mandy 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env ruby
2
+ require "rubygems"
3
+ require "mandy"
4
+ require 'optparse'
5
+ require 'ostruct'
6
+ require 'json'
7
+
8
+ options = OpenStruct.new
9
+
10
+ OptionParser.new do |opts|
11
+ opts.banner = "USAGE: mandy-run script input [options]"
12
+
13
+ opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
14
+ options.payload = payload
15
+ end
16
+
17
+ opts.on("-o", "--output OUTPUT", "Specify output path for your results.") do |payload|
18
+ options.payload = payload
19
+ end
20
+
21
+ opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
22
+ options.config = config
23
+ end
24
+
25
+ opts.on("-j", '--json "{\"key\":\"1 value\"}"', "Pass JSON encoded parameters to jobs") do |config|
26
+ options.json = config
27
+ end
28
+
29
+ opts.on("-l", "--local", "Run script using mandy-local.") do
30
+ options.local = true
31
+ end
32
+
33
+ opts.on_tail("-h", "--help", "Show this message") do
34
+ puts opts
35
+ exit
36
+ end
37
+ end.parse!
38
+
39
+ file = ARGV[0]
40
+ input = File.new(ARGV[1])
41
+
42
+ params = {}
43
+ params[:output_file] = options.output if options.output
44
+ params[:lib] = options.payload if options.payload
45
+ params[:parameters] = JSON.parse(options.json) if options.json
46
+
47
+ if options.local
48
+ require 'wrappers/mandy_local_wrapper'
49
+ else
50
+ require 'wrappers/mandy_wrapper'
51
+ set_mandy_config options.config || 'cluster.xml'
52
+ end
53
+
54
+ output_file = run_mandy(file, input, params)
55
+
56
+ puts "Results stored in: #{output_file}"
@@ -3,12 +3,11 @@ module Mandy
3
3
  module Wrapper
4
4
  SESSION_ID = Process.pid
5
5
 
6
- def set_mandy_config(file_path)
7
- @@config_path = file_path
8
- end
9
-
10
6
  def run_mandy(script, input_files, options = {})
11
7
  begin
8
+ #doing this will load all the mandy jobs in memory which will be useful later on
9
+ require script
10
+
12
11
  input_file = concat_input_files(input_files)
13
12
  output_file_path = run_mandy_local(script, input_file, options)
14
13
  return output_file_path unless block_given?
@@ -37,8 +36,9 @@ module Mandy
37
36
  FileUtils.cp(script, options[:lib])
38
37
  script = File.join(options[:lib], File.basename(script))
39
38
  end
40
-
41
- output_file = `#{param_args} mandy-local #{script} #{input} #{generate_output_path}`
39
+
40
+ output_path = options[:output_file] || generate_output_path
41
+ output_file = `#{param_args} mandy-local #{script} #{input} #{output_path}`
42
42
  output_file = output_file.split("\n").last
43
43
  output_file
44
44
  ensure
@@ -46,9 +46,10 @@ module Mandy
46
46
  end
47
47
 
48
48
  def generate_output_path
49
- output_dir = "/tmp/mandy_test_output"
49
+ output_dir = "/tmp/mandy_local_output"
50
50
  FileUtils.mkdir_p(output_dir)
51
- "#{output_dir}/#{SESSION_ID}"
51
+ file_name = Mandy::Job.jobs.last.name.downcase.gsub(/\W/, '-')
52
+ "#{output_dir}/#{file_name}_#{DateTime.now.strftime('%Y%m%d%H%M%S')}"
52
53
  end
53
54
  end
54
55
  end
@@ -8,6 +8,9 @@ module Mandy
8
8
 
9
9
  def run_mandy(script, input_files, options = {})
10
10
  begin
11
+ #doing this will load all the mandy jobs in memory which will be useful later on
12
+ require script
13
+
11
14
  hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
12
15
  put_files_on_hdfs(hdfs_path, input_files)
13
16
  run_mandy_hadoop(hdfs_path, script, options)
@@ -59,7 +62,8 @@ module Mandy
59
62
  def generate_output_path
60
63
  output_dir = "/tmp/mandy_output"
61
64
  FileUtils.mkdir_p(output_dir)
62
- "#{output_dir}/#{SESSION_ID}"
65
+ file_name = Mandy::Job.jobs.last.name.downcase.gsub(/\W/, '-')
66
+ "#{output_dir}/#{file_name}_#{DateTime.now.strftime('%Y%m%d%H%M%S')}"
63
67
  end
64
68
 
65
69
  def get_hdfs_output(hdfs_output_path)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -35,6 +35,7 @@ executables:
35
35
  - mandy-reduce
36
36
  - mandy-rm
37
37
  - mandy-install
38
+ - mandy-run
38
39
  extensions: []
39
40
 
40
41
  extra_rdoc_files: []
@@ -46,6 +47,7 @@ files:
46
47
  - bin/mandy-get
47
48
  - bin/mandy-put
48
49
  - bin/mandy-reduce
50
+ - bin/mandy-run
49
51
  - readme.md
50
52
  - Rakefile
51
53
  - bootstrap.rb