mandy 0.3.11 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/mandy-run +56 -0
- data/lib/wrappers/mandy_local_wrapper.rb +9 -8
- data/lib/wrappers/mandy_wrapper.rb +5 -1
- metadata +3 -1
data/bin/mandy-run
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "USAGE: mandy-run script input [options]"
|
12
|
+
|
13
|
+
opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
|
14
|
+
options.payload = payload
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-o", "--output OUTPUT", "Specify output path for your results.") do |payload|
|
18
|
+
options.payload = payload
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
22
|
+
options.config = config
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-j", '--json "{\"key\":\"1 value\"}"', "Pass JSON encoded parameters to jobs") do |config|
|
26
|
+
options.json = config
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-l", "--local", "Run script using mandy-local.") do
|
30
|
+
options.local = true
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
34
|
+
puts opts
|
35
|
+
exit
|
36
|
+
end
|
37
|
+
end.parse!
|
38
|
+
|
39
|
+
file = ARGV[0]
|
40
|
+
input = File.new(ARGV[1])
|
41
|
+
|
42
|
+
params = {}
|
43
|
+
params[:output_file] = options.output if options.output
|
44
|
+
params[:lib] = options.payload if options.payload
|
45
|
+
params[:parameters] = JSON.parse(options.json) if options.json
|
46
|
+
|
47
|
+
if options.local
|
48
|
+
require 'wrappers/mandy_local_wrapper'
|
49
|
+
else
|
50
|
+
require 'wrappers/mandy_wrapper'
|
51
|
+
set_mandy_config options.config || 'cluster.xml'
|
52
|
+
end
|
53
|
+
|
54
|
+
output_file = run_mandy(file, input, params)
|
55
|
+
|
56
|
+
puts "Results stored in: #{output_file}"
|
@@ -3,12 +3,11 @@ module Mandy
|
|
3
3
|
module Wrapper
|
4
4
|
SESSION_ID = Process.pid
|
5
5
|
|
6
|
-
def set_mandy_config(file_path)
|
7
|
-
@@config_path = file_path
|
8
|
-
end
|
9
|
-
|
10
6
|
def run_mandy(script, input_files, options = {})
|
11
7
|
begin
|
8
|
+
#doing this will load all the mandy jobs in memory which will be useful later on
|
9
|
+
require script
|
10
|
+
|
12
11
|
input_file = concat_input_files(input_files)
|
13
12
|
output_file_path = run_mandy_local(script, input_file, options)
|
14
13
|
return output_file_path unless block_given?
|
@@ -37,8 +36,9 @@ module Mandy
|
|
37
36
|
FileUtils.cp(script, options[:lib])
|
38
37
|
script = File.join(options[:lib], File.basename(script))
|
39
38
|
end
|
40
|
-
|
41
|
-
|
39
|
+
|
40
|
+
output_path = options[:output_file] || generate_output_path
|
41
|
+
output_file = `#{param_args} mandy-local #{script} #{input} #{output_path}`
|
42
42
|
output_file = output_file.split("\n").last
|
43
43
|
output_file
|
44
44
|
ensure
|
@@ -46,9 +46,10 @@ module Mandy
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def generate_output_path
|
49
|
-
output_dir = "/tmp/
|
49
|
+
output_dir = "/tmp/mandy_local_output"
|
50
50
|
FileUtils.mkdir_p(output_dir)
|
51
|
-
|
51
|
+
file_name = Mandy::Job.jobs.last.name.downcase.gsub(/\W/, '-')
|
52
|
+
"#{output_dir}/#{file_name}_#{DateTime.now.strftime('%Y%m%d%H%M%S')}"
|
52
53
|
end
|
53
54
|
end
|
54
55
|
end
|
@@ -8,6 +8,9 @@ module Mandy
|
|
8
8
|
|
9
9
|
def run_mandy(script, input_files, options = {})
|
10
10
|
begin
|
11
|
+
#doing this will load all the mandy jobs in memory which will be useful later on
|
12
|
+
require script
|
13
|
+
|
11
14
|
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
12
15
|
put_files_on_hdfs(hdfs_path, input_files)
|
13
16
|
run_mandy_hadoop(hdfs_path, script, options)
|
@@ -59,7 +62,8 @@ module Mandy
|
|
59
62
|
def generate_output_path
|
60
63
|
output_dir = "/tmp/mandy_output"
|
61
64
|
FileUtils.mkdir_p(output_dir)
|
62
|
-
|
65
|
+
file_name = Mandy::Job.jobs.last.name.downcase.gsub(/\W/, '-')
|
66
|
+
"#{output_dir}/#{file_name}_#{DateTime.now.strftime('%Y%m%d%H%M%S')}"
|
63
67
|
end
|
64
68
|
|
65
69
|
def get_hdfs_output(hdfs_output_path)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -35,6 +35,7 @@ executables:
|
|
35
35
|
- mandy-reduce
|
36
36
|
- mandy-rm
|
37
37
|
- mandy-install
|
38
|
+
- mandy-run
|
38
39
|
extensions: []
|
39
40
|
|
40
41
|
extra_rdoc_files: []
|
@@ -46,6 +47,7 @@ files:
|
|
46
47
|
- bin/mandy-get
|
47
48
|
- bin/mandy-put
|
48
49
|
- bin/mandy-reduce
|
50
|
+
- bin/mandy-run
|
49
51
|
- readme.md
|
50
52
|
- Rakefile
|
51
53
|
- bootstrap.rb
|