mandy 0.3.7 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/mandy-hadoop +2 -2
- data/lib/dsl.rb +1 -4
- data/lib/mandy.rb +10 -0
- data/lib/task.rb +1 -1
- data/lib/wrappers/mandy_local_wrapper.rb +57 -0
- data/lib/wrappers/mandy_wrapper.rb +73 -0
- metadata +3 -1
data/bin/mandy-hadoop
CHANGED
@@ -3,7 +3,7 @@ require "rubygems"
|
|
3
3
|
require "mandy"
|
4
4
|
require 'optparse'
|
5
5
|
require 'ostruct'
|
6
|
-
require '
|
6
|
+
require 'cgi'
|
7
7
|
|
8
8
|
options = OpenStruct.new
|
9
9
|
|
@@ -23,7 +23,7 @@ OptionParser.new do |opts|
|
|
23
23
|
end
|
24
24
|
|
25
25
|
opts.on("-j", '--json {"key":"1 value"}', "Pass JSON encoded parameters to jobs") do |config|
|
26
|
-
options.cmdenv = "json=#{
|
26
|
+
options.cmdenv = "json=#{CGI.escape(config)}"
|
27
27
|
end
|
28
28
|
|
29
29
|
opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
|
data/lib/dsl.rb
CHANGED
data/lib/mandy.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "rubygems"
|
2
2
|
require "json"
|
3
3
|
require "uri"
|
4
|
+
require "cgi"
|
4
5
|
|
5
6
|
%w(
|
6
7
|
support/formatting
|
@@ -32,4 +33,13 @@ module Mandy
|
|
32
33
|
@stores||={}
|
33
34
|
end
|
34
35
|
end
|
36
|
+
|
37
|
+
|
38
|
+
def job(name, &blk)
|
39
|
+
job = Mandy::Job.new(name)
|
40
|
+
job.instance_eval(&blk) unless blk.nil?
|
41
|
+
Mandy::Job.jobs << job
|
42
|
+
job
|
43
|
+
end
|
44
|
+
module_function :job
|
35
45
|
end
|
data/lib/task.rb
CHANGED
@@ -0,0 +1,57 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Local
|
3
|
+
module Wrapper
|
4
|
+
SESSION_ID = Process.pid
|
5
|
+
|
6
|
+
def set_mandy_config(file_path)
|
7
|
+
@@config_path = file_path
|
8
|
+
end
|
9
|
+
|
10
|
+
def run_mandy(script, input_files, options = {})
|
11
|
+
begin
|
12
|
+
input_file = concat_input_files(input_files)
|
13
|
+
output_file_path = run_mandy_local(script, input_file, options)
|
14
|
+
return output_file_path unless block_given?
|
15
|
+
#if a block is given then yield the output file path and then delete this file before returning
|
16
|
+
yield output_file_path
|
17
|
+
ensure
|
18
|
+
File.delete(input_file) if File.exists?(input_file)
|
19
|
+
File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def concat_input_files(inputs)
|
25
|
+
inputs = [inputs] unless inputs.is_a?(Array)
|
26
|
+
base_dir = File.dirname(inputs.first.path)
|
27
|
+
input_file = "#{base_dir}/#{SESSION_ID}.csv"
|
28
|
+
`cat #{inputs.collect{|f| f.path}.join(' ')} > #{input_file}`
|
29
|
+
input_file
|
30
|
+
end
|
31
|
+
|
32
|
+
def run_mandy_local(script, input, options)
|
33
|
+
mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
|
34
|
+
param_args = "export json='#{mandy_job_params.to_json}' &&"
|
35
|
+
|
36
|
+
if options.include?(:lib)
|
37
|
+
FileUtils.cp(script, options[:lib])
|
38
|
+
script = File.join(options[:lib], File.basename(script))
|
39
|
+
end
|
40
|
+
|
41
|
+
output_file = `#{param_args} mandy-local #{script} #{input} #{generate_output_path}`
|
42
|
+
output_file = output_file.split("\n").last
|
43
|
+
output_file
|
44
|
+
ensure
|
45
|
+
File.delete(script) if options.include?(:lib)
|
46
|
+
end
|
47
|
+
|
48
|
+
def generate_output_path
|
49
|
+
output_dir = "/tmp/mandy_test_output"
|
50
|
+
FileUtils.mkdir_p(output_dir)
|
51
|
+
"#{output_dir}/#{SESSION_ID}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Object.send(:include, Mandy::Local::Wrapper)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Wrapper
|
3
|
+
SESSION_ID = Process.pid
|
4
|
+
|
5
|
+
def set_mandy_config(file_path)
|
6
|
+
@@config_path = file_path
|
7
|
+
end
|
8
|
+
|
9
|
+
def run_mandy(script, input_files, options = {})
|
10
|
+
begin
|
11
|
+
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
12
|
+
put_files_on_hdfs(hdfs_path, input_files)
|
13
|
+
run_mandy_hadoop(hdfs_path, script, options)
|
14
|
+
output_file_path = get_file_from_hdfs(hdfs_path, options)
|
15
|
+
return output_file_path unless block_given?
|
16
|
+
#if a block is given then yield the output file path and then delete this file before returning
|
17
|
+
yield output_file_path
|
18
|
+
ensure
|
19
|
+
File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def put_files_on_hdfs(hdfs_path, input_files)
|
25
|
+
input_files = [input_files] unless input_files.is_a?(Array)
|
26
|
+
input_files.each do |input_file|
|
27
|
+
input_file_path = File.expand_path(input_file.path)
|
28
|
+
base_filename = input_file_path.split("/").last
|
29
|
+
dest_file = ["input/#{hdfs_path}", base_filename].join("/")
|
30
|
+
run_command "mandy-put #{input_file_path} #{dest_file}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def run_mandy_hadoop(hdfs_path, script, options)
|
35
|
+
mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
|
36
|
+
param_args = "-j '#{mandy_job_params.to_json}'"
|
37
|
+
param_args += " -p '#{options[:lib]}'" if options.include?(:lib)
|
38
|
+
|
39
|
+
hdfs_output_path = "output/#{hdfs_path}"
|
40
|
+
run_command "mandy-rm output/#{hdfs_path}"
|
41
|
+
run_command "mandy-hadoop #{script} input/#{hdfs_path} output/#{hdfs_path} #{param_args}"
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_file_from_hdfs(hdfs_path, options)
|
45
|
+
output_file_path = options[:output_file] || generate_output_path
|
46
|
+
hdfs_output_path = "output/#{hdfs_path}"
|
47
|
+
run_command "mandy-get #{get_hdfs_output(hdfs_output_path)} #{output_file_path}"
|
48
|
+
run_command "mandy-rm input/#{hdfs_path}"
|
49
|
+
run_command "mandy-rm output/#{hdfs_path}"
|
50
|
+
output_file_path
|
51
|
+
end
|
52
|
+
|
53
|
+
def run_command(command)
|
54
|
+
command = "#{command} -c #{@@config_path}"
|
55
|
+
respond_to?(:logger) ? logger.info(command) : p(command)
|
56
|
+
@output = `#{command}`
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_output_path
|
60
|
+
output_dir = "/tmp/mandy_output"
|
61
|
+
FileUtils.mkdir_p(output_dir)
|
62
|
+
"#{output_dir}/#{SESSION_ID}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_hdfs_output(hdfs_output_path)
|
66
|
+
@output.each_line do |line|
|
67
|
+
return line.chomp.strip if line.include?(hdfs_output_path)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
Object.send(:include, Mandy::Wrapper)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -76,6 +76,8 @@ files:
|
|
76
76
|
- lib/ruby-hbase/version.rb
|
77
77
|
- lib/ruby-hbase/xml_decoder.rb
|
78
78
|
- lib/test_runner.rb
|
79
|
+
- lib/wrappers/mandy_wrapper.rb
|
80
|
+
- lib/wrappers/mandy_local_wrapper.rb
|
79
81
|
has_rdoc: true
|
80
82
|
homepage: http://github.com/trafficbroker/mandy
|
81
83
|
licenses: []
|