mandy 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/mandy-hadoop +2 -2
- data/lib/dsl.rb +1 -4
- data/lib/mandy.rb +10 -0
- data/lib/task.rb +1 -1
- data/lib/wrappers/mandy_local_wrapper.rb +57 -0
- data/lib/wrappers/mandy_wrapper.rb +73 -0
- metadata +3 -1
data/bin/mandy-hadoop
CHANGED
@@ -3,7 +3,7 @@ require "rubygems"
|
|
3
3
|
require "mandy"
|
4
4
|
require 'optparse'
|
5
5
|
require 'ostruct'
|
6
|
-
require '
|
6
|
+
require 'cgi'
|
7
7
|
|
8
8
|
options = OpenStruct.new
|
9
9
|
|
@@ -23,7 +23,7 @@ OptionParser.new do |opts|
|
|
23
23
|
end
|
24
24
|
|
25
25
|
opts.on("-j", '--json {"key":"1 value"}', "Pass JSON encoded parameters to jobs") do |config|
|
26
|
-
options.cmdenv = "json=#{
|
26
|
+
options.cmdenv = "json=#{CGI.escape(config)}"
|
27
27
|
end
|
28
28
|
|
29
29
|
opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
|
data/lib/dsl.rb
CHANGED
data/lib/mandy.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "rubygems"
|
2
2
|
require "json"
|
3
3
|
require "uri"
|
4
|
+
require "cgi"
|
4
5
|
|
5
6
|
%w(
|
6
7
|
support/formatting
|
@@ -32,4 +33,13 @@ module Mandy
|
|
32
33
|
@stores||={}
|
33
34
|
end
|
34
35
|
end
|
36
|
+
|
37
|
+
|
38
|
+
def job(name, &blk)
|
39
|
+
job = Mandy::Job.new(name)
|
40
|
+
job.instance_eval(&blk) unless blk.nil?
|
41
|
+
Mandy::Job.jobs << job
|
42
|
+
job
|
43
|
+
end
|
44
|
+
module_function :job
|
35
45
|
end
|
data/lib/task.rb
CHANGED
@@ -0,0 +1,57 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Local
|
3
|
+
module Wrapper
|
4
|
+
SESSION_ID = Process.pid
|
5
|
+
|
6
|
+
def set_mandy_config(file_path)
|
7
|
+
@@config_path = file_path
|
8
|
+
end
|
9
|
+
|
10
|
+
def run_mandy(script, input_files, options = {})
|
11
|
+
begin
|
12
|
+
input_file = concat_input_files(input_files)
|
13
|
+
output_file_path = run_mandy_local(script, input_file, options)
|
14
|
+
return output_file_path unless block_given?
|
15
|
+
#if a block is given then yield the output file path and then delete this file before returning
|
16
|
+
yield output_file_path
|
17
|
+
ensure
|
18
|
+
File.delete(input_file) if File.exists?(input_file)
|
19
|
+
File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def concat_input_files(inputs)
|
25
|
+
inputs = [inputs] unless inputs.is_a?(Array)
|
26
|
+
base_dir = File.dirname(inputs.first.path)
|
27
|
+
input_file = "#{base_dir}/#{SESSION_ID}.csv"
|
28
|
+
`cat #{inputs.collect{|f| f.path}.join(' ')} > #{input_file}`
|
29
|
+
input_file
|
30
|
+
end
|
31
|
+
|
32
|
+
def run_mandy_local(script, input, options)
|
33
|
+
mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
|
34
|
+
param_args = "export json='#{mandy_job_params.to_json}' &&"
|
35
|
+
|
36
|
+
if options.include?(:lib)
|
37
|
+
FileUtils.cp(script, options[:lib])
|
38
|
+
script = File.join(options[:lib], File.basename(script))
|
39
|
+
end
|
40
|
+
|
41
|
+
output_file = `#{param_args} mandy-local #{script} #{input} #{generate_output_path}`
|
42
|
+
output_file = output_file.split("\n").last
|
43
|
+
output_file
|
44
|
+
ensure
|
45
|
+
File.delete(script) if options.include?(:lib)
|
46
|
+
end
|
47
|
+
|
48
|
+
def generate_output_path
|
49
|
+
output_dir = "/tmp/mandy_test_output"
|
50
|
+
FileUtils.mkdir_p(output_dir)
|
51
|
+
"#{output_dir}/#{SESSION_ID}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Object.send(:include, Mandy::Local::Wrapper)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Wrapper
|
3
|
+
SESSION_ID = Process.pid
|
4
|
+
|
5
|
+
def set_mandy_config(file_path)
|
6
|
+
@@config_path = file_path
|
7
|
+
end
|
8
|
+
|
9
|
+
def run_mandy(script, input_files, options = {})
|
10
|
+
begin
|
11
|
+
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
12
|
+
put_files_on_hdfs(hdfs_path, input_files)
|
13
|
+
run_mandy_hadoop(hdfs_path, script, options)
|
14
|
+
output_file_path = get_file_from_hdfs(hdfs_path, options)
|
15
|
+
return output_file_path unless block_given?
|
16
|
+
#if a block is given then yield the output file path and then delete this file before returning
|
17
|
+
yield output_file_path
|
18
|
+
ensure
|
19
|
+
File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def put_files_on_hdfs(hdfs_path, input_files)
|
25
|
+
input_files = [input_files] unless input_files.is_a?(Array)
|
26
|
+
input_files.each do |input_file|
|
27
|
+
input_file_path = File.expand_path(input_file.path)
|
28
|
+
base_filename = input_file_path.split("/").last
|
29
|
+
dest_file = ["input/#{hdfs_path}", base_filename].join("/")
|
30
|
+
run_command "mandy-put #{input_file_path} #{dest_file}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def run_mandy_hadoop(hdfs_path, script, options)
|
35
|
+
mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
|
36
|
+
param_args = "-j '#{mandy_job_params.to_json}'"
|
37
|
+
param_args += " -p '#{options[:lib]}'" if options.include?(:lib)
|
38
|
+
|
39
|
+
hdfs_output_path = "output/#{hdfs_path}"
|
40
|
+
run_command "mandy-rm output/#{hdfs_path}"
|
41
|
+
run_command "mandy-hadoop #{script} input/#{hdfs_path} output/#{hdfs_path} #{param_args}"
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_file_from_hdfs(hdfs_path, options)
|
45
|
+
output_file_path = options[:output_file] || generate_output_path
|
46
|
+
hdfs_output_path = "output/#{hdfs_path}"
|
47
|
+
run_command "mandy-get #{get_hdfs_output(hdfs_output_path)} #{output_file_path}"
|
48
|
+
run_command "mandy-rm input/#{hdfs_path}"
|
49
|
+
run_command "mandy-rm output/#{hdfs_path}"
|
50
|
+
output_file_path
|
51
|
+
end
|
52
|
+
|
53
|
+
def run_command(command)
|
54
|
+
command = "#{command} -c #{@@config_path}"
|
55
|
+
respond_to?(:logger) ? logger.info(command) : p(command)
|
56
|
+
@output = `#{command}`
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_output_path
|
60
|
+
output_dir = "/tmp/mandy_output"
|
61
|
+
FileUtils.mkdir_p(output_dir)
|
62
|
+
"#{output_dir}/#{SESSION_ID}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_hdfs_output(hdfs_output_path)
|
66
|
+
@output.each_line do |line|
|
67
|
+
return line.chomp.strip if line.include?(hdfs_output_path)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
Object.send(:include, Mandy::Wrapper)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -76,6 +76,8 @@ files:
|
|
76
76
|
- lib/ruby-hbase/version.rb
|
77
77
|
- lib/ruby-hbase/xml_decoder.rb
|
78
78
|
- lib/test_runner.rb
|
79
|
+
- lib/wrappers/mandy_wrapper.rb
|
80
|
+
- lib/wrappers/mandy_local_wrapper.rb
|
79
81
|
has_rdoc: true
|
80
82
|
homepage: http://github.com/trafficbroker/mandy
|
81
83
|
licenses: []
|