mandy 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ require "rubygems"
3
3
  require "mandy"
4
4
  require 'optparse'
5
5
  require 'ostruct'
6
- require 'uri'
6
+ require 'cgi'
7
7
 
8
8
  options = OpenStruct.new
9
9
 
@@ -23,7 +23,7 @@ OptionParser.new do |opts|
23
23
  end
24
24
 
25
25
  opts.on("-j", '--json {"key":"1 value"}', "Pass JSON encoded parameters to jobs") do |config|
26
- options.cmdenv = "json=#{URI.encode(config)}"
26
+ options.cmdenv = "json=#{CGI.escape(config)}"
27
27
  end
28
28
 
29
29
  opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
data/lib/dsl.rb CHANGED
@@ -1,10 +1,7 @@
1
1
  module Mandy
2
2
  module DSL
3
3
  def job(name, &blk)
4
- job = Mandy::Job.new(name)
5
- job.instance_eval(&blk) unless blk.nil?
6
- Mandy::Job.jobs << job
7
- job
4
+ raise "Mandy::DSL has been deprecated please use Mandy.job instead"
8
5
  end
9
6
  end
10
7
  end
@@ -1,6 +1,7 @@
1
1
  require "rubygems"
2
2
  require "json"
3
3
  require "uri"
4
+ require "cgi"
4
5
 
5
6
  %w(
6
7
  support/formatting
@@ -32,4 +33,13 @@ module Mandy
32
33
  @stores||={}
33
34
  end
34
35
  end
36
+
37
+
38
+ def job(name, &blk)
39
+ job = Mandy::Job.new(name)
40
+ job.instance_eval(&blk) unless blk.nil?
41
+ Mandy::Job.jobs << job
42
+ job
43
+ end
44
+ module_function :job
35
45
  end
@@ -49,7 +49,7 @@ module Mandy
49
49
  end
50
50
 
51
51
  def find_json_param(name)
52
- @json_args ||= JSON.parse(URI.decode(ENV[JSON_PAYLOAD_KEY]))
52
+ @json_args ||= JSON.parse(CGI.unescape(ENV[JSON_PAYLOAD_KEY]))
53
53
  @json_args[name.to_s]
54
54
  end
55
55
 
@@ -0,0 +1,57 @@
1
+ module Mandy
2
+ module Local
3
+ module Wrapper
4
+ SESSION_ID = Process.pid
5
+
6
+ def set_mandy_config(file_path)
7
+ @@config_path = file_path
8
+ end
9
+
10
+ def run_mandy(script, input_files, options = {})
11
+ begin
12
+ input_file = concat_input_files(input_files)
13
+ output_file_path = run_mandy_local(script, input_file, options)
14
+ return output_file_path unless block_given?
15
+ #if a block is given then yield the output file path and then delete this file before returning
16
+ yield output_file_path
17
+ ensure
18
+ File.delete(input_file) if File.exists?(input_file)
19
+ File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
20
+ end
21
+ end
22
+
23
+ private
24
+ def concat_input_files(inputs)
25
+ inputs = [inputs] unless inputs.is_a?(Array)
26
+ base_dir = File.dirname(inputs.first.path)
27
+ input_file = "#{base_dir}/#{SESSION_ID}.csv"
28
+ `cat #{inputs.collect{|f| f.path}.join(' ')} > #{input_file}`
29
+ input_file
30
+ end
31
+
32
+ def run_mandy_local(script, input, options)
33
+ mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
34
+ param_args = "export json='#{mandy_job_params.to_json}' &&"
35
+
36
+ if options.include?(:lib)
37
+ FileUtils.cp(script, options[:lib])
38
+ script = File.join(options[:lib], File.basename(script))
39
+ end
40
+
41
+ output_file = `#{param_args} mandy-local #{script} #{input} #{generate_output_path}`
42
+ output_file = output_file.split("\n").last
43
+ output_file
44
+ ensure
45
+ File.delete(script) if options.include?(:lib)
46
+ end
47
+
48
+ def generate_output_path
49
+ output_dir = "/tmp/mandy_test_output"
50
+ FileUtils.mkdir_p(output_dir)
51
+ "#{output_dir}/#{SESSION_ID}"
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ Object.send(:include, Mandy::Local::Wrapper)
@@ -0,0 +1,73 @@
1
+ module Mandy
2
+ module Wrapper
3
+ SESSION_ID = Process.pid
4
+
5
+ def set_mandy_config(file_path)
6
+ @@config_path = file_path
7
+ end
8
+
9
+ def run_mandy(script, input_files, options = {})
10
+ begin
11
+ hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
12
+ put_files_on_hdfs(hdfs_path, input_files)
13
+ run_mandy_hadoop(hdfs_path, script, options)
14
+ output_file_path = get_file_from_hdfs(hdfs_path, options)
15
+ return output_file_path unless block_given?
16
+ #if a block is given then yield the output file path and then delete this file before returning
17
+ yield output_file_path
18
+ ensure
19
+ File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
20
+ end
21
+ end
22
+
23
+ private
24
+ def put_files_on_hdfs(hdfs_path, input_files)
25
+ input_files = [input_files] unless input_files.is_a?(Array)
26
+ input_files.each do |input_file|
27
+ input_file_path = File.expand_path(input_file.path)
28
+ base_filename = input_file_path.split("/").last
29
+ dest_file = ["input/#{hdfs_path}", base_filename].join("/")
30
+ run_command "mandy-put #{input_file_path} #{dest_file}"
31
+ end
32
+ end
33
+
34
+ def run_mandy_hadoop(hdfs_path, script, options)
35
+ mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
36
+ param_args = "-j '#{mandy_job_params.to_json}'"
37
+ param_args += " -p '#{options[:lib]}'" if options.include?(:lib)
38
+
39
+ hdfs_output_path = "output/#{hdfs_path}"
40
+ run_command "mandy-rm output/#{hdfs_path}"
41
+ run_command "mandy-hadoop #{script} input/#{hdfs_path} output/#{hdfs_path} #{param_args}"
42
+ end
43
+
44
+ def get_file_from_hdfs(hdfs_path, options)
45
+ output_file_path = options[:output_file] || generate_output_path
46
+ hdfs_output_path = "output/#{hdfs_path}"
47
+ run_command "mandy-get #{get_hdfs_output(hdfs_output_path)} #{output_file_path}"
48
+ run_command "mandy-rm input/#{hdfs_path}"
49
+ run_command "mandy-rm output/#{hdfs_path}"
50
+ output_file_path
51
+ end
52
+
53
+ def run_command(command)
54
+ command = "#{command} -c #{@@config_path}"
55
+ respond_to?(:logger) ? logger.info(command) : p(command)
56
+ @output = `#{command}`
57
+ end
58
+
59
+ def generate_output_path
60
+ output_dir = "/tmp/mandy_output"
61
+ FileUtils.mkdir_p(output_dir)
62
+ "#{output_dir}/#{SESSION_ID}"
63
+ end
64
+
65
+ def get_hdfs_output(hdfs_output_path)
66
+ @output.each_line do |line|
67
+ return line.chomp.strip if line.include?(hdfs_output_path)
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ Object.send(:include, Mandy::Wrapper)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -76,6 +76,8 @@ files:
76
76
  - lib/ruby-hbase/version.rb
77
77
  - lib/ruby-hbase/xml_decoder.rb
78
78
  - lib/test_runner.rb
79
+ - lib/wrappers/mandy_wrapper.rb
80
+ - lib/wrappers/mandy_local_wrapper.rb
79
81
  has_rdoc: true
80
82
  homepage: http://github.com/trafficbroker/mandy
81
83
  licenses: []