mandy 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/mandy-hadoop +3 -2
- data/lib/job.rb +8 -4
- data/lib/wrappers/mandy_wrapper.rb +1 -1
- metadata +1 -1
data/bin/mandy-hadoop
CHANGED
@@ -75,14 +75,15 @@ Mandy::Job.jobs.each_with_index do |job, i|
|
|
75
75
|
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
76
76
|
|
77
77
|
bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
|
78
|
+
inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
|
78
79
|
|
79
80
|
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
|
81
|
+
-files "#{payload}","#{bootstrap_file}" \
|
80
82
|
-conf '#{config}' \
|
81
83
|
-input "#{input}" \
|
84
|
+
#{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
|
82
85
|
-mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
|
83
86
|
-reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
|
84
|
-
-file "#{payload}" \
|
85
|
-
-file "#{bootstrap_file}" \
|
86
87
|
#{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
|
87
88
|
-output "#{output}")
|
88
89
|
|
data/lib/job.rb
CHANGED
@@ -12,7 +12,8 @@ module Mandy
|
|
12
12
|
|
13
13
|
attr_reader :settings
|
14
14
|
attr_reader :name
|
15
|
-
|
15
|
+
attr_reader :input_format_options
|
16
|
+
|
16
17
|
def initialize(name, &blk)
|
17
18
|
@name = name
|
18
19
|
@settings = {}
|
@@ -25,11 +26,14 @@ module Mandy
|
|
25
26
|
modules.each {|m| @modules << m}
|
26
27
|
end
|
27
28
|
alias_method :serialize, :mixin
|
28
|
-
|
29
|
-
def input_format(format)
|
29
|
+
|
30
|
+
def input_format(format=nil, options={})
|
31
|
+
return @input_format if format.nil?
|
32
|
+
|
30
33
|
@input_format = format
|
34
|
+
@input_format_options = options
|
31
35
|
end
|
32
|
-
|
36
|
+
|
33
37
|
def output_format(format)
|
34
38
|
@output_format = format
|
35
39
|
end
|
@@ -27,7 +27,7 @@ module Mandy
|
|
27
27
|
def put_files_on_hdfs(hdfs_path, input_files)
|
28
28
|
input_files = [input_files] unless input_files.is_a?(Array)
|
29
29
|
input_files.each do |input_file|
|
30
|
-
input_file_path = File.expand_path(input_file.path)
|
30
|
+
input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
|
31
31
|
base_filename = input_file_path.split("/").last
|
32
32
|
dest_file = ["input/#{hdfs_path}", base_filename].join("/")
|
33
33
|
run_command "mandy-put #{input_file_path} #{dest_file}"
|