mandy 0.5.11 → 0.5.13
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/mandy-hadoop +16 -4
- data/lib/mandy/errors.rb +2 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.13
|
data/bin/mandy-hadoop
CHANGED
@@ -19,6 +19,14 @@ OptionParser.new do |opts|
|
|
19
19
|
opts.on("-i", "--input-format class", "Set the hadoop input format type") do |format_class|
|
20
20
|
options.input_format = format_class
|
21
21
|
end
|
22
|
+
|
23
|
+
opts.on("-o", "--output-format class", "Set the hadoop output format type") do |format_class|
|
24
|
+
options.output_format = format_class
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-l", "--libjars jar files", "comma-separated jar files to include in the classpath") do |libjars|
|
28
|
+
options.libjars = libjars
|
29
|
+
end
|
22
30
|
|
23
31
|
opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
|
24
32
|
options.payload = payload
|
@@ -73,11 +81,12 @@ input = inputs.map {|path| "-input \"#{path}\""}.join(" ")
|
|
73
81
|
output_folder = ARGV[2]
|
74
82
|
config = absolute_path(options.config || 'cluster.xml')
|
75
83
|
puts "Packaging code for distribution..."
|
76
|
-
payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
|
84
|
+
payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
|
77
85
|
cmdenv = options.cmdenv
|
78
86
|
set_env(cmdenv)
|
79
87
|
|
80
88
|
inputformat = !options.input_format.nil? ? options.input_format : "TextInputFormat"
|
89
|
+
outputformat = !options.output_format.nil? ? options.output_format : "org.apache.hadoop.mapred.TextOutputFormat"
|
81
90
|
|
82
91
|
at_exit do
|
83
92
|
puts
|
@@ -100,18 +109,21 @@ begin
|
|
100
109
|
|
101
110
|
bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
|
102
111
|
inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
|
103
|
-
|
104
|
-
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar
|
112
|
+
|
113
|
+
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar \
|
114
|
+
#{options.libjars.nil? ? '' : "-libjars \"#{options.libjars}\""} \
|
115
|
+
#{jobconf}\
|
105
116
|
-files "#{payload}","#{bootstrap_file}" \
|
106
117
|
-conf '#{config}' \
|
107
118
|
-inputformat '#{inputformat}' \
|
119
|
+
-outputformat '#{outputformat}' \
|
108
120
|
#{input} \
|
109
121
|
#{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
|
110
122
|
-mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
|
111
123
|
-reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
|
112
124
|
#{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
|
113
125
|
-output "#{output}" 2>&1)
|
114
|
-
|
126
|
+
|
115
127
|
result = []
|
116
128
|
IO.popen(command, 'r') do |subprocess|
|
117
129
|
while line = subprocess.gets
|
data/lib/mandy/errors.rb
CHANGED
@@ -17,13 +17,13 @@ module Mandy
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def hadoop_error
|
20
|
-
@output
|
20
|
+
@output
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_s
|
24
24
|
output = []
|
25
|
-
output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
|
26
25
|
output << %(Mandy Job Name: #{job_name})
|
26
|
+
output << %(Hadoop ERROR: #{hadoop_error})
|
27
27
|
output << %(Tracking URL: #{tracking_url}) if tracking_url
|
28
28
|
output*"\n"
|
29
29
|
end
|