mandy 0.5.11 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/mandy-hadoop +16 -4
- data/lib/mandy/errors.rb +2 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.13
|
data/bin/mandy-hadoop
CHANGED
@@ -19,6 +19,14 @@ OptionParser.new do |opts|
|
|
19
19
|
opts.on("-i", "--input-format class", "Set the hadoop input format type") do |format_class|
|
20
20
|
options.input_format = format_class
|
21
21
|
end
|
22
|
+
|
23
|
+
opts.on("-o", "--output-format class", "Set the hadoop output format type") do |format_class|
|
24
|
+
options.output_format = format_class
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-l", "--libjars jar files", "comma-separated jar files to include in the classpath") do |libjars|
|
28
|
+
options.libjars = libjars
|
29
|
+
end
|
22
30
|
|
23
31
|
opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
|
24
32
|
options.payload = payload
|
@@ -73,11 +81,12 @@ input = inputs.map {|path| "-input \"#{path}\""}.join(" ")
|
|
73
81
|
output_folder = ARGV[2]
|
74
82
|
config = absolute_path(options.config || 'cluster.xml')
|
75
83
|
puts "Packaging code for distribution..."
|
76
|
-
payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
|
84
|
+
payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
|
77
85
|
cmdenv = options.cmdenv
|
78
86
|
set_env(cmdenv)
|
79
87
|
|
80
88
|
inputformat = !options.input_format.nil? ? options.input_format : "TextInputFormat"
|
89
|
+
outputformat = !options.output_format.nil? ? options.output_format : "org.apache.hadoop.mapred.TextOutputFormat"
|
81
90
|
|
82
91
|
at_exit do
|
83
92
|
puts
|
@@ -100,18 +109,21 @@ begin
|
|
100
109
|
|
101
110
|
bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
|
102
111
|
inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
|
103
|
-
|
104
|
-
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar
|
112
|
+
|
113
|
+
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar \
|
114
|
+
#{options.libjars.nil? ? '' : "-libjars \"#{options.libjars}\""} \
|
115
|
+
#{jobconf}\
|
105
116
|
-files "#{payload}","#{bootstrap_file}" \
|
106
117
|
-conf '#{config}' \
|
107
118
|
-inputformat '#{inputformat}' \
|
119
|
+
-outputformat '#{outputformat}' \
|
108
120
|
#{input} \
|
109
121
|
#{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
|
110
122
|
-mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
|
111
123
|
-reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
|
112
124
|
#{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
|
113
125
|
-output "#{output}" 2>&1)
|
114
|
-
|
126
|
+
|
115
127
|
result = []
|
116
128
|
IO.popen(command, 'r') do |subprocess|
|
117
129
|
while line = subprocess.gets
|
data/lib/mandy/errors.rb
CHANGED
@@ -17,13 +17,13 @@ module Mandy
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def hadoop_error
|
20
|
-
@output
|
20
|
+
@output
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_s
|
24
24
|
output = []
|
25
|
-
output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
|
26
25
|
output << %(Mandy Job Name: #{job_name})
|
26
|
+
output << %(Hadoop ERROR: #{hadoop_error})
|
27
27
|
output << %(Tracking URL: #{tracking_url}) if tracking_url
|
28
28
|
output*"\n"
|
29
29
|
end
|