mandy 0.5.11 → 0.5.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/VERSION +1 -1
  2. data/bin/mandy-hadoop +16 -4
  3. data/lib/mandy/errors.rb +2 -2
  4. metadata +2 -2
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.11
1
+ 0.5.13
@@ -19,6 +19,14 @@ OptionParser.new do |opts|
19
19
  opts.on("-i", "--input-format class", "Set the hadoop input format type") do |format_class|
20
20
  options.input_format = format_class
21
21
  end
22
+
23
+ opts.on("-o", "--output-format class", "Set the hadoop output format type") do |format_class|
24
+ options.output_format = format_class
25
+ end
26
+
27
+ opts.on("-l", "--libjars jar files", "comma-separated jar files to include in the classpath") do |libjars|
28
+ options.libjars = libjars
29
+ end
22
30
 
23
31
  opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
24
32
  options.payload = payload
@@ -73,11 +81,12 @@ input = inputs.map {|path| "-input \"#{path}\""}.join(" ")
73
81
  output_folder = ARGV[2]
74
82
  config = absolute_path(options.config || 'cluster.xml')
75
83
  puts "Packaging code for distribution..."
76
- payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
84
+ payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
77
85
  cmdenv = options.cmdenv
78
86
  set_env(cmdenv)
79
87
 
80
88
  inputformat = !options.input_format.nil? ? options.input_format : "TextInputFormat"
89
+ outputformat = !options.output_format.nil? ? options.output_format : "org.apache.hadoop.mapred.TextOutputFormat"
81
90
 
82
91
  at_exit do
83
92
  puts
@@ -100,18 +109,21 @@ begin
100
109
 
101
110
  bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
102
111
  inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
103
-
104
- command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar #{jobconf}\
112
+
113
+ command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar \
114
+ #{options.libjars.nil? ? '' : "-libjars \"#{options.libjars}\""} \
115
+ #{jobconf}\
105
116
  -files "#{payload}","#{bootstrap_file}" \
106
117
  -conf '#{config}' \
107
118
  -inputformat '#{inputformat}' \
119
+ -outputformat '#{outputformat}' \
108
120
  #{input} \
109
121
  #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
110
122
  -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
111
123
  -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
112
124
  #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
113
125
  -output "#{output}" 2>&1)
114
-
126
+
115
127
  result = []
116
128
  IO.popen(command, 'r') do |subprocess|
117
129
  while line = subprocess.gets
@@ -17,13 +17,13 @@ module Mandy
17
17
  end
18
18
 
19
19
  def hadoop_error
20
- @output.split("\n").find {|line| line =~ /ERROR/ }
20
+ @output
21
21
  end
22
22
 
23
23
  def to_s
24
24
  output = []
25
- output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
26
25
  output << %(Mandy Job Name: #{job_name})
26
+ output << %(Hadoop ERROR: #{hadoop_error})
27
27
  output << %(Tracking URL: #{tracking_url}) if tracking_url
28
28
  output*"\n"
29
29
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 11
9
- version: 0.5.11
8
+ - 13
9
+ version: 0.5.13
10
10
  platform: ruby
11
11
  authors:
12
12
  - Andy Kent