mandy 0.5.11 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/VERSION +1 -1
  2. data/bin/mandy-hadoop +16 -4
  3. data/lib/mandy/errors.rb +2 -2
  4. metadata +2 -2
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.11
1
+ 0.5.13
@@ -19,6 +19,14 @@ OptionParser.new do |opts|
19
19
  opts.on("-i", "--input-format class", "Set the hadoop input format type") do |format_class|
20
20
  options.input_format = format_class
21
21
  end
22
+
23
+ opts.on("-o", "--output-format class", "Set the hadoop output format type") do |format_class|
24
+ options.output_format = format_class
25
+ end
26
+
27
+ opts.on("-l", "--libjars jar files", "comma-separated jar files to include in the classpath") do |libjars|
28
+ options.libjars = libjars
29
+ end
22
30
 
23
31
  opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
24
32
  options.payload = payload
@@ -73,11 +81,12 @@ input = inputs.map {|path| "-input \"#{path}\""}.join(" ")
73
81
  output_folder = ARGV[2]
74
82
  config = absolute_path(options.config || 'cluster.xml')
75
83
  puts "Packaging code for distribution..."
76
- payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
84
+ payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
77
85
  cmdenv = options.cmdenv
78
86
  set_env(cmdenv)
79
87
 
80
88
  inputformat = !options.input_format.nil? ? options.input_format : "TextInputFormat"
89
+ outputformat = !options.output_format.nil? ? options.output_format : "org.apache.hadoop.mapred.TextOutputFormat"
81
90
 
82
91
  at_exit do
83
92
  puts
@@ -100,18 +109,21 @@ begin
100
109
 
101
110
  bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
102
111
  inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
103
-
104
- command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar #{jobconf}\
112
+
113
+ command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop*streaming*.jar \
114
+ #{options.libjars.nil? ? '' : "-libjars \"#{options.libjars}\""} \
115
+ #{jobconf}\
105
116
  -files "#{payload}","#{bootstrap_file}" \
106
117
  -conf '#{config}' \
107
118
  -inputformat '#{inputformat}' \
119
+ -outputformat '#{outputformat}' \
108
120
  #{input} \
109
121
  #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
110
122
  -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
111
123
  -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
112
124
  #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
113
125
  -output "#{output}" 2>&1)
114
-
126
+
115
127
  result = []
116
128
  IO.popen(command, 'r') do |subprocess|
117
129
  while line = subprocess.gets
@@ -17,13 +17,13 @@ module Mandy
17
17
  end
18
18
 
19
19
  def hadoop_error
20
- @output.split("\n").find {|line| line =~ /ERROR/ }
20
+ @output
21
21
  end
22
22
 
23
23
  def to_s
24
24
  output = []
25
- output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
26
25
  output << %(Mandy Job Name: #{job_name})
26
+ output << %(Hadoop ERROR: #{hadoop_error})
27
27
  output << %(Tracking URL: #{tracking_url}) if tracking_url
28
28
  output*"\n"
29
29
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 11
9
- version: 0.5.11
8
+ - 13
9
+ version: 0.5.13
10
10
  platform: ruby
11
11
  authors:
12
12
  - Andy Kent