mandy 0.4.98 → 0.4.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/Rakefile +1 -1
  2. data/VERSION +1 -1
  3. data/bin/mandy-hadoop +29 -26
  4. data/lib/errors.rb +31 -0
  5. data/lib/mandy.rb +1 -0
  6. metadata +4 -3
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ namespace :gem do
17
17
  end
18
18
 
19
19
  task :install => :build do
20
- `sudo gem install pkg/mandy-*.gem`
20
+ `sudo gem install pkg/mandy-*.gem --no-rdoc --no-ri`
21
21
  end
22
22
 
23
23
  task :push => :build do
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.98
1
+ 0.4.99
data/bin/mandy-hadoop CHANGED
@@ -56,7 +56,7 @@ puts "Packaging Gems for distribution..."
56
56
  payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
57
57
  cmdenv = options.cmdenv
58
58
 
59
- at_exit do
59
+ at_exit do
60
60
  puts "Cleaning up..."
61
61
  Mandy::Packer.cleanup!(payload)
62
62
  puts "All done!"
@@ -66,32 +66,35 @@ puts "Loading Mandy scripts..."
66
66
  require absolute_path(file)
67
67
 
68
68
  output = nil
69
-
70
- puts "Sending jobs to Hadoop..."
71
-
72
- Mandy::Job.jobs.each_with_index do |job, i|
73
-
74
- jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
75
- output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
69
+ begin
70
+ Mandy::Job.jobs.each_with_index do |job, i|
71
+ puts "Running Job [#{i+1}] #{job.name}..."
72
+
73
+ jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
74
+ output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
76
75
 
77
- bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
78
- inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
76
+ bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
77
+ inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
79
78
 
80
- command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
81
- -files "#{payload}","#{bootstrap_file}" \
82
- -conf '#{config}' \
83
- -input "#{input}" \
84
- #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
85
- -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
86
- -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
87
- #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
88
- -output "#{output}")
79
+ command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
80
+ -files "#{payload}","#{bootstrap_file}" \
81
+ -conf '#{config}' \
82
+ -input "#{input}" \
83
+ #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
84
+ -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
85
+ -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
86
+ #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
87
+ -output "#{output}" 2>&1)
89
88
 
90
- `#{command}`
89
+ result = `#{command}`
90
+ raise(Mandy::HadoopJobFailure.new(job, result)) unless $?.to_i==0
91
91
 
92
- # puts "#{command}"
93
- input = output
94
- end
95
-
96
- # print out the output location so caller can know where to get the results from
97
- puts output
92
+ # puts "#{command}"
93
+ input = output
94
+ end
95
+ # print out the output location so caller can know where to get the results from
96
+ puts output
97
+ rescue Mandy::HadoopJobFailure => e
98
+ STDERR.puts e.to_s
99
+ exit(1)
100
+ end
data/lib/errors.rb ADDED
@@ -0,0 +1,31 @@
1
+ module Mandy
2
+ class HadoopJobFailure < StandardError
3
+ attr_reader :output
4
+
5
+ def initialize(job, output)
6
+ @job, @output = job, output
7
+ end
8
+
9
+ def job_name
10
+ @job.name
11
+ end
12
+
13
+ def tracking_url
14
+ line = @output.split("\n").find {|line| line =~ /Tracking URL/ }
15
+ return nil unless line
16
+ line.split('Tracking URL: ').last
17
+ end
18
+
19
+ def hadoop_error
20
+ @output.split("\n").find {|line| line =~ /ERROR/ }
21
+ end
22
+
23
+ def to_s
24
+ output = []
25
+ output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
26
+ output << %(Mandy Job Name: #{job_name})
27
+ output << %(Tracking URL: #{tracking_url}) if tracking_url
28
+ output*"\n"
29
+ end
30
+ end
31
+ end
data/lib/mandy.rb CHANGED
@@ -26,6 +26,7 @@ require "cgi"
26
26
  stores/in_memory
27
27
  test_runner
28
28
  ruby-hbase
29
+ errors
29
30
  ).each {|file| require File.join(File.dirname(__FILE__), file) }
30
31
 
31
32
  module Mandy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.98
4
+ version: 0.4.99
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -23,7 +23,7 @@ dependencies:
23
23
  - !ruby/object:Gem::Version
24
24
  version: "0"
25
25
  version:
26
- description: Map/Reduce
26
+ description: Mandy is Ruby Map/Reduce Framework built onto of the Hadoop Distributed computing platform.
27
27
  email: andy.kent@me.com
28
28
  executables:
29
29
  - mandy
@@ -64,6 +64,7 @@ files:
64
64
  - Rakefile
65
65
  - bootstrap.rb
66
66
  - geminstaller.yml
67
+ - lib/errors.rb
67
68
  - lib/mandy.rb
68
69
  - lib/support/tuple.rb
69
70
  - lib/support/formatting.rb
@@ -118,6 +119,6 @@ rubyforge_project:
118
119
  rubygems_version: 1.3.5
119
120
  signing_key:
120
121
  specification_version: 2
121
- summary: Map/Reduce
122
+ summary: Map/Reduce Framework
122
123
  test_files: []
123
124