mandy 0.4.98 → 0.4.99

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. data/Rakefile +1 -1
  2. data/VERSION +1 -1
  3. data/bin/mandy-hadoop +29 -26
  4. data/lib/errors.rb +31 -0
  5. data/lib/mandy.rb +1 -0
  6. metadata +4 -3
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ namespace :gem do
17
17
  end
18
18
 
19
19
  task :install => :build do
20
- `sudo gem install pkg/mandy-*.gem`
20
+ `sudo gem install pkg/mandy-*.gem --no-rdoc --no-ri`
21
21
  end
22
22
 
23
23
  task :push => :build do
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.98
1
+ 0.4.99
data/bin/mandy-hadoop CHANGED
@@ -56,7 +56,7 @@ puts "Packaging Gems for distribution..."
56
56
  payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
57
57
  cmdenv = options.cmdenv
58
58
 
59
- at_exit do
59
+ at_exit do
60
60
  puts "Cleaning up..."
61
61
  Mandy::Packer.cleanup!(payload)
62
62
  puts "All done!"
@@ -66,32 +66,35 @@ puts "Loading Mandy scripts..."
66
66
  require absolute_path(file)
67
67
 
68
68
  output = nil
69
-
70
- puts "Sending jobs to Hadoop..."
71
-
72
- Mandy::Job.jobs.each_with_index do |job, i|
73
-
74
- jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
75
- output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
69
+ begin
70
+ Mandy::Job.jobs.each_with_index do |job, i|
71
+ puts "Running Job [#{i+1}] #{job.name}..."
72
+
73
+ jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
74
+ output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
76
75
 
77
- bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
78
- inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
76
+ bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
77
+ inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
79
78
 
80
- command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
81
- -files "#{payload}","#{bootstrap_file}" \
82
- -conf '#{config}' \
83
- -input "#{input}" \
84
- #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
85
- -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
86
- -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
87
- #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
88
- -output "#{output}")
79
+ command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
80
+ -files "#{payload}","#{bootstrap_file}" \
81
+ -conf '#{config}' \
82
+ -input "#{input}" \
83
+ #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
84
+ -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
85
+ -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
86
+ #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
87
+ -output "#{output}" 2>&1)
89
88
 
90
- `#{command}`
89
+ result = `#{command}`
90
+ raise(Mandy::HadoopJobFailure.new(job, result)) unless $?.to_i==0
91
91
 
92
- # puts "#{command}"
93
- input = output
94
- end
95
-
96
- # print out the output location so caller can know where to get the results from
97
- puts output
92
+ # puts "#{command}"
93
+ input = output
94
+ end
95
+ # print out the output location so caller can know where to get the results from
96
+ puts output
97
+ rescue Mandy::HadoopJobFailure => e
98
+ STDERR.puts e.to_s
99
+ exit(1)
100
+ end
data/lib/errors.rb ADDED
@@ -0,0 +1,31 @@
1
+ module Mandy
2
+ class HadoopJobFailure < StandardError
3
+ attr_reader :output
4
+
5
+ def initialize(job, output)
6
+ @job, @output = job, output
7
+ end
8
+
9
+ def job_name
10
+ @job.name
11
+ end
12
+
13
+ def tracking_url
14
+ line = @output.split("\n").find {|line| line =~ /Tracking URL/ }
15
+ return nil unless line
16
+ line.split('Tracking URL: ').last
17
+ end
18
+
19
+ def hadoop_error
20
+ @output.split("\n").find {|line| line =~ /ERROR/ }
21
+ end
22
+
23
+ def to_s
24
+ output = []
25
+ output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
26
+ output << %(Mandy Job Name: #{job_name})
27
+ output << %(Tracking URL: #{tracking_url}) if tracking_url
28
+ output*"\n"
29
+ end
30
+ end
31
+ end
data/lib/mandy.rb CHANGED
@@ -26,6 +26,7 @@ require "cgi"
26
26
  stores/in_memory
27
27
  test_runner
28
28
  ruby-hbase
29
+ errors
29
30
  ).each {|file| require File.join(File.dirname(__FILE__), file) }
30
31
 
31
32
  module Mandy
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.98
4
+ version: 0.4.99
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -23,7 +23,7 @@ dependencies:
23
23
  - !ruby/object:Gem::Version
24
24
  version: "0"
25
25
  version:
26
- description: Map/Reduce
26
+ description: Mandy is Ruby Map/Reduce Framework built onto of the Hadoop Distributed computing platform.
27
27
  email: andy.kent@me.com
28
28
  executables:
29
29
  - mandy
@@ -64,6 +64,7 @@ files:
64
64
  - Rakefile
65
65
  - bootstrap.rb
66
66
  - geminstaller.yml
67
+ - lib/errors.rb
67
68
  - lib/mandy.rb
68
69
  - lib/support/tuple.rb
69
70
  - lib/support/formatting.rb
@@ -118,6 +119,6 @@ rubyforge_project:
118
119
  rubygems_version: 1.3.5
119
120
  signing_key:
120
121
  specification_version: 2
121
- summary: Map/Reduce
122
+ summary: Map/Reduce Framework
122
123
  test_files: []
123
124