mandy 0.4.98 → 0.4.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/mandy-hadoop +29 -26
- data/lib/errors.rb +31 -0
- data/lib/mandy.rb +1 -0
- metadata +4 -3
data/Rakefile
CHANGED
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.4.
|
|
1
|
+
0.4.99
|
data/bin/mandy-hadoop
CHANGED
|
@@ -56,7 +56,7 @@ puts "Packaging Gems for distribution..."
|
|
|
56
56
|
payload = Mandy::Packer.pack(file, options.payload || ARGV[0], gemfile(options.gemfile))
|
|
57
57
|
cmdenv = options.cmdenv
|
|
58
58
|
|
|
59
|
-
at_exit do
|
|
59
|
+
at_exit do
|
|
60
60
|
puts "Cleaning up..."
|
|
61
61
|
Mandy::Packer.cleanup!(payload)
|
|
62
62
|
puts "All done!"
|
|
@@ -66,32 +66,35 @@ puts "Loading Mandy scripts..."
|
|
|
66
66
|
require absolute_path(file)
|
|
67
67
|
|
|
68
68
|
output = nil
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
|
69
|
+
begin
|
|
70
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
|
71
|
+
puts "Running Job [#{i+1}] #{job.name}..."
|
|
72
|
+
|
|
73
|
+
jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
|
|
74
|
+
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
|
76
75
|
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
|
|
77
|
+
inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
|
|
79
78
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
79
|
+
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
|
|
80
|
+
-files "#{payload}","#{bootstrap_file}" \
|
|
81
|
+
-conf '#{config}' \
|
|
82
|
+
-input "#{input}" \
|
|
83
|
+
#{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
|
|
84
|
+
-mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
|
|
85
|
+
-reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
|
|
86
|
+
#{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
|
|
87
|
+
-output "#{output}" 2>&1)
|
|
89
88
|
|
|
90
|
-
|
|
89
|
+
result = `#{command}`
|
|
90
|
+
raise(Mandy::HadoopJobFailure.new(job, result)) unless $?.to_i==0
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
92
|
+
# puts "#{command}"
|
|
93
|
+
input = output
|
|
94
|
+
end
|
|
95
|
+
# print out the output location so caller can know where to get the results from
|
|
96
|
+
puts output
|
|
97
|
+
rescue Mandy::HadoopJobFailure => e
|
|
98
|
+
STDERR.puts e.to_s
|
|
99
|
+
exit(1)
|
|
100
|
+
end
|
data/lib/errors.rb
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module Mandy
|
|
2
|
+
class HadoopJobFailure < StandardError
|
|
3
|
+
attr_reader :output
|
|
4
|
+
|
|
5
|
+
def initialize(job, output)
|
|
6
|
+
@job, @output = job, output
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def job_name
|
|
10
|
+
@job.name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def tracking_url
|
|
14
|
+
line = @output.split("\n").find {|line| line =~ /Tracking URL/ }
|
|
15
|
+
return nil unless line
|
|
16
|
+
line.split('Tracking URL: ').last
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def hadoop_error
|
|
20
|
+
@output.split("\n").find {|line| line =~ /ERROR/ }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def to_s
|
|
24
|
+
output = []
|
|
25
|
+
output << %(Hadoop ERROR: #{hadoop_error || 'Unkown Error'})
|
|
26
|
+
output << %(Mandy Job Name: #{job_name})
|
|
27
|
+
output << %(Tracking URL: #{tracking_url}) if tracking_url
|
|
28
|
+
output*"\n"
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
data/lib/mandy.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mandy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.99
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andy Kent
|
|
@@ -23,7 +23,7 @@ dependencies:
|
|
|
23
23
|
- !ruby/object:Gem::Version
|
|
24
24
|
version: "0"
|
|
25
25
|
version:
|
|
26
|
-
description: Map/Reduce
|
|
26
|
+
description: Mandy is Ruby Map/Reduce Framework built onto of the Hadoop Distributed computing platform.
|
|
27
27
|
email: andy.kent@me.com
|
|
28
28
|
executables:
|
|
29
29
|
- mandy
|
|
@@ -64,6 +64,7 @@ files:
|
|
|
64
64
|
- Rakefile
|
|
65
65
|
- bootstrap.rb
|
|
66
66
|
- geminstaller.yml
|
|
67
|
+
- lib/errors.rb
|
|
67
68
|
- lib/mandy.rb
|
|
68
69
|
- lib/support/tuple.rb
|
|
69
70
|
- lib/support/formatting.rb
|
|
@@ -118,6 +119,6 @@ rubyforge_project:
|
|
|
118
119
|
rubygems_version: 1.3.5
|
|
119
120
|
signing_key:
|
|
120
121
|
specification_version: 2
|
|
121
|
-
summary: Map/Reduce
|
|
122
|
+
summary: Map/Reduce Framework
|
|
122
123
|
test_files: []
|
|
123
124
|
|