mandy 0.2.15 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/bin/mandy-hadoop +24 -4
- data/bin/mandy-map +3 -7
- data/bin/mandy-reduce +2 -1
- data/bootstrap.rb +29 -0
- data/lib/mappers/base_mapper.rb +2 -1
- data/lib/mappers/pass_through_mapper.rb +2 -2
- data/lib/packer.rb +9 -6
- metadata +16 -5
data/Gemfile
ADDED
data/bin/mandy-hadoop
CHANGED
@@ -26,6 +26,10 @@ OptionParser.new do |opts|
|
|
26
26
|
options.cmdenv = "json=#{URI.encode(config)}"
|
27
27
|
end
|
28
28
|
|
29
|
+
opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
|
30
|
+
options.gemfile = config
|
31
|
+
end
|
32
|
+
|
29
33
|
opts.on_tail("-h", "--help", "Show this message") do
|
30
34
|
puts opts
|
31
35
|
exit
|
@@ -36,31 +40,47 @@ def absolute_path(path)
|
|
36
40
|
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
37
41
|
end
|
38
42
|
|
43
|
+
def gemfile(file)
|
44
|
+
path = absolute_path(file || 'Gemfile')
|
45
|
+
File.exist?(path) ? path : File.expand_path(File.join(File.dirname(__FILE__), '..', 'Gemfile'))
|
46
|
+
end
|
47
|
+
|
39
48
|
file = ARGV[0]
|
40
49
|
filename = File.basename(file)
|
41
50
|
input = ARGV[1]
|
42
51
|
output_folder = ARGV[2]
|
43
52
|
config = options.config || 'cluster.xml'
|
44
|
-
|
53
|
+
puts "Packaging Gems for distribution..."
|
54
|
+
payload = Mandy::Packer.pack(options.payload || ARGV[0], gemfile(options.gemfile))
|
45
55
|
cmdenv = options.cmdenv
|
46
56
|
|
47
|
-
at_exit
|
57
|
+
at_exit do
|
58
|
+
puts "Cleaning up..."
|
59
|
+
Mandy::Packer.cleanup!(payload)
|
60
|
+
puts "All done!"
|
61
|
+
end
|
48
62
|
|
63
|
+
puts "Loading Mandy scripts..."
|
49
64
|
require absolute_path(file)
|
50
65
|
|
51
66
|
output = nil
|
52
67
|
|
68
|
+
puts "Sending jobs to Hadoop..."
|
69
|
+
|
53
70
|
Mandy::Job.jobs.each_with_index do |job, i|
|
54
71
|
|
55
72
|
jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
|
56
73
|
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
57
74
|
|
75
|
+
bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
|
76
|
+
|
58
77
|
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
|
59
78
|
-conf '#{config}' \
|
60
79
|
-input "#{input}" \
|
61
|
-
-mapper "
|
62
|
-
-reducer "
|
80
|
+
-mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
|
81
|
+
-reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
|
63
82
|
-file "#{payload}" \
|
83
|
+
-file "#{bootstrap_file}" \
|
64
84
|
-cmdenv #{cmdenv} \
|
65
85
|
-output "#{output}")
|
66
86
|
|
data/bin/mandy-map
CHANGED
@@ -1,17 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
environment = File.expand_path(File.join(File.dirname(__FILE__), '..', 'vendor', 'gems', 'environment.rb'))
|
3
|
+
require File.exist?(environment) ? environment : "rubygems"
|
3
4
|
require "mandy"
|
4
5
|
|
5
6
|
if ARGV.size==0
|
6
|
-
puts "USAGE: mandy-map my_script.rb 'Job Name'
|
7
|
+
puts "USAGE: mandy-map my_script.rb 'Job Name'"
|
7
8
|
exit
|
8
9
|
end
|
9
10
|
|
10
|
-
if ARGV.size > 2
|
11
|
-
payload = ARGV[2]
|
12
|
-
Mandy::Packer.unpack(payload)
|
13
|
-
end
|
14
|
-
|
15
11
|
def absolute_path(path)
|
16
12
|
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
17
13
|
end
|
data/bin/mandy-reduce
CHANGED
data/bootstrap.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
|
3
|
+
module Mandy
|
4
|
+
class Packer
|
5
|
+
TMP_DIR = '/tmp/mandy'
|
6
|
+
|
7
|
+
def self.pack(dir)
|
8
|
+
return dir if File.file?(dir)
|
9
|
+
FileUtils.mkdir_p(TMP_DIR)
|
10
|
+
tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
|
11
|
+
Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
|
12
|
+
tmp_path
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.unpack(file)
|
16
|
+
return false unless File.extname(file) == '.tar'
|
17
|
+
`tar -xf #{file}`
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cleanup!(file)
|
21
|
+
return false unless File.extname(file) == '.tar'
|
22
|
+
`rm #{file}`
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Mandy::Packer.unpack(ARGV[0])
|
28
|
+
|
29
|
+
`bin/mandy-#{ARGV[1]} #{ARGV[2]} '#{ARGV[3]}'`
|
data/lib/mappers/base_mapper.rb
CHANGED
@@ -16,7 +16,8 @@ module Mandy
|
|
16
16
|
key, value = line.split(KEY_VALUE_SEPERATOR, 2)
|
17
17
|
key, value = nil, key if value.nil?
|
18
18
|
value.chomp!
|
19
|
-
|
19
|
+
args = [input_deserialize_key(key), input_deserialize_value(value)].compact
|
20
|
+
mapper(*args)
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
data/lib/packer.rb
CHANGED
@@ -4,12 +4,15 @@ module Mandy
|
|
4
4
|
class Packer
|
5
5
|
TMP_DIR = '/tmp/mandy'
|
6
6
|
|
7
|
-
def self.pack(dir)
|
8
|
-
|
9
|
-
FileUtils.mkdir_p(
|
10
|
-
|
11
|
-
Dir.
|
12
|
-
tmp_path
|
7
|
+
def self.pack(dir, gemfile)
|
8
|
+
tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}"
|
9
|
+
FileUtils.mkdir_p(tmp_path)
|
10
|
+
to_be_copied = File.file?(dir) ? dir : File.join(dir, '*')
|
11
|
+
FileUtils.cp_r(Dir.glob(to_be_copied), tmp_path)
|
12
|
+
FileUtils.cp_r(gemfile, tmp_path)
|
13
|
+
Dir.chdir(tmp_path) { `gem bundle` }
|
14
|
+
Dir.chdir(tmp_path) { `tar -cf bundle.tar *` }
|
15
|
+
File.join(tmp_path, 'bundle.tar')
|
13
16
|
end
|
14
17
|
|
15
18
|
def self.unpack(file)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -10,10 +10,19 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-10-02 00:00:00 +01:00
|
14
14
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: bundler
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
17
26
|
description: Map/Reduce
|
18
27
|
email: andy.kent@me.com
|
19
28
|
executables:
|
@@ -39,6 +48,8 @@ files:
|
|
39
48
|
- bin/mandy-reduce
|
40
49
|
- readme.md
|
41
50
|
- Rakefile
|
51
|
+
- bootstrap.rb
|
52
|
+
- Gemfile
|
42
53
|
- lib/mandy.rb
|
43
54
|
- lib/support/tuple.rb
|
44
55
|
- lib/support/formatting.rb
|
@@ -89,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
100
|
requirements: []
|
90
101
|
|
91
102
|
rubyforge_project:
|
92
|
-
rubygems_version: 1.3.
|
103
|
+
rubygems_version: 1.3.5
|
93
104
|
signing_key:
|
94
105
|
specification_version: 2
|
95
106
|
summary: Map/Reduce
|