mandy 0.2.15 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/bin/mandy-hadoop +24 -4
- data/bin/mandy-map +3 -7
- data/bin/mandy-reduce +2 -1
- data/bootstrap.rb +29 -0
- data/lib/mappers/base_mapper.rb +2 -1
- data/lib/mappers/pass_through_mapper.rb +2 -2
- data/lib/packer.rb +9 -6
- metadata +16 -5
data/Gemfile
ADDED
data/bin/mandy-hadoop
CHANGED
@@ -26,6 +26,10 @@ OptionParser.new do |opts|
|
|
26
26
|
options.cmdenv = "json=#{URI.encode(config)}"
|
27
27
|
end
|
28
28
|
|
29
|
+
opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
|
30
|
+
options.gemfile = config
|
31
|
+
end
|
32
|
+
|
29
33
|
opts.on_tail("-h", "--help", "Show this message") do
|
30
34
|
puts opts
|
31
35
|
exit
|
@@ -36,31 +40,47 @@ def absolute_path(path)
|
|
36
40
|
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
37
41
|
end
|
38
42
|
|
43
|
+
def gemfile(file)
|
44
|
+
path = absolute_path(file || 'Gemfile')
|
45
|
+
File.exist?(path) ? path : File.expand_path(File.join(File.dirname(__FILE__), '..', 'Gemfile'))
|
46
|
+
end
|
47
|
+
|
39
48
|
file = ARGV[0]
|
40
49
|
filename = File.basename(file)
|
41
50
|
input = ARGV[1]
|
42
51
|
output_folder = ARGV[2]
|
43
52
|
config = options.config || 'cluster.xml'
|
44
|
-
|
53
|
+
puts "Packaging Gems for distribution..."
|
54
|
+
payload = Mandy::Packer.pack(options.payload || ARGV[0], gemfile(options.gemfile))
|
45
55
|
cmdenv = options.cmdenv
|
46
56
|
|
47
|
-
at_exit
|
57
|
+
at_exit do
|
58
|
+
puts "Cleaning up..."
|
59
|
+
Mandy::Packer.cleanup!(payload)
|
60
|
+
puts "All done!"
|
61
|
+
end
|
48
62
|
|
63
|
+
puts "Loading Mandy scripts..."
|
49
64
|
require absolute_path(file)
|
50
65
|
|
51
66
|
output = nil
|
52
67
|
|
68
|
+
puts "Sending jobs to Hadoop..."
|
69
|
+
|
53
70
|
Mandy::Job.jobs.each_with_index do |job, i|
|
54
71
|
|
55
72
|
jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
|
56
73
|
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
57
74
|
|
75
|
+
bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
|
76
|
+
|
58
77
|
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
|
59
78
|
-conf '#{config}' \
|
60
79
|
-input "#{input}" \
|
61
|
-
-mapper "
|
62
|
-
-reducer "
|
80
|
+
-mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
|
81
|
+
-reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
|
63
82
|
-file "#{payload}" \
|
83
|
+
-file "#{bootstrap_file}" \
|
64
84
|
-cmdenv #{cmdenv} \
|
65
85
|
-output "#{output}")
|
66
86
|
|
data/bin/mandy-map
CHANGED
@@ -1,17 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
environment = File.expand_path(File.join(File.dirname(__FILE__), '..', 'vendor', 'gems', 'environment.rb'))
|
3
|
+
require File.exist?(environment) ? environment : "rubygems"
|
3
4
|
require "mandy"
|
4
5
|
|
5
6
|
if ARGV.size==0
|
6
|
-
puts "USAGE: mandy-map my_script.rb 'Job Name'
|
7
|
+
puts "USAGE: mandy-map my_script.rb 'Job Name'"
|
7
8
|
exit
|
8
9
|
end
|
9
10
|
|
10
|
-
if ARGV.size > 2
|
11
|
-
payload = ARGV[2]
|
12
|
-
Mandy::Packer.unpack(payload)
|
13
|
-
end
|
14
|
-
|
15
11
|
def absolute_path(path)
|
16
12
|
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
17
13
|
end
|
data/bin/mandy-reduce
CHANGED
data/bootstrap.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
|
3
|
+
module Mandy
|
4
|
+
class Packer
|
5
|
+
TMP_DIR = '/tmp/mandy'
|
6
|
+
|
7
|
+
def self.pack(dir)
|
8
|
+
return dir if File.file?(dir)
|
9
|
+
FileUtils.mkdir_p(TMP_DIR)
|
10
|
+
tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
|
11
|
+
Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
|
12
|
+
tmp_path
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.unpack(file)
|
16
|
+
return false unless File.extname(file) == '.tar'
|
17
|
+
`tar -xf #{file}`
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cleanup!(file)
|
21
|
+
return false unless File.extname(file) == '.tar'
|
22
|
+
`rm #{file}`
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Mandy::Packer.unpack(ARGV[0])
|
28
|
+
|
29
|
+
`bin/mandy-#{ARGV[1]} #{ARGV[2]} '#{ARGV[3]}'`
|
data/lib/mappers/base_mapper.rb
CHANGED
@@ -16,7 +16,8 @@ module Mandy
|
|
16
16
|
key, value = line.split(KEY_VALUE_SEPERATOR, 2)
|
17
17
|
key, value = nil, key if value.nil?
|
18
18
|
value.chomp!
|
19
|
-
|
19
|
+
args = [input_deserialize_key(key), input_deserialize_value(value)].compact
|
20
|
+
mapper(*args)
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
data/lib/packer.rb
CHANGED
@@ -4,12 +4,15 @@ module Mandy
|
|
4
4
|
class Packer
|
5
5
|
TMP_DIR = '/tmp/mandy'
|
6
6
|
|
7
|
-
def self.pack(dir)
|
8
|
-
|
9
|
-
FileUtils.mkdir_p(
|
10
|
-
|
11
|
-
Dir.
|
12
|
-
tmp_path
|
7
|
+
def self.pack(dir, gemfile)
|
8
|
+
tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}"
|
9
|
+
FileUtils.mkdir_p(tmp_path)
|
10
|
+
to_be_copied = File.file?(dir) ? dir : File.join(dir, '*')
|
11
|
+
FileUtils.cp_r(Dir.glob(to_be_copied), tmp_path)
|
12
|
+
FileUtils.cp_r(gemfile, tmp_path)
|
13
|
+
Dir.chdir(tmp_path) { `gem bundle` }
|
14
|
+
Dir.chdir(tmp_path) { `tar -cf bundle.tar *` }
|
15
|
+
File.join(tmp_path, 'bundle.tar')
|
13
16
|
end
|
14
17
|
|
15
18
|
def self.unpack(file)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -10,10 +10,19 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-
|
13
|
+
date: 2009-10-02 00:00:00 +01:00
|
14
14
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: bundler
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
17
26
|
description: Map/Reduce
|
18
27
|
email: andy.kent@me.com
|
19
28
|
executables:
|
@@ -39,6 +48,8 @@ files:
|
|
39
48
|
- bin/mandy-reduce
|
40
49
|
- readme.md
|
41
50
|
- Rakefile
|
51
|
+
- bootstrap.rb
|
52
|
+
- Gemfile
|
42
53
|
- lib/mandy.rb
|
43
54
|
- lib/support/tuple.rb
|
44
55
|
- lib/support/formatting.rb
|
@@ -89,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
100
|
requirements: []
|
90
101
|
|
91
102
|
rubyforge_project:
|
92
|
-
rubygems_version: 1.3.
|
103
|
+
rubygems_version: 1.3.5
|
93
104
|
signing_key:
|
94
105
|
specification_version: 2
|
95
106
|
summary: Map/Reduce
|