mandy 0.2.15 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://gemcutter.org"
2
+ gem 'mandy'
data/bin/mandy-hadoop CHANGED
@@ -26,6 +26,10 @@ OptionParser.new do |opts|
26
26
  options.cmdenv = "json=#{URI.encode(config)}"
27
27
  end
28
28
 
29
+ opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
30
+ options.gemfile = config
31
+ end
32
+
29
33
  opts.on_tail("-h", "--help", "Show this message") do
30
34
  puts opts
31
35
  exit
@@ -36,31 +40,47 @@ def absolute_path(path)
36
40
  path =~ /^\// ? path : File.join(Dir.pwd, path)
37
41
  end
38
42
 
43
+ def gemfile(file)
44
+ path = absolute_path(file || 'Gemfile')
45
+ File.exist?(path) ? path : File.expand_path(File.join(File.dirname(__FILE__), '..', 'Gemfile'))
46
+ end
47
+
39
48
  file = ARGV[0]
40
49
  filename = File.basename(file)
41
50
  input = ARGV[1]
42
51
  output_folder = ARGV[2]
43
52
  config = options.config || 'cluster.xml'
44
- payload = options.payload ? Mandy::Packer.pack(options.payload) : ARGV[0]
53
+ puts "Packaging Gems for distribution..."
54
+ payload = Mandy::Packer.pack(options.payload || ARGV[0], gemfile(options.gemfile))
45
55
  cmdenv = options.cmdenv
46
56
 
47
- at_exit { Mandy::Packer.cleanup!(payload) }
57
+ at_exit do
58
+ puts "Cleaning up..."
59
+ Mandy::Packer.cleanup!(payload)
60
+ puts "All done!"
61
+ end
48
62
 
63
+ puts "Loading Mandy scripts..."
49
64
  require absolute_path(file)
50
65
 
51
66
  output = nil
52
67
 
68
+ puts "Sending jobs to Hadoop..."
69
+
53
70
  Mandy::Job.jobs.each_with_index do |job, i|
54
71
 
55
72
  jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
56
73
  output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
57
74
 
75
+ bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
76
+
58
77
  command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
59
78
  -conf '#{config}' \
60
79
  -input "#{input}" \
61
- -mapper "mandy-map #{filename} '#{job.name}' #{File.basename(payload)}" \
62
- -reducer "mandy-reduce #{filename} '#{job.name}' #{File.basename(payload)}" \
80
+ -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
81
+ -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
63
82
  -file "#{payload}" \
83
+ -file "#{bootstrap_file}" \
64
84
  -cmdenv #{cmdenv} \
65
85
  -output "#{output}")
66
86
 
data/bin/mandy-map CHANGED
@@ -1,17 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
- require "rubygems"
2
+ environment = File.expand_path(File.join(File.dirname(__FILE__), '..', 'vendor', 'gems', 'environment.rb'))
3
+ require File.exist?(environment) ? environment : "rubygems"
3
4
  require "mandy"
4
5
 
5
6
  if ARGV.size==0
6
- puts "USAGE: mandy-map my_script.rb 'Job Name' [payload]"
7
+ puts "USAGE: mandy-map my_script.rb 'Job Name'"
7
8
  exit
8
9
  end
9
10
 
10
- if ARGV.size > 2
11
- payload = ARGV[2]
12
- Mandy::Packer.unpack(payload)
13
- end
14
-
15
11
  def absolute_path(path)
16
12
  path =~ /^\// ? path : File.join(Dir.pwd, path)
17
13
  end
data/bin/mandy-reduce CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
- require "rubygems"
2
+ environment = File.expand_path(File.join(File.dirname(__FILE__), '..', 'vendor', 'gems', 'environment.rb'))
3
+ require File.exist?(environment) ? environment : "rubygems"
3
4
  require "mandy"
4
5
 
5
6
  if ARGV.size==0
data/bootstrap.rb ADDED
@@ -0,0 +1,29 @@
1
+ require "fileutils"
2
+
3
+ module Mandy
4
+ class Packer
5
+ TMP_DIR = '/tmp/mandy'
6
+
7
+ def self.pack(dir)
8
+ return dir if File.file?(dir)
9
+ FileUtils.mkdir_p(TMP_DIR)
10
+ tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
11
+ Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
12
+ tmp_path
13
+ end
14
+
15
+ def self.unpack(file)
16
+ return false unless File.extname(file) == '.tar'
17
+ `tar -xf #{file}`
18
+ end
19
+
20
+ def self.cleanup!(file)
21
+ return false unless File.extname(file) == '.tar'
22
+ `rm #{file}`
23
+ end
24
+ end
25
+ end
26
+
27
+ Mandy::Packer.unpack(ARGV[0])
28
+
29
+ `bin/mandy-#{ARGV[1]} #{ARGV[2]} '#{ARGV[3]}'`
@@ -16,7 +16,8 @@ module Mandy
16
16
  key, value = line.split(KEY_VALUE_SEPERATOR, 2)
17
17
  key, value = nil, key if value.nil?
18
18
  value.chomp!
19
- mapper(input_deserialize_key(key), input_deserialize_value(value))
19
+ args = [input_deserialize_key(key), input_deserialize_value(value)].compact
20
+ mapper(*args)
20
21
  end
21
22
  end
22
23
 
@@ -1,9 +1,9 @@
1
1
  module Mandy
2
2
  module Mappers
3
3
  class PassThroughMapper < Base
4
- def mapper(key,value)
4
+ def mapper(*params)
5
5
  # default map is simply a pass-through
6
- emit(key, value)
6
+ params.size == 1 ? emit(params[0]) : emit(params[0], params[1])
7
7
  end
8
8
  end
9
9
  end
data/lib/packer.rb CHANGED
@@ -4,12 +4,15 @@ module Mandy
4
4
  class Packer
5
5
  TMP_DIR = '/tmp/mandy'
6
6
 
7
- def self.pack(dir)
8
- return dir if File.file?(dir)
9
- FileUtils.mkdir_p(TMP_DIR)
10
- tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
11
- Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
12
- tmp_path
7
+ def self.pack(dir, gemfile)
8
+ tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}"
9
+ FileUtils.mkdir_p(tmp_path)
10
+ to_be_copied = File.file?(dir) ? dir : File.join(dir, '*')
11
+ FileUtils.cp_r(Dir.glob(to_be_copied), tmp_path)
12
+ FileUtils.cp_r(gemfile, tmp_path)
13
+ Dir.chdir(tmp_path) { `gem bundle` }
14
+ Dir.chdir(tmp_path) { `tar -cf bundle.tar *` }
15
+ File.join(tmp_path, 'bundle.tar')
13
16
  end
14
17
 
15
18
  def self.unpack(file)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.15
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -10,10 +10,19 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-07-09 00:00:00 +01:00
13
+ date: 2009-10-02 00:00:00 +01:00
14
14
  default_executable:
15
- dependencies: []
16
-
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: bundler
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
17
26
  description: Map/Reduce
18
27
  email: andy.kent@me.com
19
28
  executables:
@@ -39,6 +48,8 @@ files:
39
48
  - bin/mandy-reduce
40
49
  - readme.md
41
50
  - Rakefile
51
+ - bootstrap.rb
52
+ - Gemfile
42
53
  - lib/mandy.rb
43
54
  - lib/support/tuple.rb
44
55
  - lib/support/formatting.rb
@@ -89,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
100
  requirements: []
90
101
 
91
102
  rubyforge_project:
92
- rubygems_version: 1.3.4
103
+ rubygems_version: 1.3.5
93
104
  signing_key:
94
105
  specification_version: 2
95
106
  summary: Map/Reduce