mandy 0.2.15 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://gemcutter.org"
2
+ gem 'mandy'
data/bin/mandy-hadoop CHANGED
@@ -26,6 +26,10 @@ OptionParser.new do |opts|
26
26
  options.cmdenv = "json=#{URI.encode(config)}"
27
27
  end
28
28
 
29
+ opts.on("-g", '--gemfile filepath', "Path to your jobs Gemfile (defaults to ./Gemfile)") do |config|
30
+ options.gemfile = config
31
+ end
32
+
29
33
  opts.on_tail("-h", "--help", "Show this message") do
30
34
  puts opts
31
35
  exit
@@ -36,31 +40,47 @@ def absolute_path(path)
36
40
  path =~ /^\// ? path : File.join(Dir.pwd, path)
37
41
  end
38
42
 
43
+ def gemfile(file)
44
+ path = absolute_path(file || 'Gemfile')
45
+ File.exist?(path) ? path : File.expand_path(File.join(File.dirname(__FILE__), '..', 'Gemfile'))
46
+ end
47
+
39
48
  file = ARGV[0]
40
49
  filename = File.basename(file)
41
50
  input = ARGV[1]
42
51
  output_folder = ARGV[2]
43
52
  config = options.config || 'cluster.xml'
44
- payload = options.payload ? Mandy::Packer.pack(options.payload) : ARGV[0]
53
+ puts "Packaging Gems for distribution..."
54
+ payload = Mandy::Packer.pack(options.payload || ARGV[0], gemfile(options.gemfile))
45
55
  cmdenv = options.cmdenv
46
56
 
47
- at_exit { Mandy::Packer.cleanup!(payload) }
57
+ at_exit do
58
+ puts "Cleaning up..."
59
+ Mandy::Packer.cleanup!(payload)
60
+ puts "All done!"
61
+ end
48
62
 
63
+ puts "Loading Mandy scripts..."
49
64
  require absolute_path(file)
50
65
 
51
66
  output = nil
52
67
 
68
+ puts "Sending jobs to Hadoop..."
69
+
53
70
  Mandy::Job.jobs.each_with_index do |job, i|
54
71
 
55
72
  jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
56
73
  output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
57
74
 
75
+ bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
76
+
58
77
  command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
59
78
  -conf '#{config}' \
60
79
  -input "#{input}" \
61
- -mapper "mandy-map #{filename} '#{job.name}' #{File.basename(payload)}" \
62
- -reducer "mandy-reduce #{filename} '#{job.name}' #{File.basename(payload)}" \
80
+ -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
81
+ -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
63
82
  -file "#{payload}" \
83
+ -file "#{bootstrap_file}" \
64
84
  -cmdenv #{cmdenv} \
65
85
  -output "#{output}")
66
86
 
data/bin/mandy-map CHANGED
@@ -1,17 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
- require "rubygems"
2
+ environment = File.expand_path(File.join(File.dirname(__FILE__), '..', 'vendor', 'gems', 'environment.rb'))
3
+ require File.exist?(environment) ? environment : "rubygems"
3
4
  require "mandy"
4
5
 
5
6
  if ARGV.size==0
6
- puts "USAGE: mandy-map my_script.rb 'Job Name' [payload]"
7
+ puts "USAGE: mandy-map my_script.rb 'Job Name'"
7
8
  exit
8
9
  end
9
10
 
10
- if ARGV.size > 2
11
- payload = ARGV[2]
12
- Mandy::Packer.unpack(payload)
13
- end
14
-
15
11
  def absolute_path(path)
16
12
  path =~ /^\// ? path : File.join(Dir.pwd, path)
17
13
  end
data/bin/mandy-reduce CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
- require "rubygems"
2
+ environment = File.expand_path(File.join(File.dirname(__FILE__), '..', 'vendor', 'gems', 'environment.rb'))
3
+ require File.exist?(environment) ? environment : "rubygems"
3
4
  require "mandy"
4
5
 
5
6
  if ARGV.size==0
data/bootstrap.rb ADDED
@@ -0,0 +1,29 @@
1
+ require "fileutils"
2
+
3
+ module Mandy
4
+ class Packer
5
+ TMP_DIR = '/tmp/mandy'
6
+
7
+ def self.pack(dir)
8
+ return dir if File.file?(dir)
9
+ FileUtils.mkdir_p(TMP_DIR)
10
+ tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
11
+ Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
12
+ tmp_path
13
+ end
14
+
15
+ def self.unpack(file)
16
+ return false unless File.extname(file) == '.tar'
17
+ `tar -xf #{file}`
18
+ end
19
+
20
+ def self.cleanup!(file)
21
+ return false unless File.extname(file) == '.tar'
22
+ `rm #{file}`
23
+ end
24
+ end
25
+ end
26
+
27
+ Mandy::Packer.unpack(ARGV[0])
28
+
29
+ `bin/mandy-#{ARGV[1]} #{ARGV[2]} '#{ARGV[3]}'`
@@ -16,7 +16,8 @@ module Mandy
16
16
  key, value = line.split(KEY_VALUE_SEPERATOR, 2)
17
17
  key, value = nil, key if value.nil?
18
18
  value.chomp!
19
- mapper(input_deserialize_key(key), input_deserialize_value(value))
19
+ args = [input_deserialize_key(key), input_deserialize_value(value)].compact
20
+ mapper(*args)
20
21
  end
21
22
  end
22
23
 
@@ -1,9 +1,9 @@
1
1
  module Mandy
2
2
  module Mappers
3
3
  class PassThroughMapper < Base
4
- def mapper(key,value)
4
+ def mapper(*params)
5
5
  # default map is simply a pass-through
6
- emit(key, value)
6
+ params.size == 1 ? emit(params[0]) : emit(params[0], params[1])
7
7
  end
8
8
  end
9
9
  end
data/lib/packer.rb CHANGED
@@ -4,12 +4,15 @@ module Mandy
4
4
  class Packer
5
5
  TMP_DIR = '/tmp/mandy'
6
6
 
7
- def self.pack(dir)
8
- return dir if File.file?(dir)
9
- FileUtils.mkdir_p(TMP_DIR)
10
- tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
11
- Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
12
- tmp_path
7
+ def self.pack(dir, gemfile)
8
+ tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}"
9
+ FileUtils.mkdir_p(tmp_path)
10
+ to_be_copied = File.file?(dir) ? dir : File.join(dir, '*')
11
+ FileUtils.cp_r(Dir.glob(to_be_copied), tmp_path)
12
+ FileUtils.cp_r(gemfile, tmp_path)
13
+ Dir.chdir(tmp_path) { `gem bundle` }
14
+ Dir.chdir(tmp_path) { `tar -cf bundle.tar *` }
15
+ File.join(tmp_path, 'bundle.tar')
13
16
  end
14
17
 
15
18
  def self.unpack(file)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.15
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -10,10 +10,19 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-07-09 00:00:00 +01:00
13
+ date: 2009-10-02 00:00:00 +01:00
14
14
  default_executable:
15
- dependencies: []
16
-
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: bundler
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
17
26
  description: Map/Reduce
18
27
  email: andy.kent@me.com
19
28
  executables:
@@ -39,6 +48,8 @@ files:
39
48
  - bin/mandy-reduce
40
49
  - readme.md
41
50
  - Rakefile
51
+ - bootstrap.rb
52
+ - Gemfile
42
53
  - lib/mandy.rb
43
54
  - lib/support/tuple.rb
44
55
  - lib/support/formatting.rb
@@ -89,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
100
  requirements: []
90
101
 
91
102
  rubyforge_project:
92
- rubygems_version: 1.3.4
103
+ rubygems_version: 1.3.5
93
104
  signing_key:
94
105
  specification_version: 2
95
106
  summary: Map/Reduce