trafficbroker-mandy 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/mandy-hadoop CHANGED
@@ -4,6 +4,11 @@ def absolute_path(path)
4
4
  path =~ /^\// ? path : File.join(Dir.pwd, path)
5
5
  end
6
6
 
7
+ if ARGV.size==0
8
+ puts "USAGE: mandy-hadoop my_script.rb input_file_or_folder_on_hdfs output_folder_on_hdfs cluster-config.xml"
9
+ exit
10
+ end
11
+
7
12
  file = ARGV[0]
8
13
  filename = File.basename(file)
9
14
  input = ARGV[1]
data/bin/mandy-install CHANGED
@@ -1,5 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ if ARGV.size==0
4
+ puts "USAGE: mandy-install user@server1.com [user@server2.com ...]"
5
+ exit
6
+ end
7
+
3
8
  ARGV.each do |server|
4
9
  puts "Installing on #{server}..."
5
10
  system %(ssh #{server} "sudo gem install trafficbroker-mandy --source http://gems.github.com")
data/bin/mandy-local CHANGED
@@ -1,5 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ if ARGV.size==0
4
+ puts "USAGE: mandy-local my_script.rb local_input_file local_output_folder"
5
+ exit
6
+ end
7
+
3
8
  require "fileutils"
4
9
 
5
10
  def absolute_path(path)
@@ -13,6 +18,9 @@ require file
13
18
 
14
19
  Mandy::Job.jobs.each_with_index do |job, i|
15
20
  out = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
21
+ puts "Running #{job.name}..."
16
22
  `cat #{input} | mandy-map #{file} "#{job.name}" | sort | mandy-reduce #{file} "#{job.name}" > #{out}`
17
23
  input = out
18
- end
24
+ end
25
+
26
+ puts "All Done! [#{input}]"
data/bin/mandy-map CHANGED
@@ -1,5 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ if ARGV.size==0
4
+ puts "USAGE: mandy-map my_script.rb 'Job Name'"
5
+ exit
6
+ end
7
+
3
8
  def absolute_path(path)
4
9
  path =~ /^\// ? path : File.join(Dir.pwd, path)
5
10
  end
data/bin/mandy-put CHANGED
@@ -1,5 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ if ARGV.size==0
4
+ puts "USAGE: mandy-put local_file_or_folder hdfs_destination_location cluster-config.xml"
5
+ exit
6
+ end
7
+
3
8
  def absolute_path(path)
4
9
  path =~ /^\// ? path : File.join(Dir.pwd, path)
5
10
  end
data/bin/mandy-reduce CHANGED
@@ -1,5 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ if ARGV.size==0
4
+ puts "USAGE: mandy-reduce my_script.rb 'Job Name'"
5
+ exit
6
+ end
7
+
3
8
  def absolute_path(path)
4
9
  path =~ /^\// ? path : File.join(Dir.pwd, path)
5
10
  end
data/bin/mandy-rm CHANGED
@@ -1,6 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- config = ARGV[0]
3
+ if ARGV.size==0
4
+ puts "USAGE: mandy-rm file_or_folder_on_hdfs cluster-config.xml"
5
+ exit
6
+ end
7
+
4
8
  file = ARGV[1]
9
+ config = ARGV[0]
5
10
 
6
11
  `$HADOOP_HOME/bin/hadoop fs -conf #{config} -rmr #{file}`
data/lib/job.rb CHANGED
@@ -16,8 +16,8 @@ module Mandy
16
16
  def initialize(name, &blk)
17
17
  @name = name
18
18
  @settings = {}
19
- @mapper_class = Mandy::Mapper
20
- @reducer_class = Mandy::Reducer
19
+ @mapper_class = Mandy::Mappers::PassThroughMapper
20
+ @reducer_class = Mandy::Reducers::PassThroughReducer
21
21
  set('mapred.job.name', name)
22
22
  instance_eval(&blk) if blk
23
23
  end
@@ -26,12 +26,20 @@ module Mandy
26
26
  @settings[key.to_s] = value.to_s
27
27
  end
28
28
 
29
- def map(&blk)
30
- @mapper_class = Mandy::Mapper.compile(&blk)
29
+ def map_tasks(count)
30
+ set('mapred.map.tasks', count)
31
31
  end
32
32
 
33
- def reduce(&blk)
34
- @reducer_class = Mandy::Reducer.compile(&blk)
33
+ def reduce_tasks(count)
34
+ set('mapred.reduce.tasks', count)
35
+ end
36
+
37
+ def map(klass=nil, &blk)
38
+ @mapper_class = klass || Mandy::Mappers::Base.compile(&blk)
39
+ end
40
+
41
+ def reduce(klass=nil, &blk)
42
+ @reducer_class = klass || Mandy::Reducers::Base.compile(&blk)
35
43
  end
36
44
 
37
45
  def run_map(input=STDIN, output=STDOUT, &blk)
data/lib/mandy.rb CHANGED
@@ -1 +1,12 @@
1
- %w(tuple array_serializer mapper reducer dsl job test_runner).each {|file| require File.join(File.dirname(__FILE__), file) }
1
+ %w(
2
+ support/tuple
3
+ support/array_serializer
4
+ mappers/base_mapper
5
+ mappers/pass_through_mapper
6
+ reducers/base_reducer
7
+ reducers/pass_through_reducer
8
+ reducers/sum_reducer
9
+ dsl
10
+ job
11
+ test_runner
12
+ ).each {|file| require File.join(File.dirname(__FILE__), file) }
@@ -0,0 +1,40 @@
1
+ module Mandy
2
+ module Mappers
3
+ class Base
4
+
5
+ KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
6
+
7
+ def initialize(input=STDIN, output=STDOUT)
8
+ @input, @output = input, output
9
+ end
10
+
11
+ def self.compile(&blk)
12
+ Class.new(Mandy::Mappers::Base) do
13
+ self.class_eval do
14
+ define_method(:mapper, blk) if blk
15
+ end
16
+ end
17
+ end
18
+
19
+ def execute
20
+ @input.each_line do |line|
21
+ key, value = line.split(KEY_VALUE_SEPERATOR)
22
+ key, value = nil, key if value.nil?
23
+ value.chomp!
24
+ mapper(key, value)
25
+ end
26
+ end
27
+
28
+ def emit(key, value=nil)
29
+ key = 'nil' if key.nil?
30
+ @output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
31
+ end
32
+
33
+ private
34
+
35
+ def mapper(key,value)
36
+ #nil
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,10 @@
1
+ module Mandy
2
+ module Mappers
3
+ class PassThroughMapper < Base
4
+ def mapper(key,value)
5
+ # default map is simply a pass-through
6
+ emit(key, value)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,45 @@
1
+ module Mandy
2
+ module Reducers
3
+ class Base
4
+ KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
5
+
6
+ def initialize(input=STDIN, output=STDOUT)
7
+ @input, @output = input, output
8
+ end
9
+
10
+ def self.compile(&blk)
11
+ Class.new(Mandy::Reducers::Base) do
12
+ self.class_eval do
13
+ define_method(:reducer, blk) if blk
14
+ end
15
+ end
16
+ end
17
+
18
+ def execute
19
+ last_key, values = nil, []
20
+ @input.each_line do |line|
21
+ key, value = line.split(KEY_VALUE_SEPERATOR)
22
+ value.chomp!
23
+ last_key = key if last_key.nil?
24
+ if key != last_key
25
+ reducer(last_key, values)
26
+ last_key, values = key, []
27
+ end
28
+ values << value
29
+ end
30
+ reducer(last_key, values)
31
+ end
32
+
33
+ def emit(key, value=nil)
34
+ key = 'nil' if key.nil?
35
+ @output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
36
+ end
37
+
38
+ private
39
+
40
+ def reducer(key,values)
41
+ #nil
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,9 @@
1
+ module Mandy
2
+ module Reducers
3
+ class PassThroughReducer < Base
4
+ def reducer(key,values)
5
+ values.each {|value| emit(key, value) }
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Mandy
2
+ module Reducers
3
+ class SumReducer < Base
4
+ def reducer(key,values)
5
+ emit(key, values.inject(0) {|sum,count| sum+count.to_f})
6
+ end
7
+ end
8
+ end
9
+ end
File without changes
data/lib/test_runner.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Mandy
2
2
  class TestRunner
3
- def initialize(job=Mandy::Job.jobs.first)
4
- @job = job
3
+ def initialize(job=Mandy::Job.jobs.first.name)
4
+ @job = Mandy::Job.find_by_name(job)
5
5
  end
6
6
 
7
7
  def map(input, output_stream=StringIO.new(''), &blk)
data/readme.md CHANGED
@@ -3,7 +3,7 @@ Mandy - Simplified Hadoop distribution for Ruby code
3
3
 
4
4
  Mandy hides the differences and complexities between running map/reduce tasks locally or distributed or in test environments.
5
5
 
6
- It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a very simple demo.
6
+ It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a demo of some functionality.
7
7
  Run the word count example locally with...
8
8
 
9
- bin/mandy local examples/word_count.rb examples/alice.txt examples/output.txt
9
+ mandy-local examples/word_count.rb examples/alice.txt examples/output
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trafficbroker-mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -37,13 +37,16 @@ files:
37
37
  - readme.md
38
38
  - Rakefile
39
39
  - lib/mandy.rb
40
- - lib/array_serializer.rb
40
+ - lib/support/tuple.rb
41
+ - lib/support/array_serializer.rb
41
42
  - lib/dsl.rb
42
43
  - lib/job.rb
43
- - lib/mapper.rb
44
- - lib/reducer.rb
44
+ - lib/mappers/base_mapper.rb
45
+ - lib/mappers/pass_through_mapper.rb
46
+ - lib/reducers/base_reducer.rb
47
+ - lib/reducers/pass_through_reducer.rb
48
+ - lib/reducers/sum_reducer.rb
45
49
  - lib/test_runner.rb
46
- - lib/tuple.rb
47
50
  has_rdoc: false
48
51
  homepage:
49
52
  post_install_message:
data/lib/mapper.rb DELETED
@@ -1,40 +0,0 @@
1
- module Mandy
2
- class Mapper
3
-
4
- KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
5
-
6
- def initialize(input=STDIN, output=STDOUT)
7
- @input, @output = input, output
8
-
9
- end
10
-
11
- def self.compile(&blk)
12
- Class.new(Mandy::Mapper) do
13
- self.class_eval do
14
- define_method(:mapper, blk) if blk
15
- end
16
- end
17
- end
18
-
19
- def execute
20
- @input.each_line do |line|
21
- key, value = line.split(KEY_VALUE_SEPERATOR)
22
- key, value = nil, key if value.nil?
23
- value.chomp!
24
- mapper(key, value)
25
- end
26
- end
27
-
28
- def emit(key, value=nil)
29
- key = 'nil' if key.nil?
30
- @output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
31
- end
32
-
33
- private
34
-
35
- def mapper(key,value)
36
- # default map is simply a pass-through
37
- emit(key, value)
38
- end
39
- end
40
- end
data/lib/reducer.rb DELETED
@@ -1,44 +0,0 @@
1
- module Mandy
2
- class Reducer
3
-
4
- KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
5
-
6
- def initialize(input=STDIN, output=STDOUT)
7
- @input, @output = input, output
8
- end
9
-
10
- def self.compile(&blk)
11
- Class.new(Mandy::Reducer) do
12
- self.class_eval do
13
- define_method(:reducer, blk) if blk
14
- end
15
- end
16
- end
17
-
18
- def execute
19
- last_key, values = nil, []
20
- @input.each_line do |line|
21
- key, value = line.split(KEY_VALUE_SEPERATOR)
22
- value.chomp!
23
- last_key = key if last_key.nil?
24
- if key != last_key
25
- reducer(last_key, values)
26
- last_key, values = key, []
27
- end
28
- values << value
29
- end
30
- end
31
-
32
- def emit(key, value=nil)
33
- key = 'nil' if key.nil?
34
- @output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
35
- end
36
-
37
- private
38
-
39
- def reducer(key,value)
40
- # default reducer is simply a pass-through
41
- emit(key, value)
42
- end
43
- end
44
- end