trafficbroker-mandy 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/mandy-hadoop +5 -0
- data/bin/mandy-install +5 -0
- data/bin/mandy-local +9 -1
- data/bin/mandy-map +5 -0
- data/bin/mandy-put +5 -0
- data/bin/mandy-reduce +5 -0
- data/bin/mandy-rm +6 -1
- data/lib/job.rb +14 -6
- data/lib/mandy.rb +12 -1
- data/lib/mappers/base_mapper.rb +40 -0
- data/lib/mappers/pass_through_mapper.rb +10 -0
- data/lib/reducers/base_reducer.rb +45 -0
- data/lib/reducers/pass_through_reducer.rb +9 -0
- data/lib/reducers/sum_reducer.rb +9 -0
- data/lib/{array_serializer.rb → support/array_serializer.rb} +0 -0
- data/lib/{tuple.rb → support/tuple.rb} +0 -0
- data/lib/test_runner.rb +2 -2
- data/readme.md +2 -2
- metadata +8 -5
- data/lib/mapper.rb +0 -40
- data/lib/reducer.rb +0 -44
data/bin/mandy-hadoop
CHANGED
@@ -4,6 +4,11 @@ def absolute_path(path)
|
|
4
4
|
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
5
5
|
end
|
6
6
|
|
7
|
+
if ARGV.size==0
|
8
|
+
puts "USAGE: mandy-hadoop my_script.rb input_file_or_folder_on_hdfs output_folder_on_hdfs cluster-config.xml"
|
9
|
+
exit
|
10
|
+
end
|
11
|
+
|
7
12
|
file = ARGV[0]
|
8
13
|
filename = File.basename(file)
|
9
14
|
input = ARGV[1]
|
data/bin/mandy-install
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
if ARGV.size==0
|
4
|
+
puts "USAGE: mandy-install user@server1.com [user@server2.com ...]"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
3
8
|
ARGV.each do |server|
|
4
9
|
puts "Installing on #{server}..."
|
5
10
|
system %(ssh #{server} "sudo gem install trafficbroker-mandy --source http://gems.github.com")
|
data/bin/mandy-local
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
if ARGV.size==0
|
4
|
+
puts "USAGE: mandy-local my_script.rb local_input_file local_output_folder"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
3
8
|
require "fileutils"
|
4
9
|
|
5
10
|
def absolute_path(path)
|
@@ -13,6 +18,9 @@ require file
|
|
13
18
|
|
14
19
|
Mandy::Job.jobs.each_with_index do |job, i|
|
15
20
|
out = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
21
|
+
puts "Running #{job.name}..."
|
16
22
|
`cat #{input} | mandy-map #{file} "#{job.name}" | sort | mandy-reduce #{file} "#{job.name}" > #{out}`
|
17
23
|
input = out
|
18
|
-
end
|
24
|
+
end
|
25
|
+
|
26
|
+
puts "All Done! [#{input}]"
|
data/bin/mandy-map
CHANGED
data/bin/mandy-put
CHANGED
data/bin/mandy-reduce
CHANGED
data/bin/mandy-rm
CHANGED
data/lib/job.rb
CHANGED
@@ -16,8 +16,8 @@ module Mandy
|
|
16
16
|
def initialize(name, &blk)
|
17
17
|
@name = name
|
18
18
|
@settings = {}
|
19
|
-
@mapper_class = Mandy::
|
20
|
-
@reducer_class = Mandy::
|
19
|
+
@mapper_class = Mandy::Mappers::PassThroughMapper
|
20
|
+
@reducer_class = Mandy::Reducers::PassThroughReducer
|
21
21
|
set('mapred.job.name', name)
|
22
22
|
instance_eval(&blk) if blk
|
23
23
|
end
|
@@ -26,12 +26,20 @@ module Mandy
|
|
26
26
|
@settings[key.to_s] = value.to_s
|
27
27
|
end
|
28
28
|
|
29
|
-
def
|
30
|
-
|
29
|
+
def map_tasks(count)
|
30
|
+
set('mapred.map.tasks', count)
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
34
|
-
|
33
|
+
def reduce_tasks(count)
|
34
|
+
set('mapred.reduce.tasks', count)
|
35
|
+
end
|
36
|
+
|
37
|
+
def map(klass=nil, &blk)
|
38
|
+
@mapper_class = klass || Mandy::Mappers::Base.compile(&blk)
|
39
|
+
end
|
40
|
+
|
41
|
+
def reduce(klass=nil, &blk)
|
42
|
+
@reducer_class = klass || Mandy::Reducers::Base.compile(&blk)
|
35
43
|
end
|
36
44
|
|
37
45
|
def run_map(input=STDIN, output=STDOUT, &blk)
|
data/lib/mandy.rb
CHANGED
@@ -1 +1,12 @@
|
|
1
|
-
%w(
|
1
|
+
%w(
|
2
|
+
support/tuple
|
3
|
+
support/array_serializer
|
4
|
+
mappers/base_mapper
|
5
|
+
mappers/pass_through_mapper
|
6
|
+
reducers/base_reducer
|
7
|
+
reducers/pass_through_reducer
|
8
|
+
reducers/sum_reducer
|
9
|
+
dsl
|
10
|
+
job
|
11
|
+
test_runner
|
12
|
+
).each {|file| require File.join(File.dirname(__FILE__), file) }
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Mappers
|
3
|
+
class Base
|
4
|
+
|
5
|
+
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
6
|
+
|
7
|
+
def initialize(input=STDIN, output=STDOUT)
|
8
|
+
@input, @output = input, output
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.compile(&blk)
|
12
|
+
Class.new(Mandy::Mappers::Base) do
|
13
|
+
self.class_eval do
|
14
|
+
define_method(:mapper, blk) if blk
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def execute
|
20
|
+
@input.each_line do |line|
|
21
|
+
key, value = line.split(KEY_VALUE_SEPERATOR)
|
22
|
+
key, value = nil, key if value.nil?
|
23
|
+
value.chomp!
|
24
|
+
mapper(key, value)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def emit(key, value=nil)
|
29
|
+
key = 'nil' if key.nil?
|
30
|
+
@output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def mapper(key,value)
|
36
|
+
#nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Reducers
|
3
|
+
class Base
|
4
|
+
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
|
+
|
6
|
+
def initialize(input=STDIN, output=STDOUT)
|
7
|
+
@input, @output = input, output
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.compile(&blk)
|
11
|
+
Class.new(Mandy::Reducers::Base) do
|
12
|
+
self.class_eval do
|
13
|
+
define_method(:reducer, blk) if blk
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute
|
19
|
+
last_key, values = nil, []
|
20
|
+
@input.each_line do |line|
|
21
|
+
key, value = line.split(KEY_VALUE_SEPERATOR)
|
22
|
+
value.chomp!
|
23
|
+
last_key = key if last_key.nil?
|
24
|
+
if key != last_key
|
25
|
+
reducer(last_key, values)
|
26
|
+
last_key, values = key, []
|
27
|
+
end
|
28
|
+
values << value
|
29
|
+
end
|
30
|
+
reducer(last_key, values)
|
31
|
+
end
|
32
|
+
|
33
|
+
def emit(key, value=nil)
|
34
|
+
key = 'nil' if key.nil?
|
35
|
+
@output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def reducer(key,values)
|
41
|
+
#nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
File without changes
|
File without changes
|
data/lib/test_runner.rb
CHANGED
data/readme.md
CHANGED
@@ -3,7 +3,7 @@ Mandy - Simplified Hadoop distribution for Ruby code
|
|
3
3
|
|
4
4
|
Mandy hides the differences and complexities between running map/reduce tasks locally or distributed or in test environments.
|
5
5
|
|
6
|
-
It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a
|
6
|
+
It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a demo of some functionality.
|
7
7
|
Run the word count example locally with...
|
8
8
|
|
9
|
-
|
9
|
+
mandy-local examples/word_count.rb examples/alice.txt examples/output
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: trafficbroker-mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -37,13 +37,16 @@ files:
|
|
37
37
|
- readme.md
|
38
38
|
- Rakefile
|
39
39
|
- lib/mandy.rb
|
40
|
-
- lib/
|
40
|
+
- lib/support/tuple.rb
|
41
|
+
- lib/support/array_serializer.rb
|
41
42
|
- lib/dsl.rb
|
42
43
|
- lib/job.rb
|
43
|
-
- lib/
|
44
|
-
- lib/
|
44
|
+
- lib/mappers/base_mapper.rb
|
45
|
+
- lib/mappers/pass_through_mapper.rb
|
46
|
+
- lib/reducers/base_reducer.rb
|
47
|
+
- lib/reducers/pass_through_reducer.rb
|
48
|
+
- lib/reducers/sum_reducer.rb
|
45
49
|
- lib/test_runner.rb
|
46
|
-
- lib/tuple.rb
|
47
50
|
has_rdoc: false
|
48
51
|
homepage:
|
49
52
|
post_install_message:
|
data/lib/mapper.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
module Mandy
|
2
|
-
class Mapper
|
3
|
-
|
4
|
-
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
|
-
|
6
|
-
def initialize(input=STDIN, output=STDOUT)
|
7
|
-
@input, @output = input, output
|
8
|
-
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.compile(&blk)
|
12
|
-
Class.new(Mandy::Mapper) do
|
13
|
-
self.class_eval do
|
14
|
-
define_method(:mapper, blk) if blk
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def execute
|
20
|
-
@input.each_line do |line|
|
21
|
-
key, value = line.split(KEY_VALUE_SEPERATOR)
|
22
|
-
key, value = nil, key if value.nil?
|
23
|
-
value.chomp!
|
24
|
-
mapper(key, value)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def emit(key, value=nil)
|
29
|
-
key = 'nil' if key.nil?
|
30
|
-
@output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def mapper(key,value)
|
36
|
-
# default map is simply a pass-through
|
37
|
-
emit(key, value)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
data/lib/reducer.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
module Mandy
|
2
|
-
class Reducer
|
3
|
-
|
4
|
-
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
|
-
|
6
|
-
def initialize(input=STDIN, output=STDOUT)
|
7
|
-
@input, @output = input, output
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.compile(&blk)
|
11
|
-
Class.new(Mandy::Reducer) do
|
12
|
-
self.class_eval do
|
13
|
-
define_method(:reducer, blk) if blk
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def execute
|
19
|
-
last_key, values = nil, []
|
20
|
-
@input.each_line do |line|
|
21
|
-
key, value = line.split(KEY_VALUE_SEPERATOR)
|
22
|
-
value.chomp!
|
23
|
-
last_key = key if last_key.nil?
|
24
|
-
if key != last_key
|
25
|
-
reducer(last_key, values)
|
26
|
-
last_key, values = key, []
|
27
|
-
end
|
28
|
-
values << value
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def emit(key, value=nil)
|
33
|
-
key = 'nil' if key.nil?
|
34
|
-
@output.puts(value.nil? ? key.to_s : "#{key}\t#{value}")
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
def reducer(key,value)
|
40
|
-
# default reducer is simply a pass-through
|
41
|
-
emit(key, value)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|