snail-map-reduce 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/MapReduceMatrixProduct.rb +85 -0
- data/README +2 -0
- data/map_reduce.rb +45 -0
- data/matrix_block_mixin.rb +18 -0
- data/snail.gemspec +13 -0
- metadata +52 -0
data/.gitignore
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'statsample'
|
3
|
+
require './matrix_block_mixin'
|
4
|
+
require './map_reduce'
|
5
|
+
|
6
|
+
class Inputs
|
7
|
+
attr_accessor :inputs
|
8
|
+
def initialize
|
9
|
+
@inputs = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def setup(a,b,key)
|
13
|
+
if a.row_size == 2
|
14
|
+
@inputs << {:key=> key, :a => a, :b => b}
|
15
|
+
return
|
16
|
+
end
|
17
|
+
setup(a.block(0,0), b.block(0,0), key + "00A")
|
18
|
+
setup(a.block(0,1), b.block(1,0), key + "00B")
|
19
|
+
|
20
|
+
setup(a.block(0,0), b.block(0,1), key + "01A")
|
21
|
+
setup(a.block(0,1), b.block(1,1), key + "01B")
|
22
|
+
|
23
|
+
setup(a.block(1,0), b.block(0,0), key + "10A")
|
24
|
+
setup(a.block(1,1), b.block(1,0), key + "10B")
|
25
|
+
|
26
|
+
setup(a.block(1,0), b.block(0,1), key + "11A")
|
27
|
+
setup(a.block(1,1), b.block(1,1), key + "11B")
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def join(left_block, right_block)
|
33
|
+
rows = []
|
34
|
+
lower_order = left_block.row_size
|
35
|
+
lower_order.times do |t|
|
36
|
+
rows << left_block.row(t).to_a + right_block.row(t).to_a
|
37
|
+
end
|
38
|
+
rows
|
39
|
+
end
|
40
|
+
|
41
|
+
def m(order)
|
42
|
+
Matrix.build(order, order) {|row, col| rand(20) }
|
43
|
+
end
|
44
|
+
|
45
|
+
def block_join_reduce(key, values)
|
46
|
+
p00 = values[values.index {|v| v[:identity] == '00'}][:matrix]
|
47
|
+
p01 = values[values.index {|v| v[:identity] == '01'}][:matrix]
|
48
|
+
p10 = values[values.index {|v| v[:identity] == '10'}][:matrix]
|
49
|
+
p11 = values[values.index {|v| v[:identity] == '11'}][:matrix]
|
50
|
+
{:key => key[0..-2], :value => {:identity => key[-1], :matrix => Matrix.rows(join(p00, p01) + join(p10, p11))}}
|
51
|
+
end
|
52
|
+
|
53
|
+
def block_matrix_sum(key, values)
|
54
|
+
sum = Matrix.zero(values.first[:matrix].row_size)
|
55
|
+
values.each {|m| sum += m[:matrix]}
|
56
|
+
{:key => key[0..-3], :value => {:matrix => sum, :identity => key[-2..-1]}}
|
57
|
+
end
|
58
|
+
|
59
|
+
def primitive_map(key, value)
|
60
|
+
{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
|
61
|
+
end
|
62
|
+
|
63
|
+
order = 32
|
64
|
+
reductions = (Math.log2(order) - 1).to_i
|
65
|
+
m1 = m(order)
|
66
|
+
m2 = m(order)
|
67
|
+
|
68
|
+
inputs = Inputs.new
|
69
|
+
inputs.setup(m1,m2,"X")
|
70
|
+
space = inputs.inputs
|
71
|
+
|
72
|
+
|
73
|
+
mappers = [->(k,v) {primitive_map(k,v)}]
|
74
|
+
reducers = []
|
75
|
+
|
76
|
+
reductions.times do
|
77
|
+
reducers << ->(k,v) {block_matrix_sum(k,v)}
|
78
|
+
reducers << ->(k,v) {block_join_reduce(k,v)}
|
79
|
+
end
|
80
|
+
|
81
|
+
result = MapReduceRunner.new(mappers, reducers).run(space)
|
82
|
+
puts result
|
83
|
+
puts result[0][:value][:matrix]
|
84
|
+
puts m1*m2 == result[0][:value][:matrix]
|
85
|
+
|
data/README
ADDED
@@ -0,0 +1,2 @@
|
|
1
|
+
Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
|
2
|
+
|
data/map_reduce.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
class Partitioner
|
2
|
+
def run(space)
|
3
|
+
partitions = {}
|
4
|
+
space.each do |i|
|
5
|
+
key = i[:key]
|
6
|
+
partitions[key] = [] if partitions[key].nil?
|
7
|
+
partitions[key] << i[:value]
|
8
|
+
end
|
9
|
+
partitions
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Reducer
|
14
|
+
def run(partitions)
|
15
|
+
space = []
|
16
|
+
partitions.each_pair do |k,v|
|
17
|
+
space << yield(k,v)
|
18
|
+
end
|
19
|
+
space
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class Mapper
|
24
|
+
def run(space)
|
25
|
+
space.collect {|i| yield(i[:key], i)}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class MapReduceRunner
|
30
|
+
def initialize(mappers, reducers)
|
31
|
+
@mappers = mappers
|
32
|
+
@reducers = reducers
|
33
|
+
end
|
34
|
+
|
35
|
+
def run(space)
|
36
|
+
results = []
|
37
|
+
@mappers.each {|mapper| results = Mapper.new.run(space) {|k,v| mapper.call(k,v)}}
|
38
|
+
@reducers.each do |reducer|
|
39
|
+
partitions = Partitioner.new.run(results)
|
40
|
+
results = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
|
41
|
+
end
|
42
|
+
results
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class Matrix
|
2
|
+
def block(block_row, block_column)
|
3
|
+
raise "Non 2^n matrix" if (row_size & (row_size - 1)) != 0 || (column_size & (column_size - 1)) != 0
|
4
|
+
lower_order = row_size/2
|
5
|
+
start_row = block_row * lower_order
|
6
|
+
start_column = block_column * lower_order
|
7
|
+
b = []
|
8
|
+
lower_order.times do |r|
|
9
|
+
row = []
|
10
|
+
lower_order.times do |c|
|
11
|
+
row << self[start_row + r, start_column + c]
|
12
|
+
end
|
13
|
+
b << row
|
14
|
+
end
|
15
|
+
Matrix.rows(b)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
data/snail.gemspec
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
spec = Gem::Specification.new do |s|
|
2
|
+
s.name = "snail-map-reduce"
|
3
|
+
s.version = "0.0.1"
|
4
|
+
s.author = "Avishek Sen Gupta"
|
5
|
+
s.email = "avishek.sen.gupta@gmail.com"
|
6
|
+
s.homepage = "http://avishek.net/blog"
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.summary = "Some description"
|
9
|
+
s.files = `git ls-files`.split("\n")
|
10
|
+
s.summary = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
|
11
|
+
s.description = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
|
12
|
+
end
|
13
|
+
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: snail-map-reduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Avishek Sen Gupta
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-11-10 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: Snail is a single-threaded, in-memory, barebones MapReduce framework
|
15
|
+
written in Ruby to quickly prototype and test parallel algorithms.
|
16
|
+
email: avishek.sen.gupta@gmail.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- .gitignore
|
22
|
+
- MapReduceMatrixProduct.rb
|
23
|
+
- README
|
24
|
+
- map_reduce.rb
|
25
|
+
- matrix_block_mixin.rb
|
26
|
+
- snail.gemspec
|
27
|
+
homepage: http://avishek.net/blog
|
28
|
+
licenses: []
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 1.8.6
|
48
|
+
signing_key:
|
49
|
+
specification_version: 3
|
50
|
+
summary: Snail is a single-threaded, in-memory, barebones MapReduce framework written
|
51
|
+
in Ruby to quickly prototype and test parallel algorithms.
|
52
|
+
test_files: []
|