snail-map-reduce 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/MapReduceMatrixProduct.rb +85 -0
- data/README +2 -0
- data/map_reduce.rb +45 -0
- data/matrix_block_mixin.rb +18 -0
- data/snail.gemspec +13 -0
- metadata +52 -0
data/.gitignore
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'statsample'
|
|
3
|
+
require './matrix_block_mixin'
|
|
4
|
+
require './map_reduce'
|
|
5
|
+
|
|
6
|
+
class Inputs
|
|
7
|
+
attr_accessor :inputs
|
|
8
|
+
def initialize
|
|
9
|
+
@inputs = []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def setup(a,b,key)
|
|
13
|
+
if a.row_size == 2
|
|
14
|
+
@inputs << {:key=> key, :a => a, :b => b}
|
|
15
|
+
return
|
|
16
|
+
end
|
|
17
|
+
setup(a.block(0,0), b.block(0,0), key + "00A")
|
|
18
|
+
setup(a.block(0,1), b.block(1,0), key + "00B")
|
|
19
|
+
|
|
20
|
+
setup(a.block(0,0), b.block(0,1), key + "01A")
|
|
21
|
+
setup(a.block(0,1), b.block(1,1), key + "01B")
|
|
22
|
+
|
|
23
|
+
setup(a.block(1,0), b.block(0,0), key + "10A")
|
|
24
|
+
setup(a.block(1,1), b.block(1,0), key + "10B")
|
|
25
|
+
|
|
26
|
+
setup(a.block(1,0), b.block(0,1), key + "11A")
|
|
27
|
+
setup(a.block(1,1), b.block(1,1), key + "11B")
|
|
28
|
+
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def join(left_block, right_block)
|
|
33
|
+
rows = []
|
|
34
|
+
lower_order = left_block.row_size
|
|
35
|
+
lower_order.times do |t|
|
|
36
|
+
rows << left_block.row(t).to_a + right_block.row(t).to_a
|
|
37
|
+
end
|
|
38
|
+
rows
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def m(order)
|
|
42
|
+
Matrix.build(order, order) {|row, col| rand(20) }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def block_join_reduce(key, values)
|
|
46
|
+
p00 = values[values.index {|v| v[:identity] == '00'}][:matrix]
|
|
47
|
+
p01 = values[values.index {|v| v[:identity] == '01'}][:matrix]
|
|
48
|
+
p10 = values[values.index {|v| v[:identity] == '10'}][:matrix]
|
|
49
|
+
p11 = values[values.index {|v| v[:identity] == '11'}][:matrix]
|
|
50
|
+
{:key => key[0..-2], :value => {:identity => key[-1], :matrix => Matrix.rows(join(p00, p01) + join(p10, p11))}}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def block_matrix_sum(key, values)
|
|
54
|
+
sum = Matrix.zero(values.first[:matrix].row_size)
|
|
55
|
+
values.each {|m| sum += m[:matrix]}
|
|
56
|
+
{:key => key[0..-3], :value => {:matrix => sum, :identity => key[-2..-1]}}
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def primitive_map(key, value)
|
|
60
|
+
{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
order = 32
|
|
64
|
+
reductions = (Math.log2(order) - 1).to_i
|
|
65
|
+
m1 = m(order)
|
|
66
|
+
m2 = m(order)
|
|
67
|
+
|
|
68
|
+
inputs = Inputs.new
|
|
69
|
+
inputs.setup(m1,m2,"X")
|
|
70
|
+
space = inputs.inputs
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
mappers = [->(k,v) {primitive_map(k,v)}]
|
|
74
|
+
reducers = []
|
|
75
|
+
|
|
76
|
+
reductions.times do
|
|
77
|
+
reducers << ->(k,v) {block_matrix_sum(k,v)}
|
|
78
|
+
reducers << ->(k,v) {block_join_reduce(k,v)}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
result = MapReduceRunner.new(mappers, reducers).run(space)
|
|
82
|
+
puts result
|
|
83
|
+
puts result[0][:value][:matrix]
|
|
84
|
+
puts m1*m2 == result[0][:value][:matrix]
|
|
85
|
+
|
data/README
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
|
|
2
|
+
|
data/map_reduce.rb
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
class Partitioner
|
|
2
|
+
def run(space)
|
|
3
|
+
partitions = {}
|
|
4
|
+
space.each do |i|
|
|
5
|
+
key = i[:key]
|
|
6
|
+
partitions[key] = [] if partitions[key].nil?
|
|
7
|
+
partitions[key] << i[:value]
|
|
8
|
+
end
|
|
9
|
+
partitions
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
class Reducer
|
|
14
|
+
def run(partitions)
|
|
15
|
+
space = []
|
|
16
|
+
partitions.each_pair do |k,v|
|
|
17
|
+
space << yield(k,v)
|
|
18
|
+
end
|
|
19
|
+
space
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
class Mapper
|
|
24
|
+
def run(space)
|
|
25
|
+
space.collect {|i| yield(i[:key], i)}
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class MapReduceRunner
|
|
30
|
+
def initialize(mappers, reducers)
|
|
31
|
+
@mappers = mappers
|
|
32
|
+
@reducers = reducers
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def run(space)
|
|
36
|
+
results = []
|
|
37
|
+
@mappers.each {|mapper| results = Mapper.new.run(space) {|k,v| mapper.call(k,v)}}
|
|
38
|
+
@reducers.each do |reducer|
|
|
39
|
+
partitions = Partitioner.new.run(results)
|
|
40
|
+
results = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
|
|
41
|
+
end
|
|
42
|
+
results
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class Matrix
|
|
2
|
+
def block(block_row, block_column)
|
|
3
|
+
raise "Non 2^n matrix" if (row_size & (row_size - 1)) != 0 || (column_size & (column_size - 1)) != 0
|
|
4
|
+
lower_order = row_size/2
|
|
5
|
+
start_row = block_row * lower_order
|
|
6
|
+
start_column = block_column * lower_order
|
|
7
|
+
b = []
|
|
8
|
+
lower_order.times do |r|
|
|
9
|
+
row = []
|
|
10
|
+
lower_order.times do |c|
|
|
11
|
+
row << self[start_row + r, start_column + c]
|
|
12
|
+
end
|
|
13
|
+
b << row
|
|
14
|
+
end
|
|
15
|
+
Matrix.rows(b)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
data/snail.gemspec
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
spec = Gem::Specification.new do |s|
|
|
2
|
+
s.name = "snail-map-reduce"
|
|
3
|
+
s.version = "0.0.1"
|
|
4
|
+
s.author = "Avishek Sen Gupta"
|
|
5
|
+
s.email = "avishek.sen.gupta@gmail.com"
|
|
6
|
+
s.homepage = "http://avishek.net/blog"
|
|
7
|
+
s.platform = Gem::Platform::RUBY
|
|
8
|
+
s.summary = "Some description"
|
|
9
|
+
s.files = `git ls-files`.split("\n")
|
|
10
|
+
s.summary = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
|
|
11
|
+
s.description = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
|
|
12
|
+
end
|
|
13
|
+
|
metadata
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: snail-map-reduce
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Avishek Sen Gupta
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2011-11-10 00:00:00.000000000Z
|
|
13
|
+
dependencies: []
|
|
14
|
+
description: Snail is a single-threaded, in-memory, barebones MapReduce framework
|
|
15
|
+
written in Ruby to quickly prototype and test parallel algorithms.
|
|
16
|
+
email: avishek.sen.gupta@gmail.com
|
|
17
|
+
executables: []
|
|
18
|
+
extensions: []
|
|
19
|
+
extra_rdoc_files: []
|
|
20
|
+
files:
|
|
21
|
+
- .gitignore
|
|
22
|
+
- MapReduceMatrixProduct.rb
|
|
23
|
+
- README
|
|
24
|
+
- map_reduce.rb
|
|
25
|
+
- matrix_block_mixin.rb
|
|
26
|
+
- snail.gemspec
|
|
27
|
+
homepage: http://avishek.net/blog
|
|
28
|
+
licenses: []
|
|
29
|
+
post_install_message:
|
|
30
|
+
rdoc_options: []
|
|
31
|
+
require_paths:
|
|
32
|
+
- lib
|
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
34
|
+
none: false
|
|
35
|
+
requirements:
|
|
36
|
+
- - ! '>='
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '0'
|
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
|
+
none: false
|
|
41
|
+
requirements:
|
|
42
|
+
- - ! '>='
|
|
43
|
+
- !ruby/object:Gem::Version
|
|
44
|
+
version: '0'
|
|
45
|
+
requirements: []
|
|
46
|
+
rubyforge_project:
|
|
47
|
+
rubygems_version: 1.8.6
|
|
48
|
+
signing_key:
|
|
49
|
+
specification_version: 3
|
|
50
|
+
summary: Snail is a single-threaded, in-memory, barebones MapReduce framework written
|
|
51
|
+
in Ruby to quickly prototype and test parallel algorithms.
|
|
52
|
+
test_files: []
|