snail-map-reduce 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MapReduceMatrixProduct.rb +37 -32
- data/README +2 -0
- data/map_reduce.rb +11 -7
- data/snail.gemspec +1 -1
- metadata +2 -2
data/MapReduceMatrixProduct.rb
CHANGED
@@ -2,35 +2,33 @@ require 'rubygems'
|
|
2
2
|
require 'statsample'
|
3
3
|
require './matrix_block_mixin'
|
4
4
|
require './map_reduce'
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
require 'benchmark'
|
6
|
+
|
7
|
+
def setup(key, value)
|
8
|
+
inputs = []
|
9
|
+
a = value[:a]
|
10
|
+
b = value[:b]
|
11
|
+
if a.row_size == 2
|
12
|
+
inputs << {:key=> key, :a => a, :b => b}
|
13
|
+
return
|
10
14
|
end
|
15
|
+
inputs << {:key => key + "00A", :value => {:a => a.block(0,0), :b => b.block(0,0)}}
|
16
|
+
inputs << {:key => key + "00B", :value => {:a => a.block(0,1), :b => b.block(1,0)}}
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
@inputs << {:key=> key, :a => a, :b => b}
|
15
|
-
return
|
16
|
-
end
|
17
|
-
setup(a.block(0,0), b.block(0,0), key + "00A")
|
18
|
-
setup(a.block(0,1), b.block(1,0), key + "00B")
|
18
|
+
inputs << {:key => key + "01A", :value => {:a => a.block(0,0), :b => b.block(0,1)}}
|
19
|
+
inputs << {:key => key + "01B", :value => {:a => a.block(0,1), :b => b.block(1,1)}}
|
19
20
|
|
20
|
-
|
21
|
-
|
21
|
+
inputs << {:key => key + "10A", :value => {:a => a.block(1,0), :b => b.block(0,0)}}
|
22
|
+
inputs << {:key => key + "10B", :value => {:a => a.block(1,1), :b => b.block(1,0)}}
|
22
23
|
|
23
|
-
|
24
|
-
|
24
|
+
inputs << {:key => key + "11A", :value => {:a => a.block(1,0), :b => b.block(0,1)}}
|
25
|
+
inputs << {:key => key + "11B", :value => {:a => a.block(1,1), :b => b.block(1,1)}}
|
25
26
|
|
26
|
-
|
27
|
-
setup(a.block(1,1), b.block(1,1), key + "11B")
|
28
|
-
|
29
|
-
end
|
27
|
+
inputs
|
30
28
|
end
|
31
29
|
|
32
30
|
def join(left_block, right_block)
|
33
|
-
rows = []
|
31
|
+
rows = []
|
34
32
|
lower_order = left_block.row_size
|
35
33
|
lower_order.times do |t|
|
36
34
|
rows << left_block.row(t).to_a + right_block.row(t).to_a
|
@@ -57,20 +55,20 @@ def block_matrix_sum(key, values)
|
|
57
55
|
end
|
58
56
|
|
59
57
|
def primitive_map(key, value)
|
60
|
-
{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
|
58
|
+
[{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}]
|
61
59
|
end
|
62
60
|
|
63
|
-
order =
|
64
|
-
reductions = (Math.log2(order) - 1).to_i
|
61
|
+
order = 64
|
62
|
+
mappings = reductions = (Math.log2(order) - 1).to_i
|
65
63
|
m1 = m(order)
|
66
64
|
m2 = m(order)
|
67
65
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
66
|
+
mappers = []
|
67
|
+
mappings.times do
|
68
|
+
mappers << ->(k,v) {setup(k,v)}
|
69
|
+
end
|
72
70
|
|
73
|
-
mappers
|
71
|
+
mappers << ->(k,v) {primitive_map(k,v)}
|
74
72
|
reducers = []
|
75
73
|
|
76
74
|
reductions.times do
|
@@ -78,8 +76,15 @@ reductions.times do
|
|
78
76
|
reducers << ->(k,v) {block_join_reduce(k,v)}
|
79
77
|
end
|
80
78
|
|
81
|
-
result =
|
82
|
-
|
83
|
-
|
79
|
+
result = []
|
80
|
+
mr_time = Benchmark.measure do
|
81
|
+
result = MapReduceRunner.new(mappers, reducers).run([{:key => "X", :value => {:a => m1, :b => m2}}])
|
82
|
+
end
|
83
|
+
plain_time = Benchmark.measure do
|
84
|
+
m1*m2
|
85
|
+
end
|
86
|
+
puts "Unthreaded time = #{plain_time}"
|
87
|
+
puts "MR time = #{mr_time}"
|
88
|
+
|
84
89
|
puts m1*m2 == result[0][:value][:matrix]
|
85
90
|
|
data/README
CHANGED
@@ -1,2 +1,4 @@
|
|
1
1
|
Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
|
2
|
+
You can install it using:
|
2
3
|
|
4
|
+
gem install snail-map-reduce
|
data/map_reduce.rb
CHANGED
@@ -21,8 +21,12 @@ class Reducer
|
|
21
21
|
end
|
22
22
|
|
23
23
|
class Mapper
|
24
|
-
def run(
|
25
|
-
|
24
|
+
def run(pairs)
|
25
|
+
mapped_pairs = []
|
26
|
+
pairs.each do |pair|
|
27
|
+
mapped_pairs += yield(pair[:key], pair[:value])
|
28
|
+
end
|
29
|
+
mapped_pairs
|
26
30
|
end
|
27
31
|
end
|
28
32
|
|
@@ -32,14 +36,14 @@ class MapReduceRunner
|
|
32
36
|
@reducers = reducers
|
33
37
|
end
|
34
38
|
|
35
|
-
def run(
|
39
|
+
def run(pairs)
|
36
40
|
results = []
|
37
|
-
@mappers.each {|mapper|
|
41
|
+
@mappers.each {|mapper| pairs = Mapper.new.run(pairs) {|k,v| mapper.call(k,v)}}
|
38
42
|
@reducers.each do |reducer|
|
39
|
-
partitions = Partitioner.new.run(
|
40
|
-
|
43
|
+
partitions = Partitioner.new.run(pairs)
|
44
|
+
pairs = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
|
41
45
|
end
|
42
|
-
|
46
|
+
pairs
|
43
47
|
end
|
44
48
|
end
|
45
49
|
|
data/snail.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snail-map-reduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-11 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: Snail is a single-threaded, in-memory, barebones MapReduce framework
|
15
15
|
written in Ruby to quickly prototype and test parallel algorithms.
|