snail-map-reduce 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/MapReduceMatrixProduct.rb +37 -32
- data/README +2 -0
- data/map_reduce.rb +11 -7
- data/snail.gemspec +1 -1
- metadata +2 -2
data/MapReduceMatrixProduct.rb
CHANGED
@@ -2,35 +2,33 @@ require 'rubygems'
|
|
2
2
|
require 'statsample'
|
3
3
|
require './matrix_block_mixin'
|
4
4
|
require './map_reduce'
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
require 'benchmark'
|
6
|
+
|
7
|
+
def setup(key, value)
|
8
|
+
inputs = []
|
9
|
+
a = value[:a]
|
10
|
+
b = value[:b]
|
11
|
+
if a.row_size == 2
|
12
|
+
inputs << {:key=> key, :a => a, :b => b}
|
13
|
+
return
|
10
14
|
end
|
15
|
+
inputs << {:key => key + "00A", :value => {:a => a.block(0,0), :b => b.block(0,0)}}
|
16
|
+
inputs << {:key => key + "00B", :value => {:a => a.block(0,1), :b => b.block(1,0)}}
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
@inputs << {:key=> key, :a => a, :b => b}
|
15
|
-
return
|
16
|
-
end
|
17
|
-
setup(a.block(0,0), b.block(0,0), key + "00A")
|
18
|
-
setup(a.block(0,1), b.block(1,0), key + "00B")
|
18
|
+
inputs << {:key => key + "01A", :value => {:a => a.block(0,0), :b => b.block(0,1)}}
|
19
|
+
inputs << {:key => key + "01B", :value => {:a => a.block(0,1), :b => b.block(1,1)}}
|
19
20
|
|
20
|
-
|
21
|
-
|
21
|
+
inputs << {:key => key + "10A", :value => {:a => a.block(1,0), :b => b.block(0,0)}}
|
22
|
+
inputs << {:key => key + "10B", :value => {:a => a.block(1,1), :b => b.block(1,0)}}
|
22
23
|
|
23
|
-
|
24
|
-
|
24
|
+
inputs << {:key => key + "11A", :value => {:a => a.block(1,0), :b => b.block(0,1)}}
|
25
|
+
inputs << {:key => key + "11B", :value => {:a => a.block(1,1), :b => b.block(1,1)}}
|
25
26
|
|
26
|
-
|
27
|
-
setup(a.block(1,1), b.block(1,1), key + "11B")
|
28
|
-
|
29
|
-
end
|
27
|
+
inputs
|
30
28
|
end
|
31
29
|
|
32
30
|
def join(left_block, right_block)
|
33
|
-
rows = []
|
31
|
+
rows = []
|
34
32
|
lower_order = left_block.row_size
|
35
33
|
lower_order.times do |t|
|
36
34
|
rows << left_block.row(t).to_a + right_block.row(t).to_a
|
@@ -57,20 +55,20 @@ def block_matrix_sum(key, values)
|
|
57
55
|
end
|
58
56
|
|
59
57
|
def primitive_map(key, value)
|
60
|
-
{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
|
58
|
+
[{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}]
|
61
59
|
end
|
62
60
|
|
63
|
-
order =
|
64
|
-
reductions = (Math.log2(order) - 1).to_i
|
61
|
+
order = 64
|
62
|
+
mappings = reductions = (Math.log2(order) - 1).to_i
|
65
63
|
m1 = m(order)
|
66
64
|
m2 = m(order)
|
67
65
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
66
|
+
mappers = []
|
67
|
+
mappings.times do
|
68
|
+
mappers << ->(k,v) {setup(k,v)}
|
69
|
+
end
|
72
70
|
|
73
|
-
mappers
|
71
|
+
mappers << ->(k,v) {primitive_map(k,v)}
|
74
72
|
reducers = []
|
75
73
|
|
76
74
|
reductions.times do
|
@@ -78,8 +76,15 @@ reductions.times do
|
|
78
76
|
reducers << ->(k,v) {block_join_reduce(k,v)}
|
79
77
|
end
|
80
78
|
|
81
|
-
result =
|
82
|
-
|
83
|
-
|
79
|
+
result = []
|
80
|
+
mr_time = Benchmark.measure do
|
81
|
+
result = MapReduceRunner.new(mappers, reducers).run([{:key => "X", :value => {:a => m1, :b => m2}}])
|
82
|
+
end
|
83
|
+
plain_time = Benchmark.measure do
|
84
|
+
m1*m2
|
85
|
+
end
|
86
|
+
puts "Unthreaded time = #{plain_time}"
|
87
|
+
puts "MR time = #{mr_time}"
|
88
|
+
|
84
89
|
puts m1*m2 == result[0][:value][:matrix]
|
85
90
|
|
data/README
CHANGED
@@ -1,2 +1,4 @@
|
|
1
1
|
Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
|
2
|
+
You can install it using:
|
2
3
|
|
4
|
+
gem install snail-map-reduce
|
data/map_reduce.rb
CHANGED
@@ -21,8 +21,12 @@ class Reducer
|
|
21
21
|
end
|
22
22
|
|
23
23
|
class Mapper
|
24
|
-
def run(
|
25
|
-
|
24
|
+
def run(pairs)
|
25
|
+
mapped_pairs = []
|
26
|
+
pairs.each do |pair|
|
27
|
+
mapped_pairs += yield(pair[:key], pair[:value])
|
28
|
+
end
|
29
|
+
mapped_pairs
|
26
30
|
end
|
27
31
|
end
|
28
32
|
|
@@ -32,14 +36,14 @@ class MapReduceRunner
|
|
32
36
|
@reducers = reducers
|
33
37
|
end
|
34
38
|
|
35
|
-
def run(
|
39
|
+
def run(pairs)
|
36
40
|
results = []
|
37
|
-
@mappers.each {|mapper|
|
41
|
+
@mappers.each {|mapper| pairs = Mapper.new.run(pairs) {|k,v| mapper.call(k,v)}}
|
38
42
|
@reducers.each do |reducer|
|
39
|
-
partitions = Partitioner.new.run(
|
40
|
-
|
43
|
+
partitions = Partitioner.new.run(pairs)
|
44
|
+
pairs = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
|
41
45
|
end
|
42
|
-
|
46
|
+
pairs
|
43
47
|
end
|
44
48
|
end
|
45
49
|
|
data/snail.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snail-map-reduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-11 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: Snail is a single-threaded, in-memory, barebones MapReduce framework
|
15
15
|
written in Ruby to quickly prototype and test parallel algorithms.
|