snail-map-reduce 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,35 +2,33 @@ require 'rubygems'
2
2
  require 'statsample'
3
3
  require './matrix_block_mixin'
4
4
  require './map_reduce'
5
-
6
- class Inputs
7
- attr_accessor :inputs
8
- def initialize
9
- @inputs = []
5
+ require 'benchmark'
6
+
7
+ def setup(key, value)
8
+ inputs = []
9
+ a = value[:a]
10
+ b = value[:b]
11
+ if a.row_size == 2
12
+ inputs << {:key=> key, :a => a, :b => b}
13
+ return
10
14
  end
15
+ inputs << {:key => key + "00A", :value => {:a => a.block(0,0), :b => b.block(0,0)}}
16
+ inputs << {:key => key + "00B", :value => {:a => a.block(0,1), :b => b.block(1,0)}}
11
17
 
12
- def setup(a,b,key)
13
- if a.row_size == 2
14
- @inputs << {:key=> key, :a => a, :b => b}
15
- return
16
- end
17
- setup(a.block(0,0), b.block(0,0), key + "00A")
18
- setup(a.block(0,1), b.block(1,0), key + "00B")
18
+ inputs << {:key => key + "01A", :value => {:a => a.block(0,0), :b => b.block(0,1)}}
19
+ inputs << {:key => key + "01B", :value => {:a => a.block(0,1), :b => b.block(1,1)}}
19
20
 
20
- setup(a.block(0,0), b.block(0,1), key + "01A")
21
- setup(a.block(0,1), b.block(1,1), key + "01B")
21
+ inputs << {:key => key + "10A", :value => {:a => a.block(1,0), :b => b.block(0,0)}}
22
+ inputs << {:key => key + "10B", :value => {:a => a.block(1,1), :b => b.block(1,0)}}
22
23
 
23
- setup(a.block(1,0), b.block(0,0), key + "10A")
24
- setup(a.block(1,1), b.block(1,0), key + "10B")
24
+ inputs << {:key => key + "11A", :value => {:a => a.block(1,0), :b => b.block(0,1)}}
25
+ inputs << {:key => key + "11B", :value => {:a => a.block(1,1), :b => b.block(1,1)}}
25
26
 
26
- setup(a.block(1,0), b.block(0,1), key + "11A")
27
- setup(a.block(1,1), b.block(1,1), key + "11B")
28
-
29
- end
27
+ inputs
30
28
  end
31
29
 
32
30
  def join(left_block, right_block)
33
- rows = []
31
+ rows = []
34
32
  lower_order = left_block.row_size
35
33
  lower_order.times do |t|
36
34
  rows << left_block.row(t).to_a + right_block.row(t).to_a
@@ -57,20 +55,20 @@ def block_matrix_sum(key, values)
57
55
  end
58
56
 
59
57
  def primitive_map(key, value)
60
- {:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
58
+ [{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}]
61
59
  end
62
60
 
63
- order = 32
64
- reductions = (Math.log2(order) - 1).to_i
61
+ order = 64
62
+ mappings = reductions = (Math.log2(order) - 1).to_i
65
63
  m1 = m(order)
66
64
  m2 = m(order)
67
65
 
68
- inputs = Inputs.new
69
- inputs.setup(m1,m2,"X")
70
- space = inputs.inputs
71
-
66
+ mappers = []
67
+ mappings.times do
68
+ mappers << ->(k,v) {setup(k,v)}
69
+ end
72
70
 
73
- mappers = [->(k,v) {primitive_map(k,v)}]
71
+ mappers << ->(k,v) {primitive_map(k,v)}
74
72
  reducers = []
75
73
 
76
74
  reductions.times do
@@ -78,8 +76,15 @@ reductions.times do
78
76
  reducers << ->(k,v) {block_join_reduce(k,v)}
79
77
  end
80
78
 
81
- result = MapReduceRunner.new(mappers, reducers).run(space)
82
- puts result
83
- puts result[0][:value][:matrix]
79
+ result = []
80
+ mr_time = Benchmark.measure do
81
+ result = MapReduceRunner.new(mappers, reducers).run([{:key => "X", :value => {:a => m1, :b => m2}}])
82
+ end
83
+ plain_time = Benchmark.measure do
84
+ m1*m2
85
+ end
86
+ puts "Unthreaded time = #{plain_time}"
87
+ puts "MR time = #{mr_time}"
88
+
84
89
  puts m1*m2 == result[0][:value][:matrix]
85
90
 
data/README CHANGED
@@ -1,2 +1,4 @@
1
1
  Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
2
+ You can install it using:
2
3
 
4
+ gem install snail-map-reduce
data/map_reduce.rb CHANGED
@@ -21,8 +21,12 @@ class Reducer
21
21
  end
22
22
 
23
23
  class Mapper
24
- def run(space)
25
- space.collect {|i| yield(i[:key], i)}
24
+ def run(pairs)
25
+ mapped_pairs = []
26
+ pairs.each do |pair|
27
+ mapped_pairs += yield(pair[:key], pair[:value])
28
+ end
29
+ mapped_pairs
26
30
  end
27
31
  end
28
32
 
@@ -32,14 +36,14 @@ class MapReduceRunner
32
36
  @reducers = reducers
33
37
  end
34
38
 
35
- def run(space)
39
+ def run(pairs)
36
40
  results = []
37
- @mappers.each {|mapper| results = Mapper.new.run(space) {|k,v| mapper.call(k,v)}}
41
+ @mappers.each {|mapper| pairs = Mapper.new.run(pairs) {|k,v| mapper.call(k,v)}}
38
42
  @reducers.each do |reducer|
39
- partitions = Partitioner.new.run(results)
40
- results = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
43
+ partitions = Partitioner.new.run(pairs)
44
+ pairs = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
41
45
  end
42
- results
46
+ pairs
43
47
  end
44
48
  end
45
49
 
data/snail.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = "snail-map-reduce"
3
- s.version = "0.0.1"
3
+ s.version = "0.0.2"
4
4
  s.author = "Avishek Sen Gupta"
5
5
  s.email = "avishek.sen.gupta@gmail.com"
6
6
  s.homepage = "http://avishek.net/blog"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snail-map-reduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-10 00:00:00.000000000Z
12
+ date: 2011-11-11 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: Snail is a single-threaded, in-memory, barebones MapReduce framework
15
15
  written in Ruby to quickly prototype and test parallel algorithms.