snail-map-reduce 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,35 +2,33 @@ require 'rubygems'
2
2
  require 'statsample'
3
3
  require './matrix_block_mixin'
4
4
  require './map_reduce'
5
-
6
- class Inputs
7
- attr_accessor :inputs
8
- def initialize
9
- @inputs = []
5
+ require 'benchmark'
6
+
7
+ def setup(key, value)
8
+ inputs = []
9
+ a = value[:a]
10
+ b = value[:b]
11
+ if a.row_size == 2
12
+ inputs << {:key=> key, :a => a, :b => b}
13
+ return
10
14
  end
15
+ inputs << {:key => key + "00A", :value => {:a => a.block(0,0), :b => b.block(0,0)}}
16
+ inputs << {:key => key + "00B", :value => {:a => a.block(0,1), :b => b.block(1,0)}}
11
17
 
12
- def setup(a,b,key)
13
- if a.row_size == 2
14
- @inputs << {:key=> key, :a => a, :b => b}
15
- return
16
- end
17
- setup(a.block(0,0), b.block(0,0), key + "00A")
18
- setup(a.block(0,1), b.block(1,0), key + "00B")
18
+ inputs << {:key => key + "01A", :value => {:a => a.block(0,0), :b => b.block(0,1)}}
19
+ inputs << {:key => key + "01B", :value => {:a => a.block(0,1), :b => b.block(1,1)}}
19
20
 
20
- setup(a.block(0,0), b.block(0,1), key + "01A")
21
- setup(a.block(0,1), b.block(1,1), key + "01B")
21
+ inputs << {:key => key + "10A", :value => {:a => a.block(1,0), :b => b.block(0,0)}}
22
+ inputs << {:key => key + "10B", :value => {:a => a.block(1,1), :b => b.block(1,0)}}
22
23
 
23
- setup(a.block(1,0), b.block(0,0), key + "10A")
24
- setup(a.block(1,1), b.block(1,0), key + "10B")
24
+ inputs << {:key => key + "11A", :value => {:a => a.block(1,0), :b => b.block(0,1)}}
25
+ inputs << {:key => key + "11B", :value => {:a => a.block(1,1), :b => b.block(1,1)}}
25
26
 
26
- setup(a.block(1,0), b.block(0,1), key + "11A")
27
- setup(a.block(1,1), b.block(1,1), key + "11B")
28
-
29
- end
27
+ inputs
30
28
  end
31
29
 
32
30
  def join(left_block, right_block)
33
- rows = []
31
+ rows = []
34
32
  lower_order = left_block.row_size
35
33
  lower_order.times do |t|
36
34
  rows << left_block.row(t).to_a + right_block.row(t).to_a
@@ -57,20 +55,20 @@ def block_matrix_sum(key, values)
57
55
  end
58
56
 
59
57
  def primitive_map(key, value)
60
- {:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
58
+ [{:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}]
61
59
  end
62
60
 
63
- order = 32
64
- reductions = (Math.log2(order) - 1).to_i
61
+ order = 64
62
+ mappings = reductions = (Math.log2(order) - 1).to_i
65
63
  m1 = m(order)
66
64
  m2 = m(order)
67
65
 
68
- inputs = Inputs.new
69
- inputs.setup(m1,m2,"X")
70
- space = inputs.inputs
71
-
66
+ mappers = []
67
+ mappings.times do
68
+ mappers << ->(k,v) {setup(k,v)}
69
+ end
72
70
 
73
- mappers = [->(k,v) {primitive_map(k,v)}]
71
+ mappers << ->(k,v) {primitive_map(k,v)}
74
72
  reducers = []
75
73
 
76
74
  reductions.times do
@@ -78,8 +76,15 @@ reductions.times do
78
76
  reducers << ->(k,v) {block_join_reduce(k,v)}
79
77
  end
80
78
 
81
- result = MapReduceRunner.new(mappers, reducers).run(space)
82
- puts result
83
- puts result[0][:value][:matrix]
79
+ result = []
80
+ mr_time = Benchmark.measure do
81
+ result = MapReduceRunner.new(mappers, reducers).run([{:key => "X", :value => {:a => m1, :b => m2}}])
82
+ end
83
+ plain_time = Benchmark.measure do
84
+ m1*m2
85
+ end
86
+ puts "Unthreaded time = #{plain_time}"
87
+ puts "MR time = #{mr_time}"
88
+
84
89
  puts m1*m2 == result[0][:value][:matrix]
85
90
 
data/README CHANGED
@@ -1,2 +1,4 @@
1
1
  Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
2
+ You can install it using:
2
3
 
4
+ gem install snail-map-reduce
data/map_reduce.rb CHANGED
@@ -21,8 +21,12 @@ class Reducer
21
21
  end
22
22
 
23
23
  class Mapper
24
- def run(space)
25
- space.collect {|i| yield(i[:key], i)}
24
+ def run(pairs)
25
+ mapped_pairs = []
26
+ pairs.each do |pair|
27
+ mapped_pairs += yield(pair[:key], pair[:value])
28
+ end
29
+ mapped_pairs
26
30
  end
27
31
  end
28
32
 
@@ -32,14 +36,14 @@ class MapReduceRunner
32
36
  @reducers = reducers
33
37
  end
34
38
 
35
- def run(space)
39
+ def run(pairs)
36
40
  results = []
37
- @mappers.each {|mapper| results = Mapper.new.run(space) {|k,v| mapper.call(k,v)}}
41
+ @mappers.each {|mapper| pairs = Mapper.new.run(pairs) {|k,v| mapper.call(k,v)}}
38
42
  @reducers.each do |reducer|
39
- partitions = Partitioner.new.run(results)
40
- results = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
43
+ partitions = Partitioner.new.run(pairs)
44
+ pairs = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
41
45
  end
42
- results
46
+ pairs
43
47
  end
44
48
  end
45
49
 
data/snail.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = "snail-map-reduce"
3
- s.version = "0.0.1"
3
+ s.version = "0.0.2"
4
4
  s.author = "Avishek Sen Gupta"
5
5
  s.email = "avishek.sen.gupta@gmail.com"
6
6
  s.homepage = "http://avishek.net/blog"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snail-map-reduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-10 00:00:00.000000000Z
12
+ date: 2011-11-11 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: Snail is a single-threaded, in-memory, barebones MapReduce framework
15
15
  written in Ruby to quickly prototype and test parallel algorithms.