snail-map-reduce 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ *.*~
2
+ artifacts
3
+ csv
4
+ para
5
+ som
6
+ *.aux
7
+ *.dvi
8
+ *.log
9
+ *.swp
10
+ *~
@@ -0,0 +1,85 @@
1
+ require 'rubygems'
2
+ require 'statsample'
3
+ require './matrix_block_mixin'
4
+ require './map_reduce'
5
+
6
+ class Inputs
7
+ attr_accessor :inputs
8
+ def initialize
9
+ @inputs = []
10
+ end
11
+
12
+ def setup(a,b,key)
13
+ if a.row_size == 2
14
+ @inputs << {:key=> key, :a => a, :b => b}
15
+ return
16
+ end
17
+ setup(a.block(0,0), b.block(0,0), key + "00A")
18
+ setup(a.block(0,1), b.block(1,0), key + "00B")
19
+
20
+ setup(a.block(0,0), b.block(0,1), key + "01A")
21
+ setup(a.block(0,1), b.block(1,1), key + "01B")
22
+
23
+ setup(a.block(1,0), b.block(0,0), key + "10A")
24
+ setup(a.block(1,1), b.block(1,0), key + "10B")
25
+
26
+ setup(a.block(1,0), b.block(0,1), key + "11A")
27
+ setup(a.block(1,1), b.block(1,1), key + "11B")
28
+
29
+ end
30
+ end
31
+
32
+ def join(left_block, right_block)
33
+ rows = []
34
+ lower_order = left_block.row_size
35
+ lower_order.times do |t|
36
+ rows << left_block.row(t).to_a + right_block.row(t).to_a
37
+ end
38
+ rows
39
+ end
40
+
41
+ def m(order)
42
+ Matrix.build(order, order) {|row, col| rand(20) }
43
+ end
44
+
45
+ def block_join_reduce(key, values)
46
+ p00 = values[values.index {|v| v[:identity] == '00'}][:matrix]
47
+ p01 = values[values.index {|v| v[:identity] == '01'}][:matrix]
48
+ p10 = values[values.index {|v| v[:identity] == '10'}][:matrix]
49
+ p11 = values[values.index {|v| v[:identity] == '11'}][:matrix]
50
+ {:key => key[0..-2], :value => {:identity => key[-1], :matrix => Matrix.rows(join(p00, p01) + join(p10, p11))}}
51
+ end
52
+
53
+ def block_matrix_sum(key, values)
54
+ sum = Matrix.zero(values.first[:matrix].row_size)
55
+ values.each {|m| sum += m[:matrix]}
56
+ {:key => key[0..-3], :value => {:matrix => sum, :identity => key[-2..-1]}}
57
+ end
58
+
59
+ def primitive_map(key, value)
60
+ {:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
61
+ end
62
+
63
+ order = 32
64
+ reductions = (Math.log2(order) - 1).to_i
65
+ m1 = m(order)
66
+ m2 = m(order)
67
+
68
+ inputs = Inputs.new
69
+ inputs.setup(m1,m2,"X")
70
+ space = inputs.inputs
71
+
72
+
73
+ mappers = [->(k,v) {primitive_map(k,v)}]
74
+ reducers = []
75
+
76
+ reductions.times do
77
+ reducers << ->(k,v) {block_matrix_sum(k,v)}
78
+ reducers << ->(k,v) {block_join_reduce(k,v)}
79
+ end
80
+
81
+ result = MapReduceRunner.new(mappers, reducers).run(space)
82
+ puts result
83
+ puts result[0][:value][:matrix]
84
+ puts m1*m2 == result[0][:value][:matrix]
85
+
data/README ADDED
@@ -0,0 +1,2 @@
1
+ Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
2
+
@@ -0,0 +1,45 @@
1
+ class Partitioner
2
+ def run(space)
3
+ partitions = {}
4
+ space.each do |i|
5
+ key = i[:key]
6
+ partitions[key] = [] if partitions[key].nil?
7
+ partitions[key] << i[:value]
8
+ end
9
+ partitions
10
+ end
11
+ end
12
+
13
+ class Reducer
14
+ def run(partitions)
15
+ space = []
16
+ partitions.each_pair do |k,v|
17
+ space << yield(k,v)
18
+ end
19
+ space
20
+ end
21
+ end
22
+
23
+ class Mapper
24
+ def run(space)
25
+ space.collect {|i| yield(i[:key], i)}
26
+ end
27
+ end
28
+
29
+ class MapReduceRunner
30
+ def initialize(mappers, reducers)
31
+ @mappers = mappers
32
+ @reducers = reducers
33
+ end
34
+
35
+ def run(space)
36
+ results = []
37
+ @mappers.each {|mapper| results = Mapper.new.run(space) {|k,v| mapper.call(k,v)}}
38
+ @reducers.each do |reducer|
39
+ partitions = Partitioner.new.run(results)
40
+ results = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
41
+ end
42
+ results
43
+ end
44
+ end
45
+
@@ -0,0 +1,18 @@
1
+ class Matrix
2
+ def block(block_row, block_column)
3
+ raise "Non 2^n matrix" if (row_size & (row_size - 1)) != 0 || (column_size & (column_size - 1)) != 0
4
+ lower_order = row_size/2
5
+ start_row = block_row * lower_order
6
+ start_column = block_column * lower_order
7
+ b = []
8
+ lower_order.times do |r|
9
+ row = []
10
+ lower_order.times do |c|
11
+ row << self[start_row + r, start_column + c]
12
+ end
13
+ b << row
14
+ end
15
+ Matrix.rows(b)
16
+ end
17
+ end
18
+
@@ -0,0 +1,13 @@
1
+ spec = Gem::Specification.new do |s|
2
+ s.name = "snail-map-reduce"
3
+ s.version = "0.0.1"
4
+ s.author = "Avishek Sen Gupta"
5
+ s.email = "avishek.sen.gupta@gmail.com"
6
+ s.homepage = "http://avishek.net/blog"
7
+ s.platform = Gem::Platform::RUBY
8
+ s.summary = "Some description"
9
+ s.files = `git ls-files`.split("\n")
10
+ s.summary = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
11
+ s.description = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
12
+ end
13
+
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: snail-map-reduce
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Avishek Sen Gupta
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-10 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Snail is a single-threaded, in-memory, barebones MapReduce framework
15
+ written in Ruby to quickly prototype and test parallel algorithms.
16
+ email: avishek.sen.gupta@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - MapReduceMatrixProduct.rb
23
+ - README
24
+ - map_reduce.rb
25
+ - matrix_block_mixin.rb
26
+ - snail.gemspec
27
+ homepage: http://avishek.net/blog
28
+ licenses: []
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 1.8.6
48
+ signing_key:
49
+ specification_version: 3
50
+ summary: Snail is a single-threaded, in-memory, barebones MapReduce framework written
51
+ in Ruby to quickly prototype and test parallel algorithms.
52
+ test_files: []