snail-map-reduce 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ *.*~
2
+ artifacts
3
+ csv
4
+ para
5
+ som
6
+ *.aux
7
+ *.dvi
8
+ *.log
9
+ *.swp
10
+ *~
@@ -0,0 +1,85 @@
1
+ require 'rubygems'
2
+ require 'statsample'
3
+ require './matrix_block_mixin'
4
+ require './map_reduce'
5
+
6
+ class Inputs
7
+ attr_accessor :inputs
8
+ def initialize
9
+ @inputs = []
10
+ end
11
+
12
+ def setup(a,b,key)
13
+ if a.row_size == 2
14
+ @inputs << {:key=> key, :a => a, :b => b}
15
+ return
16
+ end
17
+ setup(a.block(0,0), b.block(0,0), key + "00A")
18
+ setup(a.block(0,1), b.block(1,0), key + "00B")
19
+
20
+ setup(a.block(0,0), b.block(0,1), key + "01A")
21
+ setup(a.block(0,1), b.block(1,1), key + "01B")
22
+
23
+ setup(a.block(1,0), b.block(0,0), key + "10A")
24
+ setup(a.block(1,1), b.block(1,0), key + "10B")
25
+
26
+ setup(a.block(1,0), b.block(0,1), key + "11A")
27
+ setup(a.block(1,1), b.block(1,1), key + "11B")
28
+
29
+ end
30
+ end
31
+
32
+ def join(left_block, right_block)
33
+ rows = []
34
+ lower_order = left_block.row_size
35
+ lower_order.times do |t|
36
+ rows << left_block.row(t).to_a + right_block.row(t).to_a
37
+ end
38
+ rows
39
+ end
40
+
41
+ def m(order)
42
+ Matrix.build(order, order) {|row, col| rand(20) }
43
+ end
44
+
45
+ def block_join_reduce(key, values)
46
+ p00 = values[values.index {|v| v[:identity] == '00'}][:matrix]
47
+ p01 = values[values.index {|v| v[:identity] == '01'}][:matrix]
48
+ p10 = values[values.index {|v| v[:identity] == '10'}][:matrix]
49
+ p11 = values[values.index {|v| v[:identity] == '11'}][:matrix]
50
+ {:key => key[0..-2], :value => {:identity => key[-1], :matrix => Matrix.rows(join(p00, p01) + join(p10, p11))}}
51
+ end
52
+
53
+ def block_matrix_sum(key, values)
54
+ sum = Matrix.zero(values.first[:matrix].row_size)
55
+ values.each {|m| sum += m[:matrix]}
56
+ {:key => key[0..-3], :value => {:matrix => sum, :identity => key[-2..-1]}}
57
+ end
58
+
59
+ def primitive_map(key, value)
60
+ {:key => key[0..-2], :value => {:matrix => value[:a] * value[:b], :identity => key[0..-2]}}
61
+ end
62
+
63
+ order = 32
64
+ reductions = (Math.log2(order) - 1).to_i
65
+ m1 = m(order)
66
+ m2 = m(order)
67
+
68
+ inputs = Inputs.new
69
+ inputs.setup(m1,m2,"X")
70
+ space = inputs.inputs
71
+
72
+
73
+ mappers = [->(k,v) {primitive_map(k,v)}]
74
+ reducers = []
75
+
76
+ reductions.times do
77
+ reducers << ->(k,v) {block_matrix_sum(k,v)}
78
+ reducers << ->(k,v) {block_join_reduce(k,v)}
79
+ end
80
+
81
+ result = MapReduceRunner.new(mappers, reducers).run(space)
82
+ puts result
83
+ puts result[0][:value][:matrix]
84
+ puts m1*m2 == result[0][:value][:matrix]
85
+
data/README ADDED
@@ -0,0 +1,2 @@
1
+ Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms. It's so called because all its operations are in a single-thread, so it runs really slow for even medium size datasets. However, Snail is intended to help prototype the logic and verify the correctness of a MapReduce algorithm, thus speed/scaling are not the aims.
2
+
@@ -0,0 +1,45 @@
1
+ class Partitioner
2
+ def run(space)
3
+ partitions = {}
4
+ space.each do |i|
5
+ key = i[:key]
6
+ partitions[key] = [] if partitions[key].nil?
7
+ partitions[key] << i[:value]
8
+ end
9
+ partitions
10
+ end
11
+ end
12
+
13
+ class Reducer
14
+ def run(partitions)
15
+ space = []
16
+ partitions.each_pair do |k,v|
17
+ space << yield(k,v)
18
+ end
19
+ space
20
+ end
21
+ end
22
+
23
+ class Mapper
24
+ def run(space)
25
+ space.collect {|i| yield(i[:key], i)}
26
+ end
27
+ end
28
+
29
+ class MapReduceRunner
30
+ def initialize(mappers, reducers)
31
+ @mappers = mappers
32
+ @reducers = reducers
33
+ end
34
+
35
+ def run(space)
36
+ results = []
37
+ @mappers.each {|mapper| results = Mapper.new.run(space) {|k,v| mapper.call(k,v)}}
38
+ @reducers.each do |reducer|
39
+ partitions = Partitioner.new.run(results)
40
+ results = Reducer.new.run(partitions) {|k,v| reducer.call(k,v)}
41
+ end
42
+ results
43
+ end
44
+ end
45
+
@@ -0,0 +1,18 @@
1
+ class Matrix
2
+ def block(block_row, block_column)
3
+ raise "Non 2^n matrix" if (row_size & (row_size - 1)) != 0 || (column_size & (column_size - 1)) != 0
4
+ lower_order = row_size/2
5
+ start_row = block_row * lower_order
6
+ start_column = block_column * lower_order
7
+ b = []
8
+ lower_order.times do |r|
9
+ row = []
10
+ lower_order.times do |c|
11
+ row << self[start_row + r, start_column + c]
12
+ end
13
+ b << row
14
+ end
15
+ Matrix.rows(b)
16
+ end
17
+ end
18
+
@@ -0,0 +1,13 @@
1
+ spec = Gem::Specification.new do |s|
2
+ s.name = "snail-map-reduce"
3
+ s.version = "0.0.1"
4
+ s.author = "Avishek Sen Gupta"
5
+ s.email = "avishek.sen.gupta@gmail.com"
6
+ s.homepage = "http://avishek.net/blog"
7
+ s.platform = Gem::Platform::RUBY
8
+ s.summary = "Some description"
9
+ s.files = `git ls-files`.split("\n")
10
+ s.summary = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
11
+ s.description = %q{Snail is a single-threaded, in-memory, barebones MapReduce framework written in Ruby to quickly prototype and test parallel algorithms.}
12
+ end
13
+
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: snail-map-reduce
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Avishek Sen Gupta
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-10 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Snail is a single-threaded, in-memory, barebones MapReduce framework
15
+ written in Ruby to quickly prototype and test parallel algorithms.
16
+ email: avishek.sen.gupta@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - MapReduceMatrixProduct.rb
23
+ - README
24
+ - map_reduce.rb
25
+ - matrix_block_mixin.rb
26
+ - snail.gemspec
27
+ homepage: http://avishek.net/blog
28
+ licenses: []
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 1.8.6
48
+ signing_key:
49
+ specification_version: 3
50
+ summary: Snail is a single-threaded, in-memory, barebones MapReduce framework written
51
+ in Ruby to quickly prototype and test parallel algorithms.
52
+ test_files: []