learn_mapreduce 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/learn_mapreduce.rb +72 -0
- metadata +45 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
# Small Framwork to learn MapReduce
|
2
|
+
class MapReduce
|
3
|
+
def initialize
|
4
|
+
@mapper = nil
|
5
|
+
@reducer = nil
|
6
|
+
end
|
7
|
+
|
8
|
+
# Call this function with a block to set the mapper function
|
9
|
+
# The mapper takes a record as argument
|
10
|
+
def map &mapper
|
11
|
+
@mapper = mapper
|
12
|
+
end
|
13
|
+
|
14
|
+
# Call this function with a block to set the reducer function
|
15
|
+
# The reducer takes a key and a list of values as arguments
|
16
|
+
def reduce &reducer
|
17
|
+
@reducer = reducer
|
18
|
+
end
|
19
|
+
|
20
|
+
# Run a MapReduce algorithm on the data provided
|
21
|
+
# Returns a hash containing the emitted tuples, the shuffled tuples and the output
|
22
|
+
def run data
|
23
|
+
throw "Mapper missing!" unless @mapper
|
24
|
+
throw "Reducer missing!" unless @reducer
|
25
|
+
|
26
|
+
map_context = MapContext.new
|
27
|
+
data.each { |record|
|
28
|
+
map_context.instance_exec(record, &@mapper)
|
29
|
+
}
|
30
|
+
|
31
|
+
reduce_context = ReduceContext.new
|
32
|
+
map_context.shuffled.each { |key, list_of_values|
|
33
|
+
reduce_context.instance_exec(key, list_of_values, &@reducer)
|
34
|
+
}
|
35
|
+
|
36
|
+
return [
|
37
|
+
map_context.emitted,
|
38
|
+
map_context.shuffled,
|
39
|
+
reduce_context.output
|
40
|
+
]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# The context within the mapper
|
45
|
+
# Used to give a custom 'emit' function
|
46
|
+
class MapContext
|
47
|
+
attr_accessor :emitted, :shuffled
|
48
|
+
|
49
|
+
def initialize
|
50
|
+
@emitted = []
|
51
|
+
@shuffled = {}
|
52
|
+
end
|
53
|
+
|
54
|
+
def emit key, value
|
55
|
+
@emitted.push([key, value])
|
56
|
+
(@shuffled[key] ||= []).push(value)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# The context within the reducer
|
61
|
+
# Used to give a custom 'emit' function
|
62
|
+
class ReduceContext
|
63
|
+
attr_accessor :output
|
64
|
+
|
65
|
+
def initialize
|
66
|
+
@output = []
|
67
|
+
end
|
68
|
+
|
69
|
+
def emit value
|
70
|
+
@output.push(value)
|
71
|
+
end
|
72
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: learn_mapreduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aegis
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-28 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A tiny implementation of the MapReduce Framework for learning purposes!
|
15
|
+
email:
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/learn_mapreduce.rb
|
21
|
+
homepage:
|
22
|
+
licenses: []
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 1.8.24
|
42
|
+
signing_key:
|
43
|
+
specification_version: 3
|
44
|
+
summary: Tiny implementation of the MapReduce Framework.
|
45
|
+
test_files: []
|