mapredus 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,102 @@
1
+ class GetCharCount < MapRedus::Process
2
+ EXPECTED_ANSWER = {"k"=>2, "v"=>1, " "=>54, ","=>3, "w"=>7, "a"=>17, "l"=>12, "b"=>2, "m"=>4, "c"=>3, "."=>2, "y"=>3, "n"=>18, "D"=>1, "d"=>15, "o"=>13, "p"=>14, "e"=>34, "f"=>6, "r"=>13, "g"=>6, "S"=>1, "s"=>12, "h"=>19, "H"=>1, "t"=>20, "i"=>16, "u"=>5, "j"=>1}
3
+ def self.specification
4
+ {
5
+ :inputter => CharStream,
6
+ :mapper => CharCounter,
7
+ :reducer => Adder,
8
+ :finalizer => ToRedisHash,
9
+ :outputter => MapRedus::RedisHasher,
10
+ :ordered => false
11
+ }
12
+ end
13
+ end
14
+
15
+ class GetWordCount < MapRedus::Process
16
+ TEST = "He pointed his finger in friendly jest and went over to the parapet laughing to himself. Stephen Dedalus stepped up, followed him wearily halfway and sat down on the edge of the gunrest, watching him still as he propped his mirror on the parapet, dipped the brush in the bowl and lathered cheeks and neck."
17
+ EXPECTED_ANSWER = {"gunrest"=>1, "over"=>1, "still"=>1, "of"=>1, "him"=>2, "and"=>4, "bowl"=>1, "himself"=>1, "went"=>1, "friendly"=>1, "finger"=>1, "propped"=>1, "cheeks"=>1, "dipped"=>1, "down"=>1, "wearily"=>1, "up"=>1, "stepped"=>1, "dedalus"=>1, "to"=>2, "in"=>2, "sat"=>1, "the"=>6, "pointed"=>1, "as"=>1, "followed"=>1, "stephen"=>1, "laughing"=>1, "his"=>2, "he"=>2, "brush"=>1, "jest"=>1, "neck"=>1, "mirror"=>1, "edge"=>1, "on"=>2, "parapet"=>2, "lathered"=>1, "watching"=>1, "halfway"=>1}
18
+ def self.specification
19
+ {
20
+ :inputter => WordStream,
21
+ :mapper => WordCounter,
22
+ :reducer => Adder,
23
+ :finalizer => ToRedisHash,
24
+ :outputter => MapRedus::RedisHasher,
25
+ :ordered => false,
26
+ :keyname => "test:result"
27
+ }
28
+ end
29
+ end
30
+
31
+ class WordStream < MapRedus::InputStream
32
+ def self.scan(data_object)
33
+ #
34
+ # The data_object should be a reference to an object that is
35
+ # stored on your system. The scanner is used to break up what you
36
+ # need from the object into manageable pieces for the mapper. In
37
+ # this example, the data object is a reference to a redis string.
38
+ #
39
+ test_string = MapRedus::FileSystem.get(data_object)
40
+
41
+ test_string.split.each_slice(10).each_with_index do |word_set, i|
42
+ yield(i, word_set.join(" "))
43
+ end
44
+ end
45
+ end
46
+
47
+ class CharStream < MapRedus::InputStream
48
+ def self.scan(data_object)
49
+ test_string = MapRedus::FileSystem.get(data_object)
50
+
51
+ 0.step(test_string.size, 30) do |index|
52
+ char_set = test_string[index...(index+30)]
53
+ next if char_set.empty?
54
+ yield(index, char_set)
55
+ end
56
+ end
57
+ end
58
+
59
+ class WordCounter < MapRedus::Mapper
60
+ def self.map(map_data)
61
+ map_data.split(/\W/).each do |word|
62
+ next if word.empty?
63
+ yield(word.downcase, 1)
64
+ end
65
+ end
66
+ end
67
+
68
+ class CharCounter < MapRedus::Mapper
69
+ def self.map(map_data)
70
+ map_data.each_char do |char|
71
+ yield(char, 1)
72
+ end
73
+ end
74
+ end
75
+
76
+ class Adder < MapRedus::Reducer
77
+ def self.reduce(value_list)
78
+ yield( value_list.reduce(0) { |r, v| r += v.to_i } )
79
+ end
80
+ end
81
+
82
+ class ToRedisHash < MapRedus::Finalizer
83
+ def self.finalize(process)
84
+ process.each_key_reduced_value do |key, value|
85
+ process.outputter.encode(process.keyname, key, value)
86
+ end
87
+ end
88
+ end
89
+
90
+ class Document
91
+ include MapRedus::Support
92
+ mapreduce_process :char_count, GetCharCount, "document:count:ID"
93
+
94
+ attr_accessor :id
95
+ def initialize(id)
96
+ @id = id
97
+ end
98
+
99
+ def calculate_chars(data_reference)
100
+ mapreduce.char_count(data_reference, id)
101
+ end
102
+ end
@@ -0,0 +1,295 @@
1
+ require File.dirname(__FILE__) + '/helper'
2
+
3
+ describe "MapRedus" do
4
+ # this is called before each test case
5
+ before(:each) do
6
+ MapRedus::FileSystem.flushall
7
+ @process = GetWordCount.create
8
+ MapRedus::FileSystem.setnx("wordstream:test", GetWordCount::TEST)
9
+ end
10
+
11
+ it "creates a process successfully" do
12
+ process = GetWordCount.open(@process.pid)
13
+
14
+ process.inputter.should == WordStream
15
+ process.mapper.should == WordCounter
16
+ process.reducer.should == Adder
17
+ process.finalizer.should == ToRedisHash
18
+ process.outputter.should == MapRedus::RedisHasher
19
+ end
20
+
21
+ it "runs a map reduce process synchronously" do
22
+ ##
23
+ ## In general map reduce shouldn't be running operations synchronously
24
+ ##
25
+ @process.run("wordstream:test", synchronously = true)
26
+ @process.map_keys.size.should == GetWordCount::EXPECTED_ANSWER.size
27
+
28
+ @process.map_keys.each do |key|
29
+ reduce_values = @process.reduce_values(key)
30
+ reduce_values.size.should == 1
31
+ end
32
+
33
+ @process.each_key_reduced_value do |key, value|
34
+ @process.outputter.decode(@process.keyname, key).to_i.should == GetWordCount::EXPECTED_ANSWER[key]
35
+ end
36
+ end
37
+
38
+ it "runs a map reduce process asynchronously" do
39
+ @process.run("wordstream:test", synchronously = false)
40
+ work_off
41
+
42
+ @process.map_keys.size.should == GetWordCount::EXPECTED_ANSWER.size
43
+ @process.map_keys.each do |key|
44
+ reduce_values = @process.reduce_values(key)
45
+ reduce_values.size.should == 1
46
+ end
47
+
48
+ @process.each_key_reduced_value do |key, value|
49
+ @process.outputter.decode(@process.keyname, key).to_i.should == GetWordCount::EXPECTED_ANSWER[key]
50
+ end
51
+ end
52
+ end
53
+
54
+ describe "MapRedus Process" do
55
+ before(:each) do
56
+ MapRedus::FileSystem.flushall
57
+ @process = GetWordCount.create
58
+ end
59
+
60
+ it "saves a process" do
61
+ @process.mapper = CharCounter
62
+ @process.synchronous = true
63
+ @process.save
64
+
65
+ @process = MapRedus::Process.open(@process.pid)
66
+
67
+ @process.mapper.should == CharCounter
68
+ @process.synchronous.should == true
69
+ end
70
+
71
+ it "updates a process" do
72
+ @process.update(:mapper => CharCounter, :ordered => true)
73
+ @process = MapRedus::Process.open(@process.pid)
74
+
75
+ @process.mapper.should == CharCounter
76
+ @process.ordered.should == true
77
+ end
78
+
79
+ it "deletes a process" do
80
+ @process.delete
81
+
82
+ proc = MapRedus::Process.open(@process.pid)
83
+ proc.should == nil
84
+ end
85
+
86
+ it "kills a process" do
87
+ @process.run(GetWordCount::TEST)
88
+ MapRedus::Process.kill(@process.pid)
89
+ Resque.size(:mapredus).should == 0
90
+ end
91
+
92
+ it "kills a process that is started" do
93
+ @process.run(GetWordCount::TEST)
94
+
95
+ worker = Resque::Worker.new("*")
96
+ worker.perform(worker.reserve) # do some work
97
+
98
+ MapRedus::Process.kill(@process.pid)
99
+ Resque.size(:mapredus).should == 0
100
+ end
101
+
102
+ it "kills all process" do
103
+ proc_1 = GetWordCount.create
104
+ proc_2 = GetWordCount.create
105
+ proc_3 = GetWordCount.create
106
+ proc_4 = GetWordCount.create
107
+ proc_5 = GetWordCount.create
108
+ proc_6 = GetWordCount.create
109
+
110
+ proc_1.run(GetWordCount::TEST)
111
+ proc_2.run(GetWordCount::TEST)
112
+ proc_3.run(GetWordCount::TEST)
113
+
114
+ worker = Resque::Worker.new("*")
115
+ 6.times do
116
+ worker.perform(worker.reserve)
117
+ end
118
+
119
+ proc_4.run(GetWordCount::TEST)
120
+ proc_5.run(GetWordCount::TEST)
121
+ proc_6.run(GetWordCount::TEST)
122
+
123
+ 6.times do
124
+ worker.perform(worker.reserve)
125
+ end
126
+
127
+ MapRedus::Process.kill_all
128
+ Resque.peek(:mapredus, 0, -1) == []
129
+ end
130
+
131
+ it "responses to next state correctly" do
132
+ @process.state.should == MapRedus::NOT_STARTED
133
+ @process.next_state
134
+ @process.state.should == MapRedus::INPUT_MAP_IN_PROGRESS
135
+ work_off
136
+
137
+ @process.next_state
138
+ @process.state.should == MapRedus::REDUCE_IN_PROGRESS
139
+ work_off
140
+
141
+ @process.next_state
142
+ @process.state.should == MapRedus::FINALIZER_IN_PROGRESS
143
+ work_off
144
+
145
+ @process.next_state
146
+ @process.state.should == MapRedus::COMPLETE
147
+ end
148
+
149
+ it "emit_intermediate unordered successfully" do
150
+ @process.emit_intermediate("hell", "yeah")
151
+ result = []
152
+ @process.each_key_nonreduced_value do |key, value|
153
+ result << [key, value]
154
+ end
155
+
156
+ result.should == [["hell", "yeah"]]
157
+ end
158
+
159
+ it "emit_intermediate on an ordered process" do
160
+ @process.update(:ordered => true)
161
+ @process.emit_intermediate(1, "number", "one")
162
+ @process.emit_intermediate(2, "place", "two")
163
+ res = []
164
+ @process.each_key_nonreduced_value do |key, value|
165
+ res << [key, value]
166
+ end
167
+
168
+ res.should == [["number", "one"], ["place", "two"]]
169
+ end
170
+
171
+ it "emit successfully" do
172
+ @process.emit_intermediate("something", "non_reduced_value")
173
+ @process.emit("something", "reduced")
174
+ result = []
175
+ @process.each_key_reduced_value do |key, rv|
176
+ result << [key, rv]
177
+ end
178
+ result.should == [["something", "reduced"]]
179
+ end
180
+
181
+ it "produces the correct map keys" do
182
+ @process.emit_intermediate("map key 1", "value")
183
+ @process.emit_intermediate("map key 1", "value")
184
+ @process.emit_intermediate("map key 2", "value")
185
+
186
+ @process.map_keys.sort.should == ["map key 1", "map key 2"]
187
+ end
188
+
189
+ it "produces the correct map/reduce values" do
190
+ MapRedus::FileSystem.setnx("wordstream:test", GetWordCount::TEST)
191
+ @process.run("wordstream:test")
192
+ work_off
193
+ @process.map_keys.sort.should == GetWordCount::EXPECTED_ANSWER.keys.sort
194
+
195
+ @process.each_key_reduced_value do |key, reduced_value|
196
+ reduced_value.to_i.should == GetWordCount::EXPECTED_ANSWER[key]
197
+ @process.map_values(key).should == ["1"] * reduced_value.to_i
198
+ end
199
+ end
200
+ end
201
+
202
+ describe "MapRedus Master" do
203
+ before(:each) do
204
+ MapRedus::FileSystem.flushall
205
+ MapRedus::FileSystem.setnx("test", "some data")
206
+ @process = GetWordCount.create
207
+ end
208
+
209
+ it "handles slaves (enslaving) correctly" do
210
+ MapRedus::Master.enslave(@process, WordCounter, @process.pid, "test")
211
+ Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid, "test"], "class"=>"WordCounter"}]
212
+ MapRedus::Master.slaves(@process.pid).should == ["1"]
213
+ end
214
+
215
+ it "handles slaves (freeing) correctly" do
216
+ MapRedus::Master.enslave(@process, WordCounter, @process.pid, "test")
217
+ MapRedus::Master.enslave(@process, WordCounter, @process.pid, "test")
218
+
219
+ MapRedus::Master.slaves(@process.pid).should == ["1", "1"]
220
+
221
+ MapRedus::Master.free_slave(@process.pid)
222
+ MapRedus::Master.free_slave(@process.pid)
223
+ MapRedus::Master.slaves(@process.pid).should == []
224
+ end
225
+
226
+ it "handles redundant multiple workers (same output regardless of how many workers complete)"
227
+ end
228
+
229
+ describe "MapRedus Mapper/Reducer/Finalizer" do
230
+ before(:each) do
231
+ MapRedus::FileSystem.flushall
232
+ MapRedus::FileSystem.setnx("wordstream:test", "data")
233
+ @process = GetWordCount.create
234
+ end
235
+
236
+ it "runs a map correctly proceeding to the next state" do
237
+ @process.update(:state => MapRedus::INPUT_MAP_IN_PROGRESS)
238
+ @process.state.should == MapRedus::INPUT_MAP_IN_PROGRESS
239
+ @process.inputter.perform(@process.pid, "wordstream:test")
240
+ Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid, 0], "class"=>"WordCounter"}]
241
+ Resque.pop(:mapredus)
242
+ @process.mapper.perform(@process.pid, 0)
243
+ @process.reload
244
+ @process.state.should == MapRedus::REDUCE_IN_PROGRESS
245
+ Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid, "data"], "class"=>"Adder"}]
246
+ end
247
+
248
+ it "runs a reduce correctly proceeding to the correct next state" do
249
+ @process.update(:state => MapRedus::REDUCE_IN_PROGRESS)
250
+ @process.state.should == MapRedus::REDUCE_IN_PROGRESS
251
+ @process.emit_intermediate("data", "1")
252
+ @process.reducer.perform(@process.pid, "data")
253
+ @process.reload
254
+ @process.state.should == MapRedus::FINALIZER_IN_PROGRESS
255
+ Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid], "class"=>"ToRedisHash"}]
256
+ end
257
+
258
+ it "should test that the finalizer correctly saves" do
259
+ @process.update(:state => MapRedus::FINALIZER_IN_PROGRESS)
260
+ @process.state.should == MapRedus::FINALIZER_IN_PROGRESS
261
+ @process.emit_intermediate("data", "1")
262
+ @process.emit("data", "1")
263
+ @process.finalizer.perform(@process.pid)
264
+ @process.reload
265
+ @process.state.should == MapRedus::COMPLETE
266
+ Resque.peek(:mapredus, 0, -1).should == []
267
+ @process.outputter.decode("test:result", "data").should == "1"
268
+ end
269
+ end
270
+
271
+ describe "MapReduce Support" do
272
+ before(:each) do
273
+ MapRedus::FileSystem.flushall
274
+ @doc = Document.new(10)
275
+ @other_doc = Document.new(15)
276
+ end
277
+
278
+ it "should be simple to create a mapredus as a part of a job" do
279
+ MapRedus::FileSystem.setnx("wordstream:test", GetWordCount::TEST)
280
+ MapRedus::FileSystem.setnx("charstream:test", "simpler test")
281
+ other_answer = {" "=>1, "l"=>1, "m"=>1, "e"=>2, "p"=>1, "r"=>1, "s"=>2, "t"=>2, "i"=>1}
282
+
283
+ @doc.calculate_chars("wordstream:test")
284
+ @other_doc.calculate_chars("charstream:test")
285
+ work_off
286
+
287
+ GetCharCount::EXPECTED_ANSWER.keys.each do |char|
288
+ @doc.mapreduce.char_count_result(char).should == GetCharCount::EXPECTED_ANSWER[char].to_s
289
+ end
290
+
291
+ other_answer.keys.each do |char|
292
+ @other_doc.mapreduce.char_count_result(char).should == other_answer[char].to_s
293
+ end
294
+ end
295
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mapredus
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - John Le
14
+ - Brian O'Rourke
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2010-07-06 00:00:00 -07:00
20
+ default_executable:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 31
29
+ segments:
30
+ - 1
31
+ - 0
32
+ - 4
33
+ version: 1.0.4
34
+ type: :runtime
35
+ name: redis
36
+ prerelease: false
37
+ version_requirements: *id001
38
+ - !ruby/object:Gem::Dependency
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 31
45
+ segments:
46
+ - 1
47
+ - 8
48
+ version: "1.8"
49
+ type: :runtime
50
+ name: resque
51
+ prerelease: false
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ hash: 3
60
+ segments:
61
+ - 0
62
+ version: "0"
63
+ type: :runtime
64
+ name: resque-scheduler
65
+ prerelease: false
66
+ version_requirements: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ hash: 3
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ type: :runtime
78
+ name: redis_support
79
+ prerelease: false
80
+ version_requirements: *id004
81
+ description: simple mapreduce framework using redis and resque
82
+ email: john@doloreslabs.com
83
+ executables: []
84
+
85
+ extensions: []
86
+
87
+ extra_rdoc_files:
88
+ - LICENSE
89
+ - README.md
90
+ files:
91
+ - lib/mapredus.rb
92
+ - lib/mapredus/filesystem.rb
93
+ - lib/mapredus/finalizer.rb
94
+ - lib/mapredus/inputter.rb
95
+ - lib/mapredus/keys.rb
96
+ - lib/mapredus/mapper.rb
97
+ - lib/mapredus/master.rb
98
+ - lib/mapredus/outputter.rb
99
+ - lib/mapredus/process.rb
100
+ - lib/mapredus/reducer.rb
101
+ - lib/mapredus/support.rb
102
+ - LICENSE
103
+ - README.md
104
+ - spec/helper.rb
105
+ - spec/helper_classes.rb
106
+ - spec/mapredus_spec.rb
107
+ has_rdoc: true
108
+ homepage: http://github.com/dolores/mapredus
109
+ licenses: []
110
+
111
+ post_install_message:
112
+ rdoc_options:
113
+ - --charset=UTF-8
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ hash: 3
122
+ segments:
123
+ - 0
124
+ version: "0"
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ none: false
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ hash: 3
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ requirements: []
135
+
136
+ rubyforge_project:
137
+ rubygems_version: 1.3.7
138
+ signing_key:
139
+ specification_version: 3
140
+ summary: mapredus initial
141
+ test_files:
142
+ - spec/helper.rb
143
+ - spec/helper_classes.rb
144
+ - spec/mapredus_spec.rb