mapredus 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -114,10 +114,13 @@ an example:
114
114
  end
115
115
  end
116
116
 
117
- The process.keyname refers the final result key that is stored in
118
- redis. The outputter is needed to define how exactly that encoding is
119
- defined. We provided an outputter that encodes your data into a redis
120
- hash.
117
+ The process.result_key refers the final result key that is stored in
118
+ redis. The result_key may take arguments which define the output of
119
+ the key. The process will also incorporate initially given key
120
+ arguments into the result_key. result_key's are defined exactly as a
121
+ redis_key in the redis_support gem. The outputter is needed to define
122
+ how exactly that encoding is defined. We provided an outputter that
123
+ encodes your data into a redis hash.
121
124
 
122
125
  class RedisHasher < MapRedus::Outputter
123
126
  def encode(result_key, k, v)
@@ -166,6 +169,7 @@ Requirements
166
169
  TODO
167
170
  ----
168
171
  not necessarily in the given order
172
+ * Ensure that the type that is inputted is the type that is outputted
169
173
 
170
174
  * if a process fails we do what we are supposed to do i.e. add a
171
175
  failure_hook which does something if your process fails
@@ -39,6 +39,10 @@ module MapRedus
39
39
  end
40
40
 
41
41
  class RedisHasher < Outputter
42
+ def self.values(result_key)
43
+ FileSystem.hvals(result_key)
44
+ end
45
+
42
46
  def self.keys(result_key)
43
47
  FileSystem.hkeys(result_key)
44
48
  end
data/lib/mapredus/keys.rb CHANGED
@@ -3,7 +3,7 @@ module MapRedus
3
3
  ProcessInfo = RedisKey
4
4
 
5
5
  #### USED WITHIN process.rb ####
6
-
6
+
7
7
  # Holds the current map reduce processes that are either running or which still have data lying around
8
8
  #
9
9
  redis_key :processes, "mapredus:processes"
@@ -36,6 +36,10 @@ module MapRedus
36
36
  #
37
37
  redis_key :temp, "mapredus:process:PID:temp_reduce_key:HASHED_KEY:UNIQUE_REDUCE_HOSTNAME:UNIQUE_REDUCE_PROCESS_ID"
38
38
 
39
+ # The default location for the result
40
+ #
41
+ DEFAULT_RESULT_KEY = "mapredus:process:PID:result"
42
+
39
43
  #### USED WITHIN master.rb ####
40
44
 
41
45
  # Keeps track of the current slaves (by appending "1" to a redis list)
@@ -3,7 +3,7 @@ module MapRedus
3
3
  # a master interface with Resque
4
4
  #
5
5
  # Does bookkeeping to keep track of how many slaves are doing work. If we have
6
- # no slaves doing work for a process then the process is done. While there is work available
6
+ # no slaves doing work for a process then the process is donex. While there is work available
7
7
  # the slaves will always be doing work.
8
8
  #
9
9
  class Master < QueueProcess
@@ -49,9 +49,23 @@ module MapRedus
49
49
  enslave( process, process.inputter, process.pid, data_object )
50
50
  end
51
51
 
52
+ # Enslave the reducers:
53
+ #
54
+ # For each key, enslave a reducer to process the values on that
55
+ # key. If there were no keys produced during the map operation we
56
+ # must set off the finalizer.
57
+ #
58
+ # TODO: inject optimizations here for special reducers like the
59
+ # identity reduce
60
+ #
61
+ # returns nothing
52
62
  def self.enslave_reducers( process )
53
- process.map_keys.each do |key|
54
- enslave_reduce( process, key )
63
+ if( process.num_keys > 0 )
64
+ process.map_keys.each do |key|
65
+ enslave_reduce( process, key )
66
+ end
67
+ else
68
+ process.next_state
55
69
  end
56
70
  end
57
71
 
@@ -79,7 +93,7 @@ module MapRedus
79
93
  #
80
94
  def self.enslave( process, klass, *args )
81
95
  FileSystem.rpush(ProcessInfo.slaves(process.pid), 1)
82
-
96
+
83
97
  if( process.synchronous )
84
98
  klass.perform(*args)
85
99
  else
@@ -236,6 +236,14 @@ module MapRedus
236
236
  end
237
237
  end
238
238
 
239
+ def num_keys()
240
+ if( not @ordered )
241
+ FileSystem.scard( ProcessInfo.keys(@pid) )
242
+ else
243
+ FileSystem.zcard( ProcessInfo.keys(@pid) )
244
+ end
245
+ end
246
+
239
247
  # values that the map operation produced, for a key
240
248
  #
241
249
  # Examples
@@ -265,12 +273,23 @@ module MapRedus
265
273
  FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
266
274
  end
267
275
 
276
+ # functions to manage the location of the result in the FileSystem
277
+ #
278
+ # Examples
279
+ # process.result_key(extra, arguments)
280
+ # Process.result_key(all, needed, arguments)
281
+ # # => "something:that:uses:the:extra:arguments"
282
+ #
283
+ # SomeProcessSubclass.set_result_key("something:ARG:something:VAR")
284
+ # # sets the result key for (CAPITAL require arguments to fill in the values)
268
285
  def result_key(*args)
269
286
  Helper.class_get(@type).result_key(*[@key_args, args].flatten)
270
287
  end
271
288
 
272
289
  def self.result_key(*args)
273
- ProcessInfo.send( "#{self.to_s.gsub(/\W/,"_")}_result_cache", *args )
290
+ key_maker = "#{self.to_s.gsub(/\W/,"_")}_result_cache"
291
+ key_maker = ProcessInfo.respond_to?(key_maker) ? key_maker : "#{MapRedus::Process.to_s.gsub(/\W/,"_")}_result_cache"
292
+ ProcessInfo.send( key_maker, *args )
274
293
  end
275
294
 
276
295
  def self.set_result_key(key_struct)
@@ -303,11 +322,11 @@ module MapRedus
303
322
  #
304
323
  # Example
305
324
  # class AnswerDistribution < MapRedus::Process
306
- # inputter = JudgmentStream
307
- # mapper = ResponseFrequencyMap
308
- # reducer = Adder
309
- # finalizer = AnswerCount
310
- # outputter = MapRedus::RedisHasher
325
+ # inputter JudgmentStream
326
+ # mapper ResponseFrequencyMap
327
+ # reducer Adder
328
+ # finalizer AnswerCount
329
+ # outputter MapRedus::RedisHasher
311
330
  # end
312
331
  class << self; attr_reader *ATTRS; end
313
332
 
@@ -343,6 +362,7 @@ module MapRedus
343
362
  finalizer ToRedisHash
344
363
  outputter RedisHasher
345
364
  type Process
365
+ set_result_key DEFAULT_RESULT_KEY
346
366
 
347
367
  # This function returns all the redis keys produced associated
348
368
  # with a process's process id.
data/spec/helper.rb CHANGED
@@ -35,6 +35,7 @@ end
35
35
  #
36
36
  # Set the redis server
37
37
  #
38
+ RedisSupport.redis = 'localhost:9736:0'
38
39
  MapRedus.redis = 'localhost:9736:0'
39
40
  Resque.redis = MapRedus.redis
40
41
  require 'resque/failure/redis'
@@ -18,18 +18,49 @@ class CharCounter < MapRedus::Mapper
18
18
  end
19
19
  end
20
20
 
21
+ class ExtraResultKeyHash < MapRedus::Finalizer
22
+ def self.finalize(process)
23
+ process.each_key_reduced_value do |key, value|
24
+ process.outputter.encode(process.result_key("extra"), key, value)
25
+ end
26
+ end
27
+ end
28
+
21
29
  class GetCharCount < MapRedus::Process
22
30
  EXPECTED_ANSWER = {"k"=>2, "v"=>1, " "=>54, ","=>3, "w"=>7, "a"=>17, "l"=>12, "b"=>2, "m"=>4, "c"=>3, "."=>2, "y"=>3, "n"=>18, "D"=>1, "d"=>15, "o"=>13, "p"=>14, "e"=>34, "f"=>6, "r"=>13, "g"=>6, "S"=>1, "s"=>12, "h"=>19, "H"=>1, "t"=>20, "i"=>16, "u"=>5, "j"=>1}
23
31
  inputter CharStream
24
32
  mapper CharCounter
25
33
  end
26
34
 
35
+ class CharCountTest < MapRedus::Process
36
+ inputter CharStream
37
+ mapper CharCounter
38
+ end
39
+
27
40
  class GetWordCount < MapRedus::Process
28
41
  TEST = "He pointed his finger in friendly jest and went over to the parapet laughing to himself. Stephen Dedalus stepped up, followed him wearily halfway and sat down on the edge of the gunrest, watching him still as he propped his mirror on the parapet, dipped the brush in the bowl and lathered cheeks and neck."
29
42
  EXPECTED_ANSWER = {"gunrest"=>1, "over"=>1, "still"=>1, "of"=>1, "him"=>2, "and"=>4, "bowl"=>1, "himself"=>1, "went"=>1, "friendly"=>1, "finger"=>1, "propped"=>1, "cheeks"=>1, "dipped"=>1, "down"=>1, "wearily"=>1, "up"=>1, "stepped"=>1, "dedalus"=>1, "to"=>2, "in"=>2, "sat"=>1, "the"=>6, "pointed"=>1, "as"=>1, "followed"=>1, "stephen"=>1, "laughing"=>1, "his"=>2, "he"=>2, "brush"=>1, "jest"=>1, "neck"=>1, "mirror"=>1, "edge"=>1, "on"=>2, "parapet"=>2, "lathered"=>1, "watching"=>1, "halfway"=>1}
30
43
  set_result_key "test:result"
31
44
  end
32
45
 
46
+ class TestHash < MapRedus::Finalizer
47
+ def self.finalize(process)
48
+ process.each_key_reduced_value do |key, value|
49
+ process.outputter.encode(process.result_key("extra_arg"), key, value)
50
+ end
51
+ end
52
+ end
53
+
54
+ class TestResultKeyArguments < MapRedus::Process
55
+ #
56
+ # EXTRA_KEY_ARG is not known at the time the process is run
57
+ # but it is known by the time the finalizer is running
58
+ #
59
+ finalizer TestHash
60
+ set_result_key "test:KEY_ARG:test:EXTRA_KEY_ARG"
61
+ key_args ["key_argument"]
62
+ end
63
+
33
64
  class Document
34
65
  include MapRedus::Support
35
66
  mapreduce_process :char_count, GetCharCount, "document:count:ID"
@@ -78,6 +78,59 @@ describe "MapRedus" do
78
78
  @process.outputter.decode(@process.result_key, key).to_i.should == GetWordCount::EXPECTED_ANSWER[key]
79
79
  end
80
80
  end
81
+
82
+ it "runs the default process" do
83
+ process = MapRedus::Process.create
84
+ process.update(:key_args => [process.pid])
85
+ process.result_key.should == "mapredus:process:#{process.pid}:result"
86
+ process.run("wordstream:test")
87
+ work_off
88
+
89
+ process.map_keys.size.should == GetWordCount::EXPECTED_ANSWER.size
90
+ process.map_keys.each do |key|
91
+ reduce_values = process.reduce_values(key)
92
+ reduce_values.size.should == 1
93
+ end
94
+
95
+ process.each_key_reduced_value do |key, value|
96
+ process.outputter.decode(process.result_key, key).to_i.should == GetWordCount::EXPECTED_ANSWER[key]
97
+ end
98
+ end
99
+
100
+ it "runs a process without result_key being set (using the default key location)" do
101
+ process = CharCountTest.create
102
+ process.update(:key_args => [process.pid])
103
+ process.result_key.should == "mapredus:process:#{process.pid}:result"
104
+ process.run("wordstream:test")
105
+ work_off
106
+
107
+ process.map_keys.size.should == GetCharCount::EXPECTED_ANSWER.size
108
+ process.map_keys.each do |key|
109
+ reduce_values = process.reduce_values(key)
110
+ reduce_values.size.should == 1
111
+ end
112
+
113
+ process.each_key_reduced_value do |key, value|
114
+ process.outputter.decode(process.result_key, key).to_i.should == GetCharCount::EXPECTED_ANSWER[key]
115
+ end
116
+ end
117
+
118
+ it "runs a process where key arguments exist and extra arguments are used" do
119
+ process = TestResultKeyArguments.create
120
+ process.result_key("extra_arg").should == "test:key_argument:test:extra_arg"
121
+ process.run("wordstream:test")
122
+ work_off
123
+
124
+ process.map_keys.size.should == GetWordCount::EXPECTED_ANSWER.size
125
+ process.map_keys.each do |key|
126
+ reduce_values = process.reduce_values(key)
127
+ reduce_values.size.should == 1
128
+ end
129
+
130
+ process.each_key_reduced_value do |key, value|
131
+ process.outputter.decode(process.result_key("extra_arg"), key).to_i.should == GetWordCount::EXPECTED_ANSWER[key]
132
+ end
133
+ end
81
134
  end
82
135
 
83
136
  describe "MapRedus Process" do
@@ -154,19 +207,40 @@ describe "MapRedus Process" do
154
207
  end
155
208
 
156
209
  MapRedus::Process.kill_all
157
- Resque.peek(:mapredus, 0, -1) == []
210
+ Resque.peek(:mapredus, 0, 100) == []
158
211
  end
159
212
 
160
- it "responses to next state correctly" do
213
+ it "responds to next state correctly" do
161
214
  @process.state.should == MapRedus::NOT_STARTED
162
215
  @process.next_state
163
216
  @process.state.should == MapRedus::INPUT_MAP_IN_PROGRESS
164
217
  work_off
165
218
 
219
+ ##
220
+ ## Since there are no map keys produced in this the next state
221
+ ## should go directly to the finalizer
222
+ ##
223
+
166
224
  @process.next_state
167
- @process.state.should == MapRedus::REDUCE_IN_PROGRESS
225
+ @process.state.should == MapRedus::FINALIZER_IN_PROGRESS
168
226
  work_off
227
+
228
+ @process.next_state
229
+ @process.state.should == MapRedus::COMPLETE
230
+ end
169
231
 
232
+ it "responds to next state correcty when keys are produced" do
233
+ @process.state.should == MapRedus::NOT_STARTED
234
+ @process.next_state
235
+ @process.state.should == MapRedus::INPUT_MAP_IN_PROGRESS
236
+ work_off
237
+
238
+ @process.emit_intermediate("hell", "yeah")
239
+
240
+ @process.next_state
241
+ @process.state.should == MapRedus::REDUCE_IN_PROGRESS
242
+ work_off
243
+
170
244
  @process.next_state
171
245
  @process.state.should == MapRedus::FINALIZER_IN_PROGRESS
172
246
  work_off
@@ -237,7 +311,7 @@ describe "MapRedus Master" do
237
311
 
238
312
  it "handles slaves (enslaving) correctly" do
239
313
  MapRedus::Master.enslave(@process, MapRedus::WordCounter, @process.pid, "test")
240
- Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid, "test"], "class"=>"MapRedus::WordCounter"}]
314
+ Resque.peek(:mapredus, 0, 1).should == {"args"=>[@process.pid, "test"], "class"=>"MapRedus::WordCounter"}
241
315
  MapRedus::Master.slaves(@process.pid).should == ["1"]
242
316
  end
243
317
 
@@ -266,12 +340,14 @@ describe "MapRedus Mapper/Reducer/Finalizer" do
266
340
  @process.update(:state => MapRedus::INPUT_MAP_IN_PROGRESS)
267
341
  @process.state.should == MapRedus::INPUT_MAP_IN_PROGRESS
268
342
  @process.inputter.perform(@process.pid, "wordstream:test")
269
- Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid, 0], "class"=>"MapRedus::WordCounter"}]
343
+ Resque.peek(:mapredus, 0, 1).should == {"args"=>[@process.pid, 0], "class"=>"MapRedus::WordCounter"}
270
344
  Resque.pop(:mapredus)
271
345
  @process.mapper.perform(@process.pid, 0)
272
346
  @process.reload
273
347
  @process.state.should == MapRedus::REDUCE_IN_PROGRESS
274
- Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid, "data"], "class"=>"MapRedus::Adder"}]
348
+ Resque.peek(:mapredus, 0, 1).should == {"args"=>[@process.pid, "data"], "class"=>"MapRedus::Adder"}
349
+
350
+ MapRedus::Process.open(@process.pid).state.should == MapRedus::REDUCE_IN_PROGRESS
275
351
  end
276
352
 
277
353
  it "runs a reduce correctly proceeding to the correct next state" do
@@ -281,7 +357,9 @@ describe "MapRedus Mapper/Reducer/Finalizer" do
281
357
  @process.reducer.perform(@process.pid, "data")
282
358
  @process.reload
283
359
  @process.state.should == MapRedus::FINALIZER_IN_PROGRESS
284
- Resque.peek(:mapredus, 0, -1).should == [{"args"=>[@process.pid], "class"=>"MapRedus::ToRedisHash"}]
360
+ Resque.peek(:mapredus, 0, 1).should == {"args"=>[@process.pid], "class"=>"MapRedus::ToRedisHash"}
361
+
362
+ MapRedus::Process.open(@process.pid).state.should == MapRedus::FINALIZER_IN_PROGRESS
285
363
  end
286
364
 
287
365
  it "should test that the finalizer correctly saves" do
@@ -292,8 +370,10 @@ describe "MapRedus Mapper/Reducer/Finalizer" do
292
370
  @process.finalizer.perform(@process.pid)
293
371
  @process.reload
294
372
  @process.state.should == MapRedus::COMPLETE
295
- Resque.peek(:mapredus, 0, -1).should == []
373
+ Resque.peek(:mapredus, 0, 100).should == []
296
374
  @process.outputter.decode("test:result", "data").should == "1"
375
+
376
+ MapRedus::Process.open(@process.pid).state.should == MapRedus::COMPLETE
297
377
  end
298
378
  end
299
379
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mapredus
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - John Le
@@ -16,11 +16,13 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-07-09 00:00:00 -07:00
19
+ date: 2010-07-15 00:00:00 -07:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
23
- requirement: &id001 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ name: redis
25
+ version_requirements: &id001 !ruby/object:Gem::Requirement
24
26
  none: false
25
27
  requirements:
26
28
  - - ">="
@@ -31,12 +33,12 @@ dependencies:
31
33
  - 0
32
34
  - 4
33
35
  version: 1.0.4
36
+ requirement: *id001
34
37
  type: :runtime
35
- name: redis
36
- prerelease: false
37
- version_requirements: *id001
38
38
  - !ruby/object:Gem::Dependency
39
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ name: resque
41
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
42
  none: false
41
43
  requirements:
42
44
  - - ">="
@@ -46,12 +48,12 @@ dependencies:
46
48
  - 1
47
49
  - 8
48
50
  version: "1.8"
51
+ requirement: *id002
49
52
  type: :runtime
50
- name: resque
51
- prerelease: false
52
- version_requirements: *id002
53
53
  - !ruby/object:Gem::Dependency
54
- requirement: &id003 !ruby/object:Gem::Requirement
54
+ prerelease: false
55
+ name: resque-scheduler
56
+ version_requirements: &id003 !ruby/object:Gem::Requirement
55
57
  none: false
56
58
  requirements:
57
59
  - - ">="
@@ -60,12 +62,12 @@ dependencies:
60
62
  segments:
61
63
  - 0
62
64
  version: "0"
65
+ requirement: *id003
63
66
  type: :runtime
64
- name: resque-scheduler
65
- prerelease: false
66
- version_requirements: *id003
67
67
  - !ruby/object:Gem::Dependency
68
- requirement: &id004 !ruby/object:Gem::Requirement
68
+ prerelease: false
69
+ name: redis_support
70
+ version_requirements: &id004 !ruby/object:Gem::Requirement
69
71
  none: false
70
72
  requirements:
71
73
  - - ">="
@@ -74,10 +76,8 @@ dependencies:
74
76
  segments:
75
77
  - 0
76
78
  version: "0"
79
+ requirement: *id004
77
80
  type: :runtime
78
- name: redis_support
79
- prerelease: false
80
- version_requirements: *id004
81
81
  description: simple mapreduce framework using redis and resque
82
82
  email: john@doloreslabs.com
83
83
  executables: []
@@ -102,9 +102,9 @@ files:
102
102
  - lib/mapredus/support.rb
103
103
  - LICENSE
104
104
  - README.md
105
- - spec/helper.rb
106
105
  - spec/helper_classes.rb
107
106
  - spec/mapredus_spec.rb
107
+ - spec/helper.rb
108
108
  has_rdoc: true
109
109
  homepage: http://github.com/dolores/mapredus
110
110
  licenses: []
@@ -140,6 +140,6 @@ signing_key:
140
140
  specification_version: 3
141
141
  summary: mapredus initial
142
142
  test_files:
143
- - spec/helper.rb
144
143
  - spec/helper_classes.rb
145
144
  - spec/mapredus_spec.rb
145
+ - spec/helper.rb