mapredus 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,11 +70,11 @@ module MapRedus
70
70
  #
71
71
  # Examples
72
72
  #
73
- # Support::hash( key )
73
+ # Support::key_hash( key )
74
74
  # # => '8dd8hflf8dhod8doh9hef'
75
75
  #
76
76
  # Returns the hash.
77
- def self.hash( key )
77
+ def self.key_hash( key )
78
78
  key.to_s.hash.abs.to_s(16)
79
79
  end
80
80
 
@@ -15,6 +15,8 @@ module MapRedus
15
15
  end
16
16
  end
17
17
 
18
+ ################################################################################
19
+
18
20
  class WordCounter < Mapper
19
21
  def self.map(map_data)
20
22
  map_data.split(/\W/).each do |word|
@@ -24,12 +26,51 @@ module MapRedus
24
26
  end
25
27
  end
26
28
 
29
+ ####################################REDUCERS####################################
30
+
27
31
  class Adder < Reducer
28
32
  def self.reduce(value_list)
29
33
  yield( value_list.reduce(0) { |r, v| r += v.to_i } )
30
34
  end
31
35
  end
32
36
 
37
+ # Emits the identity function on the map values.
38
+ #
39
+ # The identity reducer should never actually have to reduce as a
40
+ # special class in mapredus, the values should just be copied from
41
+ # one key to a new key directly in redis.
42
+ class Identity < Reducer
43
+ def self.reduce_perform(process, key)
44
+ FileSystem.copy( process.map_key(key), process.reduce_key(key) )
45
+ end
46
+
47
+ def self.reduce(value_list)
48
+ value_list.each do |v|
49
+ yield v
50
+ end
51
+ end
52
+ end
53
+
54
+ # Emits the length of the mapped value list.
55
+ #
56
+ # The counter reducer tells how many values were emitted by the
57
+ # mapper. In situations where an adder could used but only has to
58
+ # sum up 1's, counter will be much faster.
59
+ #
60
+ # This works in MapRedus because all the values produced for one key
61
+ # is processed (reduced) by a single worker.
62
+ class Counter < Reducer
63
+ def self.reduce_perform(process, key)
64
+ process.emit(key, FileSystem.llen(process.map_key(key)))
65
+ end
66
+
67
+ def self.reduce(value_list)
68
+ yield value_list.size
69
+ end
70
+ end
71
+
72
+ ################################################################################
73
+
33
74
  class ToRedisHash < Finalizer
34
75
  def self.finalize(process)
35
76
  process.each_key_reduced_value do |key, value|
@@ -18,7 +18,32 @@ module MapRedus
18
18
  end
19
19
 
20
20
  def self.method_missing(method, *args, &block)
21
- storage.send(method, *args)
21
+ if storage.respond_to?(method)
22
+ storage.send(method, *args)
23
+ else
24
+ super
25
+ end
26
+ end
27
+
28
+ # Copy the values from one key to a second key
29
+ #
30
+ # NOTE TODO: currently only works for the redis list data
31
+ # structure but will be extended for arbitrary data types.
32
+ #
33
+ # NOTE: this does not account for the key being changed during the
34
+ # copy, so should not be used in situations where the first_key
35
+ # value can change during the running of copy.
36
+ #
37
+ # Examples
38
+ # FileSystem.copy("key_one", "key_two")
39
+ #
40
+ # returns true on success false otherwise
41
+ def self.copy(first_key, second_key)
42
+ list_length = storage.llen(first_key)
43
+ list_length.times do |index|
44
+ storage.rpush(second_key, storage.lindex(first_key, index))
45
+ end
46
+ true
22
47
  end
23
48
 
24
49
  # Setup locks on results using RedisSupport lock functionality
@@ -186,7 +186,7 @@ module MapRedus
186
186
  if( not @ordered )
187
187
  key, value = key_value
188
188
  FileSystem.sadd( ProcessInfo.keys(@pid), key )
189
- hashed_key = Helper.hash(key)
189
+ hashed_key = Helper.key_hash(key)
190
190
  FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
191
191
  else
192
192
  # if there's an order for the process then we should use a zset above
@@ -194,7 +194,7 @@ module MapRedus
194
194
  #
195
195
  rank, key, value = key_value
196
196
  FileSystem.zadd( ProcessInfo.keys(@pid), rank, key )
197
- hashed_key = Helper.hash(key)
197
+ hashed_key = Helper.key_hash(key)
198
198
  FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
199
199
  end
200
200
  raise "Key Collision: key:#{key}, #{key.class} => hashed key:#{hashed_key}" if key_collision?(hashed_key, key)
@@ -212,7 +212,7 @@ module MapRedus
212
212
  #
213
213
  # Returns "OK" on success.
214
214
  def emit(key, reduce_val)
215
- hashed_key = Helper.hash(key)
215
+ hashed_key = Helper.key_hash(key)
216
216
  FileSystem.rpush( ProcessInfo.reduce(@pid, hashed_key), reduce_val )
217
217
  end
218
218
 
@@ -221,6 +221,21 @@ module MapRedus
221
221
  FileSystem.get( ProcessInfo.hash_to_key(@pid, hashed_key) ) == key.to_s )
222
222
  end
223
223
 
224
+ # Convenience methods to get the mapredus internal key string for a key
225
+ #
226
+ # Examples
227
+ # reduce_key("document")
228
+ # # => mapredus:process:PID:map_key:<Helper.key_hash("document")>:reduce
229
+ # map_key("document")
230
+ # # => mapredus:process:PID:map_key:<Helper.key_hash("document")>
231
+ #
232
+ # Returns the internal mapreduce string key for a given key.
233
+ [:reduce, :map].each do |internal_key|
234
+ define_method("#{internal_key}_key") do |key|
235
+ ProcessInfo.send(internal_key, @pid, Helper.key_hash(key))
236
+ end
237
+ end
238
+
224
239
  # Keys that the map operation produced
225
240
  #
226
241
  # Examples
@@ -252,12 +267,12 @@ module MapRedus
252
267
  #
253
268
  # Returns the values.
254
269
  def map_values(key)
255
- hashed_key = Helper.hash(key)
270
+ hashed_key = Helper.key_hash(key)
256
271
  FileSystem.lrange( ProcessInfo.map(@pid, hashed_key), 0, -1 )
257
272
  end
258
273
 
259
274
  def num_values(key)
260
- hashed_key = Helper.hash(key)
275
+ hashed_key = Helper.key_hash(key)
261
276
  FileSystem.llen( ProcessInfo.map(@pid, hashed_key) )
262
277
  end
263
278
 
@@ -269,7 +284,7 @@ module MapRedus
269
284
  #
270
285
  # Returns the values.
271
286
  def reduce_values(key)
272
- hashed_key = Helper.hash(key)
287
+ hashed_key = Helper.key_hash(key)
273
288
  FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
274
289
  end
275
290
 
@@ -21,14 +21,24 @@ module MapRedus
21
21
 
22
22
  def self.reduce(values); raise InvalidReducer; end
23
23
 
24
+ #
25
+ # The overridable portion of a reducer perform. In some default
26
+ # classes like Identity and Counter we do not call self.reduce but
27
+ # provide optimization for the reduction by overriding this
28
+ # method.
29
+ #
30
+ def self.reduce_perform(process, key)
31
+ reduce(process.map_values(key)) do |reduce_val|
32
+ process.emit( key, reduce_val )
33
+ end
34
+ end
35
+
24
36
  # Doesn't handle redundant workers and fault tolerance
25
37
  #
26
38
  # TODO: Resque::AutoRetry might mess this up.
27
39
  def self.perform(pid, key)
28
40
  process = Process.open(pid)
29
- reduce(process.map_values(key)) do |reduce_val|
30
- process.emit( key, reduce_val )
31
- end
41
+ reduce_perform(process, key)
32
42
  rescue MapRedus::RecoverableFail
33
43
  Master.enslave_later_reduce(process, key)
34
44
  ensure
@@ -402,3 +402,27 @@ describe "MapRedus Support" do
402
402
  end
403
403
  end
404
404
  end
405
+
406
+ describe "MapRedus Default Classes" do
407
+ before(:each) do
408
+ MapRedus::FileSystem.flushall
409
+ @process = GetWordCount.create
410
+ end
411
+
412
+ it "testing that the identity copy actually does a copy" do
413
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
414
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
415
+ MapRedus::Identity.perform(@process.pid, "test_key")
416
+
417
+ @process.map_values("test_key").should == ["whatever", "yeah"]
418
+ @process.reduce_values("test_key").should == ["whatever", "yeah"]
419
+ end
420
+
421
+ it "should properly do a count" do
422
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
423
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
424
+ MapRedus::Counter.perform(@process.pid, "test_key")
425
+
426
+ @process.reduce_values("test_key").should == ["2"]
427
+ end
428
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mapredus
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 5
10
- version: 0.0.5
9
+ - 6
10
+ version: 0.0.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - John Le
@@ -16,11 +16,13 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-08-02 00:00:00 -07:00
19
+ date: 2010-08-05 00:00:00 -07:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
23
- requirement: &id001 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ name: redis
25
+ version_requirements: &id001 !ruby/object:Gem::Requirement
24
26
  none: false
25
27
  requirements:
26
28
  - - ">="
@@ -31,12 +33,12 @@ dependencies:
31
33
  - 0
32
34
  - 4
33
35
  version: 1.0.4
36
+ requirement: *id001
34
37
  type: :runtime
35
- name: redis
36
- prerelease: false
37
- version_requirements: *id001
38
38
  - !ruby/object:Gem::Dependency
39
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ name: resque
41
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
42
  none: false
41
43
  requirements:
42
44
  - - ">="
@@ -46,12 +48,12 @@ dependencies:
46
48
  - 1
47
49
  - 8
48
50
  version: "1.8"
51
+ requirement: *id002
49
52
  type: :runtime
50
- name: resque
51
- prerelease: false
52
- version_requirements: *id002
53
53
  - !ruby/object:Gem::Dependency
54
- requirement: &id003 !ruby/object:Gem::Requirement
54
+ prerelease: false
55
+ name: resque-scheduler
56
+ version_requirements: &id003 !ruby/object:Gem::Requirement
55
57
  none: false
56
58
  requirements:
57
59
  - - ">="
@@ -60,12 +62,12 @@ dependencies:
60
62
  segments:
61
63
  - 0
62
64
  version: "0"
65
+ requirement: *id003
63
66
  type: :runtime
64
- name: resque-scheduler
65
- prerelease: false
66
- version_requirements: *id003
67
67
  - !ruby/object:Gem::Dependency
68
- requirement: &id004 !ruby/object:Gem::Requirement
68
+ prerelease: false
69
+ name: redis_support
70
+ version_requirements: &id004 !ruby/object:Gem::Requirement
69
71
  none: false
70
72
  requirements:
71
73
  - - ">="
@@ -74,10 +76,8 @@ dependencies:
74
76
  segments:
75
77
  - 0
76
78
  version: "0"
79
+ requirement: *id004
77
80
  type: :runtime
78
- name: redis_support
79
- prerelease: false
80
- version_requirements: *id004
81
81
  description: simple mapreduce framework using redis and resque
82
82
  email: john@doloreslabs.com
83
83
  executables: []
@@ -102,9 +102,9 @@ files:
102
102
  - lib/mapredus/support.rb
103
103
  - LICENSE
104
104
  - README.md
105
- - spec/helper.rb
106
105
  - spec/helper_classes.rb
107
106
  - spec/mapredus_spec.rb
107
+ - spec/helper.rb
108
108
  has_rdoc: true
109
109
  homepage: http://github.com/dolores/mapredus
110
110
  licenses: []
@@ -140,6 +140,6 @@ signing_key:
140
140
  specification_version: 3
141
141
  summary: mapredus initial
142
142
  test_files:
143
- - spec/helper.rb
144
143
  - spec/helper_classes.rb
145
144
  - spec/mapredus_spec.rb
145
+ - spec/helper.rb