mapredus 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -70,11 +70,11 @@ module MapRedus
70
70
  #
71
71
  # Examples
72
72
  #
73
- # Support::hash( key )
73
+ # Support::key_hash( key )
74
74
  # # => '8dd8hflf8dhod8doh9hef'
75
75
  #
76
76
  # Returns the hash.
77
- def self.hash( key )
77
+ def self.key_hash( key )
78
78
  key.to_s.hash.abs.to_s(16)
79
79
  end
80
80
 
@@ -15,6 +15,8 @@ module MapRedus
15
15
  end
16
16
  end
17
17
 
18
+ ################################################################################
19
+
18
20
  class WordCounter < Mapper
19
21
  def self.map(map_data)
20
22
  map_data.split(/\W/).each do |word|
@@ -24,12 +26,51 @@ module MapRedus
24
26
  end
25
27
  end
26
28
 
29
+ ####################################REDUCERS####################################
30
+
27
31
  class Adder < Reducer
28
32
  def self.reduce(value_list)
29
33
  yield( value_list.reduce(0) { |r, v| r += v.to_i } )
30
34
  end
31
35
  end
32
36
 
37
+ # Emits the identity function on the map values.
38
+ #
39
+ # The identity reducer should never actually have to reduce as a
40
+ # special class in mapredus, the values should just be copied from
41
+ # one key to a new key directly in redis.
42
+ class Identity < Reducer
43
+ def self.reduce_perform(process, key)
44
+ FileSystem.copy( process.map_key(key), process.reduce_key(key) )
45
+ end
46
+
47
+ def self.reduce(value_list)
48
+ value_list.each do |v|
49
+ yield v
50
+ end
51
+ end
52
+ end
53
+
54
+ # Emits the length of the mapped value list.
55
+ #
56
+ # The counter reducer tells how many values were emitted by the
57
+ # mapper. In situations where an adder could used but only has to
58
+ # sum up 1's, counter will be much faster.
59
+ #
60
+ # This works in MapRedus because all the values produced for one key
61
+ # is processed (reduced) by a single worker.
62
+ class Counter < Reducer
63
+ def self.reduce_perform(process, key)
64
+ process.emit(key, FileSystem.llen(process.map_key(key)))
65
+ end
66
+
67
+ def self.reduce(value_list)
68
+ yield value_list.size
69
+ end
70
+ end
71
+
72
+ ################################################################################
73
+
33
74
  class ToRedisHash < Finalizer
34
75
  def self.finalize(process)
35
76
  process.each_key_reduced_value do |key, value|
@@ -18,7 +18,32 @@ module MapRedus
18
18
  end
19
19
 
20
20
  def self.method_missing(method, *args, &block)
21
- storage.send(method, *args)
21
+ if storage.respond_to?(method)
22
+ storage.send(method, *args)
23
+ else
24
+ super
25
+ end
26
+ end
27
+
28
+ # Copy the values from one key to a second key
29
+ #
30
+ # NOTE TODO: currently only works for the redis list data
31
+ # structure but will be extended for arbitrary data types.
32
+ #
33
+ # NOTE: this does not account for the key being changed during the
34
+ # copy, so should not be used in situations where the first_key
35
+ # value can change during the running of copy.
36
+ #
37
+ # Examples
38
+ # FileSystem.copy("key_one", "key_two")
39
+ #
40
+ # returns true on success false otherwise
41
+ def self.copy(first_key, second_key)
42
+ list_length = storage.llen(first_key)
43
+ list_length.times do |index|
44
+ storage.rpush(second_key, storage.lindex(first_key, index))
45
+ end
46
+ true
22
47
  end
23
48
 
24
49
  # Setup locks on results using RedisSupport lock functionality
@@ -186,7 +186,7 @@ module MapRedus
186
186
  if( not @ordered )
187
187
  key, value = key_value
188
188
  FileSystem.sadd( ProcessInfo.keys(@pid), key )
189
- hashed_key = Helper.hash(key)
189
+ hashed_key = Helper.key_hash(key)
190
190
  FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
191
191
  else
192
192
  # if there's an order for the process then we should use a zset above
@@ -194,7 +194,7 @@ module MapRedus
194
194
  #
195
195
  rank, key, value = key_value
196
196
  FileSystem.zadd( ProcessInfo.keys(@pid), rank, key )
197
- hashed_key = Helper.hash(key)
197
+ hashed_key = Helper.key_hash(key)
198
198
  FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
199
199
  end
200
200
  raise "Key Collision: key:#{key}, #{key.class} => hashed key:#{hashed_key}" if key_collision?(hashed_key, key)
@@ -212,7 +212,7 @@ module MapRedus
212
212
  #
213
213
  # Returns "OK" on success.
214
214
  def emit(key, reduce_val)
215
- hashed_key = Helper.hash(key)
215
+ hashed_key = Helper.key_hash(key)
216
216
  FileSystem.rpush( ProcessInfo.reduce(@pid, hashed_key), reduce_val )
217
217
  end
218
218
 
@@ -221,6 +221,21 @@ module MapRedus
221
221
  FileSystem.get( ProcessInfo.hash_to_key(@pid, hashed_key) ) == key.to_s )
222
222
  end
223
223
 
224
+ # Convenience methods to get the mapredus internal key string for a key
225
+ #
226
+ # Examples
227
+ # reduce_key("document")
228
+ # # => mapredus:process:PID:map_key:<Helper.key_hash("document")>:reduce
229
+ # map_key("document")
230
+ # # => mapredus:process:PID:map_key:<Helper.key_hash("document")>
231
+ #
232
+ # Returns the internal mapreduce string key for a given key.
233
+ [:reduce, :map].each do |internal_key|
234
+ define_method("#{internal_key}_key") do |key|
235
+ ProcessInfo.send(internal_key, @pid, Helper.key_hash(key))
236
+ end
237
+ end
238
+
224
239
  # Keys that the map operation produced
225
240
  #
226
241
  # Examples
@@ -252,12 +267,12 @@ module MapRedus
252
267
  #
253
268
  # Returns the values.
254
269
  def map_values(key)
255
- hashed_key = Helper.hash(key)
270
+ hashed_key = Helper.key_hash(key)
256
271
  FileSystem.lrange( ProcessInfo.map(@pid, hashed_key), 0, -1 )
257
272
  end
258
273
 
259
274
  def num_values(key)
260
- hashed_key = Helper.hash(key)
275
+ hashed_key = Helper.key_hash(key)
261
276
  FileSystem.llen( ProcessInfo.map(@pid, hashed_key) )
262
277
  end
263
278
 
@@ -269,7 +284,7 @@ module MapRedus
269
284
  #
270
285
  # Returns the values.
271
286
  def reduce_values(key)
272
- hashed_key = Helper.hash(key)
287
+ hashed_key = Helper.key_hash(key)
273
288
  FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
274
289
  end
275
290
 
@@ -21,14 +21,24 @@ module MapRedus
21
21
 
22
22
  def self.reduce(values); raise InvalidReducer; end
23
23
 
24
+ #
25
+ # The overridable portion of a reducer perform. In some default
26
+ # classes like Identity and Counter we do not call self.reduce but
27
+ # provide optimization for the reduction by overriding this
28
+ # method.
29
+ #
30
+ def self.reduce_perform(process, key)
31
+ reduce(process.map_values(key)) do |reduce_val|
32
+ process.emit( key, reduce_val )
33
+ end
34
+ end
35
+
24
36
  # Doesn't handle redundant workers and fault tolerance
25
37
  #
26
38
  # TODO: Resque::AutoRetry might mess this up.
27
39
  def self.perform(pid, key)
28
40
  process = Process.open(pid)
29
- reduce(process.map_values(key)) do |reduce_val|
30
- process.emit( key, reduce_val )
31
- end
41
+ reduce_perform(process, key)
32
42
  rescue MapRedus::RecoverableFail
33
43
  Master.enslave_later_reduce(process, key)
34
44
  ensure
@@ -402,3 +402,27 @@ describe "MapRedus Support" do
402
402
  end
403
403
  end
404
404
  end
405
+
406
+ describe "MapRedus Default Classes" do
407
+ before(:each) do
408
+ MapRedus::FileSystem.flushall
409
+ @process = GetWordCount.create
410
+ end
411
+
412
+ it "testing that the identity copy actually does a copy" do
413
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
414
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
415
+ MapRedus::Identity.perform(@process.pid, "test_key")
416
+
417
+ @process.map_values("test_key").should == ["whatever", "yeah"]
418
+ @process.reduce_values("test_key").should == ["whatever", "yeah"]
419
+ end
420
+
421
+ it "should properly do a count" do
422
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
423
+ MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
424
+ MapRedus::Counter.perform(@process.pid, "test_key")
425
+
426
+ @process.reduce_values("test_key").should == ["2"]
427
+ end
428
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mapredus
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 5
10
- version: 0.0.5
9
+ - 6
10
+ version: 0.0.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - John Le
@@ -16,11 +16,13 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-08-02 00:00:00 -07:00
19
+ date: 2010-08-05 00:00:00 -07:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
23
- requirement: &id001 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ name: redis
25
+ version_requirements: &id001 !ruby/object:Gem::Requirement
24
26
  none: false
25
27
  requirements:
26
28
  - - ">="
@@ -31,12 +33,12 @@ dependencies:
31
33
  - 0
32
34
  - 4
33
35
  version: 1.0.4
36
+ requirement: *id001
34
37
  type: :runtime
35
- name: redis
36
- prerelease: false
37
- version_requirements: *id001
38
38
  - !ruby/object:Gem::Dependency
39
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ name: resque
41
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
42
  none: false
41
43
  requirements:
42
44
  - - ">="
@@ -46,12 +48,12 @@ dependencies:
46
48
  - 1
47
49
  - 8
48
50
  version: "1.8"
51
+ requirement: *id002
49
52
  type: :runtime
50
- name: resque
51
- prerelease: false
52
- version_requirements: *id002
53
53
  - !ruby/object:Gem::Dependency
54
- requirement: &id003 !ruby/object:Gem::Requirement
54
+ prerelease: false
55
+ name: resque-scheduler
56
+ version_requirements: &id003 !ruby/object:Gem::Requirement
55
57
  none: false
56
58
  requirements:
57
59
  - - ">="
@@ -60,12 +62,12 @@ dependencies:
60
62
  segments:
61
63
  - 0
62
64
  version: "0"
65
+ requirement: *id003
63
66
  type: :runtime
64
- name: resque-scheduler
65
- prerelease: false
66
- version_requirements: *id003
67
67
  - !ruby/object:Gem::Dependency
68
- requirement: &id004 !ruby/object:Gem::Requirement
68
+ prerelease: false
69
+ name: redis_support
70
+ version_requirements: &id004 !ruby/object:Gem::Requirement
69
71
  none: false
70
72
  requirements:
71
73
  - - ">="
@@ -74,10 +76,8 @@ dependencies:
74
76
  segments:
75
77
  - 0
76
78
  version: "0"
79
+ requirement: *id004
77
80
  type: :runtime
78
- name: redis_support
79
- prerelease: false
80
- version_requirements: *id004
81
81
  description: simple mapreduce framework using redis and resque
82
82
  email: john@doloreslabs.com
83
83
  executables: []
@@ -102,9 +102,9 @@ files:
102
102
  - lib/mapredus/support.rb
103
103
  - LICENSE
104
104
  - README.md
105
- - spec/helper.rb
106
105
  - spec/helper_classes.rb
107
106
  - spec/mapredus_spec.rb
107
+ - spec/helper.rb
108
108
  has_rdoc: true
109
109
  homepage: http://github.com/dolores/mapredus
110
110
  licenses: []
@@ -140,6 +140,6 @@ signing_key:
140
140
  specification_version: 3
141
141
  summary: mapredus initial
142
142
  test_files:
143
- - spec/helper.rb
144
143
  - spec/helper_classes.rb
145
144
  - spec/mapredus_spec.rb
145
+ - spec/helper.rb