mapredus 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mapredus.rb +2 -2
- data/lib/mapredus/default_classes.rb +41 -0
- data/lib/mapredus/filesystem.rb +26 -1
- data/lib/mapredus/process.rb +21 -6
- data/lib/mapredus/reducer.rb +13 -3
- data/spec/mapredus_spec.rb +24 -0
- metadata +22 -22
data/lib/mapredus.rb
CHANGED
@@ -70,11 +70,11 @@ module MapRedus
|
|
70
70
|
#
|
71
71
|
# Examples
|
72
72
|
#
|
73
|
-
# Support::
|
73
|
+
# Support::key_hash( key )
|
74
74
|
# # => '8dd8hflf8dhod8doh9hef'
|
75
75
|
#
|
76
76
|
# Returns the hash.
|
77
|
-
def self.
|
77
|
+
def self.key_hash( key )
|
78
78
|
key.to_s.hash.abs.to_s(16)
|
79
79
|
end
|
80
80
|
|
@@ -15,6 +15,8 @@ module MapRedus
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
################################################################################
|
19
|
+
|
18
20
|
class WordCounter < Mapper
|
19
21
|
def self.map(map_data)
|
20
22
|
map_data.split(/\W/).each do |word|
|
@@ -24,12 +26,51 @@ module MapRedus
|
|
24
26
|
end
|
25
27
|
end
|
26
28
|
|
29
|
+
####################################REDUCERS####################################
|
30
|
+
|
27
31
|
class Adder < Reducer
|
28
32
|
def self.reduce(value_list)
|
29
33
|
yield( value_list.reduce(0) { |r, v| r += v.to_i } )
|
30
34
|
end
|
31
35
|
end
|
32
36
|
|
37
|
+
# Emits the identity function on the map values.
|
38
|
+
#
|
39
|
+
# The identity reducer should never actually have to reduce as a
|
40
|
+
# special class in mapredus, the values should just be copied from
|
41
|
+
# one key to a new key directly in redis.
|
42
|
+
class Identity < Reducer
|
43
|
+
def self.reduce_perform(process, key)
|
44
|
+
FileSystem.copy( process.map_key(key), process.reduce_key(key) )
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.reduce(value_list)
|
48
|
+
value_list.each do |v|
|
49
|
+
yield v
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Emits the length of the mapped value list.
|
55
|
+
#
|
56
|
+
# The counter reducer tells how many values were emitted by the
|
57
|
+
# mapper. In situations where an adder could used but only has to
|
58
|
+
# sum up 1's, counter will be much faster.
|
59
|
+
#
|
60
|
+
# This works in MapRedus because all the values produced for one key
|
61
|
+
# is processed (reduced) by a single worker.
|
62
|
+
class Counter < Reducer
|
63
|
+
def self.reduce_perform(process, key)
|
64
|
+
process.emit(key, FileSystem.llen(process.map_key(key)))
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.reduce(value_list)
|
68
|
+
yield value_list.size
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
################################################################################
|
73
|
+
|
33
74
|
class ToRedisHash < Finalizer
|
34
75
|
def self.finalize(process)
|
35
76
|
process.each_key_reduced_value do |key, value|
|
data/lib/mapredus/filesystem.rb
CHANGED
@@ -18,7 +18,32 @@ module MapRedus
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def self.method_missing(method, *args, &block)
|
21
|
-
storage.
|
21
|
+
if storage.respond_to?(method)
|
22
|
+
storage.send(method, *args)
|
23
|
+
else
|
24
|
+
super
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Copy the values from one key to a second key
|
29
|
+
#
|
30
|
+
# NOTE TODO: currently only works for the redis list data
|
31
|
+
# structure but will be extended for arbitrary data types.
|
32
|
+
#
|
33
|
+
# NOTE: this does not account for the key being changed during the
|
34
|
+
# copy, so should not be used in situations where the first_key
|
35
|
+
# value can change during the running of copy.
|
36
|
+
#
|
37
|
+
# Examples
|
38
|
+
# FileSystem.copy("key_one", "key_two")
|
39
|
+
#
|
40
|
+
# returns true on success false otherwise
|
41
|
+
def self.copy(first_key, second_key)
|
42
|
+
list_length = storage.llen(first_key)
|
43
|
+
list_length.times do |index|
|
44
|
+
storage.rpush(second_key, storage.lindex(first_key, index))
|
45
|
+
end
|
46
|
+
true
|
22
47
|
end
|
23
48
|
|
24
49
|
# Setup locks on results using RedisSupport lock functionality
|
data/lib/mapredus/process.rb
CHANGED
@@ -186,7 +186,7 @@ module MapRedus
|
|
186
186
|
if( not @ordered )
|
187
187
|
key, value = key_value
|
188
188
|
FileSystem.sadd( ProcessInfo.keys(@pid), key )
|
189
|
-
hashed_key = Helper.
|
189
|
+
hashed_key = Helper.key_hash(key)
|
190
190
|
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
191
191
|
else
|
192
192
|
# if there's an order for the process then we should use a zset above
|
@@ -194,7 +194,7 @@ module MapRedus
|
|
194
194
|
#
|
195
195
|
rank, key, value = key_value
|
196
196
|
FileSystem.zadd( ProcessInfo.keys(@pid), rank, key )
|
197
|
-
hashed_key = Helper.
|
197
|
+
hashed_key = Helper.key_hash(key)
|
198
198
|
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
199
199
|
end
|
200
200
|
raise "Key Collision: key:#{key}, #{key.class} => hashed key:#{hashed_key}" if key_collision?(hashed_key, key)
|
@@ -212,7 +212,7 @@ module MapRedus
|
|
212
212
|
#
|
213
213
|
# Returns "OK" on success.
|
214
214
|
def emit(key, reduce_val)
|
215
|
-
hashed_key = Helper.
|
215
|
+
hashed_key = Helper.key_hash(key)
|
216
216
|
FileSystem.rpush( ProcessInfo.reduce(@pid, hashed_key), reduce_val )
|
217
217
|
end
|
218
218
|
|
@@ -221,6 +221,21 @@ module MapRedus
|
|
221
221
|
FileSystem.get( ProcessInfo.hash_to_key(@pid, hashed_key) ) == key.to_s )
|
222
222
|
end
|
223
223
|
|
224
|
+
# Convenience methods to get the mapredus internal key string for a key
|
225
|
+
#
|
226
|
+
# Examples
|
227
|
+
# reduce_key("document")
|
228
|
+
# # => mapredus:process:PID:map_key:<Helper.key_hash("document")>:reduce
|
229
|
+
# map_key("document")
|
230
|
+
# # => mapredus:process:PID:map_key:<Helper.key_hash("document")>
|
231
|
+
#
|
232
|
+
# Returns the internal mapreduce string key for a given key.
|
233
|
+
[:reduce, :map].each do |internal_key|
|
234
|
+
define_method("#{internal_key}_key") do |key|
|
235
|
+
ProcessInfo.send(internal_key, @pid, Helper.key_hash(key))
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
224
239
|
# Keys that the map operation produced
|
225
240
|
#
|
226
241
|
# Examples
|
@@ -252,12 +267,12 @@ module MapRedus
|
|
252
267
|
#
|
253
268
|
# Returns the values.
|
254
269
|
def map_values(key)
|
255
|
-
hashed_key = Helper.
|
270
|
+
hashed_key = Helper.key_hash(key)
|
256
271
|
FileSystem.lrange( ProcessInfo.map(@pid, hashed_key), 0, -1 )
|
257
272
|
end
|
258
273
|
|
259
274
|
def num_values(key)
|
260
|
-
hashed_key = Helper.
|
275
|
+
hashed_key = Helper.key_hash(key)
|
261
276
|
FileSystem.llen( ProcessInfo.map(@pid, hashed_key) )
|
262
277
|
end
|
263
278
|
|
@@ -269,7 +284,7 @@ module MapRedus
|
|
269
284
|
#
|
270
285
|
# Returns the values.
|
271
286
|
def reduce_values(key)
|
272
|
-
hashed_key = Helper.
|
287
|
+
hashed_key = Helper.key_hash(key)
|
273
288
|
FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
|
274
289
|
end
|
275
290
|
|
data/lib/mapredus/reducer.rb
CHANGED
@@ -21,14 +21,24 @@ module MapRedus
|
|
21
21
|
|
22
22
|
def self.reduce(values); raise InvalidReducer; end
|
23
23
|
|
24
|
+
#
|
25
|
+
# The overridable portion of a reducer perform. In some default
|
26
|
+
# classes like Identity and Counter we do not call self.reduce but
|
27
|
+
# provide optimization for the reduction by overriding this
|
28
|
+
# method.
|
29
|
+
#
|
30
|
+
def self.reduce_perform(process, key)
|
31
|
+
reduce(process.map_values(key)) do |reduce_val|
|
32
|
+
process.emit( key, reduce_val )
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
24
36
|
# Doesn't handle redundant workers and fault tolerance
|
25
37
|
#
|
26
38
|
# TODO: Resque::AutoRetry might mess this up.
|
27
39
|
def self.perform(pid, key)
|
28
40
|
process = Process.open(pid)
|
29
|
-
|
30
|
-
process.emit( key, reduce_val )
|
31
|
-
end
|
41
|
+
reduce_perform(process, key)
|
32
42
|
rescue MapRedus::RecoverableFail
|
33
43
|
Master.enslave_later_reduce(process, key)
|
34
44
|
ensure
|
data/spec/mapredus_spec.rb
CHANGED
@@ -402,3 +402,27 @@ describe "MapRedus Support" do
|
|
402
402
|
end
|
403
403
|
end
|
404
404
|
end
|
405
|
+
|
406
|
+
describe "MapRedus Default Classes" do
|
407
|
+
before(:each) do
|
408
|
+
MapRedus::FileSystem.flushall
|
409
|
+
@process = GetWordCount.create
|
410
|
+
end
|
411
|
+
|
412
|
+
it "testing that the identity copy actually does a copy" do
|
413
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
|
414
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
|
415
|
+
MapRedus::Identity.perform(@process.pid, "test_key")
|
416
|
+
|
417
|
+
@process.map_values("test_key").should == ["whatever", "yeah"]
|
418
|
+
@process.reduce_values("test_key").should == ["whatever", "yeah"]
|
419
|
+
end
|
420
|
+
|
421
|
+
it "should properly do a count" do
|
422
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
|
423
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
|
424
|
+
MapRedus::Counter.perform(@process.pid, "test_key")
|
425
|
+
|
426
|
+
@process.reduce_values("test_key").should == ["2"]
|
427
|
+
end
|
428
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mapredus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 6
|
10
|
+
version: 0.0.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- John Le
|
@@ -16,11 +16,13 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-08-
|
19
|
+
date: 2010-08-05 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
23
|
-
|
23
|
+
prerelease: false
|
24
|
+
name: redis
|
25
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
24
26
|
none: false
|
25
27
|
requirements:
|
26
28
|
- - ">="
|
@@ -31,12 +33,12 @@ dependencies:
|
|
31
33
|
- 0
|
32
34
|
- 4
|
33
35
|
version: 1.0.4
|
36
|
+
requirement: *id001
|
34
37
|
type: :runtime
|
35
|
-
name: redis
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: *id001
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
|
39
|
+
prerelease: false
|
40
|
+
name: resque
|
41
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
40
42
|
none: false
|
41
43
|
requirements:
|
42
44
|
- - ">="
|
@@ -46,12 +48,12 @@ dependencies:
|
|
46
48
|
- 1
|
47
49
|
- 8
|
48
50
|
version: "1.8"
|
51
|
+
requirement: *id002
|
49
52
|
type: :runtime
|
50
|
-
name: resque
|
51
|
-
prerelease: false
|
52
|
-
version_requirements: *id002
|
53
53
|
- !ruby/object:Gem::Dependency
|
54
|
-
|
54
|
+
prerelease: false
|
55
|
+
name: resque-scheduler
|
56
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
55
57
|
none: false
|
56
58
|
requirements:
|
57
59
|
- - ">="
|
@@ -60,12 +62,12 @@ dependencies:
|
|
60
62
|
segments:
|
61
63
|
- 0
|
62
64
|
version: "0"
|
65
|
+
requirement: *id003
|
63
66
|
type: :runtime
|
64
|
-
name: resque-scheduler
|
65
|
-
prerelease: false
|
66
|
-
version_requirements: *id003
|
67
67
|
- !ruby/object:Gem::Dependency
|
68
|
-
|
68
|
+
prerelease: false
|
69
|
+
name: redis_support
|
70
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
69
71
|
none: false
|
70
72
|
requirements:
|
71
73
|
- - ">="
|
@@ -74,10 +76,8 @@ dependencies:
|
|
74
76
|
segments:
|
75
77
|
- 0
|
76
78
|
version: "0"
|
79
|
+
requirement: *id004
|
77
80
|
type: :runtime
|
78
|
-
name: redis_support
|
79
|
-
prerelease: false
|
80
|
-
version_requirements: *id004
|
81
81
|
description: simple mapreduce framework using redis and resque
|
82
82
|
email: john@doloreslabs.com
|
83
83
|
executables: []
|
@@ -102,9 +102,9 @@ files:
|
|
102
102
|
- lib/mapredus/support.rb
|
103
103
|
- LICENSE
|
104
104
|
- README.md
|
105
|
-
- spec/helper.rb
|
106
105
|
- spec/helper_classes.rb
|
107
106
|
- spec/mapredus_spec.rb
|
107
|
+
- spec/helper.rb
|
108
108
|
has_rdoc: true
|
109
109
|
homepage: http://github.com/dolores/mapredus
|
110
110
|
licenses: []
|
@@ -140,6 +140,6 @@ signing_key:
|
|
140
140
|
specification_version: 3
|
141
141
|
summary: mapredus initial
|
142
142
|
test_files:
|
143
|
-
- spec/helper.rb
|
144
143
|
- spec/helper_classes.rb
|
145
144
|
- spec/mapredus_spec.rb
|
145
|
+
- spec/helper.rb
|