mapredus 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mapredus.rb +2 -2
- data/lib/mapredus/default_classes.rb +41 -0
- data/lib/mapredus/filesystem.rb +26 -1
- data/lib/mapredus/process.rb +21 -6
- data/lib/mapredus/reducer.rb +13 -3
- data/spec/mapredus_spec.rb +24 -0
- metadata +22 -22
data/lib/mapredus.rb
CHANGED
@@ -70,11 +70,11 @@ module MapRedus
|
|
70
70
|
#
|
71
71
|
# Examples
|
72
72
|
#
|
73
|
-
# Support::
|
73
|
+
# Support::key_hash( key )
|
74
74
|
# # => '8dd8hflf8dhod8doh9hef'
|
75
75
|
#
|
76
76
|
# Returns the hash.
|
77
|
-
def self.
|
77
|
+
def self.key_hash( key )
|
78
78
|
key.to_s.hash.abs.to_s(16)
|
79
79
|
end
|
80
80
|
|
@@ -15,6 +15,8 @@ module MapRedus
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
+
################################################################################
|
19
|
+
|
18
20
|
class WordCounter < Mapper
|
19
21
|
def self.map(map_data)
|
20
22
|
map_data.split(/\W/).each do |word|
|
@@ -24,12 +26,51 @@ module MapRedus
|
|
24
26
|
end
|
25
27
|
end
|
26
28
|
|
29
|
+
####################################REDUCERS####################################
|
30
|
+
|
27
31
|
class Adder < Reducer
|
28
32
|
def self.reduce(value_list)
|
29
33
|
yield( value_list.reduce(0) { |r, v| r += v.to_i } )
|
30
34
|
end
|
31
35
|
end
|
32
36
|
|
37
|
+
# Emits the identity function on the map values.
|
38
|
+
#
|
39
|
+
# The identity reducer should never actually have to reduce as a
|
40
|
+
# special class in mapredus, the values should just be copied from
|
41
|
+
# one key to a new key directly in redis.
|
42
|
+
class Identity < Reducer
|
43
|
+
def self.reduce_perform(process, key)
|
44
|
+
FileSystem.copy( process.map_key(key), process.reduce_key(key) )
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.reduce(value_list)
|
48
|
+
value_list.each do |v|
|
49
|
+
yield v
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Emits the length of the mapped value list.
|
55
|
+
#
|
56
|
+
# The counter reducer tells how many values were emitted by the
|
57
|
+
# mapper. In situations where an adder could used but only has to
|
58
|
+
# sum up 1's, counter will be much faster.
|
59
|
+
#
|
60
|
+
# This works in MapRedus because all the values produced for one key
|
61
|
+
# is processed (reduced) by a single worker.
|
62
|
+
class Counter < Reducer
|
63
|
+
def self.reduce_perform(process, key)
|
64
|
+
process.emit(key, FileSystem.llen(process.map_key(key)))
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.reduce(value_list)
|
68
|
+
yield value_list.size
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
################################################################################
|
73
|
+
|
33
74
|
class ToRedisHash < Finalizer
|
34
75
|
def self.finalize(process)
|
35
76
|
process.each_key_reduced_value do |key, value|
|
data/lib/mapredus/filesystem.rb
CHANGED
@@ -18,7 +18,32 @@ module MapRedus
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def self.method_missing(method, *args, &block)
|
21
|
-
storage.
|
21
|
+
if storage.respond_to?(method)
|
22
|
+
storage.send(method, *args)
|
23
|
+
else
|
24
|
+
super
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Copy the values from one key to a second key
|
29
|
+
#
|
30
|
+
# NOTE TODO: currently only works for the redis list data
|
31
|
+
# structure but will be extended for arbitrary data types.
|
32
|
+
#
|
33
|
+
# NOTE: this does not account for the key being changed during the
|
34
|
+
# copy, so should not be used in situations where the first_key
|
35
|
+
# value can change during the running of copy.
|
36
|
+
#
|
37
|
+
# Examples
|
38
|
+
# FileSystem.copy("key_one", "key_two")
|
39
|
+
#
|
40
|
+
# returns true on success false otherwise
|
41
|
+
def self.copy(first_key, second_key)
|
42
|
+
list_length = storage.llen(first_key)
|
43
|
+
list_length.times do |index|
|
44
|
+
storage.rpush(second_key, storage.lindex(first_key, index))
|
45
|
+
end
|
46
|
+
true
|
22
47
|
end
|
23
48
|
|
24
49
|
# Setup locks on results using RedisSupport lock functionality
|
data/lib/mapredus/process.rb
CHANGED
@@ -186,7 +186,7 @@ module MapRedus
|
|
186
186
|
if( not @ordered )
|
187
187
|
key, value = key_value
|
188
188
|
FileSystem.sadd( ProcessInfo.keys(@pid), key )
|
189
|
-
hashed_key = Helper.
|
189
|
+
hashed_key = Helper.key_hash(key)
|
190
190
|
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
191
191
|
else
|
192
192
|
# if there's an order for the process then we should use a zset above
|
@@ -194,7 +194,7 @@ module MapRedus
|
|
194
194
|
#
|
195
195
|
rank, key, value = key_value
|
196
196
|
FileSystem.zadd( ProcessInfo.keys(@pid), rank, key )
|
197
|
-
hashed_key = Helper.
|
197
|
+
hashed_key = Helper.key_hash(key)
|
198
198
|
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
199
199
|
end
|
200
200
|
raise "Key Collision: key:#{key}, #{key.class} => hashed key:#{hashed_key}" if key_collision?(hashed_key, key)
|
@@ -212,7 +212,7 @@ module MapRedus
|
|
212
212
|
#
|
213
213
|
# Returns "OK" on success.
|
214
214
|
def emit(key, reduce_val)
|
215
|
-
hashed_key = Helper.
|
215
|
+
hashed_key = Helper.key_hash(key)
|
216
216
|
FileSystem.rpush( ProcessInfo.reduce(@pid, hashed_key), reduce_val )
|
217
217
|
end
|
218
218
|
|
@@ -221,6 +221,21 @@ module MapRedus
|
|
221
221
|
FileSystem.get( ProcessInfo.hash_to_key(@pid, hashed_key) ) == key.to_s )
|
222
222
|
end
|
223
223
|
|
224
|
+
# Convenience methods to get the mapredus internal key string for a key
|
225
|
+
#
|
226
|
+
# Examples
|
227
|
+
# reduce_key("document")
|
228
|
+
# # => mapredus:process:PID:map_key:<Helper.key_hash("document")>:reduce
|
229
|
+
# map_key("document")
|
230
|
+
# # => mapredus:process:PID:map_key:<Helper.key_hash("document")>
|
231
|
+
#
|
232
|
+
# Returns the internal mapreduce string key for a given key.
|
233
|
+
[:reduce, :map].each do |internal_key|
|
234
|
+
define_method("#{internal_key}_key") do |key|
|
235
|
+
ProcessInfo.send(internal_key, @pid, Helper.key_hash(key))
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
224
239
|
# Keys that the map operation produced
|
225
240
|
#
|
226
241
|
# Examples
|
@@ -252,12 +267,12 @@ module MapRedus
|
|
252
267
|
#
|
253
268
|
# Returns the values.
|
254
269
|
def map_values(key)
|
255
|
-
hashed_key = Helper.
|
270
|
+
hashed_key = Helper.key_hash(key)
|
256
271
|
FileSystem.lrange( ProcessInfo.map(@pid, hashed_key), 0, -1 )
|
257
272
|
end
|
258
273
|
|
259
274
|
def num_values(key)
|
260
|
-
hashed_key = Helper.
|
275
|
+
hashed_key = Helper.key_hash(key)
|
261
276
|
FileSystem.llen( ProcessInfo.map(@pid, hashed_key) )
|
262
277
|
end
|
263
278
|
|
@@ -269,7 +284,7 @@ module MapRedus
|
|
269
284
|
#
|
270
285
|
# Returns the values.
|
271
286
|
def reduce_values(key)
|
272
|
-
hashed_key = Helper.
|
287
|
+
hashed_key = Helper.key_hash(key)
|
273
288
|
FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
|
274
289
|
end
|
275
290
|
|
data/lib/mapredus/reducer.rb
CHANGED
@@ -21,14 +21,24 @@ module MapRedus
|
|
21
21
|
|
22
22
|
def self.reduce(values); raise InvalidReducer; end
|
23
23
|
|
24
|
+
#
|
25
|
+
# The overridable portion of a reducer perform. In some default
|
26
|
+
# classes like Identity and Counter we do not call self.reduce but
|
27
|
+
# provide optimization for the reduction by overriding this
|
28
|
+
# method.
|
29
|
+
#
|
30
|
+
def self.reduce_perform(process, key)
|
31
|
+
reduce(process.map_values(key)) do |reduce_val|
|
32
|
+
process.emit( key, reduce_val )
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
24
36
|
# Doesn't handle redundant workers and fault tolerance
|
25
37
|
#
|
26
38
|
# TODO: Resque::AutoRetry might mess this up.
|
27
39
|
def self.perform(pid, key)
|
28
40
|
process = Process.open(pid)
|
29
|
-
|
30
|
-
process.emit( key, reduce_val )
|
31
|
-
end
|
41
|
+
reduce_perform(process, key)
|
32
42
|
rescue MapRedus::RecoverableFail
|
33
43
|
Master.enslave_later_reduce(process, key)
|
34
44
|
ensure
|
data/spec/mapredus_spec.rb
CHANGED
@@ -402,3 +402,27 @@ describe "MapRedus Support" do
|
|
402
402
|
end
|
403
403
|
end
|
404
404
|
end
|
405
|
+
|
406
|
+
describe "MapRedus Default Classes" do
|
407
|
+
before(:each) do
|
408
|
+
MapRedus::FileSystem.flushall
|
409
|
+
@process = GetWordCount.create
|
410
|
+
end
|
411
|
+
|
412
|
+
it "testing that the identity copy actually does a copy" do
|
413
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
|
414
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
|
415
|
+
MapRedus::Identity.perform(@process.pid, "test_key")
|
416
|
+
|
417
|
+
@process.map_values("test_key").should == ["whatever", "yeah"]
|
418
|
+
@process.reduce_values("test_key").should == ["whatever", "yeah"]
|
419
|
+
end
|
420
|
+
|
421
|
+
it "should properly do a count" do
|
422
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "whatever")
|
423
|
+
MapRedus::FileSystem.rpush(@process.map_key("test_key"), "yeah")
|
424
|
+
MapRedus::Counter.perform(@process.pid, "test_key")
|
425
|
+
|
426
|
+
@process.reduce_values("test_key").should == ["2"]
|
427
|
+
end
|
428
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mapredus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 6
|
10
|
+
version: 0.0.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- John Le
|
@@ -16,11 +16,13 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-08-
|
19
|
+
date: 2010-08-05 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
23
|
-
|
23
|
+
prerelease: false
|
24
|
+
name: redis
|
25
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
24
26
|
none: false
|
25
27
|
requirements:
|
26
28
|
- - ">="
|
@@ -31,12 +33,12 @@ dependencies:
|
|
31
33
|
- 0
|
32
34
|
- 4
|
33
35
|
version: 1.0.4
|
36
|
+
requirement: *id001
|
34
37
|
type: :runtime
|
35
|
-
name: redis
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: *id001
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
|
39
|
+
prerelease: false
|
40
|
+
name: resque
|
41
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
40
42
|
none: false
|
41
43
|
requirements:
|
42
44
|
- - ">="
|
@@ -46,12 +48,12 @@ dependencies:
|
|
46
48
|
- 1
|
47
49
|
- 8
|
48
50
|
version: "1.8"
|
51
|
+
requirement: *id002
|
49
52
|
type: :runtime
|
50
|
-
name: resque
|
51
|
-
prerelease: false
|
52
|
-
version_requirements: *id002
|
53
53
|
- !ruby/object:Gem::Dependency
|
54
|
-
|
54
|
+
prerelease: false
|
55
|
+
name: resque-scheduler
|
56
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
55
57
|
none: false
|
56
58
|
requirements:
|
57
59
|
- - ">="
|
@@ -60,12 +62,12 @@ dependencies:
|
|
60
62
|
segments:
|
61
63
|
- 0
|
62
64
|
version: "0"
|
65
|
+
requirement: *id003
|
63
66
|
type: :runtime
|
64
|
-
name: resque-scheduler
|
65
|
-
prerelease: false
|
66
|
-
version_requirements: *id003
|
67
67
|
- !ruby/object:Gem::Dependency
|
68
|
-
|
68
|
+
prerelease: false
|
69
|
+
name: redis_support
|
70
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
69
71
|
none: false
|
70
72
|
requirements:
|
71
73
|
- - ">="
|
@@ -74,10 +76,8 @@ dependencies:
|
|
74
76
|
segments:
|
75
77
|
- 0
|
76
78
|
version: "0"
|
79
|
+
requirement: *id004
|
77
80
|
type: :runtime
|
78
|
-
name: redis_support
|
79
|
-
prerelease: false
|
80
|
-
version_requirements: *id004
|
81
81
|
description: simple mapreduce framework using redis and resque
|
82
82
|
email: john@doloreslabs.com
|
83
83
|
executables: []
|
@@ -102,9 +102,9 @@ files:
|
|
102
102
|
- lib/mapredus/support.rb
|
103
103
|
- LICENSE
|
104
104
|
- README.md
|
105
|
-
- spec/helper.rb
|
106
105
|
- spec/helper_classes.rb
|
107
106
|
- spec/mapredus_spec.rb
|
107
|
+
- spec/helper.rb
|
108
108
|
has_rdoc: true
|
109
109
|
homepage: http://github.com/dolores/mapredus
|
110
110
|
licenses: []
|
@@ -140,6 +140,6 @@ signing_key:
|
|
140
140
|
specification_version: 3
|
141
141
|
summary: mapredus initial
|
142
142
|
test_files:
|
143
|
-
- spec/helper.rb
|
144
143
|
- spec/helper_classes.rb
|
145
144
|
- spec/mapredus_spec.rb
|
145
|
+
- spec/helper.rb
|