mapredus 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.md +227 -0
- data/lib/mapredus/filesystem.rb +43 -0
- data/lib/mapredus/finalizer.rb +33 -0
- data/lib/mapredus/inputter.rb +31 -0
- data/lib/mapredus/keys.rb +86 -0
- data/lib/mapredus/mapper.rb +27 -0
- data/lib/mapredus/master.rb +182 -0
- data/lib/mapredus/outputter.rb +42 -0
- data/lib/mapredus/process.rb +366 -0
- data/lib/mapredus/reducer.rb +39 -0
- data/lib/mapredus/support.rb +56 -0
- data/lib/mapredus.rb +106 -0
- data/spec/helper.rb +47 -0
- data/spec/helper_classes.rb +102 -0
- data/spec/mapredus_spec.rb +295 -0
- metadata +144 -0
@@ -0,0 +1,366 @@
|
|
1
|
+
module MapRedus
|
2
|
+
|
3
|
+
# This is what keeps track of our map reduce processes
|
4
|
+
#
|
5
|
+
# We use a redis key to identify the id of map reduce process
|
6
|
+
# the value of the redis object is a json object which contains:
|
7
|
+
#
|
8
|
+
# {
|
9
|
+
# mapper : mapclass,
|
10
|
+
# reducer : reduceclass,
|
11
|
+
# finalizer : finalizerclass,
|
12
|
+
# partitioner : <not supported>,
|
13
|
+
# combiner : <not supported>,
|
14
|
+
# ordered : true_or_false ## ensures ordering keys from the map output --> [ order, key, value ],
|
15
|
+
# synchronous : true_or_false ## runs the process synchronously or not (generally used for testing)
|
16
|
+
# result_timeout : lenght of time a result is saved ## 3600 * 24
|
17
|
+
# keyname : the location to the save the result of the process (cache location)
|
18
|
+
# state : the current state of the process (shouldn't be set by the process and starts off as nil)
|
19
|
+
# }
|
20
|
+
#
|
21
|
+
# The user has the ability in subclassing this class to create extra features if needed
|
22
|
+
#
|
23
|
+
class Process
|
24
|
+
# Public: Keep track of information that may show up as the redis json value
|
25
|
+
# This is so we know exactly what might show up in the json hash
|
26
|
+
READERS = [:pid]
|
27
|
+
ATTRS = [:inputter, :mapper, :reducer, :finalizer, :outputter, :ordered, :synchronous, :result_timeout, :keyname, :state]
|
28
|
+
READERS.each { |r| attr_reader r }
|
29
|
+
ATTRS.each { |a| attr_accessor a }
|
30
|
+
|
31
|
+
DEFAULT_TIME = 3600 * 24
|
32
|
+
def initialize(pid, json_info)
|
33
|
+
@pid = pid
|
34
|
+
read(json_info)
|
35
|
+
end
|
36
|
+
|
37
|
+
def read(json_info)
|
38
|
+
@inputter = Helper.class_get(json_helper(json_info, :inputter))
|
39
|
+
@mapper = Helper.class_get(json_helper(json_info, :mapper))
|
40
|
+
@reducer = Helper.class_get(json_helper(json_info, :reducer))
|
41
|
+
@finalizer = Helper.class_get(json_helper(json_info, :finalizer))
|
42
|
+
@ordered = json_helper(json_info, :ordered)
|
43
|
+
@synchronous = json_helper(json_info, :synchronous)
|
44
|
+
@result_timeout = json_helper(json_info, :result_timeout) || DEFAULT_TIME
|
45
|
+
@keyname = json_helper(json_info, :keyname)
|
46
|
+
@state = json_helper(json_info, :state) || NOT_STARTED
|
47
|
+
@outputter = json_helper(json_info, :outputter)
|
48
|
+
@outputter = @outputter ? Helper.class_get(@outputter) : MapRedus::Outputter
|
49
|
+
end
|
50
|
+
|
51
|
+
def json_helper(json_info, key)
|
52
|
+
json_info[key.to_s] || json_info[key.to_sym]
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s; to_json; end
|
56
|
+
|
57
|
+
def to_hash
|
58
|
+
(ATTRS + READERS).inject({}) do |h, attr|
|
59
|
+
h[attr] = send(attr)
|
60
|
+
h
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_json
|
65
|
+
Helper.encode(to_hash)
|
66
|
+
end
|
67
|
+
|
68
|
+
def save
|
69
|
+
FileSystem.sadd( ProcessInfo.processes, @pid )
|
70
|
+
FileSystem.save( ProcessInfo.pid(@pid), to_json )
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
def update(attrs = {})
|
75
|
+
attrs.each do |attr, val|
|
76
|
+
send("#{attr}=", val)
|
77
|
+
end
|
78
|
+
save
|
79
|
+
end
|
80
|
+
|
81
|
+
def reload
|
82
|
+
read(Helper.decode(FileSystem.get(ProcessInfo.pid(@pid))))
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# This will not delete if the master is working
|
87
|
+
# It can't get ahold of the files to shred while the master is working
|
88
|
+
#
|
89
|
+
# if safe is set to false, this will delete all the redis stores associated
|
90
|
+
# with this process, but will not kill the process from the queue, if it is
|
91
|
+
# on the queue. The process operations will fail to work when its data is deleted
|
92
|
+
#
|
93
|
+
# Examples
|
94
|
+
# delete(safe)
|
95
|
+
# # => true or false
|
96
|
+
#
|
97
|
+
# Returns true as long as the master is not working.
|
98
|
+
def delete(safe = true)
|
99
|
+
return false if (safe && Master.working?(@pid))
|
100
|
+
FileSystem.keys("mapredus:process:#{@pid}*").each do |k|
|
101
|
+
FileSystem.del(k)
|
102
|
+
end
|
103
|
+
FileSystem.srem(ProcessInfo.processes, @pid)
|
104
|
+
FileSystem.set(ProcessInfo.processes_count, 0) if( 0 == FileSystem.scard(ProcessInfo.processes) )
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
# Iterates through the key, values
|
109
|
+
#
|
110
|
+
# Example
|
111
|
+
# each_key_reduced_value(pid)
|
112
|
+
#
|
113
|
+
# Returns nothing.
|
114
|
+
def each_key_reduced_value
|
115
|
+
map_keys.each do |key|
|
116
|
+
reduce_values(key).each do |value|
|
117
|
+
yield key, value
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Iterates through the key, values
|
123
|
+
#
|
124
|
+
# Example
|
125
|
+
# each_key_nonreduced_value(pid)
|
126
|
+
#
|
127
|
+
# Returns nothing.
|
128
|
+
def each_key_nonreduced_value
|
129
|
+
map_keys.each do |key|
|
130
|
+
map_values(key).each do |value|
|
131
|
+
yield key, value
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def run( data_object, synchronous = false )
|
137
|
+
update(:synchronous => synchronous)
|
138
|
+
Master.mapreduce( self, data_object )
|
139
|
+
true
|
140
|
+
end
|
141
|
+
|
142
|
+
# TODO:
|
143
|
+
# Should also have some notion of whether the process is completed or not
|
144
|
+
# since the master might not be working, but the process is not yet complete
|
145
|
+
# so it is still running
|
146
|
+
def running?
|
147
|
+
Master.working?(@pid)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Change the process state
|
151
|
+
# if the process is not running and is not synchronous
|
152
|
+
#
|
153
|
+
# Examples
|
154
|
+
# process.next_state(pid)
|
155
|
+
#
|
156
|
+
# returns the state that the process switched to (or stays the same)
|
157
|
+
def next_state
|
158
|
+
if((not running?) and (not @synchronous))
|
159
|
+
new_state = STATE_MACHINE[self.state]
|
160
|
+
update(:state => new_state)
|
161
|
+
method = "enslave_#{new_state}".to_sym
|
162
|
+
Master.send(method, self) if( Master.respond_to?(method) )
|
163
|
+
new_state
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
### The following functions deal with keys/values produced during the
|
168
|
+
### running of a process
|
169
|
+
|
170
|
+
# Emissions, when we get map/reduce results back we emit these
|
171
|
+
# to be stored in our file system (redis)
|
172
|
+
#
|
173
|
+
# key_value - The key, value
|
174
|
+
#
|
175
|
+
# Examples
|
176
|
+
# emit_intermediate(key, value)
|
177
|
+
# # =>
|
178
|
+
# emit_intermediate(rank, key, value)
|
179
|
+
#
|
180
|
+
# Returns the true on success.
|
181
|
+
def emit_intermediate(*key_value)
|
182
|
+
if( not @ordered )
|
183
|
+
key, value = key_value
|
184
|
+
FileSystem.sadd( ProcessInfo.keys(@pid), key )
|
185
|
+
hashed_key = Helper.hash(key)
|
186
|
+
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
187
|
+
else
|
188
|
+
# if there's an order for the process then we should use a zset above
|
189
|
+
# ordered process's map emits [rank, key, value]
|
190
|
+
#
|
191
|
+
rank, key, value = key_value
|
192
|
+
FileSystem.zadd( ProcessInfo.keys(@pid), rank, key )
|
193
|
+
hashed_key = Helper.hash(key)
|
194
|
+
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
195
|
+
end
|
196
|
+
raise "Key Collision: key:#{key}, #{key.class} => hashed key:#{hashed_key}" if key_collision?(hashed_key, key)
|
197
|
+
true
|
198
|
+
end
|
199
|
+
|
200
|
+
def emit(key, reduce_val)
|
201
|
+
hashed_key = Helper.hash(key)
|
202
|
+
FileSystem.rpush( ProcessInfo.reduce(@pid, hashed_key), reduce_val )
|
203
|
+
end
|
204
|
+
|
205
|
+
def key_collision?(hashed_key, key)
|
206
|
+
not ( FileSystem.setnx( ProcessInfo.hash_to_key(@pid, hashed_key), key ) ||
|
207
|
+
FileSystem.get( ProcessInfo.hash_to_key(@pid, hashed_key) ) == key.to_s )
|
208
|
+
end
|
209
|
+
|
210
|
+
# Saves the result to the specified keyname, using the specified outputter
|
211
|
+
#
|
212
|
+
# Example
|
213
|
+
# (mapreduce:process:result:KEYNAME)
|
214
|
+
# OR
|
215
|
+
# process:pid:result
|
216
|
+
#
|
217
|
+
# The client must ensure the the result will not be affected when to_s is applied
|
218
|
+
# since redis stores all values as strings
|
219
|
+
#
|
220
|
+
# Returns true on success.
|
221
|
+
def save_result(result)
|
222
|
+
res = @outputter.encode(result)
|
223
|
+
FileSystem.save(ProcessInfo.result(@pid), res)
|
224
|
+
FileSystem.save(ProcessInfo.result_cache(@keyname), res, @result_timeout) if @keyname
|
225
|
+
true
|
226
|
+
end
|
227
|
+
|
228
|
+
def get_saved_result
|
229
|
+
@outputter.decode(Process.get_saved_result(@keyname))
|
230
|
+
end
|
231
|
+
|
232
|
+
def delete_saved_result
|
233
|
+
Process.delete_saved_result(@keyname)
|
234
|
+
end
|
235
|
+
|
236
|
+
# Keys that the map operation produced
|
237
|
+
#
|
238
|
+
# Examples
|
239
|
+
# map_keys
|
240
|
+
# # =>
|
241
|
+
#
|
242
|
+
# Returns the Keys.
|
243
|
+
def map_keys
|
244
|
+
if( not @ordered )
|
245
|
+
FileSystem.smembers( ProcessInfo.keys(@pid) )
|
246
|
+
else
|
247
|
+
FileSystem.zrange( ProcessInfo.keys(@pid), 0, -1 )
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def num_values(key)
|
252
|
+
hashed_key = Helper.hash(key)
|
253
|
+
FileSystem.llen( ProcessInfo.map(@pid, hashed_key) )
|
254
|
+
end
|
255
|
+
|
256
|
+
# values that the map operation produced, for a key
|
257
|
+
#
|
258
|
+
# Examples
|
259
|
+
# map_values(key)
|
260
|
+
# # =>
|
261
|
+
#
|
262
|
+
# Returns the values.
|
263
|
+
def map_values(key)
|
264
|
+
hashed_key = Helper.hash(key)
|
265
|
+
FileSystem.lrange( ProcessInfo.map(@pid, hashed_key), 0, -1 )
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
# values that the reduce operation produced, for a key
|
270
|
+
#
|
271
|
+
# Examples
|
272
|
+
# reduce_values(key)
|
273
|
+
# # =>
|
274
|
+
#
|
275
|
+
# Returns the values.
|
276
|
+
def reduce_values(key)
|
277
|
+
hashed_key = Helper.hash(key)
|
278
|
+
FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
|
279
|
+
end
|
280
|
+
|
281
|
+
# Map and Reduce are strings naming the Mapper and Reducer
|
282
|
+
# classes we want to run our map reduce with.
|
283
|
+
#
|
284
|
+
# For instance
|
285
|
+
# Mapper = "Mapper"
|
286
|
+
# Reducer = "Reducer"
|
287
|
+
#
|
288
|
+
# Default finalizer
|
289
|
+
# "MapRedus::Finalizer"
|
290
|
+
#
|
291
|
+
# Returns the new process id.
|
292
|
+
def self.create( *args )
|
293
|
+
new_pid = get_available_pid
|
294
|
+
|
295
|
+
spec = specification(*args)
|
296
|
+
return nil unless spec
|
297
|
+
|
298
|
+
Process.new(new_pid, spec).save
|
299
|
+
end
|
300
|
+
|
301
|
+
def self.specification(*args)
|
302
|
+
raise ProcessSpecificationError
|
303
|
+
end
|
304
|
+
|
305
|
+
def self.info(pid)
|
306
|
+
FileSystem.keys(ProcessInfo.pid(pid) + "*")
|
307
|
+
end
|
308
|
+
|
309
|
+
def self.open(pid)
|
310
|
+
spec = Helper.decode( FileSystem.get(ProcessInfo.pid(pid)) )
|
311
|
+
spec && Process.new( pid, spec )
|
312
|
+
end
|
313
|
+
|
314
|
+
# Find out what map reduce processes are out there
|
315
|
+
#
|
316
|
+
# Examples
|
317
|
+
# FileSystem::ps
|
318
|
+
#
|
319
|
+
# Returns a list of the map reduce process ids
|
320
|
+
def self.ps
|
321
|
+
FileSystem.smembers(ProcessInfo.processes)
|
322
|
+
end
|
323
|
+
|
324
|
+
# Find out what map reduce processes are out there
|
325
|
+
#
|
326
|
+
# Examples
|
327
|
+
# FileSystem::get_available_pid
|
328
|
+
#
|
329
|
+
# Returns an avilable pid.
|
330
|
+
def self.get_available_pid
|
331
|
+
FileSystem.incrby(ProcessInfo.processes_count, 1 + rand(20))
|
332
|
+
end
|
333
|
+
|
334
|
+
# Given a result keyname, delete the result
|
335
|
+
#
|
336
|
+
# Examples
|
337
|
+
# Process.delete_saved_result(key)
|
338
|
+
def self.delete_saved_result(keyname)
|
339
|
+
FileSystem.del( ProcessInfo.result_cache(keyname) )
|
340
|
+
end
|
341
|
+
|
342
|
+
# Remove redis keys associated with this process if the Master isn't working.
|
343
|
+
#
|
344
|
+
# potentially is very expensive.
|
345
|
+
#
|
346
|
+
# Example
|
347
|
+
# Process::kill(pid)
|
348
|
+
# # => true
|
349
|
+
#
|
350
|
+
# Returns true on success.
|
351
|
+
def self.kill(pid)
|
352
|
+
num_killed = Master.emancipate(pid)
|
353
|
+
proc = Process.open(pid)
|
354
|
+
proc.delete if proc
|
355
|
+
num_killed
|
356
|
+
end
|
357
|
+
|
358
|
+
def self.kill_all
|
359
|
+
ps.each do |pid|
|
360
|
+
kill(pid)
|
361
|
+
end
|
362
|
+
FileSystem.del(ProcessInfo.processes)
|
363
|
+
FileSystem.del(ProcessInfo.processes_count)
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module MapRedus
|
2
|
+
# Reduce is a function that takes in "all" the values for a single given key
|
3
|
+
# and outputs a list of values or a single value that usually "reduces"
|
4
|
+
# the initial given value set.
|
5
|
+
#
|
6
|
+
# The output of the reduce shall always be
|
7
|
+
# reduce(values) = [ reduced value, reduced value, ... ]
|
8
|
+
# and it will often only be a single element array
|
9
|
+
#
|
10
|
+
# The input values and the output values of the reduce will always
|
11
|
+
# be a string. As described in the paper, it is up to the client
|
12
|
+
# to define how to deal with this restriction.
|
13
|
+
#
|
14
|
+
class Reducer < QueueProcess
|
15
|
+
#
|
16
|
+
# After a recoverable fail this describes how much time we shall wait before
|
17
|
+
# readding the reducer back on to the queue.
|
18
|
+
#
|
19
|
+
DEFAULT_WAIT = 10 # seconds
|
20
|
+
def self.wait; DEFAULT_WAIT; end
|
21
|
+
|
22
|
+
def self.reduce(values); raise InvalidReducer; end
|
23
|
+
|
24
|
+
# Doesn't handle redundant workers and fault tolerance
|
25
|
+
#
|
26
|
+
# TODO: Resque::AutoRetry might mess this up.
|
27
|
+
def self.perform(pid, key)
|
28
|
+
process = Process.open(pid)
|
29
|
+
reduce(process.map_values(key)) do |reduce_val|
|
30
|
+
process.emit( key, reduce_val )
|
31
|
+
end
|
32
|
+
rescue MapRedus::RecoverableFail
|
33
|
+
Master.enslave_later_reduce(process, key)
|
34
|
+
ensure
|
35
|
+
Master.free_slave(pid)
|
36
|
+
process.next_state
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module MapRedus
|
2
|
+
module Support
|
3
|
+
class MapRedusRunnerError < StandardError; end
|
4
|
+
class DuplicateProcessDefinitionError < MapRedusRunnerError ; end
|
5
|
+
|
6
|
+
class Runner
|
7
|
+
attr_reader :process
|
8
|
+
def initialize(class_name)
|
9
|
+
@class = class_name
|
10
|
+
end
|
11
|
+
|
12
|
+
def method_missing(method, *args, &block)
|
13
|
+
mr_process = "#{@class}_#{method.to_s}"
|
14
|
+
if self.respond_to?(mr_process)
|
15
|
+
self.send(mr_process, *args, &block)
|
16
|
+
else
|
17
|
+
super(method, *args, &block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def mapreduce
|
23
|
+
@mapreduce_runner ||= Runner.new(self.class.to_s.gsub(/\W/,"_"))
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
def mapreduce_process( process_name, mapredus_process_class, result_store, opts = {})
|
28
|
+
runner_self = Runner
|
29
|
+
class_name = self.to_s.gsub(/\W/,"_")
|
30
|
+
|
31
|
+
global_process_name = "#{class_name}_#{process_name.to_s}"
|
32
|
+
|
33
|
+
if runner_self.methods.include?(global_process_name)
|
34
|
+
raise DuplicateProcessDefintionError
|
35
|
+
end
|
36
|
+
|
37
|
+
keyname = "mapredus_key_#{global_process_name}"
|
38
|
+
RedisSupport.redis_key( keyname, result_store )
|
39
|
+
|
40
|
+
runner_self.send( :define_method, global_process_name ) do |data, *var|
|
41
|
+
@process = mapredus_process_class.create
|
42
|
+
@process.update(:keyname => RedisSupport::Keys.send( keyname, *var ))
|
43
|
+
@process.run(data)
|
44
|
+
end
|
45
|
+
|
46
|
+
runner_self.send( :define_method, "#{global_process_name}_result" ) do |*outputter_args|
|
47
|
+
@process.outputter.decode(@process.keyname, *outputter_args)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.included(model)
|
53
|
+
model.extend ClassMethods
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/mapredus.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'redis'
|
2
|
+
require 'redis_support'
|
3
|
+
require 'resque'
|
4
|
+
require 'resque_scheduler'
|
5
|
+
|
6
|
+
module MapRedus
|
7
|
+
include RedisSupport
|
8
|
+
|
9
|
+
class InvalidProcess < NotImplementedError
|
10
|
+
def initialize; super("MapRedus QueueProcess: need to have perform method defined");end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ProcessSpecificationError < InvalidProcess
|
14
|
+
def initialize; super("MapRedus Process: need to have the specification defined");end
|
15
|
+
end
|
16
|
+
|
17
|
+
class InvalidMapper < NotImplementedError
|
18
|
+
def initialize; super("MapRedus Mapper: need to have map method defined");end
|
19
|
+
end
|
20
|
+
|
21
|
+
class InvalidReducer < NotImplementedError
|
22
|
+
def initialize; super("MapRedus Reducer: need to have reduce method defined");end
|
23
|
+
end
|
24
|
+
|
25
|
+
class InvalidInputStream < NotImplementedError
|
26
|
+
def initialize; super("MapRedus InputStream: need to have scan method defined");end
|
27
|
+
end
|
28
|
+
|
29
|
+
class InvalidProcess < NotImplementedError
|
30
|
+
def initialize; super("MapRedus Process Creation Failed: Specifications were not specified");end
|
31
|
+
end
|
32
|
+
|
33
|
+
class RecoverableFail < StandardError
|
34
|
+
def initialize; super("MapRedus Operation Failed: but it is recoverable") ;end
|
35
|
+
end
|
36
|
+
|
37
|
+
# All Queue Processes should have a function called perform
|
38
|
+
# ensuring that when the class is put on the resque queue it can perform its work
|
39
|
+
#
|
40
|
+
# Caution: defines redis, which is also defined in RedisSupport
|
41
|
+
#
|
42
|
+
class QueueProcess
|
43
|
+
def self.queue; :mapredus; end
|
44
|
+
def self.perform(*args); raise InvalidProcess; end
|
45
|
+
end
|
46
|
+
|
47
|
+
# TODO: When you send work to a worker using a mapper you define,
|
48
|
+
# the worker won't have that class name defined, unless it was started up
|
49
|
+
# with the class loaded
|
50
|
+
#
|
51
|
+
def register_reducer(klass); end;
|
52
|
+
def register_mapper(klass); end;
|
53
|
+
|
54
|
+
class Helper
|
55
|
+
# resque helpers defines
|
56
|
+
# redis
|
57
|
+
# encode
|
58
|
+
# decode
|
59
|
+
# classify
|
60
|
+
# constantize
|
61
|
+
#
|
62
|
+
# This is extended here because we want to use the encode and decode function
|
63
|
+
# when we interact with resque queues
|
64
|
+
extend Resque::Helpers
|
65
|
+
|
66
|
+
# Defines a hash by taking the absolute value of ruby's string
|
67
|
+
# hash to rid the dashes since redis keys should not contain any.
|
68
|
+
#
|
69
|
+
# key - The key to be hashed.
|
70
|
+
#
|
71
|
+
# Examples
|
72
|
+
#
|
73
|
+
# Support::hash( key )
|
74
|
+
# # => '8dd8hflf8dhod8doh9hef'
|
75
|
+
#
|
76
|
+
# Returns the hash.
|
77
|
+
def self.hash( key )
|
78
|
+
key.to_s.hash.abs.to_s(16)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the classname of the namespaced class.
|
82
|
+
#
|
83
|
+
# The full name of the class.
|
84
|
+
#
|
85
|
+
# Examples
|
86
|
+
#
|
87
|
+
# Support::class_get( Super::Long::Namespace::ClassName )
|
88
|
+
# # => 'ClassName'
|
89
|
+
#
|
90
|
+
# Returns the class name.
|
91
|
+
def self.class_get(string)
|
92
|
+
constantize(string)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
require 'mapredus/keys'
|
98
|
+
require 'mapredus/process'
|
99
|
+
require 'mapredus/filesystem'
|
100
|
+
require 'mapredus/master'
|
101
|
+
require 'mapredus/mapper'
|
102
|
+
require 'mapredus/reducer'
|
103
|
+
require 'mapredus/finalizer'
|
104
|
+
require 'mapredus/support'
|
105
|
+
require 'mapredus/outputter'
|
106
|
+
require 'mapredus/inputter'
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
|
4
|
+
dir = File.dirname(__FILE__)
|
5
|
+
$LOAD_PATH.unshift(File.join(dir, '..', 'lib'))
|
6
|
+
$LOAD_PATH.unshift(dir)
|
7
|
+
require 'mapredus'
|
8
|
+
|
9
|
+
#
|
10
|
+
# make sure we can run redis
|
11
|
+
#
|
12
|
+
if !system("which redis-server")
|
13
|
+
puts '', "** can't find `redis-server` in your path"
|
14
|
+
abort ''
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# start our own redis when the tests start,
|
19
|
+
# kill it when they end (redis is run as a daemon)
|
20
|
+
#
|
21
|
+
puts "Starting redis for testing at localhost:9736..."
|
22
|
+
`redis-server #{dir}/redis-test.conf`
|
23
|
+
|
24
|
+
at_exit do
|
25
|
+
#
|
26
|
+
# hope that no other processes have redis-test in the name...
|
27
|
+
# TODO: fixme
|
28
|
+
#
|
29
|
+
pid = `ps -A -o pid,command | grep [r]edis-test`.split(" ")[0]
|
30
|
+
puts "Killing test redis server..."
|
31
|
+
`rm -f #{dir}/dump.rdb`
|
32
|
+
Process.kill("KILL", pid.to_i)
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Set the redis server
|
37
|
+
#
|
38
|
+
MapRedus.redis = 'localhost:9736:0'
|
39
|
+
Resque.redis = MapRedus.redis
|
40
|
+
require 'resque/failure/redis'
|
41
|
+
Resque::Failure.backend = Resque::Failure::Redis
|
42
|
+
|
43
|
+
require 'helper_classes'
|
44
|
+
|
45
|
+
def work_off
|
46
|
+
Resque::Worker.new("*").work(0)
|
47
|
+
end
|