mapredus 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.md +227 -0
- data/lib/mapredus/filesystem.rb +43 -0
- data/lib/mapredus/finalizer.rb +33 -0
- data/lib/mapredus/inputter.rb +31 -0
- data/lib/mapredus/keys.rb +86 -0
- data/lib/mapredus/mapper.rb +27 -0
- data/lib/mapredus/master.rb +182 -0
- data/lib/mapredus/outputter.rb +42 -0
- data/lib/mapredus/process.rb +366 -0
- data/lib/mapredus/reducer.rb +39 -0
- data/lib/mapredus/support.rb +56 -0
- data/lib/mapredus.rb +106 -0
- data/spec/helper.rb +47 -0
- data/spec/helper_classes.rb +102 -0
- data/spec/mapredus_spec.rb +295 -0
- metadata +144 -0
@@ -0,0 +1,366 @@
|
|
1
|
+
module MapRedus
|
2
|
+
|
3
|
+
# This is what keeps track of our map reduce processes
|
4
|
+
#
|
5
|
+
# We use a redis key to identify the id of map reduce process
|
6
|
+
# the value of the redis object is a json object which contains:
|
7
|
+
#
|
8
|
+
# {
|
9
|
+
# mapper : mapclass,
|
10
|
+
# reducer : reduceclass,
|
11
|
+
# finalizer : finalizerclass,
|
12
|
+
# partitioner : <not supported>,
|
13
|
+
# combiner : <not supported>,
|
14
|
+
# ordered : true_or_false ## ensures ordering keys from the map output --> [ order, key, value ],
|
15
|
+
# synchronous : true_or_false ## runs the process synchronously or not (generally used for testing)
|
16
|
+
# result_timeout : lenght of time a result is saved ## 3600 * 24
|
17
|
+
# keyname : the location to the save the result of the process (cache location)
|
18
|
+
# state : the current state of the process (shouldn't be set by the process and starts off as nil)
|
19
|
+
# }
|
20
|
+
#
|
21
|
+
# The user has the ability in subclassing this class to create extra features if needed
|
22
|
+
#
|
23
|
+
class Process
|
24
|
+
# Public: Keep track of information that may show up as the redis json value
|
25
|
+
# This is so we know exactly what might show up in the json hash
|
26
|
+
READERS = [:pid]
|
27
|
+
ATTRS = [:inputter, :mapper, :reducer, :finalizer, :outputter, :ordered, :synchronous, :result_timeout, :keyname, :state]
|
28
|
+
READERS.each { |r| attr_reader r }
|
29
|
+
ATTRS.each { |a| attr_accessor a }
|
30
|
+
|
31
|
+
DEFAULT_TIME = 3600 * 24
|
32
|
+
def initialize(pid, json_info)
|
33
|
+
@pid = pid
|
34
|
+
read(json_info)
|
35
|
+
end
|
36
|
+
|
37
|
+
def read(json_info)
|
38
|
+
@inputter = Helper.class_get(json_helper(json_info, :inputter))
|
39
|
+
@mapper = Helper.class_get(json_helper(json_info, :mapper))
|
40
|
+
@reducer = Helper.class_get(json_helper(json_info, :reducer))
|
41
|
+
@finalizer = Helper.class_get(json_helper(json_info, :finalizer))
|
42
|
+
@ordered = json_helper(json_info, :ordered)
|
43
|
+
@synchronous = json_helper(json_info, :synchronous)
|
44
|
+
@result_timeout = json_helper(json_info, :result_timeout) || DEFAULT_TIME
|
45
|
+
@keyname = json_helper(json_info, :keyname)
|
46
|
+
@state = json_helper(json_info, :state) || NOT_STARTED
|
47
|
+
@outputter = json_helper(json_info, :outputter)
|
48
|
+
@outputter = @outputter ? Helper.class_get(@outputter) : MapRedus::Outputter
|
49
|
+
end
|
50
|
+
|
51
|
+
def json_helper(json_info, key)
|
52
|
+
json_info[key.to_s] || json_info[key.to_sym]
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s; to_json; end
|
56
|
+
|
57
|
+
def to_hash
|
58
|
+
(ATTRS + READERS).inject({}) do |h, attr|
|
59
|
+
h[attr] = send(attr)
|
60
|
+
h
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_json
|
65
|
+
Helper.encode(to_hash)
|
66
|
+
end
|
67
|
+
|
68
|
+
def save
|
69
|
+
FileSystem.sadd( ProcessInfo.processes, @pid )
|
70
|
+
FileSystem.save( ProcessInfo.pid(@pid), to_json )
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
def update(attrs = {})
|
75
|
+
attrs.each do |attr, val|
|
76
|
+
send("#{attr}=", val)
|
77
|
+
end
|
78
|
+
save
|
79
|
+
end
|
80
|
+
|
81
|
+
def reload
|
82
|
+
read(Helper.decode(FileSystem.get(ProcessInfo.pid(@pid))))
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# This will not delete if the master is working
|
87
|
+
# It can't get ahold of the files to shred while the master is working
|
88
|
+
#
|
89
|
+
# if safe is set to false, this will delete all the redis stores associated
|
90
|
+
# with this process, but will not kill the process from the queue, if it is
|
91
|
+
# on the queue. The process operations will fail to work when its data is deleted
|
92
|
+
#
|
93
|
+
# Examples
|
94
|
+
# delete(safe)
|
95
|
+
# # => true or false
|
96
|
+
#
|
97
|
+
# Returns true as long as the master is not working.
|
98
|
+
def delete(safe = true)
|
99
|
+
return false if (safe && Master.working?(@pid))
|
100
|
+
FileSystem.keys("mapredus:process:#{@pid}*").each do |k|
|
101
|
+
FileSystem.del(k)
|
102
|
+
end
|
103
|
+
FileSystem.srem(ProcessInfo.processes, @pid)
|
104
|
+
FileSystem.set(ProcessInfo.processes_count, 0) if( 0 == FileSystem.scard(ProcessInfo.processes) )
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
# Iterates through the key, values
|
109
|
+
#
|
110
|
+
# Example
|
111
|
+
# each_key_reduced_value(pid)
|
112
|
+
#
|
113
|
+
# Returns nothing.
|
114
|
+
def each_key_reduced_value
|
115
|
+
map_keys.each do |key|
|
116
|
+
reduce_values(key).each do |value|
|
117
|
+
yield key, value
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Iterates through the key, values
|
123
|
+
#
|
124
|
+
# Example
|
125
|
+
# each_key_nonreduced_value(pid)
|
126
|
+
#
|
127
|
+
# Returns nothing.
|
128
|
+
def each_key_nonreduced_value
|
129
|
+
map_keys.each do |key|
|
130
|
+
map_values(key).each do |value|
|
131
|
+
yield key, value
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def run( data_object, synchronous = false )
|
137
|
+
update(:synchronous => synchronous)
|
138
|
+
Master.mapreduce( self, data_object )
|
139
|
+
true
|
140
|
+
end
|
141
|
+
|
142
|
+
# TODO:
|
143
|
+
# Should also have some notion of whether the process is completed or not
|
144
|
+
# since the master might not be working, but the process is not yet complete
|
145
|
+
# so it is still running
|
146
|
+
def running?
|
147
|
+
Master.working?(@pid)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Change the process state
|
151
|
+
# if the process is not running and is not synchronous
|
152
|
+
#
|
153
|
+
# Examples
|
154
|
+
# process.next_state(pid)
|
155
|
+
#
|
156
|
+
# returns the state that the process switched to (or stays the same)
|
157
|
+
def next_state
|
158
|
+
if((not running?) and (not @synchronous))
|
159
|
+
new_state = STATE_MACHINE[self.state]
|
160
|
+
update(:state => new_state)
|
161
|
+
method = "enslave_#{new_state}".to_sym
|
162
|
+
Master.send(method, self) if( Master.respond_to?(method) )
|
163
|
+
new_state
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
### The following functions deal with keys/values produced during the
|
168
|
+
### running of a process
|
169
|
+
|
170
|
+
# Emissions, when we get map/reduce results back we emit these
|
171
|
+
# to be stored in our file system (redis)
|
172
|
+
#
|
173
|
+
# key_value - The key, value
|
174
|
+
#
|
175
|
+
# Examples
|
176
|
+
# emit_intermediate(key, value)
|
177
|
+
# # =>
|
178
|
+
# emit_intermediate(rank, key, value)
|
179
|
+
#
|
180
|
+
# Returns the true on success.
|
181
|
+
def emit_intermediate(*key_value)
|
182
|
+
if( not @ordered )
|
183
|
+
key, value = key_value
|
184
|
+
FileSystem.sadd( ProcessInfo.keys(@pid), key )
|
185
|
+
hashed_key = Helper.hash(key)
|
186
|
+
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
187
|
+
else
|
188
|
+
# if there's an order for the process then we should use a zset above
|
189
|
+
# ordered process's map emits [rank, key, value]
|
190
|
+
#
|
191
|
+
rank, key, value = key_value
|
192
|
+
FileSystem.zadd( ProcessInfo.keys(@pid), rank, key )
|
193
|
+
hashed_key = Helper.hash(key)
|
194
|
+
FileSystem.rpush( ProcessInfo.map(@pid, hashed_key), value )
|
195
|
+
end
|
196
|
+
raise "Key Collision: key:#{key}, #{key.class} => hashed key:#{hashed_key}" if key_collision?(hashed_key, key)
|
197
|
+
true
|
198
|
+
end
|
199
|
+
|
200
|
+
def emit(key, reduce_val)
|
201
|
+
hashed_key = Helper.hash(key)
|
202
|
+
FileSystem.rpush( ProcessInfo.reduce(@pid, hashed_key), reduce_val )
|
203
|
+
end
|
204
|
+
|
205
|
+
def key_collision?(hashed_key, key)
|
206
|
+
not ( FileSystem.setnx( ProcessInfo.hash_to_key(@pid, hashed_key), key ) ||
|
207
|
+
FileSystem.get( ProcessInfo.hash_to_key(@pid, hashed_key) ) == key.to_s )
|
208
|
+
end
|
209
|
+
|
210
|
+
# Saves the result to the specified keyname, using the specified outputter
|
211
|
+
#
|
212
|
+
# Example
|
213
|
+
# (mapreduce:process:result:KEYNAME)
|
214
|
+
# OR
|
215
|
+
# process:pid:result
|
216
|
+
#
|
217
|
+
# The client must ensure the the result will not be affected when to_s is applied
|
218
|
+
# since redis stores all values as strings
|
219
|
+
#
|
220
|
+
# Returns true on success.
|
221
|
+
def save_result(result)
|
222
|
+
res = @outputter.encode(result)
|
223
|
+
FileSystem.save(ProcessInfo.result(@pid), res)
|
224
|
+
FileSystem.save(ProcessInfo.result_cache(@keyname), res, @result_timeout) if @keyname
|
225
|
+
true
|
226
|
+
end
|
227
|
+
|
228
|
+
def get_saved_result
|
229
|
+
@outputter.decode(Process.get_saved_result(@keyname))
|
230
|
+
end
|
231
|
+
|
232
|
+
def delete_saved_result
|
233
|
+
Process.delete_saved_result(@keyname)
|
234
|
+
end
|
235
|
+
|
236
|
+
# Keys that the map operation produced
|
237
|
+
#
|
238
|
+
# Examples
|
239
|
+
# map_keys
|
240
|
+
# # =>
|
241
|
+
#
|
242
|
+
# Returns the Keys.
|
243
|
+
def map_keys
|
244
|
+
if( not @ordered )
|
245
|
+
FileSystem.smembers( ProcessInfo.keys(@pid) )
|
246
|
+
else
|
247
|
+
FileSystem.zrange( ProcessInfo.keys(@pid), 0, -1 )
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def num_values(key)
|
252
|
+
hashed_key = Helper.hash(key)
|
253
|
+
FileSystem.llen( ProcessInfo.map(@pid, hashed_key) )
|
254
|
+
end
|
255
|
+
|
256
|
+
# values that the map operation produced, for a key
|
257
|
+
#
|
258
|
+
# Examples
|
259
|
+
# map_values(key)
|
260
|
+
# # =>
|
261
|
+
#
|
262
|
+
# Returns the values.
|
263
|
+
def map_values(key)
|
264
|
+
hashed_key = Helper.hash(key)
|
265
|
+
FileSystem.lrange( ProcessInfo.map(@pid, hashed_key), 0, -1 )
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
# values that the reduce operation produced, for a key
|
270
|
+
#
|
271
|
+
# Examples
|
272
|
+
# reduce_values(key)
|
273
|
+
# # =>
|
274
|
+
#
|
275
|
+
# Returns the values.
|
276
|
+
def reduce_values(key)
|
277
|
+
hashed_key = Helper.hash(key)
|
278
|
+
FileSystem.lrange( ProcessInfo.reduce(@pid, hashed_key), 0, -1 )
|
279
|
+
end
|
280
|
+
|
281
|
+
# Map and Reduce are strings naming the Mapper and Reducer
|
282
|
+
# classes we want to run our map reduce with.
|
283
|
+
#
|
284
|
+
# For instance
|
285
|
+
# Mapper = "Mapper"
|
286
|
+
# Reducer = "Reducer"
|
287
|
+
#
|
288
|
+
# Default finalizer
|
289
|
+
# "MapRedus::Finalizer"
|
290
|
+
#
|
291
|
+
# Returns the new process id.
|
292
|
+
def self.create( *args )
|
293
|
+
new_pid = get_available_pid
|
294
|
+
|
295
|
+
spec = specification(*args)
|
296
|
+
return nil unless spec
|
297
|
+
|
298
|
+
Process.new(new_pid, spec).save
|
299
|
+
end
|
300
|
+
|
301
|
+
def self.specification(*args)
|
302
|
+
raise ProcessSpecificationError
|
303
|
+
end
|
304
|
+
|
305
|
+
def self.info(pid)
|
306
|
+
FileSystem.keys(ProcessInfo.pid(pid) + "*")
|
307
|
+
end
|
308
|
+
|
309
|
+
def self.open(pid)
|
310
|
+
spec = Helper.decode( FileSystem.get(ProcessInfo.pid(pid)) )
|
311
|
+
spec && Process.new( pid, spec )
|
312
|
+
end
|
313
|
+
|
314
|
+
# Find out what map reduce processes are out there
|
315
|
+
#
|
316
|
+
# Examples
|
317
|
+
# FileSystem::ps
|
318
|
+
#
|
319
|
+
# Returns a list of the map reduce process ids
|
320
|
+
def self.ps
|
321
|
+
FileSystem.smembers(ProcessInfo.processes)
|
322
|
+
end
|
323
|
+
|
324
|
+
# Find out what map reduce processes are out there
|
325
|
+
#
|
326
|
+
# Examples
|
327
|
+
# FileSystem::get_available_pid
|
328
|
+
#
|
329
|
+
# Returns an avilable pid.
|
330
|
+
def self.get_available_pid
|
331
|
+
FileSystem.incrby(ProcessInfo.processes_count, 1 + rand(20))
|
332
|
+
end
|
333
|
+
|
334
|
+
# Given a result keyname, delete the result
|
335
|
+
#
|
336
|
+
# Examples
|
337
|
+
# Process.delete_saved_result(key)
|
338
|
+
def self.delete_saved_result(keyname)
|
339
|
+
FileSystem.del( ProcessInfo.result_cache(keyname) )
|
340
|
+
end
|
341
|
+
|
342
|
+
# Remove redis keys associated with this process if the Master isn't working.
|
343
|
+
#
|
344
|
+
# potentially is very expensive.
|
345
|
+
#
|
346
|
+
# Example
|
347
|
+
# Process::kill(pid)
|
348
|
+
# # => true
|
349
|
+
#
|
350
|
+
# Returns true on success.
|
351
|
+
def self.kill(pid)
|
352
|
+
num_killed = Master.emancipate(pid)
|
353
|
+
proc = Process.open(pid)
|
354
|
+
proc.delete if proc
|
355
|
+
num_killed
|
356
|
+
end
|
357
|
+
|
358
|
+
def self.kill_all
|
359
|
+
ps.each do |pid|
|
360
|
+
kill(pid)
|
361
|
+
end
|
362
|
+
FileSystem.del(ProcessInfo.processes)
|
363
|
+
FileSystem.del(ProcessInfo.processes_count)
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module MapRedus
|
2
|
+
# Reduce is a function that takes in "all" the values for a single given key
|
3
|
+
# and outputs a list of values or a single value that usually "reduces"
|
4
|
+
# the initial given value set.
|
5
|
+
#
|
6
|
+
# The output of the reduce shall always be
|
7
|
+
# reduce(values) = [ reduced value, reduced value, ... ]
|
8
|
+
# and it will often only be a single element array
|
9
|
+
#
|
10
|
+
# The input values and the output values of the reduce will always
|
11
|
+
# be a string. As described in the paper, it is up to the client
|
12
|
+
# to define how to deal with this restriction.
|
13
|
+
#
|
14
|
+
class Reducer < QueueProcess
|
15
|
+
#
|
16
|
+
# After a recoverable fail this describes how much time we shall wait before
|
17
|
+
# readding the reducer back on to the queue.
|
18
|
+
#
|
19
|
+
DEFAULT_WAIT = 10 # seconds
|
20
|
+
def self.wait; DEFAULT_WAIT; end
|
21
|
+
|
22
|
+
def self.reduce(values); raise InvalidReducer; end
|
23
|
+
|
24
|
+
# Doesn't handle redundant workers and fault tolerance
|
25
|
+
#
|
26
|
+
# TODO: Resque::AutoRetry might mess this up.
|
27
|
+
def self.perform(pid, key)
|
28
|
+
process = Process.open(pid)
|
29
|
+
reduce(process.map_values(key)) do |reduce_val|
|
30
|
+
process.emit( key, reduce_val )
|
31
|
+
end
|
32
|
+
rescue MapRedus::RecoverableFail
|
33
|
+
Master.enslave_later_reduce(process, key)
|
34
|
+
ensure
|
35
|
+
Master.free_slave(pid)
|
36
|
+
process.next_state
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module MapRedus
|
2
|
+
module Support
|
3
|
+
class MapRedusRunnerError < StandardError; end
|
4
|
+
class DuplicateProcessDefinitionError < MapRedusRunnerError ; end
|
5
|
+
|
6
|
+
class Runner
|
7
|
+
attr_reader :process
|
8
|
+
def initialize(class_name)
|
9
|
+
@class = class_name
|
10
|
+
end
|
11
|
+
|
12
|
+
def method_missing(method, *args, &block)
|
13
|
+
mr_process = "#{@class}_#{method.to_s}"
|
14
|
+
if self.respond_to?(mr_process)
|
15
|
+
self.send(mr_process, *args, &block)
|
16
|
+
else
|
17
|
+
super(method, *args, &block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def mapreduce
|
23
|
+
@mapreduce_runner ||= Runner.new(self.class.to_s.gsub(/\W/,"_"))
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
def mapreduce_process( process_name, mapredus_process_class, result_store, opts = {})
|
28
|
+
runner_self = Runner
|
29
|
+
class_name = self.to_s.gsub(/\W/,"_")
|
30
|
+
|
31
|
+
global_process_name = "#{class_name}_#{process_name.to_s}"
|
32
|
+
|
33
|
+
if runner_self.methods.include?(global_process_name)
|
34
|
+
raise DuplicateProcessDefintionError
|
35
|
+
end
|
36
|
+
|
37
|
+
keyname = "mapredus_key_#{global_process_name}"
|
38
|
+
RedisSupport.redis_key( keyname, result_store )
|
39
|
+
|
40
|
+
runner_self.send( :define_method, global_process_name ) do |data, *var|
|
41
|
+
@process = mapredus_process_class.create
|
42
|
+
@process.update(:keyname => RedisSupport::Keys.send( keyname, *var ))
|
43
|
+
@process.run(data)
|
44
|
+
end
|
45
|
+
|
46
|
+
runner_self.send( :define_method, "#{global_process_name}_result" ) do |*outputter_args|
|
47
|
+
@process.outputter.decode(@process.keyname, *outputter_args)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.included(model)
|
53
|
+
model.extend ClassMethods
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/mapredus.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'redis'
|
2
|
+
require 'redis_support'
|
3
|
+
require 'resque'
|
4
|
+
require 'resque_scheduler'
|
5
|
+
|
6
|
+
module MapRedus
|
7
|
+
include RedisSupport
|
8
|
+
|
9
|
+
class InvalidProcess < NotImplementedError
|
10
|
+
def initialize; super("MapRedus QueueProcess: need to have perform method defined");end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ProcessSpecificationError < InvalidProcess
|
14
|
+
def initialize; super("MapRedus Process: need to have the specification defined");end
|
15
|
+
end
|
16
|
+
|
17
|
+
class InvalidMapper < NotImplementedError
|
18
|
+
def initialize; super("MapRedus Mapper: need to have map method defined");end
|
19
|
+
end
|
20
|
+
|
21
|
+
class InvalidReducer < NotImplementedError
|
22
|
+
def initialize; super("MapRedus Reducer: need to have reduce method defined");end
|
23
|
+
end
|
24
|
+
|
25
|
+
class InvalidInputStream < NotImplementedError
|
26
|
+
def initialize; super("MapRedus InputStream: need to have scan method defined");end
|
27
|
+
end
|
28
|
+
|
29
|
+
class InvalidProcess < NotImplementedError
|
30
|
+
def initialize; super("MapRedus Process Creation Failed: Specifications were not specified");end
|
31
|
+
end
|
32
|
+
|
33
|
+
class RecoverableFail < StandardError
|
34
|
+
def initialize; super("MapRedus Operation Failed: but it is recoverable") ;end
|
35
|
+
end
|
36
|
+
|
37
|
+
# All Queue Processes should have a function called perform
|
38
|
+
# ensuring that when the class is put on the resque queue it can perform its work
|
39
|
+
#
|
40
|
+
# Caution: defines redis, which is also defined in RedisSupport
|
41
|
+
#
|
42
|
+
class QueueProcess
|
43
|
+
def self.queue; :mapredus; end
|
44
|
+
def self.perform(*args); raise InvalidProcess; end
|
45
|
+
end
|
46
|
+
|
47
|
+
# TODO: When you send work to a worker using a mapper you define,
|
48
|
+
# the worker won't have that class name defined, unless it was started up
|
49
|
+
# with the class loaded
|
50
|
+
#
|
51
|
+
def register_reducer(klass); end;
|
52
|
+
def register_mapper(klass); end;
|
53
|
+
|
54
|
+
class Helper
|
55
|
+
# resque helpers defines
|
56
|
+
# redis
|
57
|
+
# encode
|
58
|
+
# decode
|
59
|
+
# classify
|
60
|
+
# constantize
|
61
|
+
#
|
62
|
+
# This is extended here because we want to use the encode and decode function
|
63
|
+
# when we interact with resque queues
|
64
|
+
extend Resque::Helpers
|
65
|
+
|
66
|
+
# Defines a hash by taking the absolute value of ruby's string
|
67
|
+
# hash to rid the dashes since redis keys should not contain any.
|
68
|
+
#
|
69
|
+
# key - The key to be hashed.
|
70
|
+
#
|
71
|
+
# Examples
|
72
|
+
#
|
73
|
+
# Support::hash( key )
|
74
|
+
# # => '8dd8hflf8dhod8doh9hef'
|
75
|
+
#
|
76
|
+
# Returns the hash.
|
77
|
+
def self.hash( key )
|
78
|
+
key.to_s.hash.abs.to_s(16)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the classname of the namespaced class.
|
82
|
+
#
|
83
|
+
# The full name of the class.
|
84
|
+
#
|
85
|
+
# Examples
|
86
|
+
#
|
87
|
+
# Support::class_get( Super::Long::Namespace::ClassName )
|
88
|
+
# # => 'ClassName'
|
89
|
+
#
|
90
|
+
# Returns the class name.
|
91
|
+
def self.class_get(string)
|
92
|
+
constantize(string)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
require 'mapredus/keys'
|
98
|
+
require 'mapredus/process'
|
99
|
+
require 'mapredus/filesystem'
|
100
|
+
require 'mapredus/master'
|
101
|
+
require 'mapredus/mapper'
|
102
|
+
require 'mapredus/reducer'
|
103
|
+
require 'mapredus/finalizer'
|
104
|
+
require 'mapredus/support'
|
105
|
+
require 'mapredus/outputter'
|
106
|
+
require 'mapredus/inputter'
|
data/spec/helper.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spec'
|
3
|
+
|
4
|
+
dir = File.dirname(__FILE__)
|
5
|
+
$LOAD_PATH.unshift(File.join(dir, '..', 'lib'))
|
6
|
+
$LOAD_PATH.unshift(dir)
|
7
|
+
require 'mapredus'
|
8
|
+
|
9
|
+
#
|
10
|
+
# make sure we can run redis
|
11
|
+
#
|
12
|
+
if !system("which redis-server")
|
13
|
+
puts '', "** can't find `redis-server` in your path"
|
14
|
+
abort ''
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# start our own redis when the tests start,
|
19
|
+
# kill it when they end (redis is run as a daemon)
|
20
|
+
#
|
21
|
+
puts "Starting redis for testing at localhost:9736..."
|
22
|
+
`redis-server #{dir}/redis-test.conf`
|
23
|
+
|
24
|
+
at_exit do
|
25
|
+
#
|
26
|
+
# hope that no other processes have redis-test in the name...
|
27
|
+
# TODO: fixme
|
28
|
+
#
|
29
|
+
pid = `ps -A -o pid,command | grep [r]edis-test`.split(" ")[0]
|
30
|
+
puts "Killing test redis server..."
|
31
|
+
`rm -f #{dir}/dump.rdb`
|
32
|
+
Process.kill("KILL", pid.to_i)
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Set the redis server
|
37
|
+
#
|
38
|
+
MapRedus.redis = 'localhost:9736:0'
|
39
|
+
Resque.redis = MapRedus.redis
|
40
|
+
require 'resque/failure/redis'
|
41
|
+
Resque::Failure.backend = Resque::Failure::Redis
|
42
|
+
|
43
|
+
require 'helper_classes'
|
44
|
+
|
45
|
+
def work_off
|
46
|
+
Resque::Worker.new("*").work(0)
|
47
|
+
end
|