scbi_mapreduce 0.0.37 → 0.0.38
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/lib/scbi_mapreduce/manager.rb +9 -6
- data/lib/scbi_mapreduce/work_manager.rb +208 -104
- data/lib/scbi_mapreduce/worker.rb +26 -4
- data/lib/scbi_mapreduce.rb +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -17,13 +17,12 @@ module ScbiMapreduce
|
|
17
17
|
|
18
18
|
class Manager
|
19
19
|
|
20
|
-
attr_accessor :checkpointing, :keep_order, :
|
20
|
+
attr_accessor :checkpointing, :keep_order, :retry_stuck_jobs, :exit_on_many_errors, :chunk_size
|
21
21
|
|
22
22
|
# initialize Manager
|
23
23
|
def initialize(server_ip, port, workers, work_manager_class,custom_worker_file,log_file=nil, init_env_file=nil)
|
24
24
|
@port=port
|
25
25
|
|
26
|
-
|
27
26
|
if log_file.nil?
|
28
27
|
log_file = File.join('logs','server_log.txt')
|
29
28
|
end
|
@@ -55,8 +54,9 @@ module ScbiMapreduce
|
|
55
54
|
|
56
55
|
@checkpointing=false
|
57
56
|
@keep_order=false
|
58
|
-
@
|
59
|
-
|
57
|
+
@retry_stuck_jobs=false
|
58
|
+
@exit_on_many_errors=true
|
59
|
+
|
60
60
|
@chunk_size=1
|
61
61
|
|
62
62
|
|
@@ -84,7 +84,6 @@ module ScbiMapreduce
|
|
84
84
|
|
85
85
|
@worker_launcher = WorkerLauncher.new(@ip,port,ip_list,@workers,custom_worker_file,log_file,init_env_file)
|
86
86
|
|
87
|
-
|
88
87
|
$SERVER_LOG.info("Local workers: #{@workers}")
|
89
88
|
$SERVER_LOG.info("Remote workers: #{@worker_names}")
|
90
89
|
|
@@ -101,11 +100,15 @@ module ScbiMapreduce
|
|
101
100
|
EM.error_handler{ |e|
|
102
101
|
$SERVER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
|
103
102
|
}
|
103
|
+
|
104
|
+
# $SERVER_LOG.info("Installing INT and TERM traps in #{@work_manager_class}")
|
105
|
+
# Signal.trap("INT") { puts "TRAP INT";@work_manager_class.controlled_exit; EM.stop}
|
106
|
+
# Signal.trap("TERM") { puts "TRAP TERM";@work_manager_class.controlled_exit; EM.stop}
|
104
107
|
|
105
108
|
# start EM loop
|
106
109
|
EventMachine::run {
|
107
110
|
|
108
|
-
@work_manager_class.init_work_manager_internals(@checkpointing, @keep_order, @
|
111
|
+
@work_manager_class.init_work_manager_internals(@checkpointing, @keep_order, @retry_stuck_jobs,@exit_on_many_errors,@chunk_size)
|
109
112
|
|
110
113
|
evm=EventMachine::start_server @ip, @port, @work_manager_class
|
111
114
|
dir=Socket.unpack_sockaddr_in( EM.get_sockname( evm ))
|
@@ -14,27 +14,58 @@
|
|
14
14
|
|
15
15
|
module ScbiMapreduce
|
16
16
|
|
17
|
-
|
18
|
-
PENDING_TO_SAVE=100
|
19
17
|
|
18
|
+
PENDING_TO_SAVE=10
|
19
|
+
CHECKPOINT_FILE='scbi_mapreduce_checkpoint'
|
20
|
+
OLD_CHECKPOINT_FILE='old_scbi_mapreduce_checkpoint'
|
20
21
|
|
21
22
|
class WorkManagerData
|
22
23
|
|
23
24
|
@@job_id=1
|
25
|
+
@@longest_processing_time=0
|
24
26
|
|
25
27
|
attr_reader :job_identifier
|
26
|
-
attr_accessor :status, :data
|
28
|
+
attr_accessor :status, :data,:sent_time,:received_time
|
27
29
|
|
28
|
-
def initialize(
|
30
|
+
def initialize(objs)
|
29
31
|
|
30
32
|
@job_identifier=@@job_id
|
31
33
|
@@job_id+=1
|
32
|
-
@data=
|
34
|
+
@data=objs
|
35
|
+
|
36
|
+
sent!
|
37
|
+
@received_time=0
|
38
|
+
@processing_time=nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def received!(objs)
|
42
|
+
@data=objs
|
43
|
+
@received_time=Time.now
|
44
|
+
@processing_time=@received_time-@sent_time
|
45
|
+
|
46
|
+
# save longer processing time
|
47
|
+
@@longest_processing_time=[@@longest_processing_time,@processing_time].max
|
48
|
+
|
49
|
+
@status=:received
|
50
|
+
end
|
51
|
+
|
52
|
+
def sent!
|
33
53
|
@status=:running
|
54
|
+
@sent_time=Time.now
|
55
|
+
end
|
56
|
+
|
57
|
+
def stuck?
|
58
|
+
(@status==:running) && (@@longest_processing_time>0) && (processing_time>(@@longest_processing_time*2))
|
59
|
+
end
|
60
|
+
|
61
|
+
# return running or real processing time
|
62
|
+
def processing_time
|
63
|
+
return (@processing_time || (Time.now-@sent_time))
|
34
64
|
end
|
35
65
|
|
36
66
|
def inspect
|
37
|
-
|
67
|
+
time="; time: #{processing_time} seg"
|
68
|
+
return "WorkManagerData: #{@job_identifier} => #{@status} #{time}"
|
38
69
|
end
|
39
70
|
|
40
71
|
def self.job_id=(c)
|
@@ -53,7 +84,7 @@ module ScbiMapreduce
|
|
53
84
|
class WorkManager < EventMachine::Connection
|
54
85
|
|
55
86
|
include EM::P::ObjectProtocol
|
56
|
-
|
87
|
+
|
57
88
|
def self.init_work_manager
|
58
89
|
|
59
90
|
end
|
@@ -102,8 +133,9 @@ module ScbiMapreduce
|
|
102
133
|
|
103
134
|
############
|
104
135
|
|
105
|
-
def self.init_work_manager_internals(checkpointing, keep_order,
|
136
|
+
def self.init_work_manager_internals(checkpointing, keep_order, retry_stuck_jobs,exit_on_many_errors,chunk_size)
|
106
137
|
@@count = 0
|
138
|
+
@@want_to_exit=false
|
107
139
|
@@chunk_count = 0
|
108
140
|
@@workers = 0
|
109
141
|
@@max_workers = 0
|
@@ -113,13 +145,17 @@ module ScbiMapreduce
|
|
113
145
|
|
114
146
|
@@checkpointing=checkpointing
|
115
147
|
@@keep_order=keep_order
|
116
|
-
@@
|
148
|
+
@@retry_stuck_jobs=retry_stuck_jobs
|
117
149
|
@@exit_on_many_errors=exit_on_many_errors
|
118
150
|
|
119
151
|
# TODO - Implement a dynamic chunk_size
|
120
152
|
|
121
153
|
@@chunk_size=chunk_size
|
122
154
|
$SERVER_LOG.info "Processing in chunks of #{@@chunk_size} objects"
|
155
|
+
$SERVER_LOG.info "Checkpointing: #{@@checkpointing}"
|
156
|
+
$SERVER_LOG.info "Keeping output order: #{@@keep_order}"
|
157
|
+
$SERVER_LOG.info "Retrying stuck jobs: #{@@retry_stuck_jobs}"
|
158
|
+
$SERVER_LOG.info "Exiting on too many errors: #{@@exit_on_many_errors}"
|
123
159
|
|
124
160
|
@@checkpoint=0
|
125
161
|
if @@checkpointing
|
@@ -133,17 +169,28 @@ module ScbiMapreduce
|
|
133
169
|
return @@checkpoint
|
134
170
|
end
|
135
171
|
|
136
|
-
def
|
137
|
-
|
172
|
+
def remove_checkpoint
|
173
|
+
if File.exists?(CHECKPOINT_FILE)
|
174
|
+
checkpoint_file = FileUtils.mv(CHECKPOINT_FILE,OLD_CHECKPOINT_FILE)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
138
178
|
|
179
|
+
def save_checkpoint
|
180
|
+
checkpoint_file = File.open(CHECKPOINT_FILE,'w')
|
181
|
+
|
139
182
|
if !@@running_jobs.empty?
|
140
|
-
|
183
|
+
checkpoint_value = @@running_jobs.first.job_identifier
|
141
184
|
else
|
142
|
-
|
185
|
+
checkpoint_value = WorkManagerData.job_id
|
143
186
|
end
|
144
|
-
|
187
|
+
|
188
|
+
$SERVER_LOG.info "Saving checkpoint: #{checkpoint_value}"
|
189
|
+
|
190
|
+
checkpoint_file.puts checkpoint_value
|
191
|
+
|
145
192
|
checkpoint_file.close
|
146
|
-
|
193
|
+
|
147
194
|
save_user_checkpoint
|
148
195
|
|
149
196
|
end
|
@@ -151,8 +198,8 @@ module ScbiMapreduce
|
|
151
198
|
def self.get_checkpoint
|
152
199
|
res = 0
|
153
200
|
begin
|
154
|
-
if File.exists?(
|
155
|
-
res=File.read(
|
201
|
+
if File.exists?(CHECKPOINT_FILE)
|
202
|
+
res=File.read(CHECKPOINT_FILE).chomp
|
156
203
|
# puts "read checkpoint #{res}"
|
157
204
|
|
158
205
|
res = res.to_i
|
@@ -176,42 +223,80 @@ module ScbiMapreduce
|
|
176
223
|
send_object(obj)
|
177
224
|
end
|
178
225
|
|
179
|
-
|
180
|
-
|
226
|
+
def print_running_jobs
|
227
|
+
jobs=@@running_jobs.map{|j| j.inspect}.join("\n")
|
228
|
+
$SERVER_LOG.debug("Running Jobs:\n#{jobs}")
|
229
|
+
end
|
181
230
|
|
182
|
-
|
231
|
+
def send_stuck_work
|
232
|
+
sent=false
|
183
233
|
|
184
|
-
@@
|
185
|
-
|
186
|
-
|
187
|
-
break
|
188
|
-
else
|
189
|
-
# add to obj array
|
190
|
-
objs << obj
|
191
|
-
end
|
192
|
-
end
|
234
|
+
if @@retry_stuck_jobs
|
235
|
+
# count stuck jobs and re-sent the first one
|
236
|
+
stuck_works=@@running_jobs.select{|job| job.stuck?}
|
193
237
|
|
238
|
+
if !stuck_works.empty?
|
239
|
+
jobs=stuck_works.map{|j| j.inspect}.join("\n")
|
240
|
+
$SERVER_LOG.info("Stuck Jobs:\n#{jobs}")
|
194
241
|
|
195
|
-
|
196
|
-
|
197
|
-
|
242
|
+
# send_object
|
243
|
+
send_object(stuck_works.first)
|
244
|
+
stuck_works.first.sent!
|
245
|
+
$SERVER_LOG.info("Sending stuck work #{stuck_works.first.inspect}")
|
246
|
+
sent=true
|
247
|
+
end
|
248
|
+
end
|
198
249
|
|
199
|
-
|
250
|
+
return sent
|
251
|
+
end
|
200
252
|
|
201
|
-
|
253
|
+
# send next work to worker
|
254
|
+
def send_next_work
|
202
255
|
|
203
|
-
|
204
|
-
|
205
|
-
|
256
|
+
# if we need to exit, send quit to workers
|
257
|
+
|
258
|
+
if @@want_to_exit
|
259
|
+
send_object(:quit)
|
260
|
+
|
261
|
+
elsif !send_stuck_work
|
262
|
+
|
263
|
+
#send stuck work
|
264
|
+
objs=[]
|
265
|
+
|
266
|
+
# prepare new data
|
267
|
+
@@chunk_size.times do
|
268
|
+
obj=next_work
|
269
|
+
if obj.nil?
|
270
|
+
break
|
271
|
+
else
|
272
|
+
# add to obj array
|
273
|
+
objs << obj
|
274
|
+
end
|
206
275
|
end
|
207
|
-
else
|
208
276
|
|
209
|
-
|
210
|
-
|
277
|
+
# if new was data collected, send it
|
278
|
+
if objs.count>0
|
279
|
+
@@count += objs.count
|
280
|
+
@@chunk_count += 1
|
211
281
|
|
282
|
+
work_data=WorkManagerData.new(objs)
|
283
|
+
send_object(work_data)
|
212
284
|
|
285
|
+
# to keep order or retry failed job, we need job status
|
286
|
+
if @@keep_order || @@retry_stuck_jobs
|
287
|
+
# do not remove data to be able to sent it again
|
288
|
+
# work_data.data=nil
|
289
|
+
@@running_jobs.push work_data
|
290
|
+
# print_running_jobs
|
291
|
+
end
|
292
|
+
else
|
293
|
+
# otherwise, send a quit value indicating no more data available
|
294
|
+
send_object(:quit)
|
295
|
+
end
|
296
|
+
end
|
213
297
|
end
|
214
298
|
|
299
|
+
# loads a checkpoint
|
215
300
|
def goto_checkpoint
|
216
301
|
if @@checkpoint>0
|
217
302
|
$SERVER_LOG.info "Skipping until checkpoint #{@@checkpoint}"
|
@@ -220,18 +305,13 @@ module ScbiMapreduce
|
|
220
305
|
|
221
306
|
# do an automatic checkpoint restore
|
222
307
|
if checkpoint==-1
|
223
|
-
@@checkpoint.times do |i|
|
224
|
-
|
225
|
-
|
308
|
+
(@@checkpoint - 1).times do |i|
|
309
|
+
$SERVER_LOG.info "Automatic trashing Chunk #{i+1}"
|
226
310
|
# get next work
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
#
|
231
|
-
# else
|
232
|
-
# @@count += 1
|
233
|
-
# end
|
234
|
-
# end
|
311
|
+
@@chunk_size.times do
|
312
|
+
obj=next_work
|
313
|
+
end
|
314
|
+
# trash_checkpointed_work
|
235
315
|
end
|
236
316
|
|
237
317
|
$SERVER_LOG.info "Automatic checkpoint finished"
|
@@ -240,8 +320,9 @@ module ScbiMapreduce
|
|
240
320
|
|
241
321
|
#user has done the checkpoint restoration
|
242
322
|
elsif checkpoint>0
|
243
|
-
|
323
|
+
|
244
324
|
WorkManagerData.job_id=checkpoint
|
325
|
+
|
245
326
|
elsif checkpoint==0
|
246
327
|
$SERVER_LOG.info "Automatic checkpoint not done"
|
247
328
|
end
|
@@ -271,6 +352,11 @@ module ScbiMapreduce
|
|
271
352
|
send_initial_config
|
272
353
|
send_next_work
|
273
354
|
end
|
355
|
+
|
356
|
+
def self.controlled_exit
|
357
|
+
$SERVER_LOG.info("Controlled exit. Workers will be noticed in next round")
|
358
|
+
@@want_to_exit=true
|
359
|
+
end
|
274
360
|
|
275
361
|
|
276
362
|
def receive_object(obj)
|
@@ -285,21 +371,23 @@ module ScbiMapreduce
|
|
285
371
|
|
286
372
|
# if there are too many errors
|
287
373
|
if (@@count>100) && (@@error_count >= @@count*0.8)
|
288
|
-
@@exit = @@exit_on_many_errors
|
289
374
|
|
290
375
|
# notice programmer
|
291
376
|
res=too_many_errors_received
|
292
377
|
|
293
378
|
# force exit if too_many_errors_received returns true
|
294
|
-
if res
|
295
|
-
|
379
|
+
if @@exit_on_many_errors || res
|
380
|
+
$SERVER_LOG.error("Want to exit due to too many errors")
|
381
|
+
self.controlled_exit
|
296
382
|
end
|
297
383
|
end
|
298
384
|
|
299
385
|
else
|
300
386
|
# if not using checkpointing
|
301
387
|
|
302
|
-
|
388
|
+
|
389
|
+
if @@checkpointing || @@keep_order || @@retry_stuck_jobs
|
390
|
+
# print_running_jobs
|
303
391
|
checkpointable_job_received(obj)
|
304
392
|
else
|
305
393
|
work_received(obj.data)
|
@@ -314,58 +402,71 @@ module ScbiMapreduce
|
|
314
402
|
|
315
403
|
|
316
404
|
def checkpointable_job_received(obj)
|
405
|
+
|
406
|
+
# find reveived object between sent jobs
|
317
407
|
received_job=@@running_jobs.find{|o| o.job_identifier==obj.job_identifier}
|
318
408
|
|
319
|
-
# save job
|
409
|
+
# save job if there is was a valid work previously sent
|
320
410
|
if received_job
|
321
411
|
|
322
|
-
# change job's status to received
|
323
|
-
received_job.
|
324
|
-
|
412
|
+
# change this job's status to received
|
413
|
+
received_job.received!(obj.data)
|
414
|
+
|
415
|
+
|
325
416
|
|
326
|
-
# if there are sufficient jobs, count pending ones
|
327
|
-
if (@@running_jobs.count>=PENDING_TO_SAVE)
|
328
|
-
# count received objects pending to be written
|
329
|
-
pending=0
|
417
|
+
# # if there are sufficient jobs, count pending ones
|
418
|
+
# if (@@running_jobs.count>=PENDING_TO_SAVE)
|
330
419
|
|
420
|
+
# count received objects pending to be written, only until one that is still running is found
|
421
|
+
pending_to_save=0
|
422
|
+
@@running_jobs.each do |job|
|
423
|
+
if job.status==:received
|
424
|
+
pending_to_save += 1
|
425
|
+
else
|
426
|
+
break
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# if there are a few pending to save works, or all remaining works are pending, then save
|
431
|
+
if (pending_to_save>=PENDING_TO_SAVE) || (pending_to_save==@@running_jobs.count)
|
432
|
+
# save pending jobs and write to disk
|
433
|
+
to_remove = 0
|
434
|
+
|
435
|
+
if @@checkpointing
|
436
|
+
remove_checkpoint
|
437
|
+
end
|
438
|
+
|
331
439
|
@@running_jobs.each do |job|
|
332
440
|
if job.status==:received
|
333
|
-
|
441
|
+
# puts "Sent to save: #{job.inspect}"
|
442
|
+
work_received(job.data)
|
443
|
+
job.status=:saved
|
444
|
+
to_remove += 1
|
334
445
|
else
|
335
446
|
break
|
336
447
|
end
|
337
448
|
end
|
338
449
|
|
450
|
+
# if some objects were saved, remove them from the running_jobs
|
451
|
+
if to_remove > 0
|
452
|
+
to_remove.times do |i|
|
453
|
+
o=@@running_jobs.shift
|
339
454
|
|
340
|
-
|
341
|
-
|
342
|
-
to_remove = 0
|
343
|
-
|
344
|
-
@@running_jobs.each_with_index do |job,i|
|
345
|
-
if job.status==:received
|
346
|
-
# puts "Sent to save: #{job.inspect}"
|
347
|
-
work_received(job.data)
|
348
|
-
job.status=:saved
|
349
|
-
to_remove += 1
|
350
|
-
else
|
351
|
-
break
|
352
|
-
end
|
455
|
+
# puts "Job removed #{o.inspect}"
|
456
|
+
o=nil
|
353
457
|
end
|
354
458
|
|
355
|
-
#
|
356
|
-
|
357
|
-
|
358
|
-
o=@@running_jobs.shift
|
359
|
-
# puts "Job removed #{o.inspect}"
|
360
|
-
o=nil
|
361
|
-
end
|
459
|
+
# print_running_jobs
|
460
|
+
|
461
|
+
if @@checkpointing && !@@want_to_exit
|
362
462
|
|
363
463
|
save_checkpoint
|
364
464
|
end
|
365
465
|
end
|
366
466
|
end
|
467
|
+
# end
|
367
468
|
else
|
368
|
-
$SERVER_LOG.
|
469
|
+
$SERVER_LOG.warn "Job already processed #{obj.inspect}"
|
369
470
|
end
|
370
471
|
end
|
371
472
|
|
@@ -385,26 +486,29 @@ module ScbiMapreduce
|
|
385
486
|
# no more workers left, shutdown EM and stop server
|
386
487
|
if @@workers == 0
|
387
488
|
$SERVER_LOG.info "All workers finished"
|
388
|
-
|
389
|
-
$SERVER_LOG.info "Exiting server"
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
self.class.end_work_manager
|
394
|
-
|
395
|
-
@@total_seconds = Time.now-@@total_seconds
|
396
|
-
$SERVER_LOG.info "Total processed: #{@@count} objects in #{@@total_seconds} seconds"
|
397
|
-
$SERVER_LOG.info "Processing rate: #{"%.2f" % (@@count/@@total_seconds.to_f)} objects per second"
|
398
|
-
$SERVER_LOG.info "Connection rate: #{"%.2f" % (@@chunk_count/@@total_seconds.to_f)} connections per second"
|
399
|
-
|
400
|
-
$SERVER_LOG.info "Number of errors: #{@@error_count}"
|
401
|
-
$SERVER_LOG.info "Chunk size: #{@@chunk_size}"
|
402
|
-
$SERVER_LOG.info "Total connected workers: #{@@max_workers}"
|
403
|
-
|
404
|
-
|
405
|
-
|
489
|
+
stop_work_manager
|
406
490
|
end
|
407
491
|
end
|
492
|
+
|
493
|
+
def stop_work_manager
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
EM.stop
|
498
|
+
$SERVER_LOG.info "Exiting server"
|
499
|
+
|
500
|
+
self.class.end_work_manager
|
501
|
+
|
502
|
+
@@total_seconds = Time.now-@@total_seconds
|
503
|
+
$SERVER_LOG.info "Total processed: #{@@count} objects in #{@@total_seconds} seconds"
|
504
|
+
$SERVER_LOG.info "Processing rate: #{"%.2f" % (@@count/@@total_seconds.to_f)} objects per second"
|
505
|
+
$SERVER_LOG.info "Connection rate: #{"%.2f" % (@@chunk_count/@@total_seconds.to_f)} connections per second"
|
506
|
+
|
507
|
+
$SERVER_LOG.info "Number of errors: #{@@error_count}"
|
508
|
+
$SERVER_LOG.info "Chunk size: #{@@chunk_size}"
|
509
|
+
$SERVER_LOG.info "Total connected workers: #{@@max_workers}"
|
510
|
+
|
511
|
+
end
|
408
512
|
|
409
513
|
end
|
410
514
|
end
|
@@ -10,7 +10,7 @@ module ScbiMapreduce
|
|
10
10
|
class Worker < EventMachine::Connection
|
11
11
|
include EM::P::ObjectProtocol
|
12
12
|
|
13
|
-
|
13
|
+
@@want_to_exit_worker=false
|
14
14
|
|
15
15
|
def receive_initial_config(obj)
|
16
16
|
|
@@ -40,7 +40,6 @@ module ScbiMapreduce
|
|
40
40
|
|
41
41
|
def initialize(*args)
|
42
42
|
super
|
43
|
-
|
44
43
|
end
|
45
44
|
|
46
45
|
def post_init
|
@@ -67,8 +66,15 @@ module ScbiMapreduce
|
|
67
66
|
# At first iteration, start worker
|
68
67
|
starting_worker
|
69
68
|
else
|
70
|
-
|
71
|
-
|
69
|
+
$WORKER_LOG.info("received:"+obj.to_s)
|
70
|
+
|
71
|
+
if (obj == :quit) || @@want_to_exit_worker
|
72
|
+
$WORKER_LOG.info('Quit received')
|
73
|
+
|
74
|
+
stop_worker
|
75
|
+
|
76
|
+
elsif @@want_to_exit_worker
|
77
|
+
$WORKER_LOG.info('Want to exit worker')
|
72
78
|
stop_worker
|
73
79
|
else
|
74
80
|
@@count += 1
|
@@ -94,6 +100,10 @@ module ScbiMapreduce
|
|
94
100
|
modified_data=process_object(obj.data)
|
95
101
|
obj.data = modified_data
|
96
102
|
|
103
|
+
# if obj.job_identifier==3
|
104
|
+
# sleep 15
|
105
|
+
# end
|
106
|
+
|
97
107
|
send_object(obj)
|
98
108
|
|
99
109
|
rescue Exception => e
|
@@ -114,18 +124,30 @@ module ScbiMapreduce
|
|
114
124
|
end
|
115
125
|
|
116
126
|
def stop_worker
|
127
|
+
$WORKER_LOG.info "Closing connection with WORKER"
|
128
|
+
$WORKER_LOG.info("Worker processed #{@@count} chunks")
|
129
|
+
|
117
130
|
close_connection
|
118
131
|
EventMachine::stop_event_loop
|
119
132
|
closing_worker
|
120
133
|
end
|
134
|
+
|
135
|
+
def self.controlled_exit_worker
|
136
|
+
@@want_to_exit_worker=true
|
137
|
+
end
|
121
138
|
|
122
139
|
def self.start_worker(worker_id,ip,port,log_file=nil)
|
123
140
|
#puts "NEW WORKER - INIIIIIIIIIIIIIIIIIIIIT #{self}"
|
141
|
+
|
142
|
+
|
124
143
|
ip = ip
|
125
144
|
port = port
|
126
145
|
@@count = -1
|
127
146
|
|
128
147
|
@@worker_id=worker_id
|
148
|
+
|
149
|
+
# Signal.trap("INT") { puts "TRAP INT in worker #{@@worker_id}"; controlled_exit_worker; EM.stop}
|
150
|
+
# Signal.trap("TERM") { puts "TRAP TERM in worker #{@@worker_id}";controlled_exit_worker; EM.stop}
|
129
151
|
|
130
152
|
if log_file.nil?
|
131
153
|
log_file = 'logs/worker'+worker_id+'_'+`hostname`.chomp+'_log.txt'
|
data/lib/scbi_mapreduce.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: scbi_mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.38
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2012-04-13 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: eventmachine
|