scbi_mapreduce 0.0.37 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/lib/scbi_mapreduce/manager.rb +9 -6
- data/lib/scbi_mapreduce/work_manager.rb +208 -104
- data/lib/scbi_mapreduce/worker.rb +26 -4
- data/lib/scbi_mapreduce.rb +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -17,13 +17,12 @@ module ScbiMapreduce
|
|
17
17
|
|
18
18
|
class Manager
|
19
19
|
|
20
|
-
attr_accessor :checkpointing, :keep_order, :
|
20
|
+
attr_accessor :checkpointing, :keep_order, :retry_stuck_jobs, :exit_on_many_errors, :chunk_size
|
21
21
|
|
22
22
|
# initialize Manager
|
23
23
|
def initialize(server_ip, port, workers, work_manager_class,custom_worker_file,log_file=nil, init_env_file=nil)
|
24
24
|
@port=port
|
25
25
|
|
26
|
-
|
27
26
|
if log_file.nil?
|
28
27
|
log_file = File.join('logs','server_log.txt')
|
29
28
|
end
|
@@ -55,8 +54,9 @@ module ScbiMapreduce
|
|
55
54
|
|
56
55
|
@checkpointing=false
|
57
56
|
@keep_order=false
|
58
|
-
@
|
59
|
-
|
57
|
+
@retry_stuck_jobs=false
|
58
|
+
@exit_on_many_errors=true
|
59
|
+
|
60
60
|
@chunk_size=1
|
61
61
|
|
62
62
|
|
@@ -84,7 +84,6 @@ module ScbiMapreduce
|
|
84
84
|
|
85
85
|
@worker_launcher = WorkerLauncher.new(@ip,port,ip_list,@workers,custom_worker_file,log_file,init_env_file)
|
86
86
|
|
87
|
-
|
88
87
|
$SERVER_LOG.info("Local workers: #{@workers}")
|
89
88
|
$SERVER_LOG.info("Remote workers: #{@worker_names}")
|
90
89
|
|
@@ -101,11 +100,15 @@ module ScbiMapreduce
|
|
101
100
|
EM.error_handler{ |e|
|
102
101
|
$SERVER_LOG.error(e.message + ' => ' + e.backtrace.join("\n"))
|
103
102
|
}
|
103
|
+
|
104
|
+
# $SERVER_LOG.info("Installing INT and TERM traps in #{@work_manager_class}")
|
105
|
+
# Signal.trap("INT") { puts "TRAP INT";@work_manager_class.controlled_exit; EM.stop}
|
106
|
+
# Signal.trap("TERM") { puts "TRAP TERM";@work_manager_class.controlled_exit; EM.stop}
|
104
107
|
|
105
108
|
# start EM loop
|
106
109
|
EventMachine::run {
|
107
110
|
|
108
|
-
@work_manager_class.init_work_manager_internals(@checkpointing, @keep_order, @
|
111
|
+
@work_manager_class.init_work_manager_internals(@checkpointing, @keep_order, @retry_stuck_jobs,@exit_on_many_errors,@chunk_size)
|
109
112
|
|
110
113
|
evm=EventMachine::start_server @ip, @port, @work_manager_class
|
111
114
|
dir=Socket.unpack_sockaddr_in( EM.get_sockname( evm ))
|
@@ -14,27 +14,58 @@
|
|
14
14
|
|
15
15
|
module ScbiMapreduce
|
16
16
|
|
17
|
-
|
18
|
-
PENDING_TO_SAVE=100
|
19
17
|
|
18
|
+
PENDING_TO_SAVE=10
|
19
|
+
CHECKPOINT_FILE='scbi_mapreduce_checkpoint'
|
20
|
+
OLD_CHECKPOINT_FILE='old_scbi_mapreduce_checkpoint'
|
20
21
|
|
21
22
|
class WorkManagerData
|
22
23
|
|
23
24
|
@@job_id=1
|
25
|
+
@@longest_processing_time=0
|
24
26
|
|
25
27
|
attr_reader :job_identifier
|
26
|
-
attr_accessor :status, :data
|
28
|
+
attr_accessor :status, :data,:sent_time,:received_time
|
27
29
|
|
28
|
-
def initialize(
|
30
|
+
def initialize(objs)
|
29
31
|
|
30
32
|
@job_identifier=@@job_id
|
31
33
|
@@job_id+=1
|
32
|
-
@data=
|
34
|
+
@data=objs
|
35
|
+
|
36
|
+
sent!
|
37
|
+
@received_time=0
|
38
|
+
@processing_time=nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def received!(objs)
|
42
|
+
@data=objs
|
43
|
+
@received_time=Time.now
|
44
|
+
@processing_time=@received_time-@sent_time
|
45
|
+
|
46
|
+
# save longer processing time
|
47
|
+
@@longest_processing_time=[@@longest_processing_time,@processing_time].max
|
48
|
+
|
49
|
+
@status=:received
|
50
|
+
end
|
51
|
+
|
52
|
+
def sent!
|
33
53
|
@status=:running
|
54
|
+
@sent_time=Time.now
|
55
|
+
end
|
56
|
+
|
57
|
+
def stuck?
|
58
|
+
(@status==:running) && (@@longest_processing_time>0) && (processing_time>(@@longest_processing_time*2))
|
59
|
+
end
|
60
|
+
|
61
|
+
# return running or real processing time
|
62
|
+
def processing_time
|
63
|
+
return (@processing_time || (Time.now-@sent_time))
|
34
64
|
end
|
35
65
|
|
36
66
|
def inspect
|
37
|
-
|
67
|
+
time="; time: #{processing_time} seg"
|
68
|
+
return "WorkManagerData: #{@job_identifier} => #{@status} #{time}"
|
38
69
|
end
|
39
70
|
|
40
71
|
def self.job_id=(c)
|
@@ -53,7 +84,7 @@ module ScbiMapreduce
|
|
53
84
|
class WorkManager < EventMachine::Connection
|
54
85
|
|
55
86
|
include EM::P::ObjectProtocol
|
56
|
-
|
87
|
+
|
57
88
|
def self.init_work_manager
|
58
89
|
|
59
90
|
end
|
@@ -102,8 +133,9 @@ module ScbiMapreduce
|
|
102
133
|
|
103
134
|
############
|
104
135
|
|
105
|
-
def self.init_work_manager_internals(checkpointing, keep_order,
|
136
|
+
def self.init_work_manager_internals(checkpointing, keep_order, retry_stuck_jobs,exit_on_many_errors,chunk_size)
|
106
137
|
@@count = 0
|
138
|
+
@@want_to_exit=false
|
107
139
|
@@chunk_count = 0
|
108
140
|
@@workers = 0
|
109
141
|
@@max_workers = 0
|
@@ -113,13 +145,17 @@ module ScbiMapreduce
|
|
113
145
|
|
114
146
|
@@checkpointing=checkpointing
|
115
147
|
@@keep_order=keep_order
|
116
|
-
@@
|
148
|
+
@@retry_stuck_jobs=retry_stuck_jobs
|
117
149
|
@@exit_on_many_errors=exit_on_many_errors
|
118
150
|
|
119
151
|
# TODO - Implement a dynamic chunk_size
|
120
152
|
|
121
153
|
@@chunk_size=chunk_size
|
122
154
|
$SERVER_LOG.info "Processing in chunks of #{@@chunk_size} objects"
|
155
|
+
$SERVER_LOG.info "Checkpointing: #{@@checkpointing}"
|
156
|
+
$SERVER_LOG.info "Keeping output order: #{@@keep_order}"
|
157
|
+
$SERVER_LOG.info "Retrying stuck jobs: #{@@retry_stuck_jobs}"
|
158
|
+
$SERVER_LOG.info "Exiting on too many errors: #{@@exit_on_many_errors}"
|
123
159
|
|
124
160
|
@@checkpoint=0
|
125
161
|
if @@checkpointing
|
@@ -133,17 +169,28 @@ module ScbiMapreduce
|
|
133
169
|
return @@checkpoint
|
134
170
|
end
|
135
171
|
|
136
|
-
def
|
137
|
-
|
172
|
+
def remove_checkpoint
|
173
|
+
if File.exists?(CHECKPOINT_FILE)
|
174
|
+
checkpoint_file = FileUtils.mv(CHECKPOINT_FILE,OLD_CHECKPOINT_FILE)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
138
178
|
|
179
|
+
def save_checkpoint
|
180
|
+
checkpoint_file = File.open(CHECKPOINT_FILE,'w')
|
181
|
+
|
139
182
|
if !@@running_jobs.empty?
|
140
|
-
|
183
|
+
checkpoint_value = @@running_jobs.first.job_identifier
|
141
184
|
else
|
142
|
-
|
185
|
+
checkpoint_value = WorkManagerData.job_id
|
143
186
|
end
|
144
|
-
|
187
|
+
|
188
|
+
$SERVER_LOG.info "Saving checkpoint: #{checkpoint_value}"
|
189
|
+
|
190
|
+
checkpoint_file.puts checkpoint_value
|
191
|
+
|
145
192
|
checkpoint_file.close
|
146
|
-
|
193
|
+
|
147
194
|
save_user_checkpoint
|
148
195
|
|
149
196
|
end
|
@@ -151,8 +198,8 @@ module ScbiMapreduce
|
|
151
198
|
def self.get_checkpoint
|
152
199
|
res = 0
|
153
200
|
begin
|
154
|
-
if File.exists?(
|
155
|
-
res=File.read(
|
201
|
+
if File.exists?(CHECKPOINT_FILE)
|
202
|
+
res=File.read(CHECKPOINT_FILE).chomp
|
156
203
|
# puts "read checkpoint #{res}"
|
157
204
|
|
158
205
|
res = res.to_i
|
@@ -176,42 +223,80 @@ module ScbiMapreduce
|
|
176
223
|
send_object(obj)
|
177
224
|
end
|
178
225
|
|
179
|
-
|
180
|
-
|
226
|
+
def print_running_jobs
|
227
|
+
jobs=@@running_jobs.map{|j| j.inspect}.join("\n")
|
228
|
+
$SERVER_LOG.debug("Running Jobs:\n#{jobs}")
|
229
|
+
end
|
181
230
|
|
182
|
-
|
231
|
+
def send_stuck_work
|
232
|
+
sent=false
|
183
233
|
|
184
|
-
@@
|
185
|
-
|
186
|
-
|
187
|
-
break
|
188
|
-
else
|
189
|
-
# add to obj array
|
190
|
-
objs << obj
|
191
|
-
end
|
192
|
-
end
|
234
|
+
if @@retry_stuck_jobs
|
235
|
+
# count stuck jobs and re-sent the first one
|
236
|
+
stuck_works=@@running_jobs.select{|job| job.stuck?}
|
193
237
|
|
238
|
+
if !stuck_works.empty?
|
239
|
+
jobs=stuck_works.map{|j| j.inspect}.join("\n")
|
240
|
+
$SERVER_LOG.info("Stuck Jobs:\n#{jobs}")
|
194
241
|
|
195
|
-
|
196
|
-
|
197
|
-
|
242
|
+
# send_object
|
243
|
+
send_object(stuck_works.first)
|
244
|
+
stuck_works.first.sent!
|
245
|
+
$SERVER_LOG.info("Sending stuck work #{stuck_works.first.inspect}")
|
246
|
+
sent=true
|
247
|
+
end
|
248
|
+
end
|
198
249
|
|
199
|
-
|
250
|
+
return sent
|
251
|
+
end
|
200
252
|
|
201
|
-
|
253
|
+
# send next work to worker
|
254
|
+
def send_next_work
|
202
255
|
|
203
|
-
|
204
|
-
|
205
|
-
|
256
|
+
# if we need to exit, send quit to workers
|
257
|
+
|
258
|
+
if @@want_to_exit
|
259
|
+
send_object(:quit)
|
260
|
+
|
261
|
+
elsif !send_stuck_work
|
262
|
+
|
263
|
+
#send stuck work
|
264
|
+
objs=[]
|
265
|
+
|
266
|
+
# prepare new data
|
267
|
+
@@chunk_size.times do
|
268
|
+
obj=next_work
|
269
|
+
if obj.nil?
|
270
|
+
break
|
271
|
+
else
|
272
|
+
# add to obj array
|
273
|
+
objs << obj
|
274
|
+
end
|
206
275
|
end
|
207
|
-
else
|
208
276
|
|
209
|
-
|
210
|
-
|
277
|
+
# if new was data collected, send it
|
278
|
+
if objs.count>0
|
279
|
+
@@count += objs.count
|
280
|
+
@@chunk_count += 1
|
211
281
|
|
282
|
+
work_data=WorkManagerData.new(objs)
|
283
|
+
send_object(work_data)
|
212
284
|
|
285
|
+
# to keep order or retry failed job, we need job status
|
286
|
+
if @@keep_order || @@retry_stuck_jobs
|
287
|
+
# do not remove data to be able to sent it again
|
288
|
+
# work_data.data=nil
|
289
|
+
@@running_jobs.push work_data
|
290
|
+
# print_running_jobs
|
291
|
+
end
|
292
|
+
else
|
293
|
+
# otherwise, send a quit value indicating no more data available
|
294
|
+
send_object(:quit)
|
295
|
+
end
|
296
|
+
end
|
213
297
|
end
|
214
298
|
|
299
|
+
# loads a checkpoint
|
215
300
|
def goto_checkpoint
|
216
301
|
if @@checkpoint>0
|
217
302
|
$SERVER_LOG.info "Skipping until checkpoint #{@@checkpoint}"
|
@@ -220,18 +305,13 @@ module ScbiMapreduce
|
|
220
305
|
|
221
306
|
# do an automatic checkpoint restore
|
222
307
|
if checkpoint==-1
|
223
|
-
@@checkpoint.times do |i|
|
224
|
-
|
225
|
-
|
308
|
+
(@@checkpoint - 1).times do |i|
|
309
|
+
$SERVER_LOG.info "Automatic trashing Chunk #{i+1}"
|
226
310
|
# get next work
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
#
|
231
|
-
# else
|
232
|
-
# @@count += 1
|
233
|
-
# end
|
234
|
-
# end
|
311
|
+
@@chunk_size.times do
|
312
|
+
obj=next_work
|
313
|
+
end
|
314
|
+
# trash_checkpointed_work
|
235
315
|
end
|
236
316
|
|
237
317
|
$SERVER_LOG.info "Automatic checkpoint finished"
|
@@ -240,8 +320,9 @@ module ScbiMapreduce
|
|
240
320
|
|
241
321
|
#user has done the checkpoint restoration
|
242
322
|
elsif checkpoint>0
|
243
|
-
|
323
|
+
|
244
324
|
WorkManagerData.job_id=checkpoint
|
325
|
+
|
245
326
|
elsif checkpoint==0
|
246
327
|
$SERVER_LOG.info "Automatic checkpoint not done"
|
247
328
|
end
|
@@ -271,6 +352,11 @@ module ScbiMapreduce
|
|
271
352
|
send_initial_config
|
272
353
|
send_next_work
|
273
354
|
end
|
355
|
+
|
356
|
+
def self.controlled_exit
|
357
|
+
$SERVER_LOG.info("Controlled exit. Workers will be noticed in next round")
|
358
|
+
@@want_to_exit=true
|
359
|
+
end
|
274
360
|
|
275
361
|
|
276
362
|
def receive_object(obj)
|
@@ -285,21 +371,23 @@ module ScbiMapreduce
|
|
285
371
|
|
286
372
|
# if there are too many errors
|
287
373
|
if (@@count>100) && (@@error_count >= @@count*0.8)
|
288
|
-
@@exit = @@exit_on_many_errors
|
289
374
|
|
290
375
|
# notice programmer
|
291
376
|
res=too_many_errors_received
|
292
377
|
|
293
378
|
# force exit if too_many_errors_received returns true
|
294
|
-
if res
|
295
|
-
|
379
|
+
if @@exit_on_many_errors || res
|
380
|
+
$SERVER_LOG.error("Want to exit due to too many errors")
|
381
|
+
self.controlled_exit
|
296
382
|
end
|
297
383
|
end
|
298
384
|
|
299
385
|
else
|
300
386
|
# if not using checkpointing
|
301
387
|
|
302
|
-
|
388
|
+
|
389
|
+
if @@checkpointing || @@keep_order || @@retry_stuck_jobs
|
390
|
+
# print_running_jobs
|
303
391
|
checkpointable_job_received(obj)
|
304
392
|
else
|
305
393
|
work_received(obj.data)
|
@@ -314,58 +402,71 @@ module ScbiMapreduce
|
|
314
402
|
|
315
403
|
|
316
404
|
def checkpointable_job_received(obj)
|
405
|
+
|
406
|
+
# find reveived object between sent jobs
|
317
407
|
received_job=@@running_jobs.find{|o| o.job_identifier==obj.job_identifier}
|
318
408
|
|
319
|
-
# save job
|
409
|
+
# save job if there is was a valid work previously sent
|
320
410
|
if received_job
|
321
411
|
|
322
|
-
# change job's status to received
|
323
|
-
received_job.
|
324
|
-
|
412
|
+
# change this job's status to received
|
413
|
+
received_job.received!(obj.data)
|
414
|
+
|
415
|
+
|
325
416
|
|
326
|
-
# if there are sufficient jobs, count pending ones
|
327
|
-
if (@@running_jobs.count>=PENDING_TO_SAVE)
|
328
|
-
# count received objects pending to be written
|
329
|
-
pending=0
|
417
|
+
# # if there are sufficient jobs, count pending ones
|
418
|
+
# if (@@running_jobs.count>=PENDING_TO_SAVE)
|
330
419
|
|
420
|
+
# count received objects pending to be written, only until one that is still running is found
|
421
|
+
pending_to_save=0
|
422
|
+
@@running_jobs.each do |job|
|
423
|
+
if job.status==:received
|
424
|
+
pending_to_save += 1
|
425
|
+
else
|
426
|
+
break
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# if there are a few pending to save works, or all remaining works are pending, then save
|
431
|
+
if (pending_to_save>=PENDING_TO_SAVE) || (pending_to_save==@@running_jobs.count)
|
432
|
+
# save pending jobs and write to disk
|
433
|
+
to_remove = 0
|
434
|
+
|
435
|
+
if @@checkpointing
|
436
|
+
remove_checkpoint
|
437
|
+
end
|
438
|
+
|
331
439
|
@@running_jobs.each do |job|
|
332
440
|
if job.status==:received
|
333
|
-
|
441
|
+
# puts "Sent to save: #{job.inspect}"
|
442
|
+
work_received(job.data)
|
443
|
+
job.status=:saved
|
444
|
+
to_remove += 1
|
334
445
|
else
|
335
446
|
break
|
336
447
|
end
|
337
448
|
end
|
338
449
|
|
450
|
+
# if some objects were saved, remove them from the running_jobs
|
451
|
+
if to_remove > 0
|
452
|
+
to_remove.times do |i|
|
453
|
+
o=@@running_jobs.shift
|
339
454
|
|
340
|
-
|
341
|
-
|
342
|
-
to_remove = 0
|
343
|
-
|
344
|
-
@@running_jobs.each_with_index do |job,i|
|
345
|
-
if job.status==:received
|
346
|
-
# puts "Sent to save: #{job.inspect}"
|
347
|
-
work_received(job.data)
|
348
|
-
job.status=:saved
|
349
|
-
to_remove += 1
|
350
|
-
else
|
351
|
-
break
|
352
|
-
end
|
455
|
+
# puts "Job removed #{o.inspect}"
|
456
|
+
o=nil
|
353
457
|
end
|
354
458
|
|
355
|
-
#
|
356
|
-
|
357
|
-
|
358
|
-
o=@@running_jobs.shift
|
359
|
-
# puts "Job removed #{o.inspect}"
|
360
|
-
o=nil
|
361
|
-
end
|
459
|
+
# print_running_jobs
|
460
|
+
|
461
|
+
if @@checkpointing && !@@want_to_exit
|
362
462
|
|
363
463
|
save_checkpoint
|
364
464
|
end
|
365
465
|
end
|
366
466
|
end
|
467
|
+
# end
|
367
468
|
else
|
368
|
-
$SERVER_LOG.
|
469
|
+
$SERVER_LOG.warn "Job already processed #{obj.inspect}"
|
369
470
|
end
|
370
471
|
end
|
371
472
|
|
@@ -385,26 +486,29 @@ module ScbiMapreduce
|
|
385
486
|
# no more workers left, shutdown EM and stop server
|
386
487
|
if @@workers == 0
|
387
488
|
$SERVER_LOG.info "All workers finished"
|
388
|
-
|
389
|
-
$SERVER_LOG.info "Exiting server"
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
self.class.end_work_manager
|
394
|
-
|
395
|
-
@@total_seconds = Time.now-@@total_seconds
|
396
|
-
$SERVER_LOG.info "Total processed: #{@@count} objects in #{@@total_seconds} seconds"
|
397
|
-
$SERVER_LOG.info "Processing rate: #{"%.2f" % (@@count/@@total_seconds.to_f)} objects per second"
|
398
|
-
$SERVER_LOG.info "Connection rate: #{"%.2f" % (@@chunk_count/@@total_seconds.to_f)} connections per second"
|
399
|
-
|
400
|
-
$SERVER_LOG.info "Number of errors: #{@@error_count}"
|
401
|
-
$SERVER_LOG.info "Chunk size: #{@@chunk_size}"
|
402
|
-
$SERVER_LOG.info "Total connected workers: #{@@max_workers}"
|
403
|
-
|
404
|
-
|
405
|
-
|
489
|
+
stop_work_manager
|
406
490
|
end
|
407
491
|
end
|
492
|
+
|
493
|
+
def stop_work_manager
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
EM.stop
|
498
|
+
$SERVER_LOG.info "Exiting server"
|
499
|
+
|
500
|
+
self.class.end_work_manager
|
501
|
+
|
502
|
+
@@total_seconds = Time.now-@@total_seconds
|
503
|
+
$SERVER_LOG.info "Total processed: #{@@count} objects in #{@@total_seconds} seconds"
|
504
|
+
$SERVER_LOG.info "Processing rate: #{"%.2f" % (@@count/@@total_seconds.to_f)} objects per second"
|
505
|
+
$SERVER_LOG.info "Connection rate: #{"%.2f" % (@@chunk_count/@@total_seconds.to_f)} connections per second"
|
506
|
+
|
507
|
+
$SERVER_LOG.info "Number of errors: #{@@error_count}"
|
508
|
+
$SERVER_LOG.info "Chunk size: #{@@chunk_size}"
|
509
|
+
$SERVER_LOG.info "Total connected workers: #{@@max_workers}"
|
510
|
+
|
511
|
+
end
|
408
512
|
|
409
513
|
end
|
410
514
|
end
|
@@ -10,7 +10,7 @@ module ScbiMapreduce
|
|
10
10
|
class Worker < EventMachine::Connection
|
11
11
|
include EM::P::ObjectProtocol
|
12
12
|
|
13
|
-
|
13
|
+
@@want_to_exit_worker=false
|
14
14
|
|
15
15
|
def receive_initial_config(obj)
|
16
16
|
|
@@ -40,7 +40,6 @@ module ScbiMapreduce
|
|
40
40
|
|
41
41
|
def initialize(*args)
|
42
42
|
super
|
43
|
-
|
44
43
|
end
|
45
44
|
|
46
45
|
def post_init
|
@@ -67,8 +66,15 @@ module ScbiMapreduce
|
|
67
66
|
# At first iteration, start worker
|
68
67
|
starting_worker
|
69
68
|
else
|
70
|
-
|
71
|
-
|
69
|
+
$WORKER_LOG.info("received:"+obj.to_s)
|
70
|
+
|
71
|
+
if (obj == :quit) || @@want_to_exit_worker
|
72
|
+
$WORKER_LOG.info('Quit received')
|
73
|
+
|
74
|
+
stop_worker
|
75
|
+
|
76
|
+
elsif @@want_to_exit_worker
|
77
|
+
$WORKER_LOG.info('Want to exit worker')
|
72
78
|
stop_worker
|
73
79
|
else
|
74
80
|
@@count += 1
|
@@ -94,6 +100,10 @@ module ScbiMapreduce
|
|
94
100
|
modified_data=process_object(obj.data)
|
95
101
|
obj.data = modified_data
|
96
102
|
|
103
|
+
# if obj.job_identifier==3
|
104
|
+
# sleep 15
|
105
|
+
# end
|
106
|
+
|
97
107
|
send_object(obj)
|
98
108
|
|
99
109
|
rescue Exception => e
|
@@ -114,18 +124,30 @@ module ScbiMapreduce
|
|
114
124
|
end
|
115
125
|
|
116
126
|
def stop_worker
|
127
|
+
$WORKER_LOG.info "Closing connection with WORKER"
|
128
|
+
$WORKER_LOG.info("Worker processed #{@@count} chunks")
|
129
|
+
|
117
130
|
close_connection
|
118
131
|
EventMachine::stop_event_loop
|
119
132
|
closing_worker
|
120
133
|
end
|
134
|
+
|
135
|
+
def self.controlled_exit_worker
|
136
|
+
@@want_to_exit_worker=true
|
137
|
+
end
|
121
138
|
|
122
139
|
def self.start_worker(worker_id,ip,port,log_file=nil)
|
123
140
|
#puts "NEW WORKER - INIIIIIIIIIIIIIIIIIIIIT #{self}"
|
141
|
+
|
142
|
+
|
124
143
|
ip = ip
|
125
144
|
port = port
|
126
145
|
@@count = -1
|
127
146
|
|
128
147
|
@@worker_id=worker_id
|
148
|
+
|
149
|
+
# Signal.trap("INT") { puts "TRAP INT in worker #{@@worker_id}"; controlled_exit_worker; EM.stop}
|
150
|
+
# Signal.trap("TERM") { puts "TRAP TERM in worker #{@@worker_id}";controlled_exit_worker; EM.stop}
|
129
151
|
|
130
152
|
if log_file.nil?
|
131
153
|
log_file = 'logs/worker'+worker_id+'_'+`hostname`.chomp+'_log.txt'
|
data/lib/scbi_mapreduce.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: scbi_mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.38
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2012-04-13 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: eventmachine
|