pampa 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/pampa.rb +736 -0
  3. data/worker.rb +147 -0
  4. metadata +163 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0faa4143e265bf804e49240a15eb7c27c6f209af2bfa93a6cd69a5e7506a431a
4
+ data.tar.gz: 2914d64ada1d628d6c21555680aaf0bea0ce141bffeafbc2f8df17a2b746d85c
5
+ SHA512:
6
+ metadata.gz: deb9bc56fd8eb40cbbec9f72b9ffe7f7cdee8f605ab1a1680d850c4980d706880f8920ec9eb6e409378b91c5745cfa29fda2d7fc320ae366193b84b0ed5249f4
7
+ data.tar.gz: 4025a6fd3f142347f9a95310a6e2ac9f9916195fff9cee484c8a67beba148cc67573aa04f7ca3704e220077711f8419add81ba81472feb6eecc4119deafe922b
data/lib/pampa.rb ADDED
@@ -0,0 +1,736 @@
1
+ require 'sequel'
2
+ require 'blackstack-core'
3
+ require 'blackstack-nodes'
4
+ require 'simple_command_line_parser'
5
+ require 'simple_cloud_logging'
6
+
7
+ module BlackStack
8
+ module Pampa
9
+ # arrays of workers, nodes, and jobs.
10
+ @@nodes = []
11
+ @@jobs = []
12
+ # logger configuration
13
+ @@log_filename = nil
14
+ @@logger = BlackStack::DummyLogger.new(nil)
15
+ # Connection string to the database. Example: mysql2://user:password@localhost:3306/database
16
+ @@connection_string = nil
17
+
18
+ # define a filename for the log file.
19
+ def self.set_log_filename(s)
20
+ @@log_filename = s
21
+ @@logger = BlackStack::LocalLogger.new(s)
22
+ end
23
+
24
+ # return the logger.
25
+ def self.logger()
26
+ @@logger
27
+ end
28
+
29
+ # return the log filename.
30
+ def self.log_filename()
31
+ @@log_filename
32
+ end
33
+
34
+ # define a connection string to the database.
35
+ def self.set_connection_string(s)
36
+ @@connection_string = s
37
+ end
38
+
39
+ # return connection string to the database. Example: mysql2://user:password@localhost:3306/database
40
+ def self.connection_string()
41
+ @@connection_string
42
+ end
43
+
44
+ # add a node to the cluster.
45
+ def self.add_node(h)
46
+ @@nodes << BlackStack::Pampa::Node.new(h)
47
+ end # def self.add_node(h)
48
+
49
+ # add an array of nodes to the cluster.
50
+ def self.add_nodes(a)
51
+ # validate: the parameter a is an array
52
+ raise "The parameter a is not an array" unless a.is_a?(Array)
53
+ # iterate over the array
54
+ a.each do |h|
55
+ # create the node
56
+ self.add_node(h)
57
+ end
58
+ end # def self.add_nodes(a)
59
+
60
+ # return the array of nodes.
61
+ def self.nodes()
62
+ @@nodes
63
+ end
64
+
65
+ # return the array of all workers, beloning all nodes.
66
+ def self.workers()
67
+ @@nodes.map { |node| node.workers }.flatten
68
+ end
69
+
70
+ # add a job to the cluster.
71
+ def self.add_job(h)
72
+ @@jobs << BlackStack::Pampa::Job.new(h)
73
+ end # def self.add_job(h)
74
+
75
+ # add an array of jobs to the cluster.
76
+ def self.add_jobs(a)
77
+ # validate: the parameter a is an array
78
+ raise "The parameter a is not an array" unless a.is_a?(Array)
79
+ # iterate over the array
80
+ a.each do |h|
81
+ # create the job
82
+ self.add_job(h)
83
+ end
84
+ end # def self.add_jobs(a)
85
+
86
+ # return the array of nodes.
87
+ def self.jobs()
88
+ @@jobs
89
+ end
90
+
91
+ =begin
92
+ # return a hash descriptor of the whole configuration of the cluster.
93
+ def self.to_hash()
94
+ ret = {
95
+ :log_filename => self.log_filename,
96
+ :connection_string => self.connection_string,
97
+ }
98
+ #ret[:workers] = []
99
+ #@@workers.each do |w|
100
+ # ret[:workers] << w.to_hash
101
+ #end
102
+ ret[:nodes] = []
103
+ @@nodes.each do |n|
104
+ ret[:nodes] << n.to_hash
105
+ end
106
+ ret[:jobs] = []
107
+ @@jobs.each do |j|
108
+ ret[:jobs] << j.to_hash
109
+ end
110
+ ret
111
+ end # def self.to_hash()
112
+
113
+ # setup from a whole hash descriptor
114
+ def self.initialize(h)
115
+ # TODO
116
+ end
117
+ =end
118
+
119
+ # get attached and unassigned workers.
120
+ # assign and unassign workers to jobs.
121
+ #
122
+ # Parameters:
123
+ # - config: relative path of the configuration file. Example: '../config.rb'
124
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
125
+ #
126
+ def self.stretch()
127
+ # validate: the connection string is not nil
128
+ raise "The connection string is nil" if @@connection_string.nil?
129
+ # validate: the connection string is not empty
130
+ raise "The connection string is empty" if @@connection_string.empty?
131
+ # validate: the connection string is not blank
132
+ raise "The connection string is blank" if @@connection_string.strip.empty?
133
+ # getting logger
134
+ l = self.logger()
135
+ # get attached and unassigned workers
136
+ l.logs "Getting attached and unassigned workers... "
137
+ workers = BlackStack::Pampa.workers.select { |worker| worker.attached && worker.assigned_job.nil? }
138
+ l.logf "done (#{workers.size.to_s})"
139
+ # get the job this worker is working with
140
+ BlackStack::Pampa.jobs.each { |job|
141
+ if workers.size == 0
142
+ l.logf "No more workers to assign."
143
+ break
144
+ end
145
+
146
+ l.logs("job:#{job.name}... ")
147
+ l.logs("Gettting assigned workers... ")
148
+ assigned = BlackStack::Pampa.workers.select { |worker| worker.attached && worker.assigned_job.to_s == job.name.to_s }
149
+ l.logf("done (#{assigned.size.to_s})")
150
+
151
+ l.logs("Getting total pending tasks... ")
152
+ pendings = job.selecting(job.max_pending_tasks)
153
+ l.logf("done (#{pendings.size.to_s})")
154
+
155
+ l.logs("Has 0 tasks?.... ")
156
+ if pendings.size == 0
157
+ l.logf("yes")
158
+
159
+ l.logs("Unassigning all assigned workers... ")
160
+ assigned.each { |w|
161
+ l.logs("Unassigning worker #{w.id}... ")
162
+ w.assigned_job = nil
163
+ l.done
164
+
165
+ l.logs("Adding worker #{w.id} to the list of unassigned... ")
166
+ workers << w
167
+ l.done
168
+ }
169
+ l.done
170
+ else
171
+ l.logf("no")
172
+
173
+ l.logs("Reached :max_pending_tasks (#{job.max_pending_tasks}) and more than 1 assigned workers ?... ")
174
+ if pendings.size < job.max_pending_tasks && assigned.size > 1
175
+ l.logf("no")
176
+
177
+ l.logs("Unassigning worker... ")
178
+ w = assigned.first # TODO: find a worker with no pending tasks
179
+ w.assigned_job = nil
180
+ l.done
181
+
182
+ l.logs("Adding worker from the list of unassigned... ")
183
+ workers << w
184
+ l.done
185
+ else
186
+ l.logf("yes")
187
+
188
+ l.logs("Reached :max_assigned_workers (#{job.max_assigned_workers}) and more than 0 assigned workers?... ")
189
+ if assigned.size >= job.max_assigned_workers && assigned.size > 0
190
+ l.logf("yes")
191
+ else
192
+ l.logf("no")
193
+
194
+ l.logs("Assigning worker... ")
195
+ w = workers.first
196
+ w.assigned_job = job.name.to_sym
197
+ l.done
198
+
199
+ l.logs("Removing worker from the list of unassigned... ")
200
+ workers.delete(w)
201
+ l.done
202
+ end
203
+ end
204
+ end
205
+ l.done
206
+ }
207
+ end
208
+
209
+ # iterate the jobs.
210
+ # for each job, get all the tasks to relaunch.
211
+ # for each task to relaunch, relaunch it.
212
+ #
213
+ # Parameters:
214
+ # - config: relative path of the configuration file. Example: '../config.rb'
215
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
216
+ #
217
+ def self.relaunch()
218
+ # validate: the connection string is not nil
219
+ raise "The connection string is nil" if @@connection_string.nil?
220
+ # validate: the connection string is not empty
221
+ raise "The connection string is empty" if @@connection_string.empty?
222
+ # validate: the connection string is not blank
223
+ raise "The connection string is blank" if @@connection_string.strip.empty?
224
+ # getting logger
225
+ l = self.logger()
226
+ # iterate the workers
227
+ BlackStack::Pampa.jobs.each { |job|
228
+ l.logs("job:#{job.name}... ")
229
+ l.logs("Gettting tasks to relaunch (max #{job.queue_size.to_s})... ")
230
+ tasks = job.relaunching(job.queue_size+1)
231
+ l.logf("done (#{tasks.size.to_s})")
232
+
233
+ tasks.each { |task|
234
+ l.logs("Relaunching task #{task[job.field_primary_key.to_sym]}... ")
235
+ job.relaunch(task)
236
+ l.done
237
+ }
238
+
239
+ l.done
240
+ }
241
+ end
242
+
243
+ # iterate the workers.
244
+ # for each worker, iterate the job.
245
+ #
246
+ # Parameters:
247
+ # - config: relative path of the configuration file. Example: '../config.rb'
248
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
249
+ #
250
+ def self.dispatch()
251
+ # validate: the connection string is not nil
252
+ raise "The connection string is nil" if @@connection_string.nil?
253
+ # validate: the connection string is not empty
254
+ raise "The connection string is empty" if @@connection_string.empty?
255
+ # validate: the connection string is not blank
256
+ raise "The connection string is blank" if @@connection_string.strip.empty?
257
+ # getting logger
258
+ l = self.logger()
259
+ # iterate the workers
260
+ BlackStack::Pampa.workers.each { |worker|
261
+ l.logs("worker:#{worker.id}... ")
262
+ if !worker.attached
263
+ l.logf("detached")
264
+ else
265
+ if worker.assigned_job.nil?
266
+ l.logf("unassigned")
267
+ else
268
+ # get the job this worker is assigned to
269
+ job = BlackStack::Pampa.jobs.select { |j| j.name.to_s == worker.assigned_job.to_s }.first
270
+ if job.nil?
271
+ l.logf("job #{job.name} not found")
272
+ else
273
+ l.logf("done (#{job.run_dispatch(worker).to_s})")
274
+ end
275
+ end
276
+ end
277
+ } # @@nodes.each do |node|
278
+ end
279
+
280
+ # connect the nodes via ssh.
281
+ # kill all Ruby processes except this one.
282
+ # rename any existing folder ~/pampa to ~/pampa.<current timestamp>.
283
+ # create a new folder ~/pampa.
284
+ # build the file ~/pampa/config.rb in the remote node.
285
+ # copy the file ~/pampa/worker.rb to the remote node.
286
+ # run the number of workers specified in the configuration of the Pampa module.
287
+ # return an array with the IDs of the workers.
288
+ #
289
+ # Parameters:
290
+ # - config: relative path of the configuration file. Example: '../config.rb'
291
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
292
+ #
293
+ def self.deploy(config_filename='./config.rb', worker_filename='./worker.rb')
294
+ # validate: the connection string is not nil
295
+ raise "The connection string is nil" if @@connection_string.nil?
296
+ # validate: the connection string is not empty
297
+ raise "The connection string is empty" if @@connection_string.empty?
298
+ # validate: the connection string is not blank
299
+ raise "The connection string is blank" if @@connection_string.strip.empty?
300
+ # getting logger
301
+ l = self.logger()
302
+ # iterate the nodes
303
+ @@nodes.each { |node|
304
+ l.logs("node:#{node.name()}... ")
305
+ # connect the node
306
+ l.logs("Connecting... ")
307
+ node.connect()
308
+ l.done
309
+ # kill all ruby processes except this one
310
+ l.logs("Killing all Ruby processes except this one... ")
311
+ node.exec("ps ax | grep ruby | grep -v grep | grep -v #{Process.pid} | cut -b3-7 | xargs -t kill;", false)
312
+ l.done
313
+ # rename any existing folder ~/code/pampa to ~/code/pampa.<current timestamp>.
314
+ l.logs("Renaming old folder... ")
315
+ node.exec('mv ~/pampa ~/pampa.'+Time.now().to_i.to_s, false)
316
+ l.done
317
+ # create a new folder ~/code. - ignore if it already exists.
318
+ l.logs("Creating new folder... ")
319
+ node.exec('mkdir ~/pampa', false)
320
+ l.done
321
+ # build the file ~/pampa/config.rb in the remote node. - Be sure the BlackStack::Pampa.to_hash.to_s don't have single-quotes (') in the string.
322
+ l.logs("Building config file... ")
323
+ s = "echo \"#{File.read(config_filename)}\" > ~/pampa/config.rb"
324
+ node.exec(s, false)
325
+ l.done
326
+ # copy the file ~/pampa/worker.rb to the remote node. - Be sure the script don't have single-quotes (') in the string.
327
+ l.logs("Copying worker file... ")
328
+ s = "echo \"#{File.read(worker_filename)}\" > ~/pampa/worker.rb"
329
+ node.exec(s, false)
330
+ l.done
331
+ # run the number of workers specified in the configuration of the Pampa module.
332
+ node.workers.each { |worker|
333
+ # run the worker
334
+ # add these parameters for debug: debug=yes pampa=~/code/pampa/lib/pampa.rb
335
+ l.logs "Running worker #{worker.id}... "
336
+ s = "
337
+ source /home/#{node.ssh_username}/.rvm/scripts/rvm >/dev/null 2>&1;
338
+ rvm install 3.1.2 >/dev/null 2>&1;
339
+ rvm --default use 3.1.2 >/dev/null 2>&1;
340
+ cd /home/#{node.ssh_username}/pampa >/dev/null 2>&1;
341
+ export RUBYLIB=/home/#{node.ssh_username}/pampa >/dev/null 2>&1;
342
+ nohup ruby worker.rb id=#{worker.id} config=~/pampa/config.rb >/dev/null 2>&1 &
343
+ "
344
+ node.exec(s, false)
345
+ l.done
346
+ }
347
+ # disconnect the node
348
+ l.logs("Disconnecting... ")
349
+ node.disconnect()
350
+ l.done
351
+ l.done
352
+ } # @@nodes.each do |node|
353
+ end
354
+
355
+ # connect the nodes via ssh.
356
+ # kill all Ruby processes except this one.
357
+ #
358
+ # Parameters:
359
+ # - config: relative path of the configuration file. Example: '../config.rb'
360
+ #
361
+ def self.stop(config_filename='./config.rb')
362
+ # validate: the connection string is not nil
363
+ raise "The connection string is nil" if @@connection_string.nil?
364
+ # validate: the connection string is not empty
365
+ raise "The connection string is empty" if @@connection_string.empty?
366
+ # validate: the connection string is not blank
367
+ raise "The connection string is blank" if @@connection_string.strip.empty?
368
+ # getting logger
369
+ l = self.logger()
370
+ # iterate the nodes
371
+ @@nodes.each { |node|
372
+ l.logs("node:#{node.name()}... ")
373
+ # connect the node
374
+ l.logs("Connecting... ")
375
+ node.connect()
376
+ l.done
377
+ # kill all ruby processes except this one
378
+ l.logs("Killing all Ruby processes except this one... ")
379
+ node.exec("ps ax | grep ruby | grep -v grep | grep -v #{Process.pid} | cut -b3-7 | xargs -t kill;", false)
380
+ l.done
381
+ # disconnect the node
382
+ l.logs("Disconnecting... ")
383
+ node.disconnect()
384
+ l.done
385
+ l.done
386
+ } # @@nodes.each do |node|
387
+ end
388
+
389
+ # stub worker class
390
+ class Worker
391
+ # name to identify uniquely the worker
392
+ attr_accessor :id, :assigned_job, :attached
393
+ # return an array with the errors found in the description of the job
394
+ def self.descriptor_errors(h)
395
+ errors = []
396
+ # TODO: Code Me!
397
+ errors.uniq
398
+ end
399
+ # setup dispatcher configuration here
400
+ def initialize(h)
401
+ errors = BlackStack::Pampa::Worker.descriptor_errors(h)
402
+ raise "The worker descriptor is not valid: #{errors.uniq.join(".\n")}" if errors.length > 0
403
+ self.id = h[:id]
404
+ self.assigned_job = nil
405
+ self.attached = true
406
+ end
407
+ # return a hash descriptor of the worker
408
+ def to_hash()
409
+ {
410
+ :id => self.id,
411
+ }
412
+ end
413
+ # attach worker to get dispatcher working with it
414
+ def attach()
415
+ self.attached = true
416
+ end
417
+ # detach worker to get dispatcher working with it
418
+ def detach()
419
+ self.attached = false
420
+ end
421
+ end
422
+
423
+ # stub node class
424
+ # stub node class is already defined in the blackstack-nodes gem: https://github.com/leandrosardi/blackstack-nodes
425
+ # we inherit from it to add some extra methods and attributes
426
+ class Node
427
+ # stub node class is already defined in the blackstack-nodes gem: https://github.com/leandrosardi/blackstack-nodes
428
+ # we inherit from it to add some extra methods and attributes
429
+ include BlackStack::Infrastructure::NodeModule
430
+ # array of workers belonging to this node
431
+ attr_accessor :max_workers
432
+ attr_accessor :workers
433
+ # add validations to the node descriptor
434
+ def self.descriptor_errors(h)
435
+ errors = BlackStack::Infrastructure::NodeModule.descriptor_errors(h)
436
+ # validate: the key :max_workers exists and is an integer
437
+ errors << "The key :max_workers is missing" if h[:max_workers].nil?
438
+ errors << "The key :max_workers must be an integer" unless h[:max_workers].is_a?(Integer)
439
+ # return list of errors
440
+ errors.uniq
441
+ end
442
+ # initialize the node
443
+ def initialize(h, i_logger=nil)
444
+ errors = BlackStack::Pampa::Node.descriptor_errors(h)
445
+ raise "The node descriptor is not valid: #{errors.uniq.join(".\n")}" if errors.length > 0
446
+ super(h, i_logger)
447
+ self.max_workers = h[:max_workers]
448
+ self.workers = []
449
+ self.max_workers.times do |i|
450
+ self.workers << BlackStack::Pampa::Worker.new({:id => "#{self.name}.#{(i+1).to_s}", :node => self.to_hash})
451
+ end
452
+ end # def self.create(h)
453
+ # returh a hash descriptor of the node
454
+ def to_hash()
455
+ ret = super()
456
+ ret[:max_workers] = self.max_workers
457
+ ret[:workers] = []
458
+ self.workers.each do |worker|
459
+ ret[:workers] << worker.to_hash
460
+ end
461
+ ret
462
+ end
463
+ end # class Node
464
+
465
+ # stub job class
466
+ class Job
467
+ attr_accessor :name
468
+ # database information
469
+ # :field_times, :field_start_time and :field_end_time maybe nil
470
+ attr_accessor :table
471
+ attr_accessor :field_primary_key
472
+ attr_accessor :field_id
473
+ attr_accessor :field_time
474
+ attr_accessor :field_times
475
+ attr_accessor :field_start_time
476
+ attr_accessor :field_end_time
477
+ attr_accessor :field_success
478
+ attr_accessor :field_error_description
479
+ # max number of records assigned to a worker that have not started (:start_time field is nil)
480
+ attr_accessor :queue_size
481
+ # max number of minutes that a job should take to process. if :end_time keep nil x minutes
482
+ # after :start_time, that's considered as the job has failed or interrumped
483
+ attr_accessor :max_job_duration_minutes
484
+ # max number of times that a record can start to process & fail (:start_time field is not nil,
485
+ # but :end_time field is still nil after :max_job_duration_minutes)
486
+ attr_accessor :max_try_times
487
+ # additional function to returns an array of tasks pending to be processed by a worker.
488
+ # it should returns an array
489
+ # keep it nil if you want to run the default function
490
+ attr_accessor :occupied_function
491
+ # additional function to decide if the worker can dispatch or not
492
+ # example: use this function when you want to decide based on the remaining credits of the client
493
+ # it should returns true or false
494
+ # keep it nil if you want it returns always true
495
+ attr_accessor :allowing_function
496
+ # additional function to choose the records to launch
497
+ # it should returns an array of IDs
498
+ # keep this parameter nil if you want to use the default algorithm
499
+ attr_accessor :selecting_function
500
+ # additional function to choose the records to retry
501
+ # keep this parameter nil if you want to use the default algorithm
502
+ attr_accessor :relaunching_function
503
+ # additional function to perform the update on a record to retry
504
+ # keep this parameter nil if you want to use the default algorithm
505
+ attr_accessor :relauncher_function
506
+ # additional function to perform the update on a record to flag the starting of the job
507
+ # by default this function will set the :field_start_time field with the current datetime, and it will increase the :field_times counter
508
+ # keep this parameter nil if you want to use the default algorithm
509
+ attr_accessor :starter_function
510
+ # additional function to perform the update on a record to flag the finishing of the job
511
+ # by default this function will set the :field_end_time field with the current datetime
512
+ # keep this parameter nil if you want to use the default algorithm
513
+ attr_accessor :finisher_function
514
+ # Function to execute for each task.
515
+ attr_accessor :processing_function
516
+ # stretch assignation/unassignation of workers
517
+ attr_accessor :max_pending_tasks
518
+ attr_accessor :max_assigned_workers
519
+
520
+ # return a hash descriptor of the job
521
+ def to_hash()
522
+ {
523
+ :name => self.name,
524
+ :table => self.table,
525
+ :field_primary_key => self.field_primary_key,
526
+ :field_id => self.field_id,
527
+ :field_time => self.field_time,
528
+ :field_times => self.field_times,
529
+ :field_start_time => self.field_start_time,
530
+ :field_end_time => self.field_end_time,
531
+ :field_success => self.field_success,
532
+ :field_error_description => self.field_error_description,
533
+ :queue_size => self.queue_size,
534
+ :max_job_duration_minutes => self.max_job_duration_minutes,
535
+ :max_try_times => self.max_try_times,
536
+ :occupied_function => self.occupied_function.to_s,
537
+ :allowing_function => self.allowing_function.to_s,
538
+ :selecting_function => self.selecting_function.to_s,
539
+ :relaunching_function => self.relaunching_function.to_s,
540
+ :relauncher_function => self.relauncher_function.to_s,
541
+ :starter_function => self.starter_function.to_s,
542
+ :finisher_function => self.finisher_function.to_s,
543
+ :processing_function => self.processing_function.to_s,
544
+ :max_pending_tasks => self.max_pending_tasks,
545
+ :max_assigned_workers => self.max_assigned_workers,
546
+ }
547
+ end
548
+
549
+ # return an array with the errors found in the description of the job
550
+ def self.descriptor_errors(h)
551
+ errors = []
552
+ # TODO: Code Me!
553
+ errors.uniq
554
+ end
555
+
556
+ # setup dispatcher configuration here
557
+ def initialize(h)
558
+ errors = BlackStack::Pampa::Job.descriptor_errors(h)
559
+ raise "The job descriptor is not valid: #{errors.uniq.join(".\n")}" if errors.length > 0
560
+ self.name = h[:name]
561
+ self.table = h[:table]
562
+ self.field_primary_key = h[:field_primary_key]
563
+ self.field_id = h[:field_id]
564
+ self.field_time = h[:field_time]
565
+ self.field_times = h[:field_times]
566
+ self.field_start_time = h[:field_start_time]
567
+ self.field_end_time = h[:field_end_time]
568
+ self.field_success = h[:field_success]
569
+ self.field_error_description = h[:field_error_description]
570
+ self.queue_size = h[:queue_size]
571
+ self.max_job_duration_minutes = h[:max_job_duration_minutes]
572
+ self.max_try_times = h[:max_try_times]
573
+ self.occupied_function = h[:occupied_function]
574
+ self.allowing_function = h[:allowing_function]
575
+ self.selecting_function = h[:selecting_function]
576
+ self.relaunching_function = h[:relaunching_function]
577
+ self.relauncher_function = h[:relauncher_function]
578
+ self.processing_function = h[:processing_function]
579
+ self.max_pending_tasks = h[:max_pending_tasks]
580
+ self.max_assigned_workers = h[:max_assigned_workers]
581
+ end
582
+
583
+ # returns an array of tasks pending to be processed by the worker.
584
+ # it will select the records with :reservation_id == worker.id, and :start_time == nil
585
+ def occupied_slots(worker)
586
+ if self.occupied_function.nil?
587
+ return DB[self.table.to_sym].where(self.field_id.to_sym => worker.id, self.field_start_time.to_sym => nil).all if !self.field_start_time.nil?
588
+ return DB[self.table.to_sym].where(self.field_id.to_sym => worker.id).all if self.field_start_time.nil?
589
+ else
590
+ # TODO: validar que retorna un entero
591
+ return self.occupied_function.call(worker, self)
592
+ end
593
+ end
594
+
595
+ # returns the number of free slots in the procesing queue of this worker
596
+ def available_slots(worker)
597
+ occupied = self.occupied_slots(worker).size
598
+ allowed = self.queue_size
599
+ if occupied > allowed
600
+ return 0
601
+ else
602
+ return allowed - occupied
603
+ end
604
+ end
605
+
606
+ # decide if the worker can dispatch or not
607
+ # example: use this function when you want to decide based on the remaining credits of the client
608
+ # returns always true
609
+ def allowing(worker)
610
+ if self.allowing_function.nil?
611
+ return true
612
+ else
613
+ # TODO: validar que retorna true o false
614
+ return self.allowing_function.call(worker, self)
615
+ end
616
+ end
617
+
618
+ # returns an array of available tasks for dispatching.
619
+ def selecting_dataset(n)
620
+ ds = DB[self.table.to_sym].where(self.field_id.to_sym => nil)
621
+ ds = ds.filter(self.field_end_time.to_sym => nil) if !self.field_end_time.nil?
622
+ ds = ds.filter(Sequel.function(:coalesce, self.field_times.to_sym, 0)=>self.max_try_times.times.to_a) if !self.field_times.nil?
623
+ ds.limit(n).all
624
+ end # selecting_dataset
625
+
626
+ # returns an array of available tasks for dispatching.
627
+ def selecting(n)
628
+ if self.selecting_function.nil?
629
+ return self.selecting_dataset(n)
630
+ else
631
+ # TODO: validar que retorna un array de strings
632
+ return self.selecting_function.call(n, self)
633
+ end
634
+ end
635
+
636
+ # returns an array of failed tasks for restarting.
637
+ def relaunching_dataset(n)
638
+ #ds = DB[self.table.to_sym].where("#{self.field_time.to_s} < CURRENT_TIMESTAMP() - INTERVAL '#{self.max_job_duration_minutes.to_i} minutes'")
639
+ #ds = ds.filter("#{self.field_end_time.to_s} IS NULL") if !self.field_end_time.nil?
640
+ #ds.limit(n).all
641
+ q = "
642
+ SELECT *
643
+ FROM #{self.table.to_s}
644
+ WHERE #{self.field_time.to_s} IS NOT NULL
645
+ AND #{self.field_time.to_s} < CURRENT_TIMESTAMP() - INTERVAL '#{self.max_job_duration_minutes.to_i} minutes'
646
+ AND #{self.field_id.to_s} IS NOT NULL
647
+ AND #{self.field_end_time.to_s} IS NULL
648
+ LIMIT #{n}
649
+ "
650
+ DB[q].all
651
+ end
652
+
653
+ # returns an array of failed tasks for restarting.
654
+ def relaunching(n)
655
+ if self.relaunching_function.nil?
656
+ return self.relaunching_dataset(n)
657
+ else
658
+ # TODO: validar que retorna un array de strings
659
+ return self.relaunching_function.call(n, self)
660
+ end
661
+ end
662
+
663
+ def relaunch(o)
664
+ o[self.field_id.to_sym] = nil
665
+ o[self.field_time.to_sym] = nil
666
+ o[self.field_start_time.to_sym] = nil if !self.field_start_time.nil?
667
+ o[self.field_end_time.to_sym] = nil if !self.field_end_time.nil?
668
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
669
+ end
670
+
671
+ def start(o)
672
+ if self.starter_function.nil?
673
+ o[self.field_start_time.to_sym] = DB["SELECT CURRENT_TIMESTAMP() AS dt"].first[:dt] if !self.field_start_time.nil? # IMPORTANT: use DB location to get current time.
674
+ o[self.field_times.to_sym] = o[self.field_times.to_sym].to_i + 1
675
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
676
+ else
677
+ self.starter_function.call(o, self)
678
+ end
679
+ end
680
+
681
+ def finish(o, e=nil)
682
+ if self.finisher_function.nil?
683
+ o[self.field_end_time.to_sym] = DB["SELECT CURRENT_TIMESTAMP() AS dt"].first[:dt] if !self.field_end_time.nil? && e.nil? # IMPORTANT: use DB location to get current time.
684
+ o[self.field_success.to_sym] = e.nil?
685
+ o[self.field_error_description.to_sym] = e.to_console if !e.nil?
686
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
687
+ else
688
+ self.finisher_function.call(o, e, self)
689
+ end
690
+ end
691
+
692
+ # relaunch records
693
+ def run_relaunch()
694
+ # relaunch failed records
695
+ self.relaunching.each { |o|
696
+ if self.relauncher_function.nil?
697
+ self.relaunch(o)
698
+ else
699
+ self.relauncher_function.call(o)
700
+ end
701
+ # release resources
702
+ DB.disconnect
703
+ GC.start
704
+ }
705
+ end # def run_relaunch
706
+
707
+ # dispatch records
708
+ # returns the # of records dispatched
709
+ def run_dispatch(worker)
710
+ # get # of available slots
711
+ n = self.available_slots(worker)
712
+
713
+ # dispatching n pending records
714
+ i = 0
715
+ if n>0
716
+ self.selecting(n).each { |o|
717
+ # count the # of dispatched
718
+ i += 1
719
+ # dispatch records
720
+ o[self.field_id.to_sym] = worker.id
721
+ o[self.field_time.to_sym] = DB["SELECT CURRENT_TIMESTAMP() AS dt"].first[:dt] # IMPORTANT: use DB location to get current time.
722
+ o[self.field_start_time.to_sym] = nil if !self.field_start_time.nil?
723
+ o[self.field_end_time.to_sym] = nil if !self.field_end_time.nil?
724
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
725
+ # release resources
726
+ DB.disconnect
727
+ GC.start
728
+ }
729
+ end
730
+
731
+ #
732
+ return i
733
+ end
734
+ end # class Job
735
+ end # module Pampa
736
+ end # module BlackStack
data/worker.rb ADDED
@@ -0,0 +1,147 @@
1
+ # require the gem simple_cloud_logging for parsing command line parameters.
2
+ require 'simple_command_line_parser'
3
+ # require the gem simple_cloud_logging for writing logfiles.
4
+ require 'simple_cloud_logging'
5
+ # require the gem sequel for connecting to the database and handle ORM classes.
6
+ require 'sequel'
7
+
8
+ # parse command line parameters
9
+ PARSER = BlackStack::SimpleCommandLineParser.new(
10
+ :description => 'This script starts an infinite loop. Each loop will look for a task to perform. Must be a delay between each loop.',
11
+ :configuration => [{
12
+ :name=>'delay',
13
+ :mandatory=>false,
14
+ :default=>30,
15
+ :description=>'Minimum delay between loops. A minimum of 10 seconds is recommended, in order to don\'t hard the database server. Default is 30 seconds.',
16
+ :type=>BlackStack::SimpleCommandLineParser::INT,
17
+ }, {
18
+ :name=>'debug',
19
+ :mandatory=>false,
20
+ :default=>false,
21
+ :description=>'Activate this flag if you want to require the `pampa.rb` file from the same Pampa project folder, insetad to require the gem as usual.',
22
+ :type=>BlackStack::SimpleCommandLineParser::BOOL,
23
+ }, {
24
+ :name=>'pampa',
25
+ :mandatory=>false,
26
+ :default=>'./lib/pampa.rb',
27
+ :description=>'Ruby file to require where `debug` is activated.',
28
+ :type=>BlackStack::SimpleCommandLineParser::STRING,
29
+ }, {
30
+ :name=>'config',
31
+ :mandatory=>false,
32
+ :default=>'./config.rb',
33
+ :description=>'Ruby file where is defined the connection-string and jobs.',
34
+ :type=>BlackStack::SimpleCommandLineParser::STRING,
35
+ }, {
36
+ :name=>'id',
37
+ :mandatory=>true,
38
+ :description=>'Write here a unique identifier for the worker.',
39
+ :type=>BlackStack::SimpleCommandLineParser::STRING,
40
+ }]
41
+ )
42
+
43
+ # creating logfile
44
+ l = BlackStack::LocalLogger.new('worker.'+PARSER.value('id').to_s+'.log')
45
+
46
+ begin
47
+ # log the paramers
48
+ l.log 'STARTING WORKER'
49
+
50
+ # show the parameters
51
+ # TODO: replace this hardocded array for method `PARSER.params`.
52
+ # reference: https://github.com/leandrosardi/simple_command_line_parser/issues/7
53
+ #['id','delay','debug','pampa','config'].each { |param| l.log param + ': ' + PARSER.value(param).to_s }
54
+
55
+ # require the pampa library
56
+ l.logs 'Requiring pampa (debug='+(PARSER.value('debug') ? 'true' : 'false')+', pampa='+PARSER.value('pampa')+')... '
57
+ require 'pampa' if !PARSER.value('debug')
58
+ require PARSER.value('pampa') if PARSER.value('debug')
59
+ l.done
60
+
61
+ # requiore the config.rb file where the jobs are defined.
62
+ l.logs 'Requiring config (config='+PARSER.value('config')+')... '
63
+ require PARSER.value('config')
64
+ l.done
65
+
66
+ # getting the worker object
67
+ worker = BlackStack::Pampa.workers.select { |w| w.id == PARSER.value('id') }.first
68
+ raise 'Worker '+PARSER.value('id')+' not found.' if worker.nil?
69
+
70
+ # start the loop
71
+ while true
72
+ # get the start loop time
73
+ l.logs 'Starting loop... '
74
+ start = Time.now()
75
+ l.done
76
+
77
+ BlackStack::Pampa.jobs.each { |job|
78
+ l.logs 'Processing job '+job.name+'... '
79
+ tasks = job.occupied_slots(worker)
80
+ l.logf tasks.size.to_s+' tasks in queue.'
81
+
82
+ tasks.each { |task|
83
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' started... '
84
+ job.start(task)
85
+ l.done
86
+
87
+ begin
88
+ l.logs 'Processing task '+task[job.field_primary_key.to_sym].to_s+'... '
89
+ job.processing_function.call(task, l, job, worker)
90
+ l.done
91
+
92
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' finished... '
93
+ job.finish(task)
94
+ l.done
95
+
96
+ # note: this catches the CTRL+C signal.
97
+ # note: this catches the `kill` command, ONLY if it has not the `-9` option.
98
+ rescue SignalException, SystemExit, Interrupt => e
99
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' interrumpted... '
100
+ job.finish(task, e)
101
+ l.done
102
+
103
+ log.logf 'Bye!'
104
+
105
+ raise e
106
+
107
+ rescue => e
108
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' failed... '
109
+ job.finish(task, e)
110
+ l.done
111
+
112
+ l.logf 'Error: '+e.to_console
113
+ end
114
+ }
115
+ }
116
+
117
+ # get the end loop time
118
+ l.logs 'Ending loop... '
119
+ finish = Time.now()
120
+ l.done
121
+
122
+ # get different in seconds between start and finish
123
+ # if diff > 30 seconds
124
+ l.logs 'Calculating loop duration... '
125
+ diff = finish - start
126
+ l.logf 'done ('+diff.to_s+')'
127
+
128
+ if diff < PARSER.value('delay')
129
+ # sleep for 30 seconds
130
+ n = PARSER.value('delay')-diff
131
+
132
+ l.logs 'Sleeping for '+n.to_label+' seconds... '
133
+ sleep n
134
+ l.done
135
+ else
136
+ l.log 'No sleeping. The loop took '+diff.to_label+' seconds.'
137
+ end
138
+ end # while true
139
+ rescue SignalException, SystemExit, Interrupt
140
+ # note: this catches the CTRL+C signal.
141
+ # note: this catches the `kill` command, ONLY if it has not the `-9` option.
142
+ l.logf 'Process Interrumpted.'
143
+ rescue => e
144
+ l.logf 'Fatal Error: '+e.to_console
145
+ rescue
146
+ l.logf 'Unknown Fatal Error.'
147
+ end
metadata ADDED
@@ -0,0 +1,163 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pampa
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-09-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sequel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 5.56.0
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 5.56.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 5.56.0
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 5.56.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: blackstack_core
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.3
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.3
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.3
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.3
53
+ - !ruby/object:Gem::Dependency
54
+ name: blackstack_nodes
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: 1.2.10
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.2.10
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 1.2.10
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 1.2.10
73
+ - !ruby/object:Gem::Dependency
74
+ name: simple_command_line_parser
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 1.1.2
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.1.2
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.1.2
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.1.2
93
+ - !ruby/object:Gem::Dependency
94
+ name: simple_cloud_logging
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: 1.2.2
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 1.2.2
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 1.2.2
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 1.2.2
113
+ description: |
114
+ Pampa is a Ruby library for async & distributing computing providing the following features:
115
+
116
+ - cluster-management with dynamic reconfiguration (joining and leaving nodes);
117
+ - distribution of the computation jobs to the (active) nodes;
118
+ - error handling, job-retry and fault tolerance;
119
+ - fast (non-direct) communication to ensure realtime capabilities.
120
+
121
+ The Pampa framework may be widely used for:
122
+
123
+ - large scale web scraping with what we call a "bot-farm";
124
+ - payments processing for large-scale ecommerce websites;
125
+ - reports generation for high demanded SaaS platforms;
126
+ - heavy mathematical model computing;
127
+
128
+ and any other tasks that requires a virtually infinite amount of CPU computing and memory resources.
129
+
130
+ Find documentation here: https://github.com/leandrosardi/pampa
131
+ email: leandro.sardi@expandedventure.com
132
+ executables: []
133
+ extensions: []
134
+ extra_rdoc_files: []
135
+ files:
136
+ - lib/pampa.rb
137
+ - worker.rb
138
+ homepage: https://rubygems.org/gems/pampa
139
+ licenses:
140
+ - MIT
141
+ metadata: {}
142
+ post_install_message:
143
+ rdoc_options: []
144
+ require_paths:
145
+ - lib
146
+ required_ruby_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ requirements: []
157
+ rubygems_version: 3.3.7
158
+ signing_key:
159
+ specification_version: 4
160
+ summary: Ruby library for async & distributed computing, supporting dynamic reconfiguration,
161
+ distribution of the computation jobs, error handling, job-retry and fault tolerance,
162
+ and fast (non-direct) communication to ensure real-time capabilities.
163
+ test_files: []