pampa 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/pampa.rb +736 -0
  3. data/worker.rb +147 -0
  4. metadata +163 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0faa4143e265bf804e49240a15eb7c27c6f209af2bfa93a6cd69a5e7506a431a
4
+ data.tar.gz: 2914d64ada1d628d6c21555680aaf0bea0ce141bffeafbc2f8df17a2b746d85c
5
+ SHA512:
6
+ metadata.gz: deb9bc56fd8eb40cbbec9f72b9ffe7f7cdee8f605ab1a1680d850c4980d706880f8920ec9eb6e409378b91c5745cfa29fda2d7fc320ae366193b84b0ed5249f4
7
+ data.tar.gz: 4025a6fd3f142347f9a95310a6e2ac9f9916195fff9cee484c8a67beba148cc67573aa04f7ca3704e220077711f8419add81ba81472feb6eecc4119deafe922b
data/lib/pampa.rb ADDED
@@ -0,0 +1,736 @@
1
+ require 'sequel'
2
+ require 'blackstack-core'
3
+ require 'blackstack-nodes'
4
+ require 'simple_command_line_parser'
5
+ require 'simple_cloud_logging'
6
+
7
+ module BlackStack
8
+ module Pampa
9
+ # arrays of workers, nodes, and jobs.
10
+ @@nodes = []
11
+ @@jobs = []
12
+ # logger configuration
13
+ @@log_filename = nil
14
+ @@logger = BlackStack::DummyLogger.new(nil)
15
+ # Connection string to the database. Example: mysql2://user:password@localhost:3306/database
16
+ @@connection_string = nil
17
+
18
+ # define a filename for the log file.
19
+ def self.set_log_filename(s)
20
+ @@log_filename = s
21
+ @@logger = BlackStack::LocalLogger.new(s)
22
+ end
23
+
24
+ # return the logger.
25
+ def self.logger()
26
+ @@logger
27
+ end
28
+
29
+ # return the log filename.
30
+ def self.log_filename()
31
+ @@log_filename
32
+ end
33
+
34
+ # define a connection string to the database.
35
+ def self.set_connection_string(s)
36
+ @@connection_string = s
37
+ end
38
+
39
+ # return connection string to the database. Example: mysql2://user:password@localhost:3306/database
40
+ def self.connection_string()
41
+ @@connection_string
42
+ end
43
+
44
+ # add a node to the cluster.
45
+ def self.add_node(h)
46
+ @@nodes << BlackStack::Pampa::Node.new(h)
47
+ end # def self.add_node(h)
48
+
49
+ # add an array of nodes to the cluster.
50
+ def self.add_nodes(a)
51
+ # validate: the parameter a is an array
52
+ raise "The parameter a is not an array" unless a.is_a?(Array)
53
+ # iterate over the array
54
+ a.each do |h|
55
+ # create the node
56
+ self.add_node(h)
57
+ end
58
+ end # def self.add_nodes(a)
59
+
60
+ # return the array of nodes.
61
+ def self.nodes()
62
+ @@nodes
63
+ end
64
+
65
+ # return the array of all workers, beloning all nodes.
66
+ def self.workers()
67
+ @@nodes.map { |node| node.workers }.flatten
68
+ end
69
+
70
+ # add a job to the cluster.
71
+ def self.add_job(h)
72
+ @@jobs << BlackStack::Pampa::Job.new(h)
73
+ end # def self.add_job(h)
74
+
75
+ # add an array of jobs to the cluster.
76
+ def self.add_jobs(a)
77
+ # validate: the parameter a is an array
78
+ raise "The parameter a is not an array" unless a.is_a?(Array)
79
+ # iterate over the array
80
+ a.each do |h|
81
+ # create the job
82
+ self.add_job(h)
83
+ end
84
+ end # def self.add_jobs(a)
85
+
86
+ # return the array of nodes.
87
+ def self.jobs()
88
+ @@jobs
89
+ end
90
+
91
+ =begin
92
+ # return a hash descriptor of the whole configuration of the cluster.
93
+ def self.to_hash()
94
+ ret = {
95
+ :log_filename => self.log_filename,
96
+ :connection_string => self.connection_string,
97
+ }
98
+ #ret[:workers] = []
99
+ #@@workers.each do |w|
100
+ # ret[:workers] << w.to_hash
101
+ #end
102
+ ret[:nodes] = []
103
+ @@nodes.each do |n|
104
+ ret[:nodes] << n.to_hash
105
+ end
106
+ ret[:jobs] = []
107
+ @@jobs.each do |j|
108
+ ret[:jobs] << j.to_hash
109
+ end
110
+ ret
111
+ end # def self.to_hash()
112
+
113
+ # setup from a whole hash descriptor
114
+ def self.initialize(h)
115
+ # TODO
116
+ end
117
+ =end
118
+
119
+ # get attached and unassigned workers.
120
+ # assign and unassign workers to jobs.
121
+ #
122
+ # Parameters:
123
+ # - config: relative path of the configuration file. Example: '../config.rb'
124
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
125
+ #
126
+ def self.stretch()
127
+ # validate: the connection string is not nil
128
+ raise "The connection string is nil" if @@connection_string.nil?
129
+ # validate: the connection string is not empty
130
+ raise "The connection string is empty" if @@connection_string.empty?
131
+ # validate: the connection string is not blank
132
+ raise "The connection string is blank" if @@connection_string.strip.empty?
133
+ # getting logger
134
+ l = self.logger()
135
+ # get attached and unassigned workers
136
+ l.logs "Getting attached and unassigned workers... "
137
+ workers = BlackStack::Pampa.workers.select { |worker| worker.attached && worker.assigned_job.nil? }
138
+ l.logf "done (#{workers.size.to_s})"
139
+ # get the job this worker is working with
140
+ BlackStack::Pampa.jobs.each { |job|
141
+ if workers.size == 0
142
+ l.logf "No more workers to assign."
143
+ break
144
+ end
145
+
146
+ l.logs("job:#{job.name}... ")
147
+ l.logs("Gettting assigned workers... ")
148
+ assigned = BlackStack::Pampa.workers.select { |worker| worker.attached && worker.assigned_job.to_s == job.name.to_s }
149
+ l.logf("done (#{assigned.size.to_s})")
150
+
151
+ l.logs("Getting total pending tasks... ")
152
+ pendings = job.selecting(job.max_pending_tasks)
153
+ l.logf("done (#{pendings.size.to_s})")
154
+
155
+ l.logs("Has 0 tasks?.... ")
156
+ if pendings.size == 0
157
+ l.logf("yes")
158
+
159
+ l.logs("Unassigning all assigned workers... ")
160
+ assigned.each { |w|
161
+ l.logs("Unassigning worker #{w.id}... ")
162
+ w.assigned_job = nil
163
+ l.done
164
+
165
+ l.logs("Adding worker #{w.id} to the list of unassigned... ")
166
+ workers << w
167
+ l.done
168
+ }
169
+ l.done
170
+ else
171
+ l.logf("no")
172
+
173
+ l.logs("Reached :max_pending_tasks (#{job.max_pending_tasks}) and more than 1 assigned workers ?... ")
174
+ if pendings.size < job.max_pending_tasks && assigned.size > 1
175
+ l.logf("no")
176
+
177
+ l.logs("Unassigning worker... ")
178
+ w = assigned.first # TODO: find a worker with no pending tasks
179
+ w.assigned_job = nil
180
+ l.done
181
+
182
+ l.logs("Adding worker from the list of unassigned... ")
183
+ workers << w
184
+ l.done
185
+ else
186
+ l.logf("yes")
187
+
188
+ l.logs("Reached :max_assigned_workers (#{job.max_assigned_workers}) and more than 0 assigned workers?... ")
189
+ if assigned.size >= job.max_assigned_workers && assigned.size > 0
190
+ l.logf("yes")
191
+ else
192
+ l.logf("no")
193
+
194
+ l.logs("Assigning worker... ")
195
+ w = workers.first
196
+ w.assigned_job = job.name.to_sym
197
+ l.done
198
+
199
+ l.logs("Removing worker from the list of unassigned... ")
200
+ workers.delete(w)
201
+ l.done
202
+ end
203
+ end
204
+ end
205
+ l.done
206
+ }
207
+ end
208
+
209
+ # iterate the jobs.
210
+ # for each job, get all the tasks to relaunch.
211
+ # for each task to relaunch, relaunch it.
212
+ #
213
+ # Parameters:
214
+ # - config: relative path of the configuration file. Example: '../config.rb'
215
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
216
+ #
217
+ def self.relaunch()
218
+ # validate: the connection string is not nil
219
+ raise "The connection string is nil" if @@connection_string.nil?
220
+ # validate: the connection string is not empty
221
+ raise "The connection string is empty" if @@connection_string.empty?
222
+ # validate: the connection string is not blank
223
+ raise "The connection string is blank" if @@connection_string.strip.empty?
224
+ # getting logger
225
+ l = self.logger()
226
+ # iterate the workers
227
+ BlackStack::Pampa.jobs.each { |job|
228
+ l.logs("job:#{job.name}... ")
229
+ l.logs("Gettting tasks to relaunch (max #{job.queue_size.to_s})... ")
230
+ tasks = job.relaunching(job.queue_size+1)
231
+ l.logf("done (#{tasks.size.to_s})")
232
+
233
+ tasks.each { |task|
234
+ l.logs("Relaunching task #{task[job.field_primary_key.to_sym]}... ")
235
+ job.relaunch(task)
236
+ l.done
237
+ }
238
+
239
+ l.done
240
+ }
241
+ end
242
+
243
+ # iterate the workers.
244
+ # for each worker, iterate the job.
245
+ #
246
+ # Parameters:
247
+ # - config: relative path of the configuration file. Example: '../config.rb'
248
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
249
+ #
250
+ def self.dispatch()
251
+ # validate: the connection string is not nil
252
+ raise "The connection string is nil" if @@connection_string.nil?
253
+ # validate: the connection string is not empty
254
+ raise "The connection string is empty" if @@connection_string.empty?
255
+ # validate: the connection string is not blank
256
+ raise "The connection string is blank" if @@connection_string.strip.empty?
257
+ # getting logger
258
+ l = self.logger()
259
+ # iterate the workers
260
+ BlackStack::Pampa.workers.each { |worker|
261
+ l.logs("worker:#{worker.id}... ")
262
+ if !worker.attached
263
+ l.logf("detached")
264
+ else
265
+ if worker.assigned_job.nil?
266
+ l.logf("unassigned")
267
+ else
268
+ # get the job this worker is assigned to
269
+ job = BlackStack::Pampa.jobs.select { |j| j.name.to_s == worker.assigned_job.to_s }.first
270
+ if job.nil?
271
+ l.logf("job #{job.name} not found")
272
+ else
273
+ l.logf("done (#{job.run_dispatch(worker).to_s})")
274
+ end
275
+ end
276
+ end
277
+ } # @@nodes.each do |node|
278
+ end
279
+
280
+ # connect the nodes via ssh.
281
+ # kill all Ruby processes except this one.
282
+ # rename any existing folder ~/pampa to ~/pampa.<current timestamp>.
283
+ # create a new folder ~/pampa.
284
+ # build the file ~/pampa/config.rb in the remote node.
285
+ # copy the file ~/pampa/worker.rb to the remote node.
286
+ # run the number of workers specified in the configuration of the Pampa module.
287
+ # return an array with the IDs of the workers.
288
+ #
289
+ # Parameters:
290
+ # - config: relative path of the configuration file. Example: '../config.rb'
291
+ # - worker: relative path of the worker.rb file. Example: '../worker.rb'
292
+ #
293
+ def self.deploy(config_filename='./config.rb', worker_filename='./worker.rb')
294
+ # validate: the connection string is not nil
295
+ raise "The connection string is nil" if @@connection_string.nil?
296
+ # validate: the connection string is not empty
297
+ raise "The connection string is empty" if @@connection_string.empty?
298
+ # validate: the connection string is not blank
299
+ raise "The connection string is blank" if @@connection_string.strip.empty?
300
+ # getting logger
301
+ l = self.logger()
302
+ # iterate the nodes
303
+ @@nodes.each { |node|
304
+ l.logs("node:#{node.name()}... ")
305
+ # connect the node
306
+ l.logs("Connecting... ")
307
+ node.connect()
308
+ l.done
309
+ # kill all ruby processes except this one
310
+ l.logs("Killing all Ruby processes except this one... ")
311
+ node.exec("ps ax | grep ruby | grep -v grep | grep -v #{Process.pid} | cut -b3-7 | xargs -t kill;", false)
312
+ l.done
313
+ # rename any existing folder ~/code/pampa to ~/code/pampa.<current timestamp>.
314
+ l.logs("Renaming old folder... ")
315
+ node.exec('mv ~/pampa ~/pampa.'+Time.now().to_i.to_s, false)
316
+ l.done
317
+ # create a new folder ~/code. - ignore if it already exists.
318
+ l.logs("Creating new folder... ")
319
+ node.exec('mkdir ~/pampa', false)
320
+ l.done
321
+ # build the file ~/pampa/config.rb in the remote node. - Be sure the BlackStack::Pampa.to_hash.to_s don't have single-quotes (') in the string.
322
+ l.logs("Building config file... ")
323
+ s = "echo \"#{File.read(config_filename)}\" > ~/pampa/config.rb"
324
+ node.exec(s, false)
325
+ l.done
326
+ # copy the file ~/pampa/worker.rb to the remote node. - Be sure the script don't have single-quotes (') in the string.
327
+ l.logs("Copying worker file... ")
328
+ s = "echo \"#{File.read(worker_filename)}\" > ~/pampa/worker.rb"
329
+ node.exec(s, false)
330
+ l.done
331
+ # run the number of workers specified in the configuration of the Pampa module.
332
+ node.workers.each { |worker|
333
+ # run the worker
334
+ # add these parameters for debug: debug=yes pampa=~/code/pampa/lib/pampa.rb
335
+ l.logs "Running worker #{worker.id}... "
336
+ s = "
337
+ source /home/#{node.ssh_username}/.rvm/scripts/rvm >/dev/null 2>&1;
338
+ rvm install 3.1.2 >/dev/null 2>&1;
339
+ rvm --default use 3.1.2 >/dev/null 2>&1;
340
+ cd /home/#{node.ssh_username}/pampa >/dev/null 2>&1;
341
+ export RUBYLIB=/home/#{node.ssh_username}/pampa >/dev/null 2>&1;
342
+ nohup ruby worker.rb id=#{worker.id} config=~/pampa/config.rb >/dev/null 2>&1 &
343
+ "
344
+ node.exec(s, false)
345
+ l.done
346
+ }
347
+ # disconnect the node
348
+ l.logs("Disconnecting... ")
349
+ node.disconnect()
350
+ l.done
351
+ l.done
352
+ } # @@nodes.each do |node|
353
+ end
354
+
355
+ # connect the nodes via ssh.
356
+ # kill all Ruby processes except this one.
357
+ #
358
+ # Parameters:
359
+ # - config: relative path of the configuration file. Example: '../config.rb'
360
+ #
361
+ def self.stop(config_filename='./config.rb')
362
+ # validate: the connection string is not nil
363
+ raise "The connection string is nil" if @@connection_string.nil?
364
+ # validate: the connection string is not empty
365
+ raise "The connection string is empty" if @@connection_string.empty?
366
+ # validate: the connection string is not blank
367
+ raise "The connection string is blank" if @@connection_string.strip.empty?
368
+ # getting logger
369
+ l = self.logger()
370
+ # iterate the nodes
371
+ @@nodes.each { |node|
372
+ l.logs("node:#{node.name()}... ")
373
+ # connect the node
374
+ l.logs("Connecting... ")
375
+ node.connect()
376
+ l.done
377
+ # kill all ruby processes except this one
378
+ l.logs("Killing all Ruby processes except this one... ")
379
+ node.exec("ps ax | grep ruby | grep -v grep | grep -v #{Process.pid} | cut -b3-7 | xargs -t kill;", false)
380
+ l.done
381
+ # disconnect the node
382
+ l.logs("Disconnecting... ")
383
+ node.disconnect()
384
+ l.done
385
+ l.done
386
+ } # @@nodes.each do |node|
387
+ end
388
+
389
+ # stub worker class
390
+ class Worker
391
+ # name to identify uniquely the worker
392
+ attr_accessor :id, :assigned_job, :attached
393
+ # return an array with the errors found in the description of the job
394
+ def self.descriptor_errors(h)
395
+ errors = []
396
+ # TODO: Code Me!
397
+ errors.uniq
398
+ end
399
+ # setup dispatcher configuration here
400
+ def initialize(h)
401
+ errors = BlackStack::Pampa::Worker.descriptor_errors(h)
402
+ raise "The worker descriptor is not valid: #{errors.uniq.join(".\n")}" if errors.length > 0
403
+ self.id = h[:id]
404
+ self.assigned_job = nil
405
+ self.attached = true
406
+ end
407
+ # return a hash descriptor of the worker
408
+ def to_hash()
409
+ {
410
+ :id => self.id,
411
+ }
412
+ end
413
+ # attach worker to get dispatcher working with it
414
+ def attach()
415
+ self.attached = true
416
+ end
417
+ # detach worker to get dispatcher working with it
418
+ def detach()
419
+ self.attached = false
420
+ end
421
+ end
422
+
423
+ # stub node class
424
+ # stub node class is already defined in the blackstack-nodes gem: https://github.com/leandrosardi/blackstack-nodes
425
+ # we inherit from it to add some extra methods and attributes
426
+ class Node
427
+ # stub node class is already defined in the blackstack-nodes gem: https://github.com/leandrosardi/blackstack-nodes
428
+ # we inherit from it to add some extra methods and attributes
429
+ include BlackStack::Infrastructure::NodeModule
430
+ # array of workers belonging to this node
431
+ attr_accessor :max_workers
432
+ attr_accessor :workers
433
+ # add validations to the node descriptor
434
+ def self.descriptor_errors(h)
435
+ errors = BlackStack::Infrastructure::NodeModule.descriptor_errors(h)
436
+ # validate: the key :max_workers exists and is an integer
437
+ errors << "The key :max_workers is missing" if h[:max_workers].nil?
438
+ errors << "The key :max_workers must be an integer" unless h[:max_workers].is_a?(Integer)
439
+ # return list of errors
440
+ errors.uniq
441
+ end
442
+ # initialize the node
443
+ def initialize(h, i_logger=nil)
444
+ errors = BlackStack::Pampa::Node.descriptor_errors(h)
445
+ raise "The node descriptor is not valid: #{errors.uniq.join(".\n")}" if errors.length > 0
446
+ super(h, i_logger)
447
+ self.max_workers = h[:max_workers]
448
+ self.workers = []
449
+ self.max_workers.times do |i|
450
+ self.workers << BlackStack::Pampa::Worker.new({:id => "#{self.name}.#{(i+1).to_s}", :node => self.to_hash})
451
+ end
452
+ end # def self.create(h)
453
+ # returh a hash descriptor of the node
454
+ def to_hash()
455
+ ret = super()
456
+ ret[:max_workers] = self.max_workers
457
+ ret[:workers] = []
458
+ self.workers.each do |worker|
459
+ ret[:workers] << worker.to_hash
460
+ end
461
+ ret
462
+ end
463
+ end # class Node
464
+
465
+ # stub job class
466
+ class Job
467
+ attr_accessor :name
468
+ # database information
469
+ # :field_times, :field_start_time and :field_end_time maybe nil
470
+ attr_accessor :table
471
+ attr_accessor :field_primary_key
472
+ attr_accessor :field_id
473
+ attr_accessor :field_time
474
+ attr_accessor :field_times
475
+ attr_accessor :field_start_time
476
+ attr_accessor :field_end_time
477
+ attr_accessor :field_success
478
+ attr_accessor :field_error_description
479
+ # max number of records assigned to a worker that have not started (:start_time field is nil)
480
+ attr_accessor :queue_size
481
+ # max number of minutes that a job should take to process. if :end_time keep nil x minutes
482
+ # after :start_time, that's considered as the job has failed or interrumped
483
+ attr_accessor :max_job_duration_minutes
484
+ # max number of times that a record can start to process & fail (:start_time field is not nil,
485
+ # but :end_time field is still nil after :max_job_duration_minutes)
486
+ attr_accessor :max_try_times
487
+ # additional function to returns an array of tasks pending to be processed by a worker.
488
+ # it should returns an array
489
+ # keep it nil if you want to run the default function
490
+ attr_accessor :occupied_function
491
+ # additional function to decide if the worker can dispatch or not
492
+ # example: use this function when you want to decide based on the remaining credits of the client
493
+ # it should returns true or false
494
+ # keep it nil if you want it returns always true
495
+ attr_accessor :allowing_function
496
+ # additional function to choose the records to launch
497
+ # it should returns an array of IDs
498
+ # keep this parameter nil if you want to use the default algorithm
499
+ attr_accessor :selecting_function
500
+ # additional function to choose the records to retry
501
+ # keep this parameter nil if you want to use the default algorithm
502
+ attr_accessor :relaunching_function
503
+ # additional function to perform the update on a record to retry
504
+ # keep this parameter nil if you want to use the default algorithm
505
+ attr_accessor :relauncher_function
506
+ # additional function to perform the update on a record to flag the starting of the job
507
+ # by default this function will set the :field_start_time field with the current datetime, and it will increase the :field_times counter
508
+ # keep this parameter nil if you want to use the default algorithm
509
+ attr_accessor :starter_function
510
+ # additional function to perform the update on a record to flag the finishing of the job
511
+ # by default this function will set the :field_end_time field with the current datetime
512
+ # keep this parameter nil if you want to use the default algorithm
513
+ attr_accessor :finisher_function
514
+ # Function to execute for each task.
515
+ attr_accessor :processing_function
516
+ # stretch assignation/unassignation of workers
517
+ attr_accessor :max_pending_tasks
518
+ attr_accessor :max_assigned_workers
519
+
520
+ # return a hash descriptor of the job
521
+ def to_hash()
522
+ {
523
+ :name => self.name,
524
+ :table => self.table,
525
+ :field_primary_key => self.field_primary_key,
526
+ :field_id => self.field_id,
527
+ :field_time => self.field_time,
528
+ :field_times => self.field_times,
529
+ :field_start_time => self.field_start_time,
530
+ :field_end_time => self.field_end_time,
531
+ :field_success => self.field_success,
532
+ :field_error_description => self.field_error_description,
533
+ :queue_size => self.queue_size,
534
+ :max_job_duration_minutes => self.max_job_duration_minutes,
535
+ :max_try_times => self.max_try_times,
536
+ :occupied_function => self.occupied_function.to_s,
537
+ :allowing_function => self.allowing_function.to_s,
538
+ :selecting_function => self.selecting_function.to_s,
539
+ :relaunching_function => self.relaunching_function.to_s,
540
+ :relauncher_function => self.relauncher_function.to_s,
541
+ :starter_function => self.starter_function.to_s,
542
+ :finisher_function => self.finisher_function.to_s,
543
+ :processing_function => self.processing_function.to_s,
544
+ :max_pending_tasks => self.max_pending_tasks,
545
+ :max_assigned_workers => self.max_assigned_workers,
546
+ }
547
+ end
548
+
549
+ # return an array with the errors found in the description of the job
550
+ def self.descriptor_errors(h)
551
+ errors = []
552
+ # TODO: Code Me!
553
+ errors.uniq
554
+ end
555
+
556
+ # setup dispatcher configuration here
557
+ def initialize(h)
558
+ errors = BlackStack::Pampa::Job.descriptor_errors(h)
559
+ raise "The job descriptor is not valid: #{errors.uniq.join(".\n")}" if errors.length > 0
560
+ self.name = h[:name]
561
+ self.table = h[:table]
562
+ self.field_primary_key = h[:field_primary_key]
563
+ self.field_id = h[:field_id]
564
+ self.field_time = h[:field_time]
565
+ self.field_times = h[:field_times]
566
+ self.field_start_time = h[:field_start_time]
567
+ self.field_end_time = h[:field_end_time]
568
+ self.field_success = h[:field_success]
569
+ self.field_error_description = h[:field_error_description]
570
+ self.queue_size = h[:queue_size]
571
+ self.max_job_duration_minutes = h[:max_job_duration_minutes]
572
+ self.max_try_times = h[:max_try_times]
573
+ self.occupied_function = h[:occupied_function]
574
+ self.allowing_function = h[:allowing_function]
575
+ self.selecting_function = h[:selecting_function]
576
+ self.relaunching_function = h[:relaunching_function]
577
+ self.relauncher_function = h[:relauncher_function]
578
+ self.processing_function = h[:processing_function]
579
+ self.max_pending_tasks = h[:max_pending_tasks]
580
+ self.max_assigned_workers = h[:max_assigned_workers]
581
+ end
582
+
583
+ # returns an array of tasks pending to be processed by the worker.
584
+ # it will select the records with :reservation_id == worker.id, and :start_time == nil
585
+ def occupied_slots(worker)
586
+ if self.occupied_function.nil?
587
+ return DB[self.table.to_sym].where(self.field_id.to_sym => worker.id, self.field_start_time.to_sym => nil).all if !self.field_start_time.nil?
588
+ return DB[self.table.to_sym].where(self.field_id.to_sym => worker.id).all if self.field_start_time.nil?
589
+ else
590
+ # TODO: validar que retorna un entero
591
+ return self.occupied_function.call(worker, self)
592
+ end
593
+ end
594
+
595
+ # returns the number of free slots in the procesing queue of this worker
596
+ def available_slots(worker)
597
+ occupied = self.occupied_slots(worker).size
598
+ allowed = self.queue_size
599
+ if occupied > allowed
600
+ return 0
601
+ else
602
+ return allowed - occupied
603
+ end
604
+ end
605
+
606
+ # decide if the worker can dispatch or not
607
+ # example: use this function when you want to decide based on the remaining credits of the client
608
+ # returns always true
609
+ def allowing(worker)
610
+ if self.allowing_function.nil?
611
+ return true
612
+ else
613
+ # TODO: validar que retorna true o false
614
+ return self.allowing_function.call(worker, self)
615
+ end
616
+ end
617
+
618
+ # returns an array of available tasks for dispatching.
619
+ def selecting_dataset(n)
620
+ ds = DB[self.table.to_sym].where(self.field_id.to_sym => nil)
621
+ ds = ds.filter(self.field_end_time.to_sym => nil) if !self.field_end_time.nil?
622
+ ds = ds.filter(Sequel.function(:coalesce, self.field_times.to_sym, 0)=>self.max_try_times.times.to_a) if !self.field_times.nil?
623
+ ds.limit(n).all
624
+ end # selecting_dataset
625
+
626
+ # returns an array of available tasks for dispatching.
627
+ def selecting(n)
628
+ if self.selecting_function.nil?
629
+ return self.selecting_dataset(n)
630
+ else
631
+ # TODO: validar que retorna un array de strings
632
+ return self.selecting_function.call(n, self)
633
+ end
634
+ end
635
+
636
+ # returns an array of failed tasks for restarting.
637
+ def relaunching_dataset(n)
638
+ #ds = DB[self.table.to_sym].where("#{self.field_time.to_s} < CURRENT_TIMESTAMP() - INTERVAL '#{self.max_job_duration_minutes.to_i} minutes'")
639
+ #ds = ds.filter("#{self.field_end_time.to_s} IS NULL") if !self.field_end_time.nil?
640
+ #ds.limit(n).all
641
+ q = "
642
+ SELECT *
643
+ FROM #{self.table.to_s}
644
+ WHERE #{self.field_time.to_s} IS NOT NULL
645
+ AND #{self.field_time.to_s} < CURRENT_TIMESTAMP() - INTERVAL '#{self.max_job_duration_minutes.to_i} minutes'
646
+ AND #{self.field_id.to_s} IS NOT NULL
647
+ AND #{self.field_end_time.to_s} IS NULL
648
+ LIMIT #{n}
649
+ "
650
+ DB[q].all
651
+ end
652
+
653
+ # returns an array of failed tasks for restarting.
654
+ def relaunching(n)
655
+ if self.relaunching_function.nil?
656
+ return self.relaunching_dataset(n)
657
+ else
658
+ # TODO: validar que retorna un array de strings
659
+ return self.relaunching_function.call(n, self)
660
+ end
661
+ end
662
+
663
+ def relaunch(o)
664
+ o[self.field_id.to_sym] = nil
665
+ o[self.field_time.to_sym] = nil
666
+ o[self.field_start_time.to_sym] = nil if !self.field_start_time.nil?
667
+ o[self.field_end_time.to_sym] = nil if !self.field_end_time.nil?
668
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
669
+ end
670
+
671
+ def start(o)
672
+ if self.starter_function.nil?
673
+ o[self.field_start_time.to_sym] = DB["SELECT CURRENT_TIMESTAMP() AS dt"].first[:dt] if !self.field_start_time.nil? # IMPORTANT: use DB location to get current time.
674
+ o[self.field_times.to_sym] = o[self.field_times.to_sym].to_i + 1
675
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
676
+ else
677
+ self.starter_function.call(o, self)
678
+ end
679
+ end
680
+
681
+ def finish(o, e=nil)
682
+ if self.finisher_function.nil?
683
+ o[self.field_end_time.to_sym] = DB["SELECT CURRENT_TIMESTAMP() AS dt"].first[:dt] if !self.field_end_time.nil? && e.nil? # IMPORTANT: use DB location to get current time.
684
+ o[self.field_success.to_sym] = e.nil?
685
+ o[self.field_error_description.to_sym] = e.to_console if !e.nil?
686
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
687
+ else
688
+ self.finisher_function.call(o, e, self)
689
+ end
690
+ end
691
+
692
+ # relaunch records
693
+ def run_relaunch()
694
+ # relaunch failed records
695
+ self.relaunching.each { |o|
696
+ if self.relauncher_function.nil?
697
+ self.relaunch(o)
698
+ else
699
+ self.relauncher_function.call(o)
700
+ end
701
+ # release resources
702
+ DB.disconnect
703
+ GC.start
704
+ }
705
+ end # def run_relaunch
706
+
707
+ # dispatch records
708
+ # returns the # of records dispatched
709
+ def run_dispatch(worker)
710
+ # get # of available slots
711
+ n = self.available_slots(worker)
712
+
713
+ # dispatching n pending records
714
+ i = 0
715
+ if n>0
716
+ self.selecting(n).each { |o|
717
+ # count the # of dispatched
718
+ i += 1
719
+ # dispatch records
720
+ o[self.field_id.to_sym] = worker.id
721
+ o[self.field_time.to_sym] = DB["SELECT CURRENT_TIMESTAMP() AS dt"].first[:dt] # IMPORTANT: use DB location to get current time.
722
+ o[self.field_start_time.to_sym] = nil if !self.field_start_time.nil?
723
+ o[self.field_end_time.to_sym] = nil if !self.field_end_time.nil?
724
+ DB[self.table.to_sym].where(self.field_primary_key.to_sym => o[self.field_primary_key.to_sym]).update(o)
725
+ # release resources
726
+ DB.disconnect
727
+ GC.start
728
+ }
729
+ end
730
+
731
+ #
732
+ return i
733
+ end
734
+ end # class Job
735
+ end # module Pampa
736
+ end # module BlackStack
data/worker.rb ADDED
@@ -0,0 +1,147 @@
1
+ # require the gem simple_cloud_logging for parsing command line parameters.
2
+ require 'simple_command_line_parser'
3
+ # require the gem simple_cloud_logging for writing logfiles.
4
+ require 'simple_cloud_logging'
5
+ # require the gem sequel for connecting to the database and handle ORM classes.
6
+ require 'sequel'
7
+
8
+ # parse command line parameters
9
+ PARSER = BlackStack::SimpleCommandLineParser.new(
10
+ :description => 'This script starts an infinite loop. Each loop will look for a task to perform. Must be a delay between each loop.',
11
+ :configuration => [{
12
+ :name=>'delay',
13
+ :mandatory=>false,
14
+ :default=>30,
15
+ :description=>'Minimum delay between loops. A minimum of 10 seconds is recommended, in order to don\'t hard the database server. Default is 30 seconds.',
16
+ :type=>BlackStack::SimpleCommandLineParser::INT,
17
+ }, {
18
+ :name=>'debug',
19
+ :mandatory=>false,
20
+ :default=>false,
21
+ :description=>'Activate this flag if you want to require the `pampa.rb` file from the same Pampa project folder, insetad to require the gem as usual.',
22
+ :type=>BlackStack::SimpleCommandLineParser::BOOL,
23
+ }, {
24
+ :name=>'pampa',
25
+ :mandatory=>false,
26
+ :default=>'./lib/pampa.rb',
27
+ :description=>'Ruby file to require where `debug` is activated.',
28
+ :type=>BlackStack::SimpleCommandLineParser::STRING,
29
+ }, {
30
+ :name=>'config',
31
+ :mandatory=>false,
32
+ :default=>'./config.rb',
33
+ :description=>'Ruby file where is defined the connection-string and jobs.',
34
+ :type=>BlackStack::SimpleCommandLineParser::STRING,
35
+ }, {
36
+ :name=>'id',
37
+ :mandatory=>true,
38
+ :description=>'Write here a unique identifier for the worker.',
39
+ :type=>BlackStack::SimpleCommandLineParser::STRING,
40
+ }]
41
+ )
42
+
43
+ # creating logfile
44
+ l = BlackStack::LocalLogger.new('worker.'+PARSER.value('id').to_s+'.log')
45
+
46
+ begin
47
+ # log the paramers
48
+ l.log 'STARTING WORKER'
49
+
50
+ # show the parameters
51
+ # TODO: replace this hardocded array for method `PARSER.params`.
52
+ # reference: https://github.com/leandrosardi/simple_command_line_parser/issues/7
53
+ #['id','delay','debug','pampa','config'].each { |param| l.log param + ': ' + PARSER.value(param).to_s }
54
+
55
+ # require the pampa library
56
+ l.logs 'Requiring pampa (debug='+(PARSER.value('debug') ? 'true' : 'false')+', pampa='+PARSER.value('pampa')+')... '
57
+ require 'pampa' if !PARSER.value('debug')
58
+ require PARSER.value('pampa') if PARSER.value('debug')
59
+ l.done
60
+
61
+ # requiore the config.rb file where the jobs are defined.
62
+ l.logs 'Requiring config (config='+PARSER.value('config')+')... '
63
+ require PARSER.value('config')
64
+ l.done
65
+
66
+ # getting the worker object
67
+ worker = BlackStack::Pampa.workers.select { |w| w.id == PARSER.value('id') }.first
68
+ raise 'Worker '+PARSER.value('id')+' not found.' if worker.nil?
69
+
70
+ # start the loop
71
+ while true
72
+ # get the start loop time
73
+ l.logs 'Starting loop... '
74
+ start = Time.now()
75
+ l.done
76
+
77
+ BlackStack::Pampa.jobs.each { |job|
78
+ l.logs 'Processing job '+job.name+'... '
79
+ tasks = job.occupied_slots(worker)
80
+ l.logf tasks.size.to_s+' tasks in queue.'
81
+
82
+ tasks.each { |task|
83
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' started... '
84
+ job.start(task)
85
+ l.done
86
+
87
+ begin
88
+ l.logs 'Processing task '+task[job.field_primary_key.to_sym].to_s+'... '
89
+ job.processing_function.call(task, l, job, worker)
90
+ l.done
91
+
92
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' finished... '
93
+ job.finish(task)
94
+ l.done
95
+
96
+ # note: this catches the CTRL+C signal.
97
+ # note: this catches the `kill` command, ONLY if it has not the `-9` option.
98
+ rescue SignalException, SystemExit, Interrupt => e
99
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' interrumpted... '
100
+ job.finish(task, e)
101
+ l.done
102
+
103
+ log.logf 'Bye!'
104
+
105
+ raise e
106
+
107
+ rescue => e
108
+ l.logs 'Flag task '+job.name+'.'+task[job.field_primary_key.to_sym].to_s+' failed... '
109
+ job.finish(task, e)
110
+ l.done
111
+
112
+ l.logf 'Error: '+e.to_console
113
+ end
114
+ }
115
+ }
116
+
117
+ # get the end loop time
118
+ l.logs 'Ending loop... '
119
+ finish = Time.now()
120
+ l.done
121
+
122
+ # get different in seconds between start and finish
123
+ # if diff > 30 seconds
124
+ l.logs 'Calculating loop duration... '
125
+ diff = finish - start
126
+ l.logf 'done ('+diff.to_s+')'
127
+
128
+ if diff < PARSER.value('delay')
129
+ # sleep for 30 seconds
130
+ n = PARSER.value('delay')-diff
131
+
132
+ l.logs 'Sleeping for '+n.to_label+' seconds... '
133
+ sleep n
134
+ l.done
135
+ else
136
+ l.log 'No sleeping. The loop took '+diff.to_label+' seconds.'
137
+ end
138
+ end # while true
139
+ rescue SignalException, SystemExit, Interrupt
140
+ # note: this catches the CTRL+C signal.
141
+ # note: this catches the `kill` command, ONLY if it has not the `-9` option.
142
+ l.logf 'Process Interrumpted.'
143
+ rescue => e
144
+ l.logf 'Fatal Error: '+e.to_console
145
+ rescue
146
+ l.logf 'Unknown Fatal Error.'
147
+ end
metadata ADDED
@@ -0,0 +1,163 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pampa
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-09-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sequel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 5.56.0
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 5.56.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 5.56.0
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 5.56.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: blackstack_core
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.3
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.3
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.3
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.3
53
+ - !ruby/object:Gem::Dependency
54
+ name: blackstack_nodes
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: 1.2.10
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.2.10
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 1.2.10
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 1.2.10
73
+ - !ruby/object:Gem::Dependency
74
+ name: simple_command_line_parser
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 1.1.2
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.1.2
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.1.2
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.1.2
93
+ - !ruby/object:Gem::Dependency
94
+ name: simple_cloud_logging
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: 1.2.2
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 1.2.2
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 1.2.2
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 1.2.2
113
+ description: |
114
+ Pampa is a Ruby library for async & distributing computing providing the following features:
115
+
116
+ - cluster-management with dynamic reconfiguration (joining and leaving nodes);
117
+ - distribution of the computation jobs to the (active) nodes;
118
+ - error handling, job-retry and fault tolerance;
119
+ - fast (non-direct) communication to ensure realtime capabilities.
120
+
121
+ The Pampa framework may be widely used for:
122
+
123
+ - large scale web scraping with what we call a "bot-farm";
124
+ - payments processing for large-scale ecommerce websites;
125
+ - reports generation for high demanded SaaS platforms;
126
+ - heavy mathematical model computing;
127
+
128
+ and any other tasks that requires a virtually infinite amount of CPU computing and memory resources.
129
+
130
+ Find documentation here: https://github.com/leandrosardi/pampa
131
+ email: leandro.sardi@expandedventure.com
132
+ executables: []
133
+ extensions: []
134
+ extra_rdoc_files: []
135
+ files:
136
+ - lib/pampa.rb
137
+ - worker.rb
138
+ homepage: https://rubygems.org/gems/pampa
139
+ licenses:
140
+ - MIT
141
+ metadata: {}
142
+ post_install_message:
143
+ rdoc_options: []
144
+ require_paths:
145
+ - lib
146
+ required_ruby_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ requirements: []
157
+ rubygems_version: 3.3.7
158
+ signing_key:
159
+ specification_version: 4
160
+ summary: Ruby library for async & distributed computing, supporting dynamic reconfiguration,
161
+ distribution of the computation jobs, error handling, job-retry and fault tolerance,
162
+ and fast (non-direct) communication to ensure real-time capabilities.
163
+ test_files: []