batches_task_processor 0.2.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -26
- data/lib/batches_task_processor/model.rb +33 -3
- data/lib/batches_task_processor/processor.rb +28 -28
- data/lib/batches_task_processor/processor_job.rb +11 -0
- data/lib/batches_task_processor/version.rb +1 -1
- data/lib/batches_task_processor.rb +1 -0
- data/lib/db/migrate/20220727101904_add_batches_task_processor.rb +9 -1
- data/lib/tasks/batches_task_processor_tasks.rake +2 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e3ca93ca15247aafcea0afc6873b11bc26d8519e7b5ef477ceefd9174552888
|
4
|
+
data.tar.gz: e2669c5ed9c3f8afdefa0d5e59ee7750437560d95b78792dfa5c43da2edbbf1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 140fa98d409be395a422f2687d64d0690b1b65ae1ded1cd7e00e2b0d3217680c65b8629b7aea8da5b38dd6b62fb79acd8631ae250bfe262121b1107466b65f16
|
7
|
+
data.tar.gz: e594672fb7e890d0e475f7480f6b503c34d64fc4525c50c6bfe47909b1c4b573ab6af62db13140d847cd6a5ce49332e42d7b3ea8db75d5931d27b284dea41b99
|
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# BatchesTaskProcessor
|
2
|
-
Gem that allows to process huge amount of any kind of tasks in parallel using batches.
|
2
|
+
Ruby Gem that allows to process huge amount of any kind of tasks in parallel using batches with the ability to cancel at any time and rerun later (excludes the already processed ones when rerunning) which reduces the process time dramatically.
|
3
|
+
The jobs created can be processed in background (via background jobs) or in the foreground (inline).
|
3
4
|
|
4
5
|
## Installation
|
5
6
|
Add this line to your application's Gemfile:
|
@@ -7,36 +8,31 @@ Add this line to your application's Gemfile:
|
|
7
8
|
```ruby
|
8
9
|
gem "batches_task_processor"
|
9
10
|
```
|
10
|
-
And then execute: `bundle install`
|
11
|
-
|
11
|
+
And then execute: `bundle install && bundle exec rake db:migrate`
|
12
12
|
|
13
13
|
## Usage
|
14
|
-
- Register a new task:
|
14
|
+
- Register a new task:
|
15
|
+
The following will process 200k items with 10 jobs parallelly each one in charge of 20k items (recommended `preload_job_items` for performance reasons):
|
15
16
|
```ruby
|
16
17
|
task = BatchesTaskProcessor::Model.create!(
|
17
18
|
key: 'my_process',
|
18
|
-
data:
|
19
|
-
qty_jobs: 10,
|
20
|
-
process_item: 'puts "my item: #{item}"'
|
21
|
-
)
|
22
|
-
```
|
23
|
-
Activerecord sample (recommended `preload_job_items` for performance reasons):
|
24
|
-
```ruby
|
25
|
-
task = BatchesTaskProcessor::Model.create!(
|
26
|
-
key: 'my_process',
|
27
|
-
data: Article.all.pluck(:id),
|
19
|
+
data: Article.all.limit(200000).pluck(:id),
|
28
20
|
qty_jobs: 10,
|
29
21
|
preload_job_items: 'Article.where(id: items)',
|
30
|
-
process_item: 'puts "my article: #{item.id}"'
|
22
|
+
process_item: 'puts "my article ID: #{item.id}"'
|
31
23
|
)
|
24
|
+
task.start!
|
32
25
|
```
|
33
|
-
|
34
|
-
- Run the corresponding rake task:
|
35
|
-
Copy the `task.id` from step one and use it in the following code:
|
36
|
-
`RUNNER_MODEL_ID=<id-here> rake batches_task_processor:call`
|
37
|
-
|
38
26
|
![Photo](./img.png)
|
39
27
|
|
28
|
+
## Task api
|
29
|
+
- `task.start!` starts the task (initializes the jobs)
|
30
|
+
- `task.cancel` cancels the task at any time and stops processing the items
|
31
|
+
- `task.export` exports the items that were processed in a csv file
|
32
|
+
- `task.status` prints the current status of the task
|
33
|
+
- `task.items` returns the items that were processed so far
|
34
|
+
Each item includes the following attributes: `# { key: 'value from items', result: "value returned from the process_item callback", error_details: "error message from the process_message callback if failed" }`
|
35
|
+
|
40
36
|
## TODO
|
41
37
|
- update tests
|
42
38
|
|
@@ -44,15 +40,11 @@ And then execute: `bundle install`
|
|
44
40
|
Settings:
|
45
41
|
- `data` (Array<Integer|String>) Array of whole items to be processed.
|
46
42
|
- `key` (Mandatory) key to be used to identify the task.
|
47
|
-
- `
|
43
|
+
- `queue_name` (String, default `default`) name of the background queue to be used (If `nil`, will run the process inline).
|
44
|
+
- `qty_jobs` (Optional) number of jobs to be created (all `data` items will be distributed across this qty of jobs). Default: `10`
|
48
45
|
- `process_item` (Mandatory) callback to be called to perform each item where `item` variable holds the current item value. Sample: `'Article.find(item).update_column(:title, "changed")'`
|
49
46
|
- `preload_job_items` (Optional) callback that allows to preload items list and/or associations where `items` variable holds the current chunk of items to be processed (by default returns the same list). Sample: `Article.where(id: items)`
|
50
47
|
|
51
|
-
Tasks (requires `RUNNER_MODEL_ID` env variable):
|
52
|
-
- `rake batches_task_processor:call` Starts the processing of jobs (Skips already processed ones when rerunning after cancel).
|
53
|
-
- `rake batches_task_processor:status` Prints the process status.
|
54
|
-
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs (Change into `pending` to rerun again).
|
55
|
-
|
56
48
|
## Contributing
|
57
49
|
Contribution directions go here.
|
58
50
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'csv'
|
3
4
|
module BatchesTaskProcessor
|
4
5
|
class Model < ActiveRecord::Base
|
5
6
|
self.table_name = 'batches_task_processors'
|
@@ -7,6 +8,7 @@ module BatchesTaskProcessor
|
|
7
8
|
validates :process_item, presence: true
|
8
9
|
validates :key, presence: true
|
9
10
|
before_create :apply_data_uniqueness
|
11
|
+
before_create :check_qty_jobs
|
10
12
|
# state: :pending, :processing, :finished, :canceled
|
11
13
|
|
12
14
|
def qty_items_job
|
@@ -17,18 +19,46 @@ module BatchesTaskProcessor
|
|
17
19
|
update!(state: :finished, finished_at: Time.current)
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
22
|
+
def all_processed?
|
23
|
+
items.count == data.count
|
24
|
+
end
|
25
|
+
|
26
|
+
# Text data columns support (Mysql only)
|
27
|
+
def data
|
28
|
+
self[:data].is_a?(String) ? JSON.parse(self[:data] || '[]') : self[:data]
|
29
|
+
end
|
30
|
+
|
31
|
+
# ********* user methods
|
32
|
+
def start!
|
33
|
+
Processor.new(id).call
|
34
|
+
end
|
35
|
+
|
36
|
+
def cancel
|
21
37
|
update!(state: :canceled)
|
22
38
|
end
|
23
39
|
|
24
|
-
def
|
25
|
-
items.count
|
40
|
+
def status
|
41
|
+
Rails.logger.info "Process status: #{task_model.items.count}/#{task_model.data.count}"
|
26
42
|
end
|
27
43
|
|
44
|
+
def export
|
45
|
+
path = Rails.root.join('tmp/batches_task_processor_result.csv')
|
46
|
+
data = items.pluck(:key, :result, :error_details)
|
47
|
+
data = [['Key', 'Result', 'Error details']] + data
|
48
|
+
File.write(path, data.map(&:to_csv).join)
|
49
|
+
Rails.logger.info "Exported to #{path}"
|
50
|
+
end
|
51
|
+
# ********* end user methods
|
52
|
+
|
28
53
|
private
|
29
54
|
|
30
55
|
def apply_data_uniqueness
|
31
56
|
self.data = data.uniq
|
32
57
|
end
|
58
|
+
|
59
|
+
# Fix: at least 1 item per job
|
60
|
+
def check_qty_jobs
|
61
|
+
self.qty_jobs = data.count if data.count < qty_jobs
|
62
|
+
end
|
33
63
|
end
|
34
64
|
end
|
@@ -3,11 +3,10 @@
|
|
3
3
|
require 'active_support/all'
|
4
4
|
module BatchesTaskProcessor
|
5
5
|
class Processor
|
6
|
-
|
7
|
-
attr_reader :model_id
|
6
|
+
attr_reader :task_id
|
8
7
|
|
9
|
-
def initialize(
|
10
|
-
@
|
8
|
+
def initialize(task_id = nil)
|
9
|
+
@task_id = task_id || ENV['RUNNER_TASK_ID']
|
11
10
|
end
|
12
11
|
|
13
12
|
def call
|
@@ -18,37 +17,38 @@ module BatchesTaskProcessor
|
|
18
17
|
run_job(job_no.to_i)
|
19
18
|
end
|
20
19
|
|
21
|
-
def status
|
22
|
-
log "Process status: #{process_model.items.count}/#{process_model.data.count}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def cancel
|
26
|
-
process_model.cancel!
|
27
|
-
end
|
28
|
-
|
29
20
|
private
|
30
21
|
|
31
22
|
# @example item.perform_my_action
|
32
23
|
def process_item(item)
|
33
|
-
instance_eval(
|
24
|
+
instance_eval(task_model.process_item)
|
34
25
|
end
|
35
26
|
|
36
27
|
# @example Article.where(no: items)
|
37
28
|
def preload_job_items(items)
|
38
|
-
instance_eval(
|
29
|
+
instance_eval(task_model.preload_job_items || 'items')
|
39
30
|
end
|
40
31
|
|
41
32
|
def init_jobs
|
42
|
-
jobs =
|
33
|
+
jobs = task_model.qty_jobs
|
43
34
|
log "Initializing #{jobs} jobs..."
|
44
35
|
jobs.times.each do |index|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
36
|
+
if task_model.queue_name
|
37
|
+
log "Scheduling ##{index} job..."
|
38
|
+
BatchesTaskProcessor::ProcessorJob.set(queue: task_model.queue_name).perform_later(task_id, index)
|
39
|
+
else
|
40
|
+
start_inline_job(index)
|
41
|
+
end
|
49
42
|
end
|
50
43
|
end
|
51
44
|
|
45
|
+
def start_inline_job(job_no)
|
46
|
+
log "Starting ##{job_no} job..."
|
47
|
+
env_vars = "RUNNER_JOB_NO=#{job_no} RUNNER_TASK_ID=#{task_id}"
|
48
|
+
pid = Process.spawn("#{env_vars} rake batches_task_processor:process_job &")
|
49
|
+
Process.detach(pid)
|
50
|
+
end
|
51
|
+
|
52
52
|
def run_job(job)
|
53
53
|
log "Running ##{job} job..."
|
54
54
|
items = job_items(job)
|
@@ -61,38 +61,38 @@ module BatchesTaskProcessor
|
|
61
61
|
end
|
62
62
|
|
63
63
|
log "Finished #{job} job..."
|
64
|
-
|
64
|
+
task_model.finish! if task_model.all_processed?
|
65
65
|
end
|
66
66
|
|
67
67
|
def job_items(job)
|
68
|
-
res =
|
68
|
+
res = task_model.data.each_slice(task_model.qty_items_job).to_a[job]
|
69
69
|
preload_job_items(res)
|
70
70
|
end
|
71
71
|
|
72
72
|
def start_process_item(item, job, key, index)
|
73
|
-
log "Processing #{job
|
73
|
+
log "Processing key: #{key}, job: #{job}, counter: #{index}/#{task_model.qty_items_job}"
|
74
74
|
result = process_item(item)
|
75
|
-
|
75
|
+
task_model.items.create!(key: key, result: result.to_s[0..255])
|
76
76
|
rescue => e
|
77
|
-
|
77
|
+
task_model.items.create!(key: key, error_details: e.message)
|
78
78
|
log "Process failed #{job}/#{key}: #{e.message}"
|
79
79
|
end
|
80
80
|
|
81
81
|
def already_processed?(key)
|
82
|
-
|
82
|
+
task_model.items.where(key: key).exists?
|
83
83
|
end
|
84
84
|
|
85
85
|
def process_cancelled?
|
86
|
-
|
86
|
+
task_model.state == 'cancelled'
|
87
87
|
end
|
88
88
|
|
89
89
|
def log(msg)
|
90
90
|
puts "BatchesTaskProcessor => #{msg}"
|
91
91
|
end
|
92
92
|
|
93
|
-
def
|
93
|
+
def task_model
|
94
94
|
klass = BatchesTaskProcessor::Model.all
|
95
|
-
|
95
|
+
task_id ? klass.find(task_id) : klass.last
|
96
96
|
end
|
97
97
|
end
|
98
98
|
end
|
@@ -5,11 +5,13 @@ class AddBatchesTaskProcessor < ActiveRecord::Migration[5.0]
|
|
5
5
|
create_table :batches_task_processors do |t|
|
6
6
|
t.string :key
|
7
7
|
t.string :state, default: :pending
|
8
|
-
t.json :data, default: []
|
8
|
+
t.json :data, default: [] if support_json?
|
9
|
+
t.text :data unless support_json?
|
9
10
|
t.integer :qty_jobs, default: 10
|
10
11
|
t.datetime :finished_at
|
11
12
|
t.text :preload_job_items
|
12
13
|
t.text :process_item, null: false
|
14
|
+
t.string :queue_name, default: :default
|
13
15
|
t.timestamps
|
14
16
|
end
|
15
17
|
|
@@ -21,4 +23,10 @@ class AddBatchesTaskProcessor < ActiveRecord::Migration[5.0]
|
|
21
23
|
t.timestamps
|
22
24
|
end
|
23
25
|
end
|
26
|
+
|
27
|
+
def support_json?
|
28
|
+
connector_name = ActiveRecord::Base.connection.adapter_name.downcase
|
29
|
+
no_json = connector_name.include?('mysql') || connector_name.include?('sqlite')
|
30
|
+
!no_json
|
31
|
+
end
|
24
32
|
end
|
@@ -3,22 +3,11 @@
|
|
3
3
|
namespace :batches_task_processor do
|
4
4
|
desc 'Starts the Batches Task Processor'
|
5
5
|
task call: :environment do
|
6
|
-
BatchesTaskProcessor::Processor.new(ENV['
|
6
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).call
|
7
7
|
end
|
8
8
|
|
9
9
|
desc 'Starts the Batches Task Processor'
|
10
10
|
task process_job: :environment do
|
11
|
-
BatchesTaskProcessor::Processor.new(ENV['
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
desc 'Prints the status of the Task Processor'
|
16
|
-
task status: :environment do
|
17
|
-
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).status
|
18
|
-
end
|
19
|
-
|
20
|
-
desc 'Cancels the Batches Task Processor'
|
21
|
-
task cancel: :environment do
|
22
|
-
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).cancel
|
11
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).process_job(ENV['RUNNER_JOB_NO'])
|
23
12
|
end
|
24
13
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batches_task_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Owen Peredo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -37,6 +37,7 @@ files:
|
|
37
37
|
- lib/batches_task_processor/model.rb
|
38
38
|
- lib/batches_task_processor/model_item.rb
|
39
39
|
- lib/batches_task_processor/processor.rb
|
40
|
+
- lib/batches_task_processor/processor_job.rb
|
40
41
|
- lib/batches_task_processor/railtie.rb
|
41
42
|
- lib/batches_task_processor/version.rb
|
42
43
|
- lib/db/migrate/20220727101904_add_batches_task_processor.rb
|