batches_task_processor 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -25
- data/lib/batches_task_processor/model.rb +22 -3
- data/lib/batches_task_processor/processor.rb +28 -28
- data/lib/batches_task_processor/processor_job.rb +11 -0
- data/lib/batches_task_processor/version.rb +1 -1
- data/lib/batches_task_processor.rb +1 -0
- data/lib/db/migrate/20220727101904_add_batches_task_processor.rb +1 -0
- data/lib/tasks/batches_task_processor_tasks.rake +2 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66cd035ce5d4863e28c7052efc51b79d2cfcc3cae0b0ab84e14a8a3921da9dee
|
4
|
+
data.tar.gz: b38a41a2489cbc422e3d18c108fd61c00f46304e17b179e6ec109b2b29e956a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1618b2a8460a30828df870b559a299d75283ab960d9d74c51743da5f1ab3992c61f9e7ed0f1eefc6af3a89e18a5a719387003497c55def0fdd668e3a5902689b
|
7
|
+
data.tar.gz: 3f822048a4ee3efa3244416348e680ba38cfd779d5e3de8cff2d66add02c9cd465272e78d12c5a127d7d6caaf738f8ca9af6b2d6b9892db759d48cd7ca830529
|
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# BatchesTaskProcessor
|
2
|
-
Gem that allows to process huge amount of any kind of tasks in parallel using batches.
|
2
|
+
Ruby Gem that allows to process huge amount of any kind of tasks in parallel using batches with the ability to cancel at any time.
|
3
|
+
The jobs created can be processed in background or in the foreground (inline) with the ability to rerun/retry later (excludes the already processed ones).
|
3
4
|
|
4
5
|
## Installation
|
5
6
|
Add this line to your application's Gemfile:
|
@@ -7,36 +8,30 @@ Add this line to your application's Gemfile:
|
|
7
8
|
```ruby
|
8
9
|
gem "batches_task_processor"
|
9
10
|
```
|
10
|
-
And then execute: `bundle install`
|
11
|
-
|
11
|
+
And then execute: `bundle install && bundle exec rake db:migrate`
|
12
12
|
|
13
13
|
## Usage
|
14
|
-
- Register a new task:
|
14
|
+
- Register a new task:
|
15
|
+
The following will process 200 items with 10 jobs parallelly each one in charge of 20 items (recommended `preload_job_items` for performance reasons):
|
15
16
|
```ruby
|
16
17
|
task = BatchesTaskProcessor::Model.create!(
|
17
18
|
key: 'my_process',
|
18
|
-
data:
|
19
|
-
qty_jobs: 10,
|
20
|
-
process_item: 'puts "my item: #{item}"'
|
21
|
-
)
|
22
|
-
```
|
23
|
-
Activerecord sample (recommended `preload_job_items` for performance reasons):
|
24
|
-
```ruby
|
25
|
-
task = BatchesTaskProcessor::Model.create!(
|
26
|
-
key: 'my_process',
|
27
|
-
data: Article.all.pluck(:id),
|
19
|
+
data: Article.all.limit(200).pluck(:id),
|
28
20
|
qty_jobs: 10,
|
29
21
|
preload_job_items: 'Article.where(id: items)',
|
30
22
|
process_item: 'puts "my article: #{item.id}"'
|
31
23
|
)
|
24
|
+
task.start!
|
32
25
|
```
|
33
|
-
|
34
|
-
- Run the corresponding rake task:
|
35
|
-
Copy the `task.id` from step one and use it in the following code:
|
36
|
-
`RUNNER_MODEL_ID=<id-here> rake batches_task_processor:call`
|
37
|
-
|
38
26
|

|
39
27
|
|
28
|
+
## Task api
|
29
|
+
- `task.start!` starts the task (initializes the jobs)
|
30
|
+
- `task.cancel` cancels the task and stops processing the items
|
31
|
+
- `task.export` exports the items that were processed in a csv file
|
32
|
+
- `task.items` returns the items that were processed
|
33
|
+
Each item includes the following attributes: `# { result: "value returned from the process_item callback", error_details: "error message from the process_message callback if failed" }`
|
34
|
+
|
40
35
|
## TODO
|
41
36
|
- update tests
|
42
37
|
|
@@ -44,15 +39,11 @@ And then execute: `bundle install`
|
|
44
39
|
Settings:
|
45
40
|
- `data` (Array<Integer|String>) Array of whole items to be processed.
|
46
41
|
- `key` (Mandatory) key to be used to identify the task.
|
47
|
-
- `
|
42
|
+
- `queue_name` (String, default `default`) name of the background queue to be used (If `nil`, will run the process inline).
|
43
|
+
- `qty_jobs` (Optional) number of jobs to be created (all `data` items will be distributed across this qty of jobs). Default: `10`
|
48
44
|
- `process_item` (Mandatory) callback to be called to perform each item where `item` variable holds the current item value. Sample: `'Article.find(item).update_column(:title, "changed")'`
|
49
45
|
- `preload_job_items` (Optional) callback that allows to preload items list and/or associations where `items` variable holds the current chunk of items to be processed (by default returns the same list). Sample: `Article.where(id: items)`
|
50
46
|
|
51
|
-
Tasks (requires `RUNNER_MODEL_ID` env variable):
|
52
|
-
- `rake batches_task_processor:call` Starts the processing of jobs (Skips already processed ones when rerunning after cancel).
|
53
|
-
- `rake batches_task_processor:status` Prints the process status.
|
54
|
-
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs (Change into `pending` to rerun again).
|
55
|
-
|
56
47
|
## Contributing
|
57
48
|
Contribution directions go here.
|
58
49
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'csv'
|
3
4
|
module BatchesTaskProcessor
|
4
5
|
class Model < ActiveRecord::Base
|
5
6
|
self.table_name = 'batches_task_processors'
|
@@ -17,13 +18,31 @@ module BatchesTaskProcessor
|
|
17
18
|
update!(state: :finished, finished_at: Time.current)
|
18
19
|
end
|
19
20
|
|
20
|
-
def
|
21
|
+
def all_processed?
|
22
|
+
items.count == data.count
|
23
|
+
end
|
24
|
+
|
25
|
+
# ********* user methods
|
26
|
+
def start!
|
27
|
+
Processor.new(id).call
|
28
|
+
end
|
29
|
+
|
30
|
+
def cancel
|
21
31
|
update!(state: :canceled)
|
22
32
|
end
|
23
33
|
|
24
|
-
def
|
25
|
-
items.count
|
34
|
+
def status
|
35
|
+
Rails.logger.info "Process status: #{task_model.items.count}/#{task_model.data.count}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def export
|
39
|
+
path = Rails.root.join('tmp/batches_task_processor_result.csv')
|
40
|
+
data = items.pluck(:key, :result, :error_details)
|
41
|
+
data = [['Key', 'Result', 'Error details']] + data
|
42
|
+
File.write(path, data.map(&:to_csv).join)
|
43
|
+
Rails.logger.info "Exported to #{path}"
|
26
44
|
end
|
45
|
+
# ********* end user methods
|
27
46
|
|
28
47
|
private
|
29
48
|
|
@@ -3,11 +3,10 @@
|
|
3
3
|
require 'active_support/all'
|
4
4
|
module BatchesTaskProcessor
|
5
5
|
class Processor
|
6
|
-
|
7
|
-
attr_reader :model_id
|
6
|
+
attr_reader :task_id
|
8
7
|
|
9
|
-
def initialize(
|
10
|
-
@
|
8
|
+
def initialize(task_id = nil)
|
9
|
+
@task_id = task_id || ENV['RUNNER_TASK_ID']
|
11
10
|
end
|
12
11
|
|
13
12
|
def call
|
@@ -18,37 +17,38 @@ module BatchesTaskProcessor
|
|
18
17
|
run_job(job_no.to_i)
|
19
18
|
end
|
20
19
|
|
21
|
-
def status
|
22
|
-
log "Process status: #{process_model.items.count}/#{process_model.data.count}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def cancel
|
26
|
-
process_model.cancel!
|
27
|
-
end
|
28
|
-
|
29
20
|
private
|
30
21
|
|
31
22
|
# @example item.perform_my_action
|
32
23
|
def process_item(item)
|
33
|
-
instance_eval(
|
24
|
+
instance_eval(task_model.process_item)
|
34
25
|
end
|
35
26
|
|
36
27
|
# @example Article.where(no: items)
|
37
28
|
def preload_job_items(items)
|
38
|
-
instance_eval(
|
29
|
+
instance_eval(task_model.preload_job_items || 'items')
|
39
30
|
end
|
40
31
|
|
41
32
|
def init_jobs
|
42
|
-
jobs =
|
33
|
+
jobs = task_model.qty_jobs
|
43
34
|
log "Initializing #{jobs} jobs..."
|
44
35
|
jobs.times.each do |index|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
36
|
+
if task_model.queue_name
|
37
|
+
log "Scheduling ##{index} job..."
|
38
|
+
BatchesTaskProcessor::ProcessorJob.set(queue: task_model.queue_name).perform_later(task_id, index)
|
39
|
+
else
|
40
|
+
start_inline_job(index)
|
41
|
+
end
|
49
42
|
end
|
50
43
|
end
|
51
44
|
|
45
|
+
def start_inline_job(job_no)
|
46
|
+
log "Starting ##{job_no} job..."
|
47
|
+
env_vars = "RUNNER_JOB_NO=#{job_no} RUNNER_TASK_ID=#{task_id}"
|
48
|
+
pid = Process.spawn("#{env_vars} rake batches_task_processor:process_job &")
|
49
|
+
Process.detach(pid)
|
50
|
+
end
|
51
|
+
|
52
52
|
def run_job(job)
|
53
53
|
log "Running ##{job} job..."
|
54
54
|
items = job_items(job)
|
@@ -61,38 +61,38 @@ module BatchesTaskProcessor
|
|
61
61
|
end
|
62
62
|
|
63
63
|
log "Finished #{job} job..."
|
64
|
-
|
64
|
+
task_model.finish! if task_model.all_processed?
|
65
65
|
end
|
66
66
|
|
67
67
|
def job_items(job)
|
68
|
-
res =
|
68
|
+
res = task_model.data.each_slice(task_model.qty_items_job).to_a[job]
|
69
69
|
preload_job_items(res)
|
70
70
|
end
|
71
71
|
|
72
72
|
def start_process_item(item, job, key, index)
|
73
|
-
log "Processing #{job}/#{key}: #{index}/#{
|
73
|
+
log "Processing #{job}/#{key}: #{index}/#{task_model.qty_items_job}"
|
74
74
|
result = process_item(item)
|
75
|
-
|
75
|
+
task_model.items.create!(key: key, result: result.to_s[0..255])
|
76
76
|
rescue => e
|
77
|
-
|
77
|
+
task_model.items.create!(key: key, error_details: e.message)
|
78
78
|
log "Process failed #{job}/#{key}: #{e.message}"
|
79
79
|
end
|
80
80
|
|
81
81
|
def already_processed?(key)
|
82
|
-
|
82
|
+
task_model.items.where(key: key).exists?
|
83
83
|
end
|
84
84
|
|
85
85
|
def process_cancelled?
|
86
|
-
|
86
|
+
task_model.state == 'cancelled'
|
87
87
|
end
|
88
88
|
|
89
89
|
def log(msg)
|
90
90
|
puts "BatchesTaskProcessor => #{msg}"
|
91
91
|
end
|
92
92
|
|
93
|
-
def
|
93
|
+
def task_model
|
94
94
|
klass = BatchesTaskProcessor::Model.all
|
95
|
-
|
95
|
+
task_id ? klass.find(task_id) : klass.last
|
96
96
|
end
|
97
97
|
end
|
98
98
|
end
|
@@ -3,22 +3,11 @@
|
|
3
3
|
namespace :batches_task_processor do
|
4
4
|
desc 'Starts the Batches Task Processor'
|
5
5
|
task call: :environment do
|
6
|
-
BatchesTaskProcessor::Processor.new(ENV['
|
6
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).call
|
7
7
|
end
|
8
8
|
|
9
9
|
desc 'Starts the Batches Task Processor'
|
10
10
|
task process_job: :environment do
|
11
|
-
BatchesTaskProcessor::Processor.new(ENV['
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
desc 'Prints the status of the Task Processor'
|
16
|
-
task status: :environment do
|
17
|
-
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).status
|
18
|
-
end
|
19
|
-
|
20
|
-
desc 'Cancels the Batches Task Processor'
|
21
|
-
task cancel: :environment do
|
22
|
-
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).cancel
|
11
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).process_job(ENV['RUNNER_JOB_NO'])
|
23
12
|
end
|
24
13
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batches_task_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Owen Peredo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -37,6 +37,7 @@ files:
|
|
37
37
|
- lib/batches_task_processor/model.rb
|
38
38
|
- lib/batches_task_processor/model_item.rb
|
39
39
|
- lib/batches_task_processor/processor.rb
|
40
|
+
- lib/batches_task_processor/processor_job.rb
|
40
41
|
- lib/batches_task_processor/railtie.rb
|
41
42
|
- lib/batches_task_processor/version.rb
|
42
43
|
- lib/db/migrate/20220727101904_add_batches_task_processor.rb
|