batches_task_processor 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -25
- data/lib/batches_task_processor/model.rb +22 -3
- data/lib/batches_task_processor/processor.rb +28 -28
- data/lib/batches_task_processor/processor_job.rb +11 -0
- data/lib/batches_task_processor/version.rb +1 -1
- data/lib/batches_task_processor.rb +1 -0
- data/lib/db/migrate/20220727101904_add_batches_task_processor.rb +1 -0
- data/lib/tasks/batches_task_processor_tasks.rake +2 -13
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66cd035ce5d4863e28c7052efc51b79d2cfcc3cae0b0ab84e14a8a3921da9dee
|
4
|
+
data.tar.gz: b38a41a2489cbc422e3d18c108fd61c00f46304e17b179e6ec109b2b29e956a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1618b2a8460a30828df870b559a299d75283ab960d9d74c51743da5f1ab3992c61f9e7ed0f1eefc6af3a89e18a5a719387003497c55def0fdd668e3a5902689b
|
7
|
+
data.tar.gz: 3f822048a4ee3efa3244416348e680ba38cfd779d5e3de8cff2d66add02c9cd465272e78d12c5a127d7d6caaf738f8ca9af6b2d6b9892db759d48cd7ca830529
|
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# BatchesTaskProcessor
|
2
|
-
Gem that allows to process huge amount of any kind of tasks in parallel using batches.
|
2
|
+
Ruby Gem that allows to process huge amount of any kind of tasks in parallel using batches with the ability to cancel at any time.
|
3
|
+
The jobs created can be processed in background or in the foreground (inline) with the ability to rerun/retry later (excludes the already processed ones).
|
3
4
|
|
4
5
|
## Installation
|
5
6
|
Add this line to your application's Gemfile:
|
@@ -7,36 +8,30 @@ Add this line to your application's Gemfile:
|
|
7
8
|
```ruby
|
8
9
|
gem "batches_task_processor"
|
9
10
|
```
|
10
|
-
And then execute: `bundle install`
|
11
|
-
|
11
|
+
And then execute: `bundle install && bundle exec rake db:migrate`
|
12
12
|
|
13
13
|
## Usage
|
14
|
-
- Register a new task:
|
14
|
+
- Register a new task:
|
15
|
+
The following will process 200 items with 10 jobs parallelly each one in charge of 20 items (recommended `preload_job_items` for performance reasons):
|
15
16
|
```ruby
|
16
17
|
task = BatchesTaskProcessor::Model.create!(
|
17
18
|
key: 'my_process',
|
18
|
-
data:
|
19
|
-
qty_jobs: 10,
|
20
|
-
process_item: 'puts "my item: #{item}"'
|
21
|
-
)
|
22
|
-
```
|
23
|
-
Activerecord sample (recommended `preload_job_items` for performance reasons):
|
24
|
-
```ruby
|
25
|
-
task = BatchesTaskProcessor::Model.create!(
|
26
|
-
key: 'my_process',
|
27
|
-
data: Article.all.pluck(:id),
|
19
|
+
data: Article.all.limit(200).pluck(:id),
|
28
20
|
qty_jobs: 10,
|
29
21
|
preload_job_items: 'Article.where(id: items)',
|
30
22
|
process_item: 'puts "my article: #{item.id}"'
|
31
23
|
)
|
24
|
+
task.start!
|
32
25
|
```
|
33
|
-
|
34
|
-
- Run the corresponding rake task:
|
35
|
-
Copy the `task.id` from step one and use it in the following code:
|
36
|
-
`RUNNER_MODEL_ID=<id-here> rake batches_task_processor:call`
|
37
|
-
|
38
26
|
![Photo](./img.png)
|
39
27
|
|
28
|
+
## Task api
|
29
|
+
- `task.start!` starts the task (initializes the jobs)
|
30
|
+
- `task.cancel` cancels the task and stops processing the items
|
31
|
+
- `task.export` exports the items that were processed in a csv file
|
32
|
+
- `task.items` returns the items that were processed
|
33
|
+
Each item includes the following attributes: `# { result: "value returned from the process_item callback", error_details: "error message from the process_message callback if failed" }`
|
34
|
+
|
40
35
|
## TODO
|
41
36
|
- update tests
|
42
37
|
|
@@ -44,15 +39,11 @@ And then execute: `bundle install`
|
|
44
39
|
Settings:
|
45
40
|
- `data` (Array<Integer|String>) Array of whole items to be processed.
|
46
41
|
- `key` (Mandatory) key to be used to identify the task.
|
47
|
-
- `
|
42
|
+
- `queue_name` (String, default `default`) name of the background queue to be used (If `nil`, will run the process inline).
|
43
|
+
- `qty_jobs` (Optional) number of jobs to be created (all `data` items will be distributed across this qty of jobs). Default: `10`
|
48
44
|
- `process_item` (Mandatory) callback to be called to perform each item where `item` variable holds the current item value. Sample: `'Article.find(item).update_column(:title, "changed")'`
|
49
45
|
- `preload_job_items` (Optional) callback that allows to preload items list and/or associations where `items` variable holds the current chunk of items to be processed (by default returns the same list). Sample: `Article.where(id: items)`
|
50
46
|
|
51
|
-
Tasks (requires `RUNNER_MODEL_ID` env variable):
|
52
|
-
- `rake batches_task_processor:call` Starts the processing of jobs (Skips already processed ones when rerunning after cancel).
|
53
|
-
- `rake batches_task_processor:status` Prints the process status.
|
54
|
-
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs (Change into `pending` to rerun again).
|
55
|
-
|
56
47
|
## Contributing
|
57
48
|
Contribution directions go here.
|
58
49
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'csv'
|
3
4
|
module BatchesTaskProcessor
|
4
5
|
class Model < ActiveRecord::Base
|
5
6
|
self.table_name = 'batches_task_processors'
|
@@ -17,13 +18,31 @@ module BatchesTaskProcessor
|
|
17
18
|
update!(state: :finished, finished_at: Time.current)
|
18
19
|
end
|
19
20
|
|
20
|
-
def
|
21
|
+
def all_processed?
|
22
|
+
items.count == data.count
|
23
|
+
end
|
24
|
+
|
25
|
+
# ********* user methods
|
26
|
+
def start!
|
27
|
+
Processor.new(id).call
|
28
|
+
end
|
29
|
+
|
30
|
+
def cancel
|
21
31
|
update!(state: :canceled)
|
22
32
|
end
|
23
33
|
|
24
|
-
def
|
25
|
-
items.count
|
34
|
+
def status
|
35
|
+
Rails.logger.info "Process status: #{task_model.items.count}/#{task_model.data.count}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def export
|
39
|
+
path = Rails.root.join('tmp/batches_task_processor_result.csv')
|
40
|
+
data = items.pluck(:key, :result, :error_details)
|
41
|
+
data = [['Key', 'Result', 'Error details']] + data
|
42
|
+
File.write(path, data.map(&:to_csv).join)
|
43
|
+
Rails.logger.info "Exported to #{path}"
|
26
44
|
end
|
45
|
+
# ********* end user methods
|
27
46
|
|
28
47
|
private
|
29
48
|
|
@@ -3,11 +3,10 @@
|
|
3
3
|
require 'active_support/all'
|
4
4
|
module BatchesTaskProcessor
|
5
5
|
class Processor
|
6
|
-
|
7
|
-
attr_reader :model_id
|
6
|
+
attr_reader :task_id
|
8
7
|
|
9
|
-
def initialize(
|
10
|
-
@
|
8
|
+
def initialize(task_id = nil)
|
9
|
+
@task_id = task_id || ENV['RUNNER_TASK_ID']
|
11
10
|
end
|
12
11
|
|
13
12
|
def call
|
@@ -18,37 +17,38 @@ module BatchesTaskProcessor
|
|
18
17
|
run_job(job_no.to_i)
|
19
18
|
end
|
20
19
|
|
21
|
-
def status
|
22
|
-
log "Process status: #{process_model.items.count}/#{process_model.data.count}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def cancel
|
26
|
-
process_model.cancel!
|
27
|
-
end
|
28
|
-
|
29
20
|
private
|
30
21
|
|
31
22
|
# @example item.perform_my_action
|
32
23
|
def process_item(item)
|
33
|
-
instance_eval(
|
24
|
+
instance_eval(task_model.process_item)
|
34
25
|
end
|
35
26
|
|
36
27
|
# @example Article.where(no: items)
|
37
28
|
def preload_job_items(items)
|
38
|
-
instance_eval(
|
29
|
+
instance_eval(task_model.preload_job_items || 'items')
|
39
30
|
end
|
40
31
|
|
41
32
|
def init_jobs
|
42
|
-
jobs =
|
33
|
+
jobs = task_model.qty_jobs
|
43
34
|
log "Initializing #{jobs} jobs..."
|
44
35
|
jobs.times.each do |index|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
36
|
+
if task_model.queue_name
|
37
|
+
log "Scheduling ##{index} job..."
|
38
|
+
BatchesTaskProcessor::ProcessorJob.set(queue: task_model.queue_name).perform_later(task_id, index)
|
39
|
+
else
|
40
|
+
start_inline_job(index)
|
41
|
+
end
|
49
42
|
end
|
50
43
|
end
|
51
44
|
|
45
|
+
def start_inline_job(job_no)
|
46
|
+
log "Starting ##{job_no} job..."
|
47
|
+
env_vars = "RUNNER_JOB_NO=#{job_no} RUNNER_TASK_ID=#{task_id}"
|
48
|
+
pid = Process.spawn("#{env_vars} rake batches_task_processor:process_job &")
|
49
|
+
Process.detach(pid)
|
50
|
+
end
|
51
|
+
|
52
52
|
def run_job(job)
|
53
53
|
log "Running ##{job} job..."
|
54
54
|
items = job_items(job)
|
@@ -61,38 +61,38 @@ module BatchesTaskProcessor
|
|
61
61
|
end
|
62
62
|
|
63
63
|
log "Finished #{job} job..."
|
64
|
-
|
64
|
+
task_model.finish! if task_model.all_processed?
|
65
65
|
end
|
66
66
|
|
67
67
|
def job_items(job)
|
68
|
-
res =
|
68
|
+
res = task_model.data.each_slice(task_model.qty_items_job).to_a[job]
|
69
69
|
preload_job_items(res)
|
70
70
|
end
|
71
71
|
|
72
72
|
def start_process_item(item, job, key, index)
|
73
|
-
log "Processing #{job}/#{key}: #{index}/#{
|
73
|
+
log "Processing #{job}/#{key}: #{index}/#{task_model.qty_items_job}"
|
74
74
|
result = process_item(item)
|
75
|
-
|
75
|
+
task_model.items.create!(key: key, result: result.to_s[0..255])
|
76
76
|
rescue => e
|
77
|
-
|
77
|
+
task_model.items.create!(key: key, error_details: e.message)
|
78
78
|
log "Process failed #{job}/#{key}: #{e.message}"
|
79
79
|
end
|
80
80
|
|
81
81
|
def already_processed?(key)
|
82
|
-
|
82
|
+
task_model.items.where(key: key).exists?
|
83
83
|
end
|
84
84
|
|
85
85
|
def process_cancelled?
|
86
|
-
|
86
|
+
task_model.state == 'cancelled'
|
87
87
|
end
|
88
88
|
|
89
89
|
def log(msg)
|
90
90
|
puts "BatchesTaskProcessor => #{msg}"
|
91
91
|
end
|
92
92
|
|
93
|
-
def
|
93
|
+
def task_model
|
94
94
|
klass = BatchesTaskProcessor::Model.all
|
95
|
-
|
95
|
+
task_id ? klass.find(task_id) : klass.last
|
96
96
|
end
|
97
97
|
end
|
98
98
|
end
|
@@ -3,22 +3,11 @@
|
|
3
3
|
namespace :batches_task_processor do
|
4
4
|
desc 'Starts the Batches Task Processor'
|
5
5
|
task call: :environment do
|
6
|
-
BatchesTaskProcessor::Processor.new(ENV['
|
6
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).call
|
7
7
|
end
|
8
8
|
|
9
9
|
desc 'Starts the Batches Task Processor'
|
10
10
|
task process_job: :environment do
|
11
|
-
BatchesTaskProcessor::Processor.new(ENV['
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
desc 'Prints the status of the Task Processor'
|
16
|
-
task status: :environment do
|
17
|
-
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).status
|
18
|
-
end
|
19
|
-
|
20
|
-
desc 'Cancels the Batches Task Processor'
|
21
|
-
task cancel: :environment do
|
22
|
-
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).cancel
|
11
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).process_job(ENV['RUNNER_JOB_NO'])
|
23
12
|
end
|
24
13
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batches_task_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Owen Peredo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -37,6 +37,7 @@ files:
|
|
37
37
|
- lib/batches_task_processor/model.rb
|
38
38
|
- lib/batches_task_processor/model_item.rb
|
39
39
|
- lib/batches_task_processor/processor.rb
|
40
|
+
- lib/batches_task_processor/processor_job.rb
|
40
41
|
- lib/batches_task_processor/railtie.rb
|
41
42
|
- lib/batches_task_processor/version.rb
|
42
43
|
- lib/db/migrate/20220727101904_add_batches_task_processor.rb
|