batches_task_processor 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +37 -32
- data/lib/batches_task_processor/model.rb +34 -0
- data/lib/batches_task_processor/model_item.rb +8 -0
- data/lib/batches_task_processor/processor.rb +33 -79
- data/lib/batches_task_processor/railtie.rb +4 -0
- data/lib/batches_task_processor/version.rb +1 -1
- data/lib/batches_task_processor.rb +4 -12
- data/lib/db/migrate/20220727101904_add_batches_task_processor.rb +24 -0
- data/lib/tasks/batches_task_processor_tasks.rake +4 -13
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9292ab75aea73468e3c48bf668388c3767e6e55aa36872adcd044014b20c7b1
|
4
|
+
data.tar.gz: 6955b56074ea63f120010518807a03a58f053b23f57b7dc3064e9c8705b84d14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f88cecfa026896d758f24c260cc9b2f6a2516a83e80df80bd8862b44fe650ceedd07cb867fdbb6a723bf0235b79cdc612090c8b0cf361a5af3471af2572c4358
|
7
|
+
data.tar.gz: 7fc18b9188e50f2ee84c61a67bbd4b4e6769c39aa2658c0a0de8b92ff44542a3c2d1b2560468f2ae6e829d415cc03af387dd4b85f07d1199ac90bb3ae0a8dd88
|
data/README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# BatchesTaskProcessor
|
2
|
-
Gem that allows to process huge amount of tasks in parallel using batches
|
3
|
-
This gem depends on `Rails.cache` to save results of processing (In the future: use a database table instead).
|
2
|
+
Gem that allows to process huge amount of any kind of tasks in parallel using batches.
|
4
3
|
|
5
4
|
## Installation
|
6
5
|
Add this line to your application's Gemfile:
|
@@ -11,42 +10,48 @@ gem "batches_task_processor"
|
|
11
10
|
And then execute: `bundle install`
|
12
11
|
|
13
12
|
|
14
|
-
## Usage
|
15
|
-
-
|
16
|
-
Sample Array:
|
13
|
+
## Usage
|
14
|
+
- Register a new task:
|
17
15
|
```ruby
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
16
|
+
task = BatchesTaskProcessor::Model.create!(
|
17
|
+
key: 'my_process',
|
18
|
+
data: [1, 2, 3],
|
19
|
+
qty_jobs: 10,
|
20
|
+
process_item: 'puts "my item: #{item}"'
|
21
|
+
)
|
25
22
|
```
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
23
|
+
Activerecord sample (recommended `preload_job_items` for performance reasons):
|
24
|
+
```ruby
|
25
|
+
task = BatchesTaskProcessor::Model.create!(
|
26
|
+
key: 'my_process',
|
27
|
+
data: Article.all.pluck(:id),
|
28
|
+
qty_jobs: 10,
|
29
|
+
preload_job_items: 'Article.where(id: items)',
|
30
|
+
process_item: 'puts "my article: #{item.id}"'
|
31
|
+
)
|
32
|
+
```
|
33
|
+
|
34
|
+
- Run the corresponding rake task:
|
35
|
+
Copy the `task.id` from step one and use it in the following code:
|
36
|
+
`RUNNER_MODEL_ID=<id-here> rake batches_task_processor:call`
|
37
|
+
|
38
|
+
![Photo](./img.png)
|
39
|
+
|
40
|
+
## TODO
|
41
|
+
- update tests
|
35
42
|
|
36
43
|
## Api
|
37
44
|
Settings:
|
38
|
-
- `
|
39
|
-
- `
|
40
|
-
- `
|
41
|
-
- `
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
- `rake batches_task_processor:
|
45
|
+
- `data` (Array<Integer|String>) Array of whole items to be processed.
|
46
|
+
- `key` (Mandatory) key to be used to identify the task.
|
47
|
+
- `qty_jobs` (Optional) number of jobs to be created. Default: `10`
|
48
|
+
- `process_item` (Mandatory) callback to be called to perform each item where `item` variable holds the current item value. Sample: `'Article.find(item).update_column(:title, "changed")'`
|
49
|
+
- `preload_job_items` (Optional) callback that allows to preload items list and/or associations where `items` variable holds the current chunk of items to be processed (by default returns the same list). Sample: `Article.where(id: items)`
|
50
|
+
|
51
|
+
Tasks (requires `RUNNER_MODEL_ID` env variable):
|
52
|
+
- `rake batches_task_processor:call` Starts the processing of jobs (Skips already processed ones when rerunning after cancel).
|
46
53
|
- `rake batches_task_processor:status` Prints the process status.
|
47
|
-
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs.
|
48
|
-
- `rake batches_task_processor:clear` Removes all process logs or tmp data.
|
49
|
-
|
54
|
+
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs (Change into `pending` to rerun again).
|
50
55
|
|
51
56
|
## Contributing
|
52
57
|
Contribution directions go here.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BatchesTaskProcessor
|
4
|
+
class Model < ActiveRecord::Base
|
5
|
+
self.table_name = 'batches_task_processors'
|
6
|
+
has_many :items, class_name: 'BatchesTaskProcessor::ModelItem', dependent: :destroy, foreign_key: :batches_task_processors_id
|
7
|
+
validates :process_item, presence: true
|
8
|
+
validates :key, presence: true
|
9
|
+
before_create :apply_data_uniqueness
|
10
|
+
# state: :pending, :processing, :finished, :canceled
|
11
|
+
|
12
|
+
def qty_items_job
|
13
|
+
@qty_items_job ||= (data.count.to_f / qty_jobs).ceil
|
14
|
+
end
|
15
|
+
|
16
|
+
def finish!
|
17
|
+
update!(state: :finished, finished_at: Time.current)
|
18
|
+
end
|
19
|
+
|
20
|
+
def cancel!
|
21
|
+
update!(state: :canceled)
|
22
|
+
end
|
23
|
+
|
24
|
+
def all_processed?
|
25
|
+
items.count == data.count
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def apply_data_uniqueness
|
31
|
+
self.data = data.uniq
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -4,141 +4,95 @@ require 'active_support/all'
|
|
4
4
|
module BatchesTaskProcessor
|
5
5
|
class Processor
|
6
6
|
RUNNER_JOB_KEY = 'RUNNER_JOB_KEY'
|
7
|
+
attr_reader :model_id
|
8
|
+
|
9
|
+
def initialize(model_id = nil)
|
10
|
+
@model_id = model_id || ENV['RUNNER_MODEL_ID']
|
11
|
+
end
|
7
12
|
|
8
13
|
def call
|
9
|
-
init_cache
|
10
14
|
init_jobs
|
11
15
|
end
|
12
16
|
|
13
17
|
def process_job(job_no)
|
14
|
-
run_job(job_no.to_i
|
15
|
-
end
|
16
|
-
|
17
|
-
def retry
|
18
|
-
init_jobs
|
18
|
+
run_job(job_no.to_i)
|
19
19
|
end
|
20
20
|
|
21
21
|
def status
|
22
|
-
|
23
|
-
res[:jobs] = res[:jobs].times.map { |i| job_registry(i)[:items].count }
|
24
|
-
puts "Process status: #{res.inspect}"
|
22
|
+
log "Process status: #{process_model.items.count}/#{process_model.data.count}"
|
25
23
|
end
|
26
24
|
|
27
25
|
def cancel
|
28
|
-
|
29
|
-
data[:cancelled] = true
|
30
|
-
Rails.cache.write(RUNNER_JOB_KEY, data)
|
31
|
-
end
|
32
|
-
|
33
|
-
def clear
|
34
|
-
res = Rails.cache.read(RUNNER_JOB_KEY)
|
35
|
-
res[:jobs].times.each { |i| job_registry(i, :delete) }
|
36
|
-
Rails.cache.delete(RUNNER_JOB_KEY)
|
26
|
+
process_model.cancel!
|
37
27
|
end
|
38
28
|
|
39
29
|
private
|
40
30
|
|
41
|
-
# ****** customizations
|
42
|
-
# @example ['article_id1', 'article_id2', 'article_id3']
|
43
|
-
# @example Article.where(created_at: 1.month_ago..Time.current)
|
44
|
-
def calculate_items
|
45
|
-
instance_exec(&BatchesTaskProcessor::Config.calculate_items)
|
46
|
-
end
|
47
|
-
|
48
31
|
# @example item.perform_my_action
|
49
32
|
def process_item(item)
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def per_page
|
54
|
-
BatchesTaskProcessor::Config.per_page
|
33
|
+
instance_eval(process_model.process_item)
|
55
34
|
end
|
56
35
|
|
57
36
|
# @example Article.where(no: items)
|
58
37
|
def preload_job_items(items)
|
59
|
-
|
60
|
-
end
|
61
|
-
# ****** end customizations
|
62
|
-
|
63
|
-
def init_cache
|
64
|
-
items = calculate_items
|
65
|
-
jobs = (items.count.to_f / per_page).ceil
|
66
|
-
data = { jobs: jobs, count: items.count, date: Time.current, finished_jobs: [], cancelled: false }
|
67
|
-
main_registry(data)
|
38
|
+
instance_eval(process_model.preload_job_items || 'items')
|
68
39
|
end
|
69
40
|
|
70
41
|
def init_jobs
|
71
|
-
jobs =
|
42
|
+
jobs = process_model.qty_jobs
|
72
43
|
log "Initializing #{jobs} jobs..."
|
73
44
|
jobs.times.each do |index|
|
74
45
|
log "Starting ##{index} job..."
|
75
|
-
|
46
|
+
env_vars = "RUNNER_JOB_NO=#{index} RUNNER_MODEL_ID=#{model_id}"
|
47
|
+
pid = Process.spawn("#{env_vars} rake batches_task_processor:process_job &")
|
76
48
|
Process.detach(pid)
|
77
49
|
end
|
78
50
|
end
|
79
51
|
|
80
|
-
def run_job(job
|
52
|
+
def run_job(job)
|
81
53
|
log "Running ##{job} job..."
|
82
|
-
|
54
|
+
items = job_items(job)
|
55
|
+
(items.try(:find_each) || items.each).with_index do |item, index|
|
83
56
|
key = item.try(:id) || item
|
84
57
|
break log('Process cancelled') if process_cancelled?
|
85
|
-
next log("Skipping #{key}...") if already_processed?(
|
58
|
+
next log("Skipping #{key}...") if already_processed?(key)
|
86
59
|
|
87
60
|
start_process_item(item, job, key, index)
|
88
61
|
end
|
89
62
|
|
90
|
-
mark_finished_job(job)
|
91
63
|
log "Finished #{job} job..."
|
64
|
+
process_model.finish! if process_model.all_processed?
|
92
65
|
end
|
93
66
|
|
94
|
-
def job_items(
|
95
|
-
|
67
|
+
def job_items(job)
|
68
|
+
res = process_model.data.each_slice(process_model.qty_items_job).to_a[job]
|
69
|
+
preload_job_items(res)
|
96
70
|
end
|
97
71
|
|
98
72
|
def start_process_item(item, job, key, index)
|
99
|
-
log "Processing #{job}/#{key}: #{index}/#{
|
100
|
-
process_item(item)
|
101
|
-
|
73
|
+
log "Processing #{job}/#{key}: #{index}/#{process_model.qty_items_job}"
|
74
|
+
result = process_item(item)
|
75
|
+
process_model.items.create!(key: key, result: result.to_s[0..255])
|
102
76
|
rescue => e
|
103
|
-
|
77
|
+
process_model.items.create!(key: key, error_details: e.message)
|
104
78
|
log "Process failed #{job}/#{key}: #{e.message}"
|
105
79
|
end
|
106
80
|
|
107
|
-
def
|
108
|
-
|
109
|
-
new_data || Rails.cache.read(RUNNER_JOB_KEY)
|
110
|
-
end
|
111
|
-
|
112
|
-
def mark_finished_job(job)
|
113
|
-
main_registry(main_registry.merge(finished_jobs: main_registry[:finished_jobs] + [job]))
|
114
|
-
end
|
115
|
-
|
116
|
-
def job_registry(job, new_data = nil)
|
117
|
-
key = "#{RUNNER_JOB_KEY}/#{job}"
|
118
|
-
default_data = { items: [], errors: [] }
|
119
|
-
Rails.cache.write(key, default_data, expires_in: 1.week) unless Rails.cache.read(key)
|
120
|
-
Rails.cache.write(key, new_data, expires_in: 1.week) if new_data
|
121
|
-
Rails.cache.delete(key) if new_data == :delete
|
122
|
-
new_data || Rails.cache.read(key)
|
123
|
-
end
|
124
|
-
|
125
|
-
def update_job_cache(job, value, error = nil)
|
126
|
-
data = job_registry(job)
|
127
|
-
data[:items] << value
|
128
|
-
data[:errors] << [value, error] if error
|
129
|
-
job_registry(job, data)
|
130
|
-
end
|
131
|
-
|
132
|
-
def already_processed?(job, value)
|
133
|
-
job_registry(job)[:items].include?(value)
|
81
|
+
def already_processed?(key)
|
82
|
+
process_model.items.where(key: key).exists?
|
134
83
|
end
|
135
84
|
|
136
85
|
def process_cancelled?
|
137
|
-
|
86
|
+
process_model.state == 'cancelled'
|
138
87
|
end
|
139
88
|
|
140
89
|
def log(msg)
|
141
90
|
puts "BatchesTaskProcessor => #{msg}"
|
142
91
|
end
|
92
|
+
|
93
|
+
def process_model
|
94
|
+
klass = BatchesTaskProcessor::Model.all
|
95
|
+
model_id ? klass.find(model_id) : klass.last
|
96
|
+
end
|
143
97
|
end
|
144
98
|
end
|
@@ -6,5 +6,9 @@ module BatchesTaskProcessor
|
|
6
6
|
rake_tasks do
|
7
7
|
load 'tasks/batches_task_processor_tasks.rake'
|
8
8
|
end
|
9
|
+
initializer :append_migrations do |app|
|
10
|
+
path = File.join(File.expand_path('../../', __FILE__), 'db/migrate')
|
11
|
+
app.config.paths["db/migrate"] << path
|
12
|
+
end
|
9
13
|
end
|
10
14
|
end
|
@@ -1,18 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "batches_task_processor/version"
|
2
4
|
require "batches_task_processor/railtie"
|
3
5
|
require "batches_task_processor/processor"
|
4
|
-
|
6
|
+
require "batches_task_processor/model"
|
7
|
+
require "batches_task_processor/model_item"
|
5
8
|
|
6
9
|
module BatchesTaskProcessor
|
7
|
-
class Config
|
8
|
-
cattr_accessor(:per_page) { 5000 }
|
9
|
-
cattr_accessor(:calculate_items) { -> { raise('Implement calculate_items method') } }
|
10
|
-
cattr_accessor(:process_item) { -> (_item) { raise('Implement calculate_items method') } }
|
11
|
-
cattr_accessor(:preload_job_items) { -> (items) { items } }
|
12
|
-
|
13
|
-
|
14
|
-
def self.configure
|
15
|
-
yield self
|
16
|
-
end
|
17
|
-
end
|
18
10
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class AddBatchesTaskProcessor < ActiveRecord::Migration[5.0]
|
4
|
+
def change
|
5
|
+
create_table :batches_task_processors do |t|
|
6
|
+
t.string :key
|
7
|
+
t.string :state, default: :pending
|
8
|
+
t.json :data, default: []
|
9
|
+
t.integer :qty_jobs, default: 10
|
10
|
+
t.datetime :finished_at
|
11
|
+
t.text :preload_job_items
|
12
|
+
t.text :process_item, null: false
|
13
|
+
t.timestamps
|
14
|
+
end
|
15
|
+
|
16
|
+
create_table :batches_task_processor_items do |t|
|
17
|
+
t.belongs_to :batches_task_processors, foreign_key: true, index: { name: 'index_batches_task_processors_parent_id' }
|
18
|
+
t.string :key
|
19
|
+
t.text :result
|
20
|
+
t.text :error_details
|
21
|
+
t.timestamps
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -3,31 +3,22 @@
|
|
3
3
|
namespace :batches_task_processor do
|
4
4
|
desc 'Starts the Batches Task Processor'
|
5
5
|
task call: :environment do
|
6
|
-
BatchesTaskProcessor::Processor.new.call
|
6
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).call
|
7
7
|
end
|
8
8
|
|
9
9
|
desc 'Starts the Batches Task Processor'
|
10
10
|
task process_job: :environment do
|
11
|
-
BatchesTaskProcessor::Processor.new.process_job(ENV['RUNNER_JOB_NO'])
|
11
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).process_job(ENV['RUNNER_JOB_NO'])
|
12
12
|
end
|
13
13
|
|
14
|
-
desc 'Retries the Batches Task Processor'
|
15
|
-
task retry: :environment do
|
16
|
-
BatchesTaskProcessor::Processor.new.retry
|
17
|
-
end
|
18
14
|
|
19
15
|
desc 'Prints the status of the Task Processor'
|
20
16
|
task status: :environment do
|
21
|
-
BatchesTaskProcessor::Processor.new.status
|
17
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).status
|
22
18
|
end
|
23
19
|
|
24
20
|
desc 'Cancels the Batches Task Processor'
|
25
21
|
task cancel: :environment do
|
26
|
-
BatchesTaskProcessor::Processor.new.cancel
|
27
|
-
end
|
28
|
-
|
29
|
-
desc 'Clears the Batches Task Processor cache'
|
30
|
-
task clear: :environment do
|
31
|
-
BatchesTaskProcessor::Processor.new.clear
|
22
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).cancel
|
32
23
|
end
|
33
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batches_task_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Owen Peredo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -34,9 +34,12 @@ files:
|
|
34
34
|
- README.md
|
35
35
|
- Rakefile
|
36
36
|
- lib/batches_task_processor.rb
|
37
|
+
- lib/batches_task_processor/model.rb
|
38
|
+
- lib/batches_task_processor/model_item.rb
|
37
39
|
- lib/batches_task_processor/processor.rb
|
38
40
|
- lib/batches_task_processor/railtie.rb
|
39
41
|
- lib/batches_task_processor/version.rb
|
42
|
+
- lib/db/migrate/20220727101904_add_batches_task_processor.rb
|
40
43
|
- lib/tasks/batches_task_processor_tasks.rake
|
41
44
|
homepage: https://github.com/owen2345/batches-task-processor
|
42
45
|
licenses: []
|