batches_task_processor 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -32
- data/lib/batches_task_processor/model.rb +34 -0
- data/lib/batches_task_processor/model_item.rb +8 -0
- data/lib/batches_task_processor/processor.rb +33 -79
- data/lib/batches_task_processor/railtie.rb +4 -0
- data/lib/batches_task_processor/version.rb +1 -1
- data/lib/batches_task_processor.rb +4 -12
- data/lib/db/migrate/20220727101904_add_batches_task_processor.rb +24 -0
- data/lib/tasks/batches_task_processor_tasks.rake +4 -13
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9292ab75aea73468e3c48bf668388c3767e6e55aa36872adcd044014b20c7b1
|
4
|
+
data.tar.gz: 6955b56074ea63f120010518807a03a58f053b23f57b7dc3064e9c8705b84d14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f88cecfa026896d758f24c260cc9b2f6a2516a83e80df80bd8862b44fe650ceedd07cb867fdbb6a723bf0235b79cdc612090c8b0cf361a5af3471af2572c4358
|
7
|
+
data.tar.gz: 7fc18b9188e50f2ee84c61a67bbd4b4e6769c39aa2658c0a0de8b92ff44542a3c2d1b2560468f2ae6e829d415cc03af387dd4b85f07d1199ac90bb3ae0a8dd88
|
data/README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# BatchesTaskProcessor
|
2
|
-
Gem that allows to process huge amount of tasks in parallel using batches
|
3
|
-
This gem depends on `Rails.cache` to save results of processing (In the future: use a database table instead).
|
2
|
+
Gem that allows to process huge amount of any kind of tasks in parallel using batches.
|
4
3
|
|
5
4
|
## Installation
|
6
5
|
Add this line to your application's Gemfile:
|
@@ -11,42 +10,48 @@ gem "batches_task_processor"
|
|
11
10
|
And then execute: `bundle install`
|
12
11
|
|
13
12
|
|
14
|
-
## Usage
|
15
|
-
-
|
16
|
-
Sample Array:
|
13
|
+
## Usage
|
14
|
+
- Register a new task:
|
17
15
|
```ruby
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
16
|
+
task = BatchesTaskProcessor::Model.create!(
|
17
|
+
key: 'my_process',
|
18
|
+
data: [1, 2, 3],
|
19
|
+
qty_jobs: 10,
|
20
|
+
process_item: 'puts "my item: #{item}"'
|
21
|
+
)
|
25
22
|
```
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
23
|
+
Activerecord sample (recommended `preload_job_items` for performance reasons):
|
24
|
+
```ruby
|
25
|
+
task = BatchesTaskProcessor::Model.create!(
|
26
|
+
key: 'my_process',
|
27
|
+
data: Article.all.pluck(:id),
|
28
|
+
qty_jobs: 10,
|
29
|
+
preload_job_items: 'Article.where(id: items)',
|
30
|
+
process_item: 'puts "my article: #{item.id}"'
|
31
|
+
)
|
32
|
+
```
|
33
|
+
|
34
|
+
- Run the corresponding rake task:
|
35
|
+
Copy the `task.id` from step one and use it in the following code:
|
36
|
+
`RUNNER_MODEL_ID=<id-here> rake batches_task_processor:call`
|
37
|
+
|
38
|
+

|
39
|
+
|
40
|
+
## TODO
|
41
|
+
- update tests
|
35
42
|
|
36
43
|
## Api
|
37
44
|
Settings:
|
38
|
-
- `
|
39
|
-
- `
|
40
|
-
- `
|
41
|
-
- `
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
- `rake batches_task_processor:
|
45
|
+
- `data` (Array<Integer|String>) Array of whole items to be processed.
|
46
|
+
- `key` (Mandatory) key to be used to identify the task.
|
47
|
+
- `qty_jobs` (Optional) number of jobs to be created. Default: `10`
|
48
|
+
- `process_item` (Mandatory) callback to be called to perform each item where `item` variable holds the current item value. Sample: `'Article.find(item).update_column(:title, "changed")'`
|
49
|
+
- `preload_job_items` (Optional) callback that allows to preload items list and/or associations where `items` variable holds the current chunk of items to be processed (by default returns the same list). Sample: `Article.where(id: items)`
|
50
|
+
|
51
|
+
Tasks (requires `RUNNER_MODEL_ID` env variable):
|
52
|
+
- `rake batches_task_processor:call` Starts the processing of jobs (Skips already processed ones when rerunning after cancel).
|
46
53
|
- `rake batches_task_processor:status` Prints the process status.
|
47
|
-
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs.
|
48
|
-
- `rake batches_task_processor:clear` Removes all process logs or tmp data.
|
49
|
-
|
54
|
+
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs (Change into `pending` to rerun again).
|
50
55
|
|
51
56
|
## Contributing
|
52
57
|
Contribution directions go here.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BatchesTaskProcessor
|
4
|
+
class Model < ActiveRecord::Base
|
5
|
+
self.table_name = 'batches_task_processors'
|
6
|
+
has_many :items, class_name: 'BatchesTaskProcessor::ModelItem', dependent: :destroy, foreign_key: :batches_task_processors_id
|
7
|
+
validates :process_item, presence: true
|
8
|
+
validates :key, presence: true
|
9
|
+
before_create :apply_data_uniqueness
|
10
|
+
# state: :pending, :processing, :finished, :canceled
|
11
|
+
|
12
|
+
def qty_items_job
|
13
|
+
@qty_items_job ||= (data.count.to_f / qty_jobs).ceil
|
14
|
+
end
|
15
|
+
|
16
|
+
def finish!
|
17
|
+
update!(state: :finished, finished_at: Time.current)
|
18
|
+
end
|
19
|
+
|
20
|
+
def cancel!
|
21
|
+
update!(state: :canceled)
|
22
|
+
end
|
23
|
+
|
24
|
+
def all_processed?
|
25
|
+
items.count == data.count
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def apply_data_uniqueness
|
31
|
+
self.data = data.uniq
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -4,141 +4,95 @@ require 'active_support/all'
|
|
4
4
|
module BatchesTaskProcessor
|
5
5
|
class Processor
|
6
6
|
RUNNER_JOB_KEY = 'RUNNER_JOB_KEY'
|
7
|
+
attr_reader :model_id
|
8
|
+
|
9
|
+
def initialize(model_id = nil)
|
10
|
+
@model_id = model_id || ENV['RUNNER_MODEL_ID']
|
11
|
+
end
|
7
12
|
|
8
13
|
def call
|
9
|
-
init_cache
|
10
14
|
init_jobs
|
11
15
|
end
|
12
16
|
|
13
17
|
def process_job(job_no)
|
14
|
-
run_job(job_no.to_i
|
15
|
-
end
|
16
|
-
|
17
|
-
def retry
|
18
|
-
init_jobs
|
18
|
+
run_job(job_no.to_i)
|
19
19
|
end
|
20
20
|
|
21
21
|
def status
|
22
|
-
|
23
|
-
res[:jobs] = res[:jobs].times.map { |i| job_registry(i)[:items].count }
|
24
|
-
puts "Process status: #{res.inspect}"
|
22
|
+
log "Process status: #{process_model.items.count}/#{process_model.data.count}"
|
25
23
|
end
|
26
24
|
|
27
25
|
def cancel
|
28
|
-
|
29
|
-
data[:cancelled] = true
|
30
|
-
Rails.cache.write(RUNNER_JOB_KEY, data)
|
31
|
-
end
|
32
|
-
|
33
|
-
def clear
|
34
|
-
res = Rails.cache.read(RUNNER_JOB_KEY)
|
35
|
-
res[:jobs].times.each { |i| job_registry(i, :delete) }
|
36
|
-
Rails.cache.delete(RUNNER_JOB_KEY)
|
26
|
+
process_model.cancel!
|
37
27
|
end
|
38
28
|
|
39
29
|
private
|
40
30
|
|
41
|
-
# ****** customizations
|
42
|
-
# @example ['article_id1', 'article_id2', 'article_id3']
|
43
|
-
# @example Article.where(created_at: 1.month_ago..Time.current)
|
44
|
-
def calculate_items
|
45
|
-
instance_exec(&BatchesTaskProcessor::Config.calculate_items)
|
46
|
-
end
|
47
|
-
|
48
31
|
# @example item.perform_my_action
|
49
32
|
def process_item(item)
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def per_page
|
54
|
-
BatchesTaskProcessor::Config.per_page
|
33
|
+
instance_eval(process_model.process_item)
|
55
34
|
end
|
56
35
|
|
57
36
|
# @example Article.where(no: items)
|
58
37
|
def preload_job_items(items)
|
59
|
-
|
60
|
-
end
|
61
|
-
# ****** end customizations
|
62
|
-
|
63
|
-
def init_cache
|
64
|
-
items = calculate_items
|
65
|
-
jobs = (items.count.to_f / per_page).ceil
|
66
|
-
data = { jobs: jobs, count: items.count, date: Time.current, finished_jobs: [], cancelled: false }
|
67
|
-
main_registry(data)
|
38
|
+
instance_eval(process_model.preload_job_items || 'items')
|
68
39
|
end
|
69
40
|
|
70
41
|
def init_jobs
|
71
|
-
jobs =
|
42
|
+
jobs = process_model.qty_jobs
|
72
43
|
log "Initializing #{jobs} jobs..."
|
73
44
|
jobs.times.each do |index|
|
74
45
|
log "Starting ##{index} job..."
|
75
|
-
|
46
|
+
env_vars = "RUNNER_JOB_NO=#{index} RUNNER_MODEL_ID=#{model_id}"
|
47
|
+
pid = Process.spawn("#{env_vars} rake batches_task_processor:process_job &")
|
76
48
|
Process.detach(pid)
|
77
49
|
end
|
78
50
|
end
|
79
51
|
|
80
|
-
def run_job(job
|
52
|
+
def run_job(job)
|
81
53
|
log "Running ##{job} job..."
|
82
|
-
|
54
|
+
items = job_items(job)
|
55
|
+
(items.try(:find_each) || items.each).with_index do |item, index|
|
83
56
|
key = item.try(:id) || item
|
84
57
|
break log('Process cancelled') if process_cancelled?
|
85
|
-
next log("Skipping #{key}...") if already_processed?(
|
58
|
+
next log("Skipping #{key}...") if already_processed?(key)
|
86
59
|
|
87
60
|
start_process_item(item, job, key, index)
|
88
61
|
end
|
89
62
|
|
90
|
-
mark_finished_job(job)
|
91
63
|
log "Finished #{job} job..."
|
64
|
+
process_model.finish! if process_model.all_processed?
|
92
65
|
end
|
93
66
|
|
94
|
-
def job_items(
|
95
|
-
|
67
|
+
def job_items(job)
|
68
|
+
res = process_model.data.each_slice(process_model.qty_items_job).to_a[job]
|
69
|
+
preload_job_items(res)
|
96
70
|
end
|
97
71
|
|
98
72
|
def start_process_item(item, job, key, index)
|
99
|
-
log "Processing #{job}/#{key}: #{index}/#{
|
100
|
-
process_item(item)
|
101
|
-
|
73
|
+
log "Processing #{job}/#{key}: #{index}/#{process_model.qty_items_job}"
|
74
|
+
result = process_item(item)
|
75
|
+
process_model.items.create!(key: key, result: result.to_s[0..255])
|
102
76
|
rescue => e
|
103
|
-
|
77
|
+
process_model.items.create!(key: key, error_details: e.message)
|
104
78
|
log "Process failed #{job}/#{key}: #{e.message}"
|
105
79
|
end
|
106
80
|
|
107
|
-
def
|
108
|
-
|
109
|
-
new_data || Rails.cache.read(RUNNER_JOB_KEY)
|
110
|
-
end
|
111
|
-
|
112
|
-
def mark_finished_job(job)
|
113
|
-
main_registry(main_registry.merge(finished_jobs: main_registry[:finished_jobs] + [job]))
|
114
|
-
end
|
115
|
-
|
116
|
-
def job_registry(job, new_data = nil)
|
117
|
-
key = "#{RUNNER_JOB_KEY}/#{job}"
|
118
|
-
default_data = { items: [], errors: [] }
|
119
|
-
Rails.cache.write(key, default_data, expires_in: 1.week) unless Rails.cache.read(key)
|
120
|
-
Rails.cache.write(key, new_data, expires_in: 1.week) if new_data
|
121
|
-
Rails.cache.delete(key) if new_data == :delete
|
122
|
-
new_data || Rails.cache.read(key)
|
123
|
-
end
|
124
|
-
|
125
|
-
def update_job_cache(job, value, error = nil)
|
126
|
-
data = job_registry(job)
|
127
|
-
data[:items] << value
|
128
|
-
data[:errors] << [value, error] if error
|
129
|
-
job_registry(job, data)
|
130
|
-
end
|
131
|
-
|
132
|
-
def already_processed?(job, value)
|
133
|
-
job_registry(job)[:items].include?(value)
|
81
|
+
def already_processed?(key)
|
82
|
+
process_model.items.where(key: key).exists?
|
134
83
|
end
|
135
84
|
|
136
85
|
def process_cancelled?
|
137
|
-
|
86
|
+
process_model.state == 'cancelled'
|
138
87
|
end
|
139
88
|
|
140
89
|
def log(msg)
|
141
90
|
puts "BatchesTaskProcessor => #{msg}"
|
142
91
|
end
|
92
|
+
|
93
|
+
def process_model
|
94
|
+
klass = BatchesTaskProcessor::Model.all
|
95
|
+
model_id ? klass.find(model_id) : klass.last
|
96
|
+
end
|
143
97
|
end
|
144
98
|
end
|
@@ -6,5 +6,9 @@ module BatchesTaskProcessor
|
|
6
6
|
rake_tasks do
|
7
7
|
load 'tasks/batches_task_processor_tasks.rake'
|
8
8
|
end
|
9
|
+
initializer :append_migrations do |app|
|
10
|
+
path = File.join(File.expand_path('../../', __FILE__), 'db/migrate')
|
11
|
+
app.config.paths["db/migrate"] << path
|
12
|
+
end
|
9
13
|
end
|
10
14
|
end
|
@@ -1,18 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "batches_task_processor/version"
|
2
4
|
require "batches_task_processor/railtie"
|
3
5
|
require "batches_task_processor/processor"
|
4
|
-
|
6
|
+
require "batches_task_processor/model"
|
7
|
+
require "batches_task_processor/model_item"
|
5
8
|
|
6
9
|
module BatchesTaskProcessor
|
7
|
-
class Config
|
8
|
-
cattr_accessor(:per_page) { 5000 }
|
9
|
-
cattr_accessor(:calculate_items) { -> { raise('Implement calculate_items method') } }
|
10
|
-
cattr_accessor(:process_item) { -> (_item) { raise('Implement calculate_items method') } }
|
11
|
-
cattr_accessor(:preload_job_items) { -> (items) { items } }
|
12
|
-
|
13
|
-
|
14
|
-
def self.configure
|
15
|
-
yield self
|
16
|
-
end
|
17
|
-
end
|
18
10
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class AddBatchesTaskProcessor < ActiveRecord::Migration[5.0]
|
4
|
+
def change
|
5
|
+
create_table :batches_task_processors do |t|
|
6
|
+
t.string :key
|
7
|
+
t.string :state, default: :pending
|
8
|
+
t.json :data, default: []
|
9
|
+
t.integer :qty_jobs, default: 10
|
10
|
+
t.datetime :finished_at
|
11
|
+
t.text :preload_job_items
|
12
|
+
t.text :process_item, null: false
|
13
|
+
t.timestamps
|
14
|
+
end
|
15
|
+
|
16
|
+
create_table :batches_task_processor_items do |t|
|
17
|
+
t.belongs_to :batches_task_processors, foreign_key: true, index: { name: 'index_batches_task_processors_parent_id' }
|
18
|
+
t.string :key
|
19
|
+
t.text :result
|
20
|
+
t.text :error_details
|
21
|
+
t.timestamps
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -3,31 +3,22 @@
|
|
3
3
|
namespace :batches_task_processor do
|
4
4
|
desc 'Starts the Batches Task Processor'
|
5
5
|
task call: :environment do
|
6
|
-
BatchesTaskProcessor::Processor.new.call
|
6
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).call
|
7
7
|
end
|
8
8
|
|
9
9
|
desc 'Starts the Batches Task Processor'
|
10
10
|
task process_job: :environment do
|
11
|
-
BatchesTaskProcessor::Processor.new.process_job(ENV['RUNNER_JOB_NO'])
|
11
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).process_job(ENV['RUNNER_JOB_NO'])
|
12
12
|
end
|
13
13
|
|
14
|
-
desc 'Retries the Batches Task Processor'
|
15
|
-
task retry: :environment do
|
16
|
-
BatchesTaskProcessor::Processor.new.retry
|
17
|
-
end
|
18
14
|
|
19
15
|
desc 'Prints the status of the Task Processor'
|
20
16
|
task status: :environment do
|
21
|
-
BatchesTaskProcessor::Processor.new.status
|
17
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).status
|
22
18
|
end
|
23
19
|
|
24
20
|
desc 'Cancels the Batches Task Processor'
|
25
21
|
task cancel: :environment do
|
26
|
-
BatchesTaskProcessor::Processor.new.cancel
|
27
|
-
end
|
28
|
-
|
29
|
-
desc 'Clears the Batches Task Processor cache'
|
30
|
-
task clear: :environment do
|
31
|
-
BatchesTaskProcessor::Processor.new.clear
|
22
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_MODEL_ID']).cancel
|
32
23
|
end
|
33
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batches_task_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Owen Peredo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -34,9 +34,12 @@ files:
|
|
34
34
|
- README.md
|
35
35
|
- Rakefile
|
36
36
|
- lib/batches_task_processor.rb
|
37
|
+
- lib/batches_task_processor/model.rb
|
38
|
+
- lib/batches_task_processor/model_item.rb
|
37
39
|
- lib/batches_task_processor/processor.rb
|
38
40
|
- lib/batches_task_processor/railtie.rb
|
39
41
|
- lib/batches_task_processor/version.rb
|
42
|
+
- lib/db/migrate/20220727101904_add_batches_task_processor.rb
|
40
43
|
- lib/tasks/batches_task_processor_tasks.rake
|
41
44
|
homepage: https://github.com/owen2345/batches-task-processor
|
42
45
|
licenses: []
|