batches_task_processor 0.1.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +32 -35
- data/lib/batches_task_processor/model.rb +53 -0
- data/lib/batches_task_processor/model_item.rb +8 -0
- data/lib/batches_task_processor/processor.rb +42 -88
- data/lib/batches_task_processor/processor_job.rb +11 -0
- data/lib/batches_task_processor/railtie.rb +4 -0
- data/lib/batches_task_processor/version.rb +1 -1
- data/lib/batches_task_processor.rb +5 -12
- data/lib/db/migrate/20220727101904_add_batches_task_processor.rb +25 -0
- data/lib/tasks/batches_task_processor_tasks.rake +2 -22
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b83bdfebcd837e7ee632a01502d5796ad29ad096e63486cd4d9513dd5882908
|
4
|
+
data.tar.gz: a8e2fbfafe6c76ca0ddab0810b24797dbc5ce3b6eceb84f15c2b587078a8d558
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2aca1fb9e37d1c3c7e7af5e51cc593bcbbf1d83f37654bb81f9ef0cc45740a5d33467f3ead66a20f93fedf25f5ac1b81568a49019673ee0f47bf61bf3fd2629
|
7
|
+
data.tar.gz: 43d671b910781c69ac56bee5c363213c7590c162e5f17cf48882e31ea5dcc20ae7311186b30c5d9b9fdbc037a3e342097543b46d084112477c794057ccc89612
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# BatchesTaskProcessor
|
2
|
-
Gem that allows to process huge amount of tasks in parallel using batches
|
3
|
-
|
2
|
+
Ruby Gem that allows to process huge amount of any kind of tasks in parallel using batches with the ability to cancel at any time.
|
3
|
+
The jobs created can be processed in background or in the foreground (inline) with the ability to rerun/retry later (excludes the already processed ones).
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
Add this line to your application's Gemfile:
|
@@ -8,45 +8,42 @@ Add this line to your application's Gemfile:
|
|
8
8
|
```ruby
|
9
9
|
gem "batches_task_processor"
|
10
10
|
```
|
11
|
-
And then execute: `bundle install`
|
11
|
+
And then execute: `bundle install && bundle exec rake db:migrate`
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
Sample Array:
|
13
|
+
## Usage
|
14
|
+
- Register a new task:
|
15
|
+
The following will process 200k items with 10 jobs parallelly each one in charge of 20k items (recommended `preload_job_items` for performance reasons):
|
17
16
|
```ruby
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
17
|
+
task = BatchesTaskProcessor::Model.create!(
|
18
|
+
key: 'my_process',
|
19
|
+
data: Article.all.limit(200000).pluck(:id),
|
20
|
+
qty_jobs: 10,
|
21
|
+
preload_job_items: 'Article.where(id: items)',
|
22
|
+
process_item: 'puts "my article ID: #{item.id}"'
|
23
|
+
)
|
24
|
+
task.start!
|
25
25
|
```
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
26
|
+
![Photo](./img.png)
|
27
|
+
|
28
|
+
## Task api
|
29
|
+
- `task.start!` starts the task (initializes the jobs)
|
30
|
+
- `task.cancel` cancels the task at any time and stops processing the items
|
31
|
+
- `task.export` exports the items that were processed in a csv file
|
32
|
+
- `task.status` prints the current status of the task
|
33
|
+
- `task.items` returns the items that were processed so far
|
34
|
+
Each item includes the following attributes: `# { key: 'value from items', result: "value returned from the process_item callback", error_details: "error message from the process_message callback if failed" }`
|
35
|
+
|
36
|
+
## TODO
|
37
|
+
- update tests
|
35
38
|
|
36
39
|
## Api
|
37
40
|
Settings:
|
38
|
-
- `
|
39
|
-
- `
|
40
|
-
- `
|
41
|
-
- `
|
42
|
-
|
43
|
-
- `
|
44
|
-
- `rake batches_task_processor:process_job` (Only for internal usage).
|
45
|
-
- `rake batches_task_processor:retry` Retries the processing of all jobs (ignores already processed).
|
46
|
-
- `rake batches_task_processor:status` Prints the process status.
|
47
|
-
- `rake batches_task_processor:cancel` Marks as cancelled the process and stops processing jobs.
|
48
|
-
- `rake batches_task_processor:clear` Removes all process logs or tmp data.
|
49
|
-
|
41
|
+
- `data` (Array<Integer|String>) Array of whole items to be processed.
|
42
|
+
- `key` (Mandatory) key to be used to identify the task.
|
43
|
+
- `queue_name` (String, default `default`) name of the background queue to be used (If `nil`, will run the process inline).
|
44
|
+
- `qty_jobs` (Optional) number of jobs to be created (all `data` items will be distributed across this qty of jobs). Default: `10`
|
45
|
+
- `process_item` (Mandatory) callback to be called to perform each item where `item` variable holds the current item value. Sample: `'Article.find(item).update_column(:title, "changed")'`
|
46
|
+
- `preload_job_items` (Optional) callback that allows to preload items list and/or associations where `items` variable holds the current chunk of items to be processed (by default returns the same list). Sample: `Article.where(id: items)`
|
50
47
|
|
51
48
|
## Contributing
|
52
49
|
Contribution directions go here.
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
module BatchesTaskProcessor
|
5
|
+
class Model < ActiveRecord::Base
|
6
|
+
self.table_name = 'batches_task_processors'
|
7
|
+
has_many :items, class_name: 'BatchesTaskProcessor::ModelItem', dependent: :destroy, foreign_key: :batches_task_processors_id
|
8
|
+
validates :process_item, presence: true
|
9
|
+
validates :key, presence: true
|
10
|
+
before_create :apply_data_uniqueness
|
11
|
+
# state: :pending, :processing, :finished, :canceled
|
12
|
+
|
13
|
+
def qty_items_job
|
14
|
+
@qty_items_job ||= (data.count.to_f / qty_jobs).ceil
|
15
|
+
end
|
16
|
+
|
17
|
+
def finish!
|
18
|
+
update!(state: :finished, finished_at: Time.current)
|
19
|
+
end
|
20
|
+
|
21
|
+
def all_processed?
|
22
|
+
items.count == data.count
|
23
|
+
end
|
24
|
+
|
25
|
+
# ********* user methods
|
26
|
+
def start!
|
27
|
+
Processor.new(id).call
|
28
|
+
end
|
29
|
+
|
30
|
+
def cancel
|
31
|
+
update!(state: :canceled)
|
32
|
+
end
|
33
|
+
|
34
|
+
def status
|
35
|
+
Rails.logger.info "Process status: #{task_model.items.count}/#{task_model.data.count}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def export
|
39
|
+
path = Rails.root.join('tmp/batches_task_processor_result.csv')
|
40
|
+
data = items.pluck(:key, :result, :error_details)
|
41
|
+
data = [['Key', 'Result', 'Error details']] + data
|
42
|
+
File.write(path, data.map(&:to_csv).join)
|
43
|
+
Rails.logger.info "Exported to #{path}"
|
44
|
+
end
|
45
|
+
# ********* end user methods
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def apply_data_uniqueness
|
50
|
+
self.data = data.uniq
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -3,142 +3,96 @@
|
|
3
3
|
require 'active_support/all'
|
4
4
|
module BatchesTaskProcessor
|
5
5
|
class Processor
|
6
|
-
|
6
|
+
attr_reader :task_id
|
7
7
|
|
8
|
-
def
|
9
|
-
|
10
|
-
init_jobs
|
11
|
-
end
|
12
|
-
|
13
|
-
def process_job(job_no)
|
14
|
-
run_job(job_no.to_i, calculate_items)
|
8
|
+
def initialize(task_id = nil)
|
9
|
+
@task_id = task_id || ENV['RUNNER_TASK_ID']
|
15
10
|
end
|
16
11
|
|
17
|
-
def
|
12
|
+
def call
|
18
13
|
init_jobs
|
19
14
|
end
|
20
15
|
|
21
|
-
def
|
22
|
-
|
23
|
-
res[:jobs] = res[:jobs].times.map { |i| job_registry(i)[:items].count }
|
24
|
-
puts "Process status: #{res.inspect}"
|
25
|
-
end
|
26
|
-
|
27
|
-
def cancel
|
28
|
-
data = Rails.cache.read(RUNNER_JOB_KEY)
|
29
|
-
data[:cancelled] = true
|
30
|
-
Rails.cache.write(RUNNER_JOB_KEY, data)
|
31
|
-
end
|
32
|
-
|
33
|
-
def clear
|
34
|
-
res = Rails.cache.read(RUNNER_JOB_KEY)
|
35
|
-
res[:jobs].times.each { |i| job_registry(i, :delete) }
|
36
|
-
Rails.cache.delete(RUNNER_JOB_KEY)
|
16
|
+
def process_job(job_no)
|
17
|
+
run_job(job_no.to_i)
|
37
18
|
end
|
38
19
|
|
39
20
|
private
|
40
21
|
|
41
|
-
# ****** customizations
|
42
|
-
# @example ['article_id1', 'article_id2', 'article_id3']
|
43
|
-
# @example Article.where(created_at: 1.month_ago..Time.current)
|
44
|
-
def calculate_items
|
45
|
-
instance_exec(&BatchesTaskProcessor::Config.calculate_items)
|
46
|
-
end
|
47
|
-
|
48
22
|
# @example item.perform_my_action
|
49
23
|
def process_item(item)
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def per_page
|
54
|
-
BatchesTaskProcessor::Config.per_page
|
24
|
+
instance_eval(task_model.process_item)
|
55
25
|
end
|
56
26
|
|
57
27
|
# @example Article.where(no: items)
|
58
28
|
def preload_job_items(items)
|
59
|
-
|
60
|
-
end
|
61
|
-
# ****** end customizations
|
62
|
-
|
63
|
-
def init_cache
|
64
|
-
items = calculate_items
|
65
|
-
jobs = (items.count.to_f / per_page).ceil
|
66
|
-
data = { jobs: jobs, count: items.count, date: Time.current, finished_jobs: [], cancelled: false }
|
67
|
-
main_registry(data)
|
29
|
+
instance_eval(task_model.preload_job_items || 'items')
|
68
30
|
end
|
69
31
|
|
70
32
|
def init_jobs
|
71
|
-
jobs =
|
33
|
+
jobs = task_model.qty_jobs
|
72
34
|
log "Initializing #{jobs} jobs..."
|
73
35
|
jobs.times.each do |index|
|
74
|
-
|
75
|
-
|
76
|
-
|
36
|
+
if task_model.queue_name
|
37
|
+
log "Scheduling ##{index} job..."
|
38
|
+
BatchesTaskProcessor::ProcessorJob.set(queue: task_model.queue_name).perform_later(task_id, index)
|
39
|
+
else
|
40
|
+
start_inline_job(index)
|
41
|
+
end
|
77
42
|
end
|
78
43
|
end
|
79
44
|
|
80
|
-
def
|
45
|
+
def start_inline_job(job_no)
|
46
|
+
log "Starting ##{job_no} job..."
|
47
|
+
env_vars = "RUNNER_JOB_NO=#{job_no} RUNNER_TASK_ID=#{task_id}"
|
48
|
+
pid = Process.spawn("#{env_vars} rake batches_task_processor:process_job &")
|
49
|
+
Process.detach(pid)
|
50
|
+
end
|
51
|
+
|
52
|
+
def run_job(job)
|
81
53
|
log "Running ##{job} job..."
|
82
|
-
|
54
|
+
items = job_items(job)
|
55
|
+
(items.try(:find_each) || items.each).with_index do |item, index|
|
83
56
|
key = item.try(:id) || item
|
84
57
|
break log('Process cancelled') if process_cancelled?
|
85
|
-
next log("Skipping #{key}...") if already_processed?(
|
58
|
+
next log("Skipping #{key}...") if already_processed?(key)
|
86
59
|
|
87
60
|
start_process_item(item, job, key, index)
|
88
61
|
end
|
89
62
|
|
90
|
-
mark_finished_job(job)
|
91
63
|
log "Finished #{job} job..."
|
64
|
+
task_model.finish! if task_model.all_processed?
|
92
65
|
end
|
93
66
|
|
94
|
-
def job_items(
|
95
|
-
|
67
|
+
def job_items(job)
|
68
|
+
res = task_model.data.each_slice(task_model.qty_items_job).to_a[job]
|
69
|
+
preload_job_items(res)
|
96
70
|
end
|
97
71
|
|
98
72
|
def start_process_item(item, job, key, index)
|
99
|
-
log "Processing #{job
|
100
|
-
process_item(item)
|
101
|
-
|
73
|
+
log "Processing key: #{key}, job: #{job}, counter: #{index}/#{task_model.qty_items_job}"
|
74
|
+
result = process_item(item)
|
75
|
+
task_model.items.create!(key: key, result: result.to_s[0..255])
|
102
76
|
rescue => e
|
103
|
-
|
77
|
+
task_model.items.create!(key: key, error_details: e.message)
|
104
78
|
log "Process failed #{job}/#{key}: #{e.message}"
|
105
79
|
end
|
106
80
|
|
107
|
-
def
|
108
|
-
|
109
|
-
new_data || Rails.cache.read(RUNNER_JOB_KEY)
|
110
|
-
end
|
111
|
-
|
112
|
-
def mark_finished_job(job)
|
113
|
-
main_registry(main_registry.merge(finished_jobs: main_registry[:finished_jobs] + [job]))
|
114
|
-
end
|
115
|
-
|
116
|
-
def job_registry(job, new_data = nil)
|
117
|
-
key = "#{RUNNER_JOB_KEY}/#{job}"
|
118
|
-
default_data = { items: [], errors: [] }
|
119
|
-
Rails.cache.write(key, default_data, expires_in: 1.week) unless Rails.cache.read(key)
|
120
|
-
Rails.cache.write(key, new_data, expires_in: 1.week) if new_data
|
121
|
-
Rails.cache.delete(key) if new_data == :delete
|
122
|
-
new_data || Rails.cache.read(key)
|
123
|
-
end
|
124
|
-
|
125
|
-
def update_job_cache(job, value, error = nil)
|
126
|
-
data = job_registry(job)
|
127
|
-
data[:items] << value
|
128
|
-
data[:errors] << [value, error] if error
|
129
|
-
job_registry(job, data)
|
130
|
-
end
|
131
|
-
|
132
|
-
def already_processed?(job, value)
|
133
|
-
job_registry(job)[:items].include?(value)
|
81
|
+
def already_processed?(key)
|
82
|
+
task_model.items.where(key: key).exists?
|
134
83
|
end
|
135
84
|
|
136
85
|
def process_cancelled?
|
137
|
-
|
86
|
+
task_model.state == 'cancelled'
|
138
87
|
end
|
139
88
|
|
140
89
|
def log(msg)
|
141
90
|
puts "BatchesTaskProcessor => #{msg}"
|
142
91
|
end
|
92
|
+
|
93
|
+
def task_model
|
94
|
+
klass = BatchesTaskProcessor::Model.all
|
95
|
+
task_id ? klass.find(task_id) : klass.last
|
96
|
+
end
|
143
97
|
end
|
144
98
|
end
|
@@ -6,5 +6,9 @@ module BatchesTaskProcessor
|
|
6
6
|
rake_tasks do
|
7
7
|
load 'tasks/batches_task_processor_tasks.rake'
|
8
8
|
end
|
9
|
+
initializer :append_migrations do |app|
|
10
|
+
path = File.join(File.expand_path('../../', __FILE__), 'db/migrate')
|
11
|
+
app.config.paths["db/migrate"] << path
|
12
|
+
end
|
9
13
|
end
|
10
14
|
end
|
@@ -1,18 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "batches_task_processor/version"
|
2
4
|
require "batches_task_processor/railtie"
|
3
5
|
require "batches_task_processor/processor"
|
4
|
-
|
6
|
+
require "batches_task_processor/processor_job"
|
7
|
+
require "batches_task_processor/model"
|
8
|
+
require "batches_task_processor/model_item"
|
5
9
|
|
6
10
|
module BatchesTaskProcessor
|
7
|
-
class Config
|
8
|
-
cattr_accessor(:per_page) { 5000 }
|
9
|
-
cattr_accessor(:calculate_items) { -> { raise('Implement calculate_items method') } }
|
10
|
-
cattr_accessor(:process_item) { -> (_item) { raise('Implement calculate_items method') } }
|
11
|
-
cattr_accessor(:preload_job_items) { -> (items) { items } }
|
12
|
-
|
13
|
-
|
14
|
-
def self.configure
|
15
|
-
yield self
|
16
|
-
end
|
17
|
-
end
|
18
11
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class AddBatchesTaskProcessor < ActiveRecord::Migration[5.0]
|
4
|
+
def change
|
5
|
+
create_table :batches_task_processors do |t|
|
6
|
+
t.string :key
|
7
|
+
t.string :state, default: :pending
|
8
|
+
t.json :data, default: []
|
9
|
+
t.integer :qty_jobs, default: 10
|
10
|
+
t.datetime :finished_at
|
11
|
+
t.text :preload_job_items
|
12
|
+
t.text :process_item, null: false
|
13
|
+
t.string :queue_name, default: :default
|
14
|
+
t.timestamps
|
15
|
+
end
|
16
|
+
|
17
|
+
create_table :batches_task_processor_items do |t|
|
18
|
+
t.belongs_to :batches_task_processors, foreign_key: true, index: { name: 'index_batches_task_processors_parent_id' }
|
19
|
+
t.string :key
|
20
|
+
t.text :result
|
21
|
+
t.text :error_details
|
22
|
+
t.timestamps
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -3,31 +3,11 @@
|
|
3
3
|
namespace :batches_task_processor do
|
4
4
|
desc 'Starts the Batches Task Processor'
|
5
5
|
task call: :environment do
|
6
|
-
BatchesTaskProcessor::Processor.new.call
|
6
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).call
|
7
7
|
end
|
8
8
|
|
9
9
|
desc 'Starts the Batches Task Processor'
|
10
10
|
task process_job: :environment do
|
11
|
-
BatchesTaskProcessor::Processor.new.process_job(ENV['RUNNER_JOB_NO'])
|
12
|
-
end
|
13
|
-
|
14
|
-
desc 'Retries the Batches Task Processor'
|
15
|
-
task retry: :environment do
|
16
|
-
BatchesTaskProcessor::Processor.new.retry
|
17
|
-
end
|
18
|
-
|
19
|
-
desc 'Prints the status of the Task Processor'
|
20
|
-
task status: :environment do
|
21
|
-
BatchesTaskProcessor::Processor.new.status
|
22
|
-
end
|
23
|
-
|
24
|
-
desc 'Cancels the Batches Task Processor'
|
25
|
-
task cancel: :environment do
|
26
|
-
BatchesTaskProcessor::Processor.new.cancel
|
27
|
-
end
|
28
|
-
|
29
|
-
desc 'Clears the Batches Task Processor cache'
|
30
|
-
task clear: :environment do
|
31
|
-
BatchesTaskProcessor::Processor.new.clear
|
11
|
+
BatchesTaskProcessor::Processor.new(ENV['RUNNER_TASK_ID']).process_job(ENV['RUNNER_JOB_NO'])
|
32
12
|
end
|
33
13
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batches_task_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Owen Peredo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -34,9 +34,13 @@ files:
|
|
34
34
|
- README.md
|
35
35
|
- Rakefile
|
36
36
|
- lib/batches_task_processor.rb
|
37
|
+
- lib/batches_task_processor/model.rb
|
38
|
+
- lib/batches_task_processor/model_item.rb
|
37
39
|
- lib/batches_task_processor/processor.rb
|
40
|
+
- lib/batches_task_processor/processor_job.rb
|
38
41
|
- lib/batches_task_processor/railtie.rb
|
39
42
|
- lib/batches_task_processor/version.rb
|
43
|
+
- lib/db/migrate/20220727101904_add_batches_task_processor.rb
|
40
44
|
- lib/tasks/batches_task_processor_tasks.rake
|
41
45
|
homepage: https://github.com/owen2345/batches-task-processor
|
42
46
|
licenses: []
|