job-iteration 1.1.12 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +16 -0
- data/.github/workflows/ci.yml +1 -3
- data/.gitignore +0 -1
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +19 -1
- data/Gemfile +0 -1
- data/Gemfile.lock +123 -0
- data/README.md +24 -3
- data/bin/test +32 -0
- data/dev.yml +38 -2
- data/job-iteration.gemspec +3 -2
- data/lib/job-iteration/active_record_batch_enumerator.rb +117 -0
- data/lib/job-iteration/active_record_cursor.rb +1 -1
- data/lib/job-iteration/active_record_enumerator.rb +1 -0
- data/lib/job-iteration/enumerator_builder.rb +19 -1
- data/lib/job-iteration/iteration.rb +14 -16
- data/lib/job-iteration/throttle_enumerator.rb +1 -0
- data/lib/job-iteration/version.rb +1 -1
- data/lib/job-iteration.rb +8 -8
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b36e89b6fb57143e002ec9185093367f97b1e8db3b4e585ef2a5ca25a147008
|
4
|
+
data.tar.gz: 616a27066510613ce74de5cd76d9920188006377fbcfb2b56b1fdd8d045ba1a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 643cd18e4ed677c44bb5e709f286ffd20bd93602064967fa28cbf5db865290da7b4ada2619103ae3f34f9b8e6048684c652465e72d34513526515953dc1b1401
|
7
|
+
data.tar.gz: cdec5872bfd3096782ff954434c0ffaa723c9e7abf3d3136d8eb9e5486be20b067133cc9b5fcc162a5e460f38e80290036b67c4a6710779ed34410e99fb33996
|
@@ -0,0 +1,16 @@
|
|
1
|
+
version: 2
|
2
|
+
|
3
|
+
updates:
|
4
|
+
|
5
|
+
- package-ecosystem: bundler
|
6
|
+
directory: '/'
|
7
|
+
versioning-strategy: increase
|
8
|
+
open-pull-requests-limit: 100
|
9
|
+
insecure-external-code-execution: allow
|
10
|
+
schedule:
|
11
|
+
interval: weekly
|
12
|
+
|
13
|
+
- package-ecosystem: github-actions
|
14
|
+
directory: '/'
|
15
|
+
schedule:
|
16
|
+
interval: daily
|
data/.github/workflows/ci.yml
CHANGED
@@ -13,11 +13,9 @@ jobs:
|
|
13
13
|
- 6379:6379
|
14
14
|
strategy:
|
15
15
|
matrix:
|
16
|
-
ruby: [2.
|
16
|
+
ruby: [2.6, 2.7, 3.0]
|
17
17
|
gemfile: [rails_5_2, rails_6_0, rails_edge]
|
18
18
|
exclude:
|
19
|
-
- ruby: 2.5
|
20
|
-
gemfile: rails_edge
|
21
19
|
- ruby: 2.6
|
22
20
|
gemfile: rails_edge
|
23
21
|
- ruby: 3.0
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,26 @@
|
|
1
1
|
### Master (unreleased)
|
2
2
|
|
3
|
-
|
3
|
+
## v1.3.0 (Oct 7, 2021)
|
4
|
+
- [133](https://github.com/Shopify/job-iteration/pull/133) - Moves attributes out of JobIteration::Iteration included block
|
5
|
+
|
6
|
+
|
7
|
+
## v1.2.0 (Sept 21, 2021)
|
8
|
+
- [107](https://github.com/Shopify/job-iteration/pull/107) - Remove broken links from README
|
9
|
+
- [108](https://github.com/Shopify/job-iteration/pull/108) - Drop support for ruby 2.5
|
10
|
+
- [110](https://github.com/Shopify/job-iteration/pull/110) - Update rubocop TargetRubyVersion
|
11
|
+
|
12
|
+
## v1.1.14 (May 28, 2021)
|
4
13
|
|
5
14
|
#### Bug fix
|
15
|
+
- [84](https://github.com/Shopify/job-iteration/pull/84) - Call adjust_total_time before running on_complete callbacks
|
16
|
+
- [94](https://github.com/Shopify/job-iteration/pull/94) - Remove unnecessary break
|
17
|
+
- [95](https://github.com/Shopify/job-iteration/pull/95) - ActiveRecordBatchEnumerator#each should rewind at the end
|
18
|
+
- [97](https://github.com/Shopify/job-iteration/pull/97) - Batch enumerator size returns the number of batches, not records
|
19
|
+
|
20
|
+
## v1.1.13 (May 20, 2021)
|
21
|
+
|
22
|
+
#### New feature
|
23
|
+
- [91](https://github.com/Shopify/job-iteration/pull/91) - Add enumerator yielding batches as Active Record Relations
|
6
24
|
|
7
25
|
## v1.1.12 (April 19, 2021)
|
8
26
|
|
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
job-iteration (1.3.0)
|
5
|
+
activejob (>= 5.2)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activejob (6.1.4.1)
|
11
|
+
activesupport (= 6.1.4.1)
|
12
|
+
globalid (>= 0.3.6)
|
13
|
+
activemodel (6.1.4.1)
|
14
|
+
activesupport (= 6.1.4.1)
|
15
|
+
activerecord (6.1.4.1)
|
16
|
+
activemodel (= 6.1.4.1)
|
17
|
+
activesupport (= 6.1.4.1)
|
18
|
+
activesupport (6.1.4.1)
|
19
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
20
|
+
i18n (>= 1.6, < 2)
|
21
|
+
minitest (>= 5.1)
|
22
|
+
tzinfo (~> 2.0)
|
23
|
+
zeitwerk (~> 2.3)
|
24
|
+
ast (2.4.2)
|
25
|
+
coderay (1.1.3)
|
26
|
+
concurrent-ruby (1.1.9)
|
27
|
+
connection_pool (2.2.5)
|
28
|
+
database_cleaner (2.0.1)
|
29
|
+
database_cleaner-active_record (~> 2.0.0)
|
30
|
+
database_cleaner-active_record (2.0.1)
|
31
|
+
activerecord (>= 5.a)
|
32
|
+
database_cleaner-core (~> 2.0.0)
|
33
|
+
database_cleaner-core (2.0.1)
|
34
|
+
globalid (0.5.2)
|
35
|
+
activesupport (>= 5.0)
|
36
|
+
i18n (1.8.10)
|
37
|
+
concurrent-ruby (~> 1.0)
|
38
|
+
method_source (1.0.0)
|
39
|
+
minitest (5.14.4)
|
40
|
+
mocha (1.13.0)
|
41
|
+
mono_logger (1.1.1)
|
42
|
+
multi_json (1.15.0)
|
43
|
+
mustermann (1.1.1)
|
44
|
+
ruby2_keywords (~> 0.0.1)
|
45
|
+
mysql2 (0.5.3)
|
46
|
+
parallel (1.21.0)
|
47
|
+
parser (3.0.2.0)
|
48
|
+
ast (~> 2.4.1)
|
49
|
+
pry (0.14.1)
|
50
|
+
coderay (~> 1.1)
|
51
|
+
method_source (~> 1.0)
|
52
|
+
rack (2.2.3)
|
53
|
+
rack-protection (2.1.0)
|
54
|
+
rack
|
55
|
+
rainbow (3.0.0)
|
56
|
+
rake (13.0.6)
|
57
|
+
redis (4.4.0)
|
58
|
+
redis-namespace (1.8.1)
|
59
|
+
redis (>= 3.0.4)
|
60
|
+
regexp_parser (2.1.1)
|
61
|
+
resque (2.1.0)
|
62
|
+
mono_logger (~> 1.0)
|
63
|
+
multi_json (~> 1.0)
|
64
|
+
redis-namespace (~> 1.6)
|
65
|
+
sinatra (>= 0.9.2)
|
66
|
+
vegas (~> 0.1.2)
|
67
|
+
rexml (3.2.5)
|
68
|
+
rubocop (1.22.1)
|
69
|
+
parallel (~> 1.10)
|
70
|
+
parser (>= 3.0.0.0)
|
71
|
+
rainbow (>= 2.2.2, < 4.0)
|
72
|
+
regexp_parser (>= 1.8, < 3.0)
|
73
|
+
rexml
|
74
|
+
rubocop-ast (>= 1.12.0, < 2.0)
|
75
|
+
ruby-progressbar (~> 1.7)
|
76
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
77
|
+
rubocop-ast (1.12.0)
|
78
|
+
parser (>= 3.0.1.1)
|
79
|
+
rubocop-shopify (2.3.0)
|
80
|
+
rubocop (~> 1.22)
|
81
|
+
ruby-progressbar (1.11.0)
|
82
|
+
ruby2_keywords (0.0.5)
|
83
|
+
sidekiq (6.2.2)
|
84
|
+
connection_pool (>= 2.2.2)
|
85
|
+
rack (~> 2.0)
|
86
|
+
redis (>= 4.2.0)
|
87
|
+
sinatra (2.1.0)
|
88
|
+
mustermann (~> 1.0)
|
89
|
+
rack (~> 2.2)
|
90
|
+
rack-protection (= 2.1.0)
|
91
|
+
tilt (~> 2.0)
|
92
|
+
sorbet-runtime (0.5.9158)
|
93
|
+
tilt (2.0.10)
|
94
|
+
tzinfo (2.0.4)
|
95
|
+
concurrent-ruby (~> 1.0)
|
96
|
+
unicode-display_width (2.1.0)
|
97
|
+
vegas (0.1.11)
|
98
|
+
rack (>= 1.0.0)
|
99
|
+
yard (0.9.26)
|
100
|
+
zeitwerk (2.4.2)
|
101
|
+
|
102
|
+
PLATFORMS
|
103
|
+
ruby
|
104
|
+
|
105
|
+
DEPENDENCIES
|
106
|
+
activerecord
|
107
|
+
database_cleaner
|
108
|
+
globalid
|
109
|
+
i18n
|
110
|
+
job-iteration!
|
111
|
+
mocha
|
112
|
+
mysql2 (~> 0.5)
|
113
|
+
pry
|
114
|
+
rake
|
115
|
+
redis
|
116
|
+
resque
|
117
|
+
rubocop-shopify
|
118
|
+
sidekiq
|
119
|
+
sorbet-runtime
|
120
|
+
yard
|
121
|
+
|
122
|
+
BUNDLED WITH
|
123
|
+
2.2.20
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Job Iteration API
|
2
2
|
|
3
|
-
[![
|
3
|
+
[![CI](https://github.com/Shopify/job-iteration/actions/workflows/ci.yml/badge.svg)](https://github.com/Shopify/job-iteration/actions/workflows/ci.yml)
|
4
4
|
|
5
5
|
Meet Iteration, an extension for [ActiveJob](https://github.com/rails/rails/tree/master/activejob) that makes your jobs interruptible and resumable, saving all progress that the job has made (aka checkpoint for jobs).
|
6
6
|
|
@@ -77,7 +77,28 @@ class BatchesJob < ApplicationJob
|
|
77
77
|
|
78
78
|
def each_iteration(batch_of_comments, product_id)
|
79
79
|
# batch_of_comments will contain batches of 100 records
|
80
|
-
|
80
|
+
batch_of_comments.each do |comment|
|
81
|
+
DeleteCommentJob.perform_later(comment)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
```
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
class BatchesAsRelationJob < ApplicationJob
|
89
|
+
include JobIteration::Iteration
|
90
|
+
|
91
|
+
def build_enumerator(product_id, cursor:)
|
92
|
+
enumerator_builder.active_record_on_batch_relations(
|
93
|
+
Product.find(product_id).comments,
|
94
|
+
cursor: cursor,
|
95
|
+
batch_size: 100,
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
def each_iteration(batch_of_comments, product_id)
|
100
|
+
# batch_of_comments will be a Comment::ActiveRecord_Relation
|
101
|
+
batch_of_comments.update_all(deleted: true)
|
81
102
|
end
|
82
103
|
end
|
83
104
|
```
|
@@ -150,7 +171,7 @@ There a few configuration assumptions that are required for Iteration to work wi
|
|
150
171
|
|
151
172
|
**Why is it important that `each_iteration` takes less than 30 seconds?** When the job worker is scheduled for restart or shutdown, it gets a notice to finish remaining unit of work. To guarantee that no progress is lost we need to make sure that `each_iteration` completes within a reasonable amount of time.
|
152
173
|
|
153
|
-
**What do I do if each iteration takes a long time, because it's doing nested operations?** If your `each_iteration` is complex, we recommend enqueuing another job, which will run your nested business logic. We may expose primitives in the future to do this more effectively, but this is not terribly common today.
|
174
|
+
**What do I do if each iteration takes a long time, because it's doing nested operations?** If your `each_iteration` is complex, we recommend enqueuing another job, which will run your nested business logic. We may expose primitives in the future to do this more effectively, but this is not terribly common today.
|
154
175
|
|
155
176
|
**Why do I use have to use this ugly helper in `build_enumerator`? Why can't you automatically infer it?** This is how the first version of the API worked. We checked the type of object returned by `build_enumerable`, and whether it was ActiveRecord Relation or an Array, we used the matching adapter. This caused opaque type branching in Iteration internals and it didn’t allow developers to craft their own Enumerators and control the cursor value. We made a decision to _always_ return Enumerator instance from `build_enumerator`. Now we provide explicit helpers to convert ActiveRecord Relation or an Array to Enumerator, and for more complex iteration flows developers can build their own `Enumerator` objects.
|
156
177
|
|
data/bin/test
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
def main
|
5
|
+
begin
|
6
|
+
command = create_command
|
7
|
+
rescue ArgumentError => e
|
8
|
+
abort(e.message)
|
9
|
+
end
|
10
|
+
puts "Running #{command.join(" ")}"
|
11
|
+
system(*command)
|
12
|
+
end
|
13
|
+
|
14
|
+
def create_command
|
15
|
+
case ARGV.length
|
16
|
+
when 0
|
17
|
+
["bundle", "exec", "rake", "test"]
|
18
|
+
when 1
|
19
|
+
filename = ARGV[0]
|
20
|
+
["bundle", "exec", "rake", "test", "TEST=#{filename}"]
|
21
|
+
when 2
|
22
|
+
filename = ARGV[0]
|
23
|
+
test_name = ARGV[1]
|
24
|
+
test_name_with_underscores = test_name.tr(" ", "_")
|
25
|
+
test_name_pattern = "/#{Regexp.escape(test_name_with_underscores)}/"
|
26
|
+
["bundle", "exec", "rake", "test", "TEST=#{filename}", "TESTOPTS=\"--name=#{test_name_pattern} -v\""]
|
27
|
+
else
|
28
|
+
raise ArgumentError, "Too many arguments. Did you forget to put the test name in quotes?"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
main
|
data/dev.yml
CHANGED
@@ -13,7 +13,43 @@ up:
|
|
13
13
|
- custom:
|
14
14
|
name: Create Job Iteration database
|
15
15
|
meet: mysql -uroot -h job-iteration.railgun -e "CREATE DATABASE job_iteration_test"
|
16
|
-
met?: mysql -uroot -h job-iteration.railgun job_iteration_test -e "SELECT 1"
|
16
|
+
met?: mysql -uroot -h job-iteration.railgun job_iteration_test -e "SELECT 1" &> /dev/null
|
17
17
|
|
18
18
|
commands:
|
19
|
-
test:
|
19
|
+
test:
|
20
|
+
run: bin/test "$@"
|
21
|
+
syntax:
|
22
|
+
optional: filename testnamepattern
|
23
|
+
aliases: [t]
|
24
|
+
desc: run tests
|
25
|
+
long_desc: |
|
26
|
+
{{bold:Default}}
|
27
|
+
=======
|
28
|
+
Run the entire test suite.
|
29
|
+
|
30
|
+
Examples:
|
31
|
+
{{command:dev test}}
|
32
|
+
{{command:dev t}}
|
33
|
+
|
34
|
+
{{bold:Run all tests in a file}}
|
35
|
+
========================
|
36
|
+
Include the file path.
|
37
|
+
|
38
|
+
Example:
|
39
|
+
{{command:dev test test/unit/iteration_test.rb}}
|
40
|
+
|
41
|
+
{{bold:Run a single test in a given file}}
|
42
|
+
========================
|
43
|
+
Include the file path and the name of the test you'd like to run.
|
44
|
+
|
45
|
+
Example:
|
46
|
+
{{command:dev test test/unit/iteration_test.rb test_that_it_has_a_version_number}}
|
47
|
+
|
48
|
+
{{bold:Run all tests in a given file whose name contains a string}}
|
49
|
+
========================
|
50
|
+
Include the file path and the string that the test names should contain.
|
51
|
+
|
52
|
+
Example:
|
53
|
+
{{command:dev test test/unit/iteration_test.rb version_number}}
|
54
|
+
style:
|
55
|
+
run: bundle exec rubocop -a
|
data/job-iteration.gemspec
CHANGED
@@ -5,9 +5,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
5
|
require "job-iteration/version"
|
6
6
|
|
7
7
|
Gem::Specification.new do |spec|
|
8
|
+
spec.required_ruby_version = ">= 2.6"
|
8
9
|
spec.name = "job-iteration"
|
9
10
|
spec.version = JobIteration::VERSION
|
10
|
-
spec.authors =
|
11
|
+
spec.authors = ["Shopify"]
|
11
12
|
spec.email = ["ops-accounts+shipit@shopify.com"]
|
12
13
|
|
13
14
|
spec.summary = "Makes your background jobs interruptible and resumable."
|
@@ -20,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
20
21
|
end
|
21
22
|
spec.bindir = "exe"
|
22
23
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
-
spec.require_paths =
|
24
|
+
spec.require_paths = ["lib"]
|
24
25
|
|
25
26
|
spec.metadata["changelog_uri"] = "https://github.com/Shopify/job-iteration/blob/master/CHANGELOG.md"
|
26
27
|
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
# Builds Batch Enumerator based on ActiveRecord Relation.
|
5
|
+
# @see EnumeratorBuilder
|
6
|
+
class ActiveRecordBatchEnumerator
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
|
11
|
+
def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
|
12
|
+
@batch_size = batch_size
|
13
|
+
@primary_key = "#{relation.table_name}.#{relation.primary_key}"
|
14
|
+
@columns = Array(columns&.map(&:to_s) || @primary_key)
|
15
|
+
@primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
|
16
|
+
@pluck_columns = if @primary_key_index
|
17
|
+
@columns
|
18
|
+
else
|
19
|
+
@columns.dup << @primary_key
|
20
|
+
end
|
21
|
+
@cursor = Array.wrap(cursor)
|
22
|
+
@initial_cursor = @cursor
|
23
|
+
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
24
|
+
if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
|
25
|
+
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
26
|
+
end
|
27
|
+
|
28
|
+
if relation.arel.orders.present? || relation.arel.taken.present?
|
29
|
+
raise ConditionNotSupportedError
|
30
|
+
end
|
31
|
+
|
32
|
+
@base_relation = relation.reorder(@columns.join(","))
|
33
|
+
end
|
34
|
+
|
35
|
+
def each
|
36
|
+
return to_enum { size } unless block_given?
|
37
|
+
while (relation = next_batch)
|
38
|
+
yield relation, cursor_value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def size
|
43
|
+
(@base_relation.count + @batch_size - 1) / @batch_size # ceiling division
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def next_batch
|
49
|
+
relation = @base_relation.limit(@batch_size)
|
50
|
+
if conditions.any?
|
51
|
+
relation = relation.where(*conditions)
|
52
|
+
end
|
53
|
+
|
54
|
+
cursor_values, ids = relation.uncached do
|
55
|
+
pluck_columns(relation)
|
56
|
+
end
|
57
|
+
|
58
|
+
cursor = cursor_values.last
|
59
|
+
unless cursor.present?
|
60
|
+
@cursor = @initial_cursor
|
61
|
+
return
|
62
|
+
end
|
63
|
+
# The primary key was plucked, but original cursor did not include it, so we should remove it
|
64
|
+
cursor.pop unless @primary_key_index
|
65
|
+
@cursor = Array.wrap(cursor)
|
66
|
+
|
67
|
+
# Yields relations by selecting the primary keys of records in the batch.
|
68
|
+
# Post.where(published: nil) results in an enumerator of relations like: Post.where(ids: batch_of_ids)
|
69
|
+
@base_relation.where(@primary_key => ids)
|
70
|
+
end
|
71
|
+
|
72
|
+
def pluck_columns(relation)
|
73
|
+
if @pluck_columns.size == 1 # only the primary key
|
74
|
+
column_values = relation.pluck(*@pluck_columns)
|
75
|
+
return [column_values, column_values]
|
76
|
+
end
|
77
|
+
|
78
|
+
column_values = relation.pluck(*@pluck_columns)
|
79
|
+
primary_key_index = @primary_key_index || -1
|
80
|
+
primary_key_values = column_values.map { |values| values[primary_key_index] }
|
81
|
+
|
82
|
+
serialize_column_values!(column_values)
|
83
|
+
[column_values, primary_key_values]
|
84
|
+
end
|
85
|
+
|
86
|
+
def cursor_value
|
87
|
+
return @cursor.first if @cursor.size == 1
|
88
|
+
@cursor
|
89
|
+
end
|
90
|
+
|
91
|
+
def conditions
|
92
|
+
column_index = @cursor.size - 1
|
93
|
+
column = @columns[column_index]
|
94
|
+
where_clause = if @columns.size == @cursor.size
|
95
|
+
"#{column} > ?"
|
96
|
+
else
|
97
|
+
"#{column} >= ?"
|
98
|
+
end
|
99
|
+
while column_index > 0
|
100
|
+
column_index -= 1
|
101
|
+
column = @columns[column_index]
|
102
|
+
where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
|
103
|
+
end
|
104
|
+
ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
|
105
|
+
ret.pop
|
106
|
+
ret
|
107
|
+
end
|
108
|
+
|
109
|
+
def serialize_column_values!(column_values)
|
110
|
+
column_values.map! { |values| values.map! { |value| column_value(value) } }
|
111
|
+
end
|
112
|
+
|
113
|
+
def column_value(value)
|
114
|
+
value.is_a?(Time) ? value.strftime(SQL_DATETIME_WITH_NSEC) : value
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -13,7 +13,7 @@ module JobIteration
|
|
13
13
|
def initialize
|
14
14
|
super(
|
15
15
|
"The relation cannot use ORDER BY or LIMIT due to the way how iteration with a cursor is designed. " \
|
16
|
-
|
16
|
+
"You can use other ways to limit the number of rows, e.g. a WHERE condition on the primary key column."
|
17
17
|
)
|
18
18
|
end
|
19
19
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "./active_record_batch_enumerator"
|
2
4
|
require_relative "./active_record_enumerator"
|
3
5
|
require_relative "./csv_enumerator"
|
4
6
|
require_relative "./throttle_enumerator"
|
@@ -86,6 +88,11 @@ module JobIteration
|
|
86
88
|
# WHERE (created_at > '$LAST_CREATED_AT_CURSOR'
|
87
89
|
# OR (created_at = '$LAST_CREATED_AT_CURSOR' AND (id > '$LAST_ID_CURSOR')))
|
88
90
|
# ORDER BY created_at, id LIMIT 100
|
91
|
+
#
|
92
|
+
# As a result of this query pattern, if the values in these columns change for the records in scope during
|
93
|
+
# iteration, they may be skipped or yielded multiple times depending on the nature of the update and the
|
94
|
+
# cursor's value. If the value gets updated to a greater value than the cursor's value, it will get yielded
|
95
|
+
# again. Similarly, if the value gets updated to a lesser value than the curor's value, it will get skipped.
|
89
96
|
def build_active_record_enumerator_on_records(scope, cursor:, **args)
|
90
97
|
enum = build_active_record_enumerator(
|
91
98
|
scope,
|
@@ -95,7 +102,7 @@ module JobIteration
|
|
95
102
|
wrap(self, enum)
|
96
103
|
end
|
97
104
|
|
98
|
-
# Builds Enumerator from Active Record Relation and enumerates on batches.
|
105
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches of records.
|
99
106
|
# Each Enumerator tick moves the cursor +batch_size+ rows forward.
|
100
107
|
#
|
101
108
|
# +batch_size:+ sets how many records will be fetched in one batch. Defaults to 100.
|
@@ -110,6 +117,16 @@ module JobIteration
|
|
110
117
|
wrap(self, enum)
|
111
118
|
end
|
112
119
|
|
120
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches, yielding Active Record Relations.
|
121
|
+
# See documentation for #build_active_record_enumerator_on_batches.
|
122
|
+
def build_active_record_enumerator_on_batch_relations(scope, cursor:, **args)
|
123
|
+
JobIteration::ActiveRecordBatchEnumerator.new(
|
124
|
+
scope,
|
125
|
+
cursor: cursor,
|
126
|
+
**args
|
127
|
+
).each
|
128
|
+
end
|
129
|
+
|
113
130
|
def build_throttle_enumerator(enum, throttle_on:, backoff:)
|
114
131
|
JobIteration::ThrottleEnumerator.new(
|
115
132
|
enum,
|
@@ -124,6 +141,7 @@ module JobIteration
|
|
124
141
|
alias_method :array, :build_array_enumerator
|
125
142
|
alias_method :active_record_on_records, :build_active_record_enumerator_on_records
|
126
143
|
alias_method :active_record_on_batches, :build_active_record_enumerator_on_batches
|
144
|
+
alias_method :active_record_on_batch_relations, :build_active_record_enumerator_on_batch_relations
|
127
145
|
alias_method :throttle, :build_throttle_enumerator
|
128
146
|
|
129
147
|
private
|
@@ -6,6 +6,13 @@ module JobIteration
|
|
6
6
|
module Iteration
|
7
7
|
extend ActiveSupport::Concern
|
8
8
|
|
9
|
+
attr_accessor(
|
10
|
+
:cursor_position,
|
11
|
+
:start_time,
|
12
|
+
:times_interrupted,
|
13
|
+
:total_time,
|
14
|
+
)
|
15
|
+
|
9
16
|
class CursorError < ArgumentError
|
10
17
|
attr_reader :cursor
|
11
18
|
|
@@ -29,13 +36,6 @@ module JobIteration
|
|
29
36
|
end
|
30
37
|
|
31
38
|
included do |_base|
|
32
|
-
attr_accessor(
|
33
|
-
:cursor_position,
|
34
|
-
:start_time,
|
35
|
-
:times_interrupted,
|
36
|
-
:total_time,
|
37
|
-
)
|
38
|
-
|
39
39
|
define_callbacks :start
|
40
40
|
define_callbacks :shutdown
|
41
41
|
define_callbacks :complete
|
@@ -159,16 +159,16 @@ module JobIteration
|
|
159
159
|
|
160
160
|
logger.info(
|
161
161
|
"[JobIteration::Iteration] Enumerator found nothing to iterate! " \
|
162
|
-
|
162
|
+
"times_interrupted=#{times_interrupted} cursor_position=#{cursor_position}"
|
163
163
|
) unless found_record
|
164
164
|
|
165
|
+
adjust_total_time
|
166
|
+
|
165
167
|
true
|
166
168
|
end
|
167
169
|
|
168
|
-
def record_unit_of_work
|
169
|
-
ActiveSupport::Notifications.instrument("each_iteration.iteration", iteration_instrumentation_tags)
|
170
|
-
yield
|
171
|
-
end
|
170
|
+
def record_unit_of_work(&block)
|
171
|
+
ActiveSupport::Notifications.instrument("each_iteration.iteration", iteration_instrumentation_tags, &block)
|
172
172
|
end
|
173
173
|
|
174
174
|
def reenqueue_iteration_job
|
@@ -208,7 +208,7 @@ module JobIteration
|
|
208
208
|
|
209
209
|
raise CursorError.new(
|
210
210
|
"Cursor must be composed of objects capable of built-in (de)serialization: " \
|
211
|
-
|
211
|
+
"Strings, Integers, Floats, Arrays, Hashes, true, false, or nil.",
|
212
212
|
cursor: cursor,
|
213
213
|
)
|
214
214
|
end
|
@@ -225,7 +225,7 @@ module JobIteration
|
|
225
225
|
parameters = method_parameters(:build_enumerator)
|
226
226
|
unless valid_cursor_parameter?(parameters)
|
227
227
|
raise ArgumentError, "Iteration job (#{self.class}) #build_enumerator " \
|
228
|
-
|
228
|
+
"expects the keyword argument `cursor`"
|
229
229
|
end
|
230
230
|
else
|
231
231
|
raise ArgumentError, "Iteration job (#{self.class}) must implement #build_enumerator " \
|
@@ -249,8 +249,6 @@ module JobIteration
|
|
249
249
|
end
|
250
250
|
|
251
251
|
def output_interrupt_summary
|
252
|
-
adjust_total_time
|
253
|
-
|
254
252
|
message = "[JobIteration::Iteration] Completed iterating. times_interrupted=%d total_time=%.3f"
|
255
253
|
logger.info(Kernel.format(message, times_interrupted, total_time))
|
256
254
|
end
|
data/lib/job-iteration.rb
CHANGED
@@ -22,6 +22,7 @@ module JobIteration
|
|
22
22
|
|
23
23
|
# Used internally for hooking into job processing frameworks like Sidekiq and Resque.
|
24
24
|
attr_accessor :interruption_adapter
|
25
|
+
|
25
26
|
self.interruption_adapter = -> { false }
|
26
27
|
|
27
28
|
# Set if you want to use your own enumerator builder instead of default EnumeratorBuilder.
|
@@ -33,21 +34,20 @@ module JobIteration
|
|
33
34
|
#
|
34
35
|
# JobIteration.enumerator_builder = MyOwnBuilder
|
35
36
|
attr_accessor :enumerator_builder
|
37
|
+
|
36
38
|
self.enumerator_builder = JobIteration::EnumeratorBuilder
|
37
39
|
|
38
40
|
def load_integrations
|
39
41
|
loaded = nil
|
40
42
|
INTEGRATIONS.each do |integration|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
"#{loaded} integration has already been loaded, but #{integration} is also available. " \
|
43
|
+
load_integration(integration)
|
44
|
+
if loaded
|
45
|
+
raise IntegrationLoadError,
|
46
|
+
"#{loaded} integration has already been loaded, but #{integration} is also available. " \
|
46
47
|
"Iteration will only work with one integration."
|
47
|
-
end
|
48
|
-
loaded = integration
|
49
|
-
rescue LoadError
|
50
48
|
end
|
49
|
+
loaded = integration
|
50
|
+
rescue LoadError
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -45,6 +45,7 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
+
- ".github/dependabot.yml"
|
48
49
|
- ".github/workflows/ci.yml"
|
49
50
|
- ".gitignore"
|
50
51
|
- ".rubocop.yml"
|
@@ -52,10 +53,12 @@ files:
|
|
52
53
|
- CHANGELOG.md
|
53
54
|
- CODE_OF_CONDUCT.md
|
54
55
|
- Gemfile
|
56
|
+
- Gemfile.lock
|
55
57
|
- LICENSE.txt
|
56
58
|
- README.md
|
57
59
|
- Rakefile
|
58
60
|
- bin/setup
|
61
|
+
- bin/test
|
59
62
|
- dev.yml
|
60
63
|
- gemfiles/rails_5_2.gemfile
|
61
64
|
- gemfiles/rails_6_0.gemfile
|
@@ -66,6 +69,7 @@ files:
|
|
66
69
|
- guides/throttling.md
|
67
70
|
- job-iteration.gemspec
|
68
71
|
- lib/job-iteration.rb
|
72
|
+
- lib/job-iteration/active_record_batch_enumerator.rb
|
69
73
|
- lib/job-iteration/active_record_cursor.rb
|
70
74
|
- lib/job-iteration/active_record_enumerator.rb
|
71
75
|
- lib/job-iteration/csv_enumerator.rb
|
@@ -91,14 +95,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
95
|
requirements:
|
92
96
|
- - ">="
|
93
97
|
- !ruby/object:Gem::Version
|
94
|
-
version: '
|
98
|
+
version: '2.6'
|
95
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
100
|
requirements:
|
97
101
|
- - ">="
|
98
102
|
- !ruby/object:Gem::Version
|
99
103
|
version: '0'
|
100
104
|
requirements: []
|
101
|
-
rubygems_version: 3.
|
105
|
+
rubygems_version: 3.2.20
|
102
106
|
signing_key:
|
103
107
|
specification_version: 4
|
104
108
|
summary: Makes your background jobs interruptible and resumable.
|