job-iteration 1.1.3 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of job-iteration might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.travis.yml +4 -2
- data/CHANGELOG.md +22 -0
- data/Gemfile +5 -2
- data/README.md +1 -0
- data/dev.yml +1 -1
- data/gemfiles/rails_6_0.gemfile +6 -0
- data/guides/custom-enumerator.md +1 -1
- data/guides/throttling.md +46 -0
- data/job-iteration.gemspec +1 -0
- data/lib/job-iteration/active_record_cursor.rb +2 -0
- data/lib/job-iteration/csv_enumerator.rb +9 -5
- data/lib/job-iteration/enumerator_builder.rb +11 -10
- data/lib/job-iteration/iteration.rb +21 -2
- data/lib/job-iteration/throttle_enumerator.rb +46 -0
- data/lib/job-iteration/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99e26756140f1346b646a3931dc6cc8de1181a67a27916d9a0d85deb1d4dddeb
|
4
|
+
data.tar.gz: '058d65ec35e8cebbf74ba37c2227aa63cb1b5fbf183c81ee4c1b4f1cc3a9d033'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86a6a0f8ef55ab7502215fc4efcd6bc6e7b7a0b4737decf470db964d2302705d42ab1d16eff904e0d2c914f7ac30ca66cd2b99fffcb6aeaf1db904619fd89418
|
7
|
+
data.tar.gz: 7d9c9d5023f56365e2a1040fc7ccbb57f01c73f50c7addeaaedcf64edc2dab5cf2383d56cda46b832cdc6acce367abf87757dbd1113971335f51d675883e4141
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
@@ -3,8 +3,9 @@ services:
|
|
3
3
|
- redis-server
|
4
4
|
language: ruby
|
5
5
|
rvm:
|
6
|
-
- 2.5
|
7
|
-
- 2.6
|
6
|
+
- 2.5
|
7
|
+
- 2.6
|
8
|
+
- 2.7
|
8
9
|
before_install:
|
9
10
|
- mysql -e 'CREATE DATABASE job_iteration_test;'
|
10
11
|
script:
|
@@ -14,4 +15,5 @@ script:
|
|
14
15
|
|
15
16
|
gemfile:
|
16
17
|
- 'gemfiles/rails_5_2.gemfile'
|
18
|
+
- 'gemfiles/rails_6_0.gemfile'
|
17
19
|
- 'gemfiles/rails_edge.gemfile'
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,28 @@
|
|
4
4
|
|
5
5
|
#### Bug fix
|
6
6
|
|
7
|
+
## v1.1.8 (June 8, 2020)
|
8
|
+
|
9
|
+
- Preserve ruby2_keywords tags in arguments on Ruby 2.7
|
10
|
+
|
11
|
+
## v1.1.7 (June 4, 2020)
|
12
|
+
|
13
|
+
- [54](https://github.com/Shopify/job-iteration/pull/54) - Fix warnings on Ruby 2.7
|
14
|
+
|
15
|
+
## v1.1.6 (May 22, 2020)
|
16
|
+
|
17
|
+
- [49](https://github.com/Shopify/job-iteration/pull/49) - Log when enumerator has nothing to iterate
|
18
|
+
- [52](https://github.com/Shopify/job-iteration/pull/52) - Fix CSVEnumerator cursor to properly remove already processed rows
|
19
|
+
|
20
|
+
## v1.1.5 (February 27, 2020)
|
21
|
+
|
22
|
+
- [47](https://github.com/Shopify/job-iteration/pull/47) - Optional `sorbet-runtime` support for `JobIteration::Iteration` interface validation
|
23
|
+
|
24
|
+
## v1.1.4 (December 13, 2019)
|
25
|
+
|
26
|
+
- [45](https://github.com/Shopify/job-iteration/pull/45) - Add Throttle enumerator
|
27
|
+
|
28
|
+
|
7
29
|
### v1.1.3 (August 20, 2019)
|
8
30
|
|
9
31
|
- [36](https://github.com/shopify/job-iteration/pull/39) - Check method validation at job initialization step
|
data/Gemfile
CHANGED
@@ -11,7 +11,7 @@ gemspec
|
|
11
11
|
gem 'sidekiq'
|
12
12
|
gem 'resque'
|
13
13
|
|
14
|
-
gem 'mysql2', '~> 0.
|
14
|
+
gem 'mysql2', '~> 0.5'
|
15
15
|
gem 'globalid'
|
16
16
|
gem 'i18n'
|
17
17
|
gem 'redis'
|
@@ -20,6 +20,9 @@ gem 'database_cleaner'
|
|
20
20
|
gem 'pry'
|
21
21
|
gem 'mocha'
|
22
22
|
|
23
|
-
gem 'rubocop'
|
23
|
+
gem 'rubocop', '~> 0.77.0'
|
24
24
|
gem 'yard'
|
25
25
|
gem 'rake'
|
26
|
+
|
27
|
+
# for unit testing optional sorbet support
|
28
|
+
gem 'sorbet-runtime'
|
data/README.md
CHANGED
@@ -112,6 +112,7 @@ Iteration hooks into Sidekiq and Resque out of the box to support graceful inter
|
|
112
112
|
* [Iteration: how it works](guides/iteration-how-it-works.md)
|
113
113
|
* [Best practices](guides/best-practices.md)
|
114
114
|
* [Writing custom enumerator](guides/custom-enumerator.md)
|
115
|
+
* [Throttling](guides/throttling.md)
|
115
116
|
|
116
117
|
For more detailed documentation, see [rubydoc](https://www.rubydoc.info/github/Shopify/job-iteration).
|
117
118
|
|
data/dev.yml
CHANGED
data/guides/custom-enumerator.md
CHANGED
@@ -0,0 +1,46 @@
|
|
1
|
+
Iteration comes with a special wrapper enumerator that allows you to throttle iterations based on external signal (e.g. database health).
|
2
|
+
|
3
|
+
Consider this example:
|
4
|
+
|
5
|
+
```ruby
|
6
|
+
class InactiveAccountDeleteJob < ActiveJob::Base
|
7
|
+
include JobIteration::Iteration
|
8
|
+
|
9
|
+
def build_enumerator(_params, cursor:)
|
10
|
+
enumerator_builder.active_record_on_batches(
|
11
|
+
Account.inactive,
|
12
|
+
cursor: cursor
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
def each_iteration(batch, _params)
|
17
|
+
Account.where(id: batch.map(&:id)).delete_all
|
18
|
+
end
|
19
|
+
end
|
20
|
+
```
|
21
|
+
|
22
|
+
For an app that keeps track of customer accounts, it's typical to purge old data that's no longer relevant for storage.
|
23
|
+
|
24
|
+
At the same time, if you've got a lot of DB writes to perform, this can cause extra load on the database and slow down other parts of your service.
|
25
|
+
|
26
|
+
You can change `build_enumerator` to wrap enumeration on DB rows into a throttle enumerator, which takes signal as a proc and enqueues the job for later in case the proc returned `true`.
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
def build_enumerator(_params, cursor:)
|
30
|
+
enumerator_builder.build_throttle_enumerator(
|
31
|
+
enumerator_builder.active_record_on_batches(
|
32
|
+
Account.inactive,
|
33
|
+
cursor: cursor
|
34
|
+
),
|
35
|
+
throttle_on: -> { DatabaseStatus.unhealthy? },
|
36
|
+
backoff: 30.seconds
|
37
|
+
)
|
38
|
+
end
|
39
|
+
```
|
40
|
+
|
41
|
+
Note that it's up to you to implement `DatabaseStatus.unhealthy?` that works for your database choice. At Shopify, a helper like `DatabaseStatus` checks the following MySQL metrics:
|
42
|
+
|
43
|
+
* Replication lag across all regions
|
44
|
+
* DB threads
|
45
|
+
* DB is available for writes (otherwise indicates a failover happening)
|
46
|
+
* [Semian](https://github.com/shopify/semian) open circuits
|
data/job-iteration.gemspec
CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.require_paths = %w(lib)
|
24
24
|
|
25
25
|
spec.metadata["changelog_uri"] = "https://github.com/Shopify/job-iteration/blob/master/CHANGELOG.md"
|
26
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
26
27
|
|
27
28
|
spec.add_development_dependency("activerecord")
|
28
29
|
spec.add_dependency("activejob", ">= 5.2")
|
@@ -32,8 +32,8 @@ module JobIteration
|
|
32
32
|
def rows(cursor:)
|
33
33
|
@csv.lazy
|
34
34
|
.each_with_index
|
35
|
-
.drop(cursor
|
36
|
-
.to_enum {
|
35
|
+
.drop(count_of_processed_rows(cursor))
|
36
|
+
.to_enum { count_of_rows_in_file }
|
37
37
|
end
|
38
38
|
|
39
39
|
# Constructs a enumerator on batches of CSV rows
|
@@ -42,13 +42,13 @@ module JobIteration
|
|
42
42
|
@csv.lazy
|
43
43
|
.each_slice(batch_size)
|
44
44
|
.each_with_index
|
45
|
-
.drop(cursor
|
46
|
-
.to_enum { (
|
45
|
+
.drop(count_of_processed_rows(cursor))
|
46
|
+
.to_enum { (count_of_rows_in_file.to_f / batch_size).ceil }
|
47
47
|
end
|
48
48
|
|
49
49
|
private
|
50
50
|
|
51
|
-
def
|
51
|
+
def count_of_rows_in_file
|
52
52
|
# TODO: Remove rescue for NoMethodError when Ruby 2.6 is no longer supported.
|
53
53
|
begin
|
54
54
|
filepath = @csv.path
|
@@ -63,5 +63,9 @@ module JobIteration
|
|
63
63
|
count -= 1 if @csv.headers
|
64
64
|
count
|
65
65
|
end
|
66
|
+
|
67
|
+
def count_of_processed_rows(cursor)
|
68
|
+
cursor.nil? ? 0 : cursor + 1
|
69
|
+
end
|
66
70
|
end
|
67
71
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative "./active_record_enumerator"
|
3
3
|
require_relative "./csv_enumerator"
|
4
|
+
require_relative "./throttle_enumerator"
|
4
5
|
require "forwardable"
|
5
6
|
|
6
7
|
module JobIteration
|
@@ -61,16 +62,6 @@ module JobIteration
|
|
61
62
|
wrap(self, enumerable.each_with_index.drop(drop).to_enum { enumerable.size })
|
62
63
|
end
|
63
64
|
|
64
|
-
# Builds Enumerator from a lock queue instance that belongs to a job.
|
65
|
-
# The helper is only to be used from jobs that use LockQueue module.
|
66
|
-
def build_lock_queue_enumerator(lock_queue, at_most_once:)
|
67
|
-
unless lock_queue.is_a?(BackgroundQueue::LockQueue::RedisQueue) ||
|
68
|
-
lock_queue.is_a?(BackgroundQueue::LockQueue::RolloutRedisQueue)
|
69
|
-
raise ArgumentError, "an argument to #build_lock_queue_enumerator must be a LockQueue"
|
70
|
-
end
|
71
|
-
wrap(self, BackgroundQueue::LockQueueEnumerator.new(lock_queue, at_most_once: at_most_once).to_enum)
|
72
|
-
end
|
73
|
-
|
74
65
|
# Builds Enumerator from Active Record Relation. Each Enumerator tick moves the cursor one row forward.
|
75
66
|
#
|
76
67
|
# +columns:+ argument is used to build the actual query for iteration. +columns+: defaults to primary key:
|
@@ -119,11 +110,21 @@ module JobIteration
|
|
119
110
|
wrap(self, enum)
|
120
111
|
end
|
121
112
|
|
113
|
+
def build_throttle_enumerator(enum, throttle_on:, backoff:)
|
114
|
+
JobIteration::ThrottleEnumerator.new(
|
115
|
+
enum,
|
116
|
+
@job,
|
117
|
+
throttle_on: throttle_on,
|
118
|
+
backoff: backoff
|
119
|
+
).to_enum
|
120
|
+
end
|
121
|
+
|
122
122
|
alias_method :once, :build_once_enumerator
|
123
123
|
alias_method :times, :build_times_enumerator
|
124
124
|
alias_method :array, :build_array_enumerator
|
125
125
|
alias_method :active_record_on_records, :build_active_record_enumerator_on_records
|
126
126
|
alias_method :active_record_on_batches, :build_active_record_enumerator_on_batches
|
127
|
+
alias_method :throttle, :build_throttle_enumerator
|
127
128
|
|
128
129
|
private
|
129
130
|
|
@@ -49,6 +49,7 @@ module JobIteration
|
|
49
49
|
self.total_time = 0.0
|
50
50
|
assert_implements_methods!
|
51
51
|
end
|
52
|
+
ruby2_keywords(:initialize) if respond_to?(:ruby2_keywords, true)
|
52
53
|
|
53
54
|
def serialize # @private
|
54
55
|
super.merge(
|
@@ -69,7 +70,7 @@ module JobIteration
|
|
69
70
|
interruptible_perform(*params)
|
70
71
|
end
|
71
72
|
|
72
|
-
def retry_job(
|
73
|
+
def retry_job(*, **)
|
73
74
|
super unless defined?(@retried) && @retried
|
74
75
|
@retried = true
|
75
76
|
end
|
@@ -116,8 +117,10 @@ module JobIteration
|
|
116
117
|
|
117
118
|
def iterate_with_enumerator(enumerator, arguments)
|
118
119
|
arguments = arguments.dup.freeze
|
120
|
+
found_record = false
|
119
121
|
enumerator.each do |object_from_enumerator, index|
|
120
122
|
record_unit_of_work do
|
123
|
+
found_record = true
|
121
124
|
each_iteration(object_from_enumerator, *arguments)
|
122
125
|
self.cursor_position = index
|
123
126
|
end
|
@@ -128,6 +131,11 @@ module JobIteration
|
|
128
131
|
return false
|
129
132
|
end
|
130
133
|
|
134
|
+
logger.info(
|
135
|
+
"[JobIteration::Iteration] Enumerator found nothing to iterate! " \
|
136
|
+
"times_interrupted=#{times_interrupted} cursor_position=#{cursor_position}"
|
137
|
+
) unless found_record
|
138
|
+
|
131
139
|
true
|
132
140
|
end
|
133
141
|
|
@@ -176,7 +184,7 @@ module JobIteration
|
|
176
184
|
end
|
177
185
|
|
178
186
|
if respond_to?(:build_enumerator, true)
|
179
|
-
parameters =
|
187
|
+
parameters = method_parameters(:build_enumerator)
|
180
188
|
unless valid_cursor_parameter?(parameters)
|
181
189
|
raise ArgumentError, "Iteration job (#{self.class}) #build_enumerator " \
|
182
190
|
"expects the keyword argument `cursor`"
|
@@ -187,6 +195,17 @@ module JobIteration
|
|
187
195
|
end
|
188
196
|
end
|
189
197
|
|
198
|
+
def method_parameters(method_name)
|
199
|
+
method = method(method_name)
|
200
|
+
|
201
|
+
if defined?(T::Private::Methods)
|
202
|
+
signature = T::Private::Methods.signature_for_method(method)
|
203
|
+
method = signature.method if signature
|
204
|
+
end
|
205
|
+
|
206
|
+
method.parameters
|
207
|
+
end
|
208
|
+
|
190
209
|
def iteration_instrumentation_tags
|
191
210
|
{ job_class: self.class.name }
|
192
211
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
module JobIteration
|
4
|
+
# ThrottleEnumerator allows you to throttle iterations
|
5
|
+
# based on external signal (e.g. database health).
|
6
|
+
# @example
|
7
|
+
# def build_enumerator(_params, cursor:)
|
8
|
+
# enumerator_builder.build_throttle_enumerator(
|
9
|
+
# enumerator_builder.active_record_on_batches(
|
10
|
+
# Account.inactive,
|
11
|
+
# cursor: cursor
|
12
|
+
# ),
|
13
|
+
# throttle_on: -> { DatabaseStatus.unhealthy? },
|
14
|
+
# backoff: 30.seconds
|
15
|
+
# )
|
16
|
+
# end
|
17
|
+
# The enumerator from above will mimic +active_record_on_batches+,
|
18
|
+
# except when +DatabaseStatus.unhealthy?+ starts to return true.
|
19
|
+
# In that case, it will re-enqueue the job with a specified backoff.
|
20
|
+
class ThrottleEnumerator
|
21
|
+
def initialize(enum, job, throttle_on:, backoff:)
|
22
|
+
@enum = enum
|
23
|
+
@job = job
|
24
|
+
@throttle_on = throttle_on
|
25
|
+
@backoff = backoff
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_enum
|
29
|
+
Enumerator.new(-> { @enum.size }) do |yielder|
|
30
|
+
@enum.each do |*val|
|
31
|
+
if should_throttle?
|
32
|
+
ActiveSupport::Notifications.instrument("throttled.iteration", job_class: @job.class.name)
|
33
|
+
@job.retry_job(wait: @backoff)
|
34
|
+
throw(:abort, :skip_complete_callbacks)
|
35
|
+
end
|
36
|
+
|
37
|
+
yielder.yield(*val)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def should_throttle?
|
43
|
+
@throttle_on.call
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -58,10 +58,12 @@ files:
|
|
58
58
|
- bin/setup
|
59
59
|
- dev.yml
|
60
60
|
- gemfiles/rails_5_2.gemfile
|
61
|
+
- gemfiles/rails_6_0.gemfile
|
61
62
|
- gemfiles/rails_edge.gemfile
|
62
63
|
- guides/best-practices.md
|
63
64
|
- guides/custom-enumerator.md
|
64
65
|
- guides/iteration-how-it-works.md
|
66
|
+
- guides/throttling.md
|
65
67
|
- job-iteration.gemspec
|
66
68
|
- lib/job-iteration.rb
|
67
69
|
- lib/job-iteration/active_record_cursor.rb
|
@@ -72,6 +74,7 @@ files:
|
|
72
74
|
- lib/job-iteration/integrations/sidekiq.rb
|
73
75
|
- lib/job-iteration/iteration.rb
|
74
76
|
- lib/job-iteration/test_helper.rb
|
77
|
+
- lib/job-iteration/throttle_enumerator.rb
|
75
78
|
- lib/job-iteration/version.rb
|
76
79
|
- railgun.yml
|
77
80
|
homepage: https://github.com/shopify/job-iteration
|
@@ -79,6 +82,7 @@ licenses:
|
|
79
82
|
- MIT
|
80
83
|
metadata:
|
81
84
|
changelog_uri: https://github.com/Shopify/job-iteration/blob/master/CHANGELOG.md
|
85
|
+
allowed_push_host: https://rubygems.org
|
82
86
|
post_install_message:
|
83
87
|
rdoc_options: []
|
84
88
|
require_paths:
|