job-iteration 1.10.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/job-iteration.gemspec +2 -2
- data/lib/job-iteration/csv_enumerator.rb +6 -10
- data/lib/job-iteration/version.rb +1 -1
- data/lib/tapioca/dsl/compilers/job_iteration.rb +2 -1
- metadata +3 -23
- data/.github/dependabot.yml +0 -16
- data/.github/workflows/ci.yml +0 -77
- data/.github/workflows/cla.yml +0 -22
- data/.gitignore +0 -11
- data/.rubocop.yml +0 -16
- data/.ruby-version +0 -1
- data/.yardopts +0 -3
- data/CODE_OF_CONDUCT.md +0 -74
- data/Gemfile +0 -45
- data/Gemfile.lock +0 -266
- data/Rakefile +0 -12
- data/bin/setup +0 -23
- data/bin/test +0 -32
- data/dev.yml +0 -54
- data/gemfiles/rails_gems.gemfile +0 -18
- data/guides/argument-semantics.md +0 -128
- data/guides/best-practices.md +0 -108
- data/guides/custom-enumerator.md +0 -140
- data/guides/iteration-how-it-works.md +0 -51
- data/guides/throttling.md +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61c40af6f75909b71f400462b9d64f994530f0fb68367ddbe6479f1b5fe8d831
|
4
|
+
data.tar.gz: a293c68062cd9714d522a2c3c5e81292bcd5a9f8172cbf3f537f0676e0de5731
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a773c72a419db3fe0677ba323ec0a2bdca6c02e34f322e7519ff080752c66e8ce7c973ca3b9031e84e2b70e27eb61dcf5875dfd38ef0b4421983991202256c71
|
7
|
+
data.tar.gz: 2abedd098be50d634f68eade3e681042aef00a87c8a3b0fb7cc88a91143cafade08bc18435f7479399c2ccbb660563ff781c1b4e9248b8bff7cf22190504383e
|
data/CHANGELOG.md
CHANGED
@@ -16,6 +16,18 @@ nil
|
|
16
16
|
|
17
17
|
nil
|
18
18
|
|
19
|
+
## v1.11.0 (Jul 14, 2025)
|
20
|
+
|
21
|
+
### Security fixes
|
22
|
+
|
23
|
+
- [595](https://github.com/Shopify/job-iteration/pull/595) [CVE-2025-53623] Fixes a security issue in the `CSVEnumerator` where the filename was directly interpolated into a bash command.
|
24
|
+
|
25
|
+
### Bug fixes
|
26
|
+
|
27
|
+
- [590](https://github.com/Shopify/job-iteration/pull/590) Fix a compatibilty issue between the Sorbet DSL compiler and the latest Tapioca.
|
28
|
+
- [593](https://github.com/Shopify/job-iteration/pull/593) Properly support required and optional positional arguments in the Sorbet DSL compiler.
|
29
|
+
- [594](https://github.com/Shopify/job-iteration/pull/594) Clean up the size calculation in the `CSVEnumerator`.
|
30
|
+
|
19
31
|
## v1.10.0 (Mar 20, 2025)
|
20
32
|
|
21
33
|
### Breaking Changes
|
data/job-iteration.gemspec
CHANGED
@@ -16,8 +16,8 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.homepage = "https://github.com/shopify/job-iteration"
|
17
17
|
spec.license = "MIT"
|
18
18
|
|
19
|
-
spec.files
|
20
|
-
f.match(%r{^(test|spec|features)/})
|
19
|
+
spec.files = ["CHANGELOG.md", "LICENSE.txt", "README.md", "job-iteration.gemspec"] + Dir.glob("{lib,exe}/**/*", File::FNM_DOTMATCH).reject do |f|
|
20
|
+
File.directory?(f) || f.match(%r{^(test|spec|features)/})
|
21
21
|
end
|
22
22
|
spec.bindir = "exe"
|
23
23
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
@@ -43,23 +43,19 @@ module JobIteration
|
|
43
43
|
.each_slice(batch_size)
|
44
44
|
.with_index
|
45
45
|
.drop(count_of_processed_rows(cursor))
|
46
|
-
.to_enum
|
46
|
+
.to_enum do
|
47
|
+
num_rows = count_of_rows_in_file
|
48
|
+
num_rows.nil? ? nil : (num_rows.to_f / batch_size).ceil
|
49
|
+
end
|
47
50
|
end
|
48
51
|
|
49
52
|
private
|
50
53
|
|
51
54
|
def count_of_rows_in_file
|
52
|
-
|
53
|
-
begin
|
54
|
-
filepath = @csv.path
|
55
|
-
rescue NoMethodError
|
56
|
-
return
|
57
|
-
end
|
58
|
-
|
59
|
-
# Behaviour of CSV#path changed in Ruby 2.6.3 (returns nil instead of raising NoMethodError)
|
55
|
+
filepath = @csv.path
|
60
56
|
return unless filepath
|
61
57
|
|
62
|
-
count =
|
58
|
+
count = File.foreach(filepath).count
|
63
59
|
count -= 1 if @csv.headers
|
64
60
|
count
|
65
61
|
end
|
@@ -8,10 +8,11 @@ module Tapioca
|
|
8
8
|
module Compilers
|
9
9
|
class JobIteration < Compiler
|
10
10
|
extend T::Sig
|
11
|
+
extend T::Generic
|
11
12
|
|
12
13
|
ConstantType = type_member { { fixed: T.class_of(::JobIteration::Iteration) } }
|
13
14
|
PARAM_TYPES_IN_ORDER = [
|
14
|
-
RBI::
|
15
|
+
RBI::ReqParam,
|
15
16
|
RBI::OptParam,
|
16
17
|
RBI::RestParam,
|
17
18
|
RBI::KwParam,
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: activejob
|
@@ -30,29 +30,9 @@ executables: []
|
|
30
30
|
extensions: []
|
31
31
|
extra_rdoc_files: []
|
32
32
|
files:
|
33
|
-
- ".github/dependabot.yml"
|
34
|
-
- ".github/workflows/ci.yml"
|
35
|
-
- ".github/workflows/cla.yml"
|
36
|
-
- ".gitignore"
|
37
|
-
- ".rubocop.yml"
|
38
|
-
- ".ruby-version"
|
39
|
-
- ".yardopts"
|
40
33
|
- CHANGELOG.md
|
41
|
-
- CODE_OF_CONDUCT.md
|
42
|
-
- Gemfile
|
43
|
-
- Gemfile.lock
|
44
34
|
- LICENSE.txt
|
45
35
|
- README.md
|
46
|
-
- Rakefile
|
47
|
-
- bin/setup
|
48
|
-
- bin/test
|
49
|
-
- dev.yml
|
50
|
-
- gemfiles/rails_gems.gemfile
|
51
|
-
- guides/argument-semantics.md
|
52
|
-
- guides/best-practices.md
|
53
|
-
- guides/custom-enumerator.md
|
54
|
-
- guides/iteration-how-it-works.md
|
55
|
-
- guides/throttling.md
|
56
36
|
- job-iteration.gemspec
|
57
37
|
- lib/job-iteration.rb
|
58
38
|
- lib/job-iteration/active_record_batch_enumerator.rb
|
@@ -96,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
76
|
- !ruby/object:Gem::Version
|
97
77
|
version: '0'
|
98
78
|
requirements: []
|
99
|
-
rubygems_version: 3.6.
|
79
|
+
rubygems_version: 3.6.9
|
100
80
|
specification_version: 4
|
101
81
|
summary: Makes your background jobs interruptible and resumable.
|
102
82
|
test_files: []
|
data/.github/dependabot.yml
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
version: 2
|
2
|
-
|
3
|
-
updates:
|
4
|
-
|
5
|
-
- package-ecosystem: bundler
|
6
|
-
directory: '/'
|
7
|
-
versioning-strategy: increase
|
8
|
-
open-pull-requests-limit: 100
|
9
|
-
insecure-external-code-execution: allow
|
10
|
-
schedule:
|
11
|
-
interval: weekly
|
12
|
-
|
13
|
-
- package-ecosystem: github-actions
|
14
|
-
directory: '/'
|
15
|
-
schedule:
|
16
|
-
interval: daily
|
data/.github/workflows/ci.yml
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
name: CI
|
2
|
-
|
3
|
-
on: [push, pull_request]
|
4
|
-
|
5
|
-
jobs:
|
6
|
-
build:
|
7
|
-
runs-on: ubuntu-latest
|
8
|
-
name: Ruby ${{ matrix.ruby }} | Rails ${{ matrix.rails }} | Gemfile ${{ matrix.gemfile }}
|
9
|
-
continue-on-error: ${{ matrix.rails == 'edge' }}
|
10
|
-
services:
|
11
|
-
redis:
|
12
|
-
image: redis
|
13
|
-
ports:
|
14
|
-
- 6379:6379
|
15
|
-
strategy:
|
16
|
-
fail-fast: false
|
17
|
-
matrix:
|
18
|
-
ruby: ["3.0", "3.1", "3.2", "3.3", "3.4"]
|
19
|
-
rails: ["6.1", "7.0", "7.1", "7.2", "8.0", "edge"]
|
20
|
-
gemfile: [rails_gems]
|
21
|
-
exclude:
|
22
|
-
- ruby: "3.0"
|
23
|
-
rails: "7.1"
|
24
|
-
- ruby: "3.0"
|
25
|
-
rails: "7.2"
|
26
|
-
- ruby: "3.0"
|
27
|
-
rails: "8.0"
|
28
|
-
- ruby: "3.0"
|
29
|
-
rails: "edge"
|
30
|
-
- ruby: "3.1"
|
31
|
-
rails: "8.0"
|
32
|
-
- ruby: "3.1"
|
33
|
-
rails: "edge"
|
34
|
-
- ruby: "3.2"
|
35
|
-
rails: "6.1"
|
36
|
-
- ruby: "3.3"
|
37
|
-
rails: "6.1"
|
38
|
-
- ruby: "3.4"
|
39
|
-
rails: "6.1"
|
40
|
-
- ruby: "3.4"
|
41
|
-
rails: "7.0"
|
42
|
-
include:
|
43
|
-
- ruby: head
|
44
|
-
rails: "edge"
|
45
|
-
gemfile: rails_gems
|
46
|
-
env:
|
47
|
-
BUNDLE_GEMFILE: gemfiles/${{ matrix.gemfile }}.gemfile
|
48
|
-
RAILS_VERSION: ${{ matrix.rails }}
|
49
|
-
steps:
|
50
|
-
- name: Check out code
|
51
|
-
uses: actions/checkout@v4
|
52
|
-
- name: Set up Ruby ${{ matrix.ruby }}
|
53
|
-
uses: ruby/setup-ruby@v1
|
54
|
-
with:
|
55
|
-
ruby-version: ${{ matrix.ruby }}
|
56
|
-
bundler-cache: true
|
57
|
-
- name: Start MySQL and create DB
|
58
|
-
run: |
|
59
|
-
sudo systemctl start mysql.service
|
60
|
-
mysql -uroot -h localhost -proot -e "CREATE DATABASE job_iteration_test;"
|
61
|
-
- name: Ruby tests
|
62
|
-
run: bundle exec rake test
|
63
|
-
|
64
|
-
lint:
|
65
|
-
runs-on: ubuntu-latest
|
66
|
-
name: Lint
|
67
|
-
steps:
|
68
|
-
- name: Check out code
|
69
|
-
uses: actions/checkout@v4
|
70
|
-
- name: Set up Ruby
|
71
|
-
uses: ruby/setup-ruby@v1
|
72
|
-
with:
|
73
|
-
bundler-cache: true
|
74
|
-
- name: Rubocop
|
75
|
-
run: bundle exec rubocop
|
76
|
-
- name: Documentation correctly written
|
77
|
-
run: bundle exec yardoc --no-output --no-save --no-stats --fail-on-warning
|
data/.github/workflows/cla.yml
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
name: Contributor License Agreement (CLA)
|
2
|
-
|
3
|
-
on:
|
4
|
-
pull_request_target:
|
5
|
-
types: [opened, synchronize]
|
6
|
-
issue_comment:
|
7
|
-
types: [created]
|
8
|
-
|
9
|
-
jobs:
|
10
|
-
cla:
|
11
|
-
runs-on: ubuntu-latest
|
12
|
-
if: |
|
13
|
-
(github.event.issue.pull_request
|
14
|
-
&& !github.event.issue.pull_request.merged_at
|
15
|
-
&& contains(github.event.comment.body, 'signed')
|
16
|
-
)
|
17
|
-
|| (github.event.pull_request && !github.event.pull_request.merged)
|
18
|
-
steps:
|
19
|
-
- uses: Shopify/shopify-cla-action@v1
|
20
|
-
with:
|
21
|
-
github-token: ${{ secrets.GITHUB_TOKEN }}
|
22
|
-
cla-token: ${{ secrets.CLA_TOKEN }}
|
data/.gitignore
DELETED
data/.rubocop.yml
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
inherit_gem:
|
2
|
-
rubocop-shopify: rubocop.yml
|
3
|
-
|
4
|
-
inherit_mode:
|
5
|
-
merge:
|
6
|
-
- Include
|
7
|
-
|
8
|
-
AllCops:
|
9
|
-
Include:
|
10
|
-
- '**/*.gemfile'
|
11
|
-
Lint/SuppressedException:
|
12
|
-
Exclude:
|
13
|
-
- lib/job-iteration.rb
|
14
|
-
Naming/FileName:
|
15
|
-
Exclude:
|
16
|
-
- lib/job-iteration.rb
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
3.4.2
|
data/.yardopts
DELETED
data/CODE_OF_CONDUCT.md
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# Contributor Covenant Code of Conduct
|
2
|
-
|
3
|
-
## Our Pledge
|
4
|
-
|
5
|
-
In the interest of fostering an open and welcoming environment, we as
|
6
|
-
contributors and maintainers pledge to making participation in our project and
|
7
|
-
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
-
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
-
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
-
orientation.
|
11
|
-
|
12
|
-
## Our Standards
|
13
|
-
|
14
|
-
Examples of behavior that contributes to creating a positive environment
|
15
|
-
include:
|
16
|
-
|
17
|
-
* Using welcoming and inclusive language
|
18
|
-
* Being respectful of differing viewpoints and experiences
|
19
|
-
* Gracefully accepting constructive criticism
|
20
|
-
* Focusing on what is best for the community
|
21
|
-
* Showing empathy towards other community members
|
22
|
-
|
23
|
-
Examples of unacceptable behavior by participants include:
|
24
|
-
|
25
|
-
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
-
advances
|
27
|
-
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
-
* Public or private harassment
|
29
|
-
* Publishing others' private information, such as a physical or electronic
|
30
|
-
address, without explicit permission
|
31
|
-
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
-
professional setting
|
33
|
-
|
34
|
-
## Our Responsibilities
|
35
|
-
|
36
|
-
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
-
behavior and are expected to take appropriate and fair corrective action in
|
38
|
-
response to any instances of unacceptable behavior.
|
39
|
-
|
40
|
-
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
-
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
-
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
-
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
-
threatening, offensive, or harmful.
|
45
|
-
|
46
|
-
## Scope
|
47
|
-
|
48
|
-
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
-
when an individual is representing the project or its community. Examples of
|
50
|
-
representing a project or community include using an official project e-mail
|
51
|
-
address, posting via an official social media account, or acting as an appointed
|
52
|
-
representative at an online or offline event. Representation of a project may be
|
53
|
-
further defined and clarified by project maintainers.
|
54
|
-
|
55
|
-
## Enforcement
|
56
|
-
|
57
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
-
reported by contacting the project team at shatrov@me.com. All
|
59
|
-
complaints will be reviewed and investigated and will result in a response that
|
60
|
-
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
-
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
-
Further details of specific enforcement policies may be posted separately.
|
63
|
-
|
64
|
-
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
-
faith may face temporary or permanent repercussions as determined by other
|
66
|
-
members of the project's leadership.
|
67
|
-
|
68
|
-
## Attribution
|
69
|
-
|
70
|
-
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
-
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
-
|
73
|
-
[homepage]: http://contributor-covenant.org
|
74
|
-
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
source "https://rubygems.org"
|
4
|
-
|
5
|
-
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
6
|
-
|
7
|
-
# Specify your gem's dependencies in job-iteration.gemspec
|
8
|
-
gemspec
|
9
|
-
|
10
|
-
# for integration testing
|
11
|
-
gem "sidekiq"
|
12
|
-
gem "resque"
|
13
|
-
gem "delayed_job"
|
14
|
-
|
15
|
-
if defined?(@rails_gems_requirements) && @rails_gems_requirements
|
16
|
-
# We avoid the `gem "..."` syntax here so Dependabot doesn't try to update these gems.
|
17
|
-
[
|
18
|
-
"activejob",
|
19
|
-
"activerecord",
|
20
|
-
"railties",
|
21
|
-
].each { |name| gem name, @rails_gems_requirements }
|
22
|
-
else
|
23
|
-
# gem "activejob" # Set in gemspec
|
24
|
-
gem "activerecord"
|
25
|
-
gem "railties"
|
26
|
-
end
|
27
|
-
|
28
|
-
gem "mysql2", github: "brianmario/mysql2"
|
29
|
-
gem "globalid"
|
30
|
-
gem "i18n"
|
31
|
-
gem "redis"
|
32
|
-
|
33
|
-
gem "pry"
|
34
|
-
gem "mocha"
|
35
|
-
|
36
|
-
gem "rubocop-shopify", require: false
|
37
|
-
gem "yard"
|
38
|
-
gem "rake"
|
39
|
-
gem "csv" # required for Ruby 3.4+
|
40
|
-
|
41
|
-
# for unit testing optional sorbet support
|
42
|
-
gem "sorbet-runtime"
|
43
|
-
gem "tapioca"
|
44
|
-
|
45
|
-
gem "logger"
|
data/Gemfile.lock
DELETED
@@ -1,266 +0,0 @@
|
|
1
|
-
GIT
|
2
|
-
remote: https://github.com/brianmario/mysql2
|
3
|
-
revision: 57b8df188c963ae0e4d4e1123d3e9de2bbcab637
|
4
|
-
specs:
|
5
|
-
mysql2 (0.5.6)
|
6
|
-
bigdecimal
|
7
|
-
|
8
|
-
PATH
|
9
|
-
remote: .
|
10
|
-
specs:
|
11
|
-
job-iteration (1.10.0)
|
12
|
-
activejob (>= 6.1)
|
13
|
-
|
14
|
-
GEM
|
15
|
-
remote: https://rubygems.org/
|
16
|
-
specs:
|
17
|
-
actionpack (8.0.1)
|
18
|
-
actionview (= 8.0.1)
|
19
|
-
activesupport (= 8.0.1)
|
20
|
-
nokogiri (>= 1.8.5)
|
21
|
-
rack (>= 2.2.4)
|
22
|
-
rack-session (>= 1.0.1)
|
23
|
-
rack-test (>= 0.6.3)
|
24
|
-
rails-dom-testing (~> 2.2)
|
25
|
-
rails-html-sanitizer (~> 1.6)
|
26
|
-
useragent (~> 0.16)
|
27
|
-
actionview (8.0.1)
|
28
|
-
activesupport (= 8.0.1)
|
29
|
-
builder (~> 3.1)
|
30
|
-
erubi (~> 1.11)
|
31
|
-
rails-dom-testing (~> 2.2)
|
32
|
-
rails-html-sanitizer (~> 1.6)
|
33
|
-
activejob (8.0.1)
|
34
|
-
activesupport (= 8.0.1)
|
35
|
-
globalid (>= 0.3.6)
|
36
|
-
activemodel (8.0.1)
|
37
|
-
activesupport (= 8.0.1)
|
38
|
-
activerecord (8.0.1)
|
39
|
-
activemodel (= 8.0.1)
|
40
|
-
activesupport (= 8.0.1)
|
41
|
-
timeout (>= 0.4.0)
|
42
|
-
activesupport (8.0.1)
|
43
|
-
base64
|
44
|
-
benchmark (>= 0.3)
|
45
|
-
bigdecimal
|
46
|
-
concurrent-ruby (~> 1.0, >= 1.3.1)
|
47
|
-
connection_pool (>= 2.2.5)
|
48
|
-
drb
|
49
|
-
i18n (>= 1.6, < 2)
|
50
|
-
logger (>= 1.4.2)
|
51
|
-
minitest (>= 5.1)
|
52
|
-
securerandom (>= 0.3)
|
53
|
-
tzinfo (~> 2.0, >= 2.0.5)
|
54
|
-
uri (>= 0.13.1)
|
55
|
-
ast (2.4.2)
|
56
|
-
base64 (0.2.0)
|
57
|
-
benchmark (0.4.0)
|
58
|
-
bigdecimal (3.1.9)
|
59
|
-
builder (3.3.0)
|
60
|
-
coderay (1.1.3)
|
61
|
-
concurrent-ruby (1.3.5)
|
62
|
-
connection_pool (2.5.0)
|
63
|
-
crass (1.0.6)
|
64
|
-
csv (3.3.2)
|
65
|
-
date (3.4.1)
|
66
|
-
delayed_job (4.1.13)
|
67
|
-
activesupport (>= 3.0, < 9.0)
|
68
|
-
drb (2.2.1)
|
69
|
-
erubi (1.13.1)
|
70
|
-
globalid (1.2.1)
|
71
|
-
activesupport (>= 6.1)
|
72
|
-
i18n (1.14.7)
|
73
|
-
concurrent-ruby (~> 1.0)
|
74
|
-
io-console (0.8.0)
|
75
|
-
irb (1.15.1)
|
76
|
-
pp (>= 0.6.0)
|
77
|
-
rdoc (>= 4.0.0)
|
78
|
-
reline (>= 0.4.2)
|
79
|
-
json (2.10.1)
|
80
|
-
language_server-protocol (3.17.0.4)
|
81
|
-
lint_roller (1.1.0)
|
82
|
-
logger (1.6.6)
|
83
|
-
loofah (2.24.0)
|
84
|
-
crass (~> 1.0.2)
|
85
|
-
nokogiri (>= 1.12.0)
|
86
|
-
method_source (1.1.0)
|
87
|
-
minitest (5.25.4)
|
88
|
-
mocha (2.7.1)
|
89
|
-
ruby2_keywords (>= 0.0.5)
|
90
|
-
mono_logger (1.1.2)
|
91
|
-
multi_json (1.15.0)
|
92
|
-
mustermann (3.0.3)
|
93
|
-
ruby2_keywords (~> 0.0.1)
|
94
|
-
netrc (0.11.0)
|
95
|
-
nokogiri (1.18.3-arm64-darwin)
|
96
|
-
racc (~> 1.4)
|
97
|
-
nokogiri (1.18.3-x86_64-darwin)
|
98
|
-
racc (~> 1.4)
|
99
|
-
nokogiri (1.18.3-x86_64-linux-gnu)
|
100
|
-
racc (~> 1.4)
|
101
|
-
parallel (1.26.3)
|
102
|
-
parser (3.3.7.1)
|
103
|
-
ast (~> 2.4.1)
|
104
|
-
racc
|
105
|
-
pp (0.6.2)
|
106
|
-
prettyprint
|
107
|
-
prettyprint (0.2.0)
|
108
|
-
prism (1.3.0)
|
109
|
-
pry (0.15.2)
|
110
|
-
coderay (~> 1.1)
|
111
|
-
method_source (~> 1.0)
|
112
|
-
psych (5.2.3)
|
113
|
-
date
|
114
|
-
stringio
|
115
|
-
racc (1.8.1)
|
116
|
-
rack (3.1.12)
|
117
|
-
rack-protection (4.1.1)
|
118
|
-
base64 (>= 0.1.0)
|
119
|
-
logger (>= 1.6.0)
|
120
|
-
rack (>= 3.0.0, < 4)
|
121
|
-
rack-session (2.1.0)
|
122
|
-
base64 (>= 0.1.0)
|
123
|
-
rack (>= 3.0.0)
|
124
|
-
rack-test (2.2.0)
|
125
|
-
rack (>= 1.3)
|
126
|
-
rackup (2.2.1)
|
127
|
-
rack (>= 3)
|
128
|
-
rails-dom-testing (2.2.0)
|
129
|
-
activesupport (>= 5.0.0)
|
130
|
-
minitest
|
131
|
-
nokogiri (>= 1.6)
|
132
|
-
rails-html-sanitizer (1.6.2)
|
133
|
-
loofah (~> 2.21)
|
134
|
-
nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0)
|
135
|
-
railties (8.0.1)
|
136
|
-
actionpack (= 8.0.1)
|
137
|
-
activesupport (= 8.0.1)
|
138
|
-
irb (~> 1.13)
|
139
|
-
rackup (>= 1.0.0)
|
140
|
-
rake (>= 12.2)
|
141
|
-
thor (~> 1.0, >= 1.2.2)
|
142
|
-
zeitwerk (~> 2.6)
|
143
|
-
rainbow (3.1.1)
|
144
|
-
rake (13.2.1)
|
145
|
-
rbi (0.3.0)
|
146
|
-
prism (~> 1.0)
|
147
|
-
rbs (>= 3.4.4)
|
148
|
-
sorbet-runtime (>= 0.5.9204)
|
149
|
-
rbs (3.8.1)
|
150
|
-
logger
|
151
|
-
rdoc (6.12.0)
|
152
|
-
psych (>= 4.0.0)
|
153
|
-
redis (5.4.0)
|
154
|
-
redis-client (>= 0.22.0)
|
155
|
-
redis-client (0.24.0)
|
156
|
-
connection_pool
|
157
|
-
redis-namespace (1.11.0)
|
158
|
-
redis (>= 4)
|
159
|
-
regexp_parser (2.10.0)
|
160
|
-
reline (0.6.0)
|
161
|
-
io-console (~> 0.5)
|
162
|
-
resque (2.7.0)
|
163
|
-
mono_logger (~> 1)
|
164
|
-
multi_json (~> 1.0)
|
165
|
-
redis-namespace (~> 1.6)
|
166
|
-
sinatra (>= 0.9.2)
|
167
|
-
rubocop (1.73.2)
|
168
|
-
json (~> 2.3)
|
169
|
-
language_server-protocol (~> 3.17.0.2)
|
170
|
-
lint_roller (~> 1.1.0)
|
171
|
-
parallel (~> 1.10)
|
172
|
-
parser (>= 3.3.0.2)
|
173
|
-
rainbow (>= 2.2.2, < 4.0)
|
174
|
-
regexp_parser (>= 2.9.3, < 3.0)
|
175
|
-
rubocop-ast (>= 1.38.0, < 2.0)
|
176
|
-
ruby-progressbar (~> 1.7)
|
177
|
-
unicode-display_width (>= 2.4.0, < 4.0)
|
178
|
-
rubocop-ast (1.38.1)
|
179
|
-
parser (>= 3.3.1.0)
|
180
|
-
rubocop-shopify (2.16.0)
|
181
|
-
rubocop (~> 1.62)
|
182
|
-
ruby-progressbar (1.13.0)
|
183
|
-
ruby2_keywords (0.0.5)
|
184
|
-
securerandom (0.4.1)
|
185
|
-
sidekiq (8.0.1)
|
186
|
-
connection_pool (>= 2.5.0)
|
187
|
-
json (>= 2.9.0)
|
188
|
-
logger (>= 1.6.2)
|
189
|
-
rack (>= 3.1.0)
|
190
|
-
redis-client (>= 0.23.2)
|
191
|
-
sinatra (4.1.1)
|
192
|
-
logger (>= 1.6.0)
|
193
|
-
mustermann (~> 3.0)
|
194
|
-
rack (>= 3.0.0, < 4)
|
195
|
-
rack-protection (= 4.1.1)
|
196
|
-
rack-session (>= 2.0.0, < 3)
|
197
|
-
tilt (~> 2.0)
|
198
|
-
sorbet (0.5.11915)
|
199
|
-
sorbet-static (= 0.5.11915)
|
200
|
-
sorbet-runtime (0.5.11915)
|
201
|
-
sorbet-static (0.5.11915-universal-darwin)
|
202
|
-
sorbet-static (0.5.11915-x86_64-linux)
|
203
|
-
sorbet-static-and-runtime (0.5.11915)
|
204
|
-
sorbet (= 0.5.11915)
|
205
|
-
sorbet-runtime (= 0.5.11915)
|
206
|
-
spoom (1.6.0)
|
207
|
-
erubi (>= 1.10.0)
|
208
|
-
prism (>= 0.28.0)
|
209
|
-
rbi (>= 0.2.3)
|
210
|
-
sorbet-static-and-runtime (>= 0.5.10187)
|
211
|
-
thor (>= 0.19.2)
|
212
|
-
stringio (3.1.5)
|
213
|
-
tapioca (0.16.11)
|
214
|
-
benchmark
|
215
|
-
bundler (>= 2.2.25)
|
216
|
-
netrc (>= 0.11.0)
|
217
|
-
parallel (>= 1.21.0)
|
218
|
-
rbi (~> 0.2)
|
219
|
-
sorbet-static-and-runtime (>= 0.5.11087)
|
220
|
-
spoom (>= 1.2.0)
|
221
|
-
thor (>= 1.2.0)
|
222
|
-
yard-sorbet
|
223
|
-
thor (1.3.2)
|
224
|
-
tilt (2.6.0)
|
225
|
-
timeout (0.4.3)
|
226
|
-
tzinfo (2.0.6)
|
227
|
-
concurrent-ruby (~> 1.0)
|
228
|
-
unicode-display_width (3.1.4)
|
229
|
-
unicode-emoji (~> 4.0, >= 4.0.4)
|
230
|
-
unicode-emoji (4.0.4)
|
231
|
-
uri (1.0.3)
|
232
|
-
useragent (0.16.11)
|
233
|
-
yard (0.9.37)
|
234
|
-
yard-sorbet (0.9.0)
|
235
|
-
sorbet-runtime
|
236
|
-
yard
|
237
|
-
zeitwerk (2.7.2)
|
238
|
-
|
239
|
-
PLATFORMS
|
240
|
-
arm64-darwin
|
241
|
-
x86_64-darwin
|
242
|
-
x86_64-linux
|
243
|
-
|
244
|
-
DEPENDENCIES
|
245
|
-
activerecord
|
246
|
-
csv
|
247
|
-
delayed_job
|
248
|
-
globalid
|
249
|
-
i18n
|
250
|
-
job-iteration!
|
251
|
-
logger
|
252
|
-
mocha
|
253
|
-
mysql2!
|
254
|
-
pry
|
255
|
-
railties
|
256
|
-
rake
|
257
|
-
redis
|
258
|
-
resque
|
259
|
-
rubocop-shopify
|
260
|
-
sidekiq
|
261
|
-
sorbet-runtime
|
262
|
-
tapioca
|
263
|
-
yard
|
264
|
-
|
265
|
-
BUNDLED WITH
|
266
|
-
2.6.1
|
data/Rakefile
DELETED
data/bin/setup
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
|
3
|
-
if ! [ -x "$(command -v mysql)" ];
|
4
|
-
then
|
5
|
-
echo "Error: mysql is not installed." >&2
|
6
|
-
echo "You need to install mysql"
|
7
|
-
exit 1
|
8
|
-
else
|
9
|
-
echo "Installing dependencies"
|
10
|
-
bundle install --quiet
|
11
|
-
|
12
|
-
mysql.server start > /dev/null 2>&1
|
13
|
-
mysql -uroot job_iteration_test -e exit > /dev/null 2>&1
|
14
|
-
|
15
|
-
if [ $? -eq 0 ];
|
16
|
-
then
|
17
|
-
echo "Setup completed!"
|
18
|
-
else
|
19
|
-
echo "Creating job_iteration_test database"
|
20
|
-
mysql -uroot -e "CREATE DATABASE job_iteration_test" > /dev/null 2>&1
|
21
|
-
echo "Setup completed!"
|
22
|
-
fi
|
23
|
-
fi
|
data/bin/test
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
def main
|
5
|
-
begin
|
6
|
-
command = create_command
|
7
|
-
rescue ArgumentError => e
|
8
|
-
abort(e.message)
|
9
|
-
end
|
10
|
-
puts "Running #{command.join(" ")}"
|
11
|
-
system(*command)
|
12
|
-
end
|
13
|
-
|
14
|
-
def create_command
|
15
|
-
case ARGV.length
|
16
|
-
when 0
|
17
|
-
["bundle", "exec", "rake", "test"]
|
18
|
-
when 1
|
19
|
-
filename = ARGV[0]
|
20
|
-
["bundle", "exec", "rake", "test", "TEST=#{filename}"]
|
21
|
-
when 2
|
22
|
-
filename = ARGV[0]
|
23
|
-
test_name = ARGV[1]
|
24
|
-
test_name_with_underscores = test_name.tr(" ", "_")
|
25
|
-
test_name_pattern = "/#{Regexp.escape(test_name_with_underscores)}/"
|
26
|
-
["bundle", "exec", "rake", "test", "TEST=#{filename}", "TESTOPTS=\"--name=#{test_name_pattern} -v\""]
|
27
|
-
else
|
28
|
-
raise ArgumentError, "Too many arguments. Did you forget to put the test name in quotes?"
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
main
|
data/dev.yml
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
# This file is for Shopify employees development environment.
|
2
|
-
# If you are an external contributor you don't have to bother with it.
|
3
|
-
name: job-iteration
|
4
|
-
|
5
|
-
up:
|
6
|
-
- packages:
|
7
|
-
- mysql_client
|
8
|
-
- ruby
|
9
|
-
- bundler
|
10
|
-
- mysql
|
11
|
-
- redis
|
12
|
-
- custom:
|
13
|
-
name: Create Job Iteration database
|
14
|
-
meet: mysql -uroot -h $MYSQL_HOST -P $MYSQL_PORT -e "CREATE DATABASE job_iteration_test"
|
15
|
-
met?: mysql -uroot -h $MYSQL_HOST -P $MYSQL_PORT job_iteration_test -e "SELECT 1" &> /dev/null
|
16
|
-
|
17
|
-
commands:
|
18
|
-
test:
|
19
|
-
run: bin/test "$@"
|
20
|
-
syntax:
|
21
|
-
optional: filename testnamepattern
|
22
|
-
aliases: [t]
|
23
|
-
desc: run tests
|
24
|
-
long_desc: |
|
25
|
-
{{bold:Default}}
|
26
|
-
=======
|
27
|
-
Run the entire test suite.
|
28
|
-
|
29
|
-
Examples:
|
30
|
-
{{command:dev test}}
|
31
|
-
{{command:dev t}}
|
32
|
-
|
33
|
-
{{bold:Run all tests in a file}}
|
34
|
-
========================
|
35
|
-
Include the file path.
|
36
|
-
|
37
|
-
Example:
|
38
|
-
{{command:dev test test/unit/iteration_test.rb}}
|
39
|
-
|
40
|
-
{{bold:Run a single test in a given file}}
|
41
|
-
========================
|
42
|
-
Include the file path and the name of the test you'd like to run.
|
43
|
-
|
44
|
-
Example:
|
45
|
-
{{command:dev test test/unit/iteration_test.rb test_that_it_has_a_version_number}}
|
46
|
-
|
47
|
-
{{bold:Run all tests in a given file whose name contains a string}}
|
48
|
-
========================
|
49
|
-
Include the file path and the string that the test names should contain.
|
50
|
-
|
51
|
-
Example:
|
52
|
-
{{command:dev test test/unit/iteration_test.rb version_number}}
|
53
|
-
style:
|
54
|
-
run: bundle exec rubocop -a
|
data/gemfiles/rails_gems.gemfile
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
rails_version = ENV.fetch("RAILS_VERSION")
|
4
|
-
@rails_gems_requirements = case rails_version
|
5
|
-
when "edge" then { github: "rails/rails", branch: "main" }
|
6
|
-
when /\A\d+\.\d+\z/ then "~> #{rails_version}.0"
|
7
|
-
else raise "Unsupported RAILS_VERSION: #{rails_version}"
|
8
|
-
end
|
9
|
-
|
10
|
-
eval_gemfile "../Gemfile"
|
11
|
-
|
12
|
-
# https://github.com/rails/rails/pull/44083
|
13
|
-
if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.1") &&
|
14
|
-
rails_version != "edge" && Gem::Version.new(rails_version) < Gem::Version.new("7")
|
15
|
-
gem "net-imap", require: false
|
16
|
-
gem "net-pop", require: false
|
17
|
-
gem "net-smtp", require: false
|
18
|
-
end
|
@@ -1,128 +0,0 @@
|
|
1
|
-
`job-iteration` overrides the `perform` method of `ActiveJob::Base` to allow for iteration. The `perform` method preserves all the standard calling conventions of the original, but the way the subsequent methods work might differ from what one expects from an ActiveJob subclass.
|
2
|
-
|
3
|
-
The call sequence is usually 3 methods:
|
4
|
-
|
5
|
-
`perform -> build_enumerator -> each_iteration|each_batch`
|
6
|
-
|
7
|
-
In that sense `job-iteration` works like a framework (it calls your code) rather than like a library (that you call). When using jobs with parameters, the following rules of thumb are good to keep in mind.
|
8
|
-
|
9
|
-
### Jobs without arguments
|
10
|
-
|
11
|
-
Jobs without arguments do not pass anything into either `build_enumerator` or `each_iteration` except for the `cursor` which `job-iteration` persists by itself:
|
12
|
-
|
13
|
-
```ruby
|
14
|
-
class ArglessJob < ActiveJob::Base
|
15
|
-
include JobIteration::Iteration
|
16
|
-
|
17
|
-
def build_enumerator(cursor:)
|
18
|
-
# ...
|
19
|
-
end
|
20
|
-
|
21
|
-
def each_iteration(single_object_yielded_from_enumerator)
|
22
|
-
# ...
|
23
|
-
end
|
24
|
-
end
|
25
|
-
```
|
26
|
-
|
27
|
-
To enqueue the job:
|
28
|
-
|
29
|
-
```ruby
|
30
|
-
ArglessJob.perform_later
|
31
|
-
```
|
32
|
-
|
33
|
-
### Jobs with positional arguments
|
34
|
-
|
35
|
-
Jobs with positional arguments will have those arguments available to both `build_enumerator` and `each_iteration`:
|
36
|
-
|
37
|
-
```ruby
|
38
|
-
class ArgumentativeJob < ActiveJob::Base
|
39
|
-
include JobIteration::Iteration
|
40
|
-
|
41
|
-
def build_enumerator(arg1, arg2, arg3, cursor:)
|
42
|
-
# ...
|
43
|
-
end
|
44
|
-
|
45
|
-
def each_iteration(single_object_yielded_from_enumerator, arg1, arg2, arg3)
|
46
|
-
# ...
|
47
|
-
end
|
48
|
-
end
|
49
|
-
```
|
50
|
-
|
51
|
-
To enqueue the job:
|
52
|
-
|
53
|
-
```ruby
|
54
|
-
ArgumentativeJob.perform_later(_arg1 = "One", _arg2 = "Two", _arg3 = "Three")
|
55
|
-
```
|
56
|
-
|
57
|
-
### Jobs with keyword arguments
|
58
|
-
|
59
|
-
Jobs with keyword arguments will have the keyword arguments available to both `build_enumerator` and `each_iteration`, but these arguments come packaged into a Hash in both cases. You will need to `fetch` or `[]` your parameter from the `Hash` you get passed in:
|
60
|
-
|
61
|
-
```ruby
|
62
|
-
class ParameterizedJob < ActiveJob::Base
|
63
|
-
include JobIteration::Iteration
|
64
|
-
|
65
|
-
def build_enumerator(kwargs, cursor:)
|
66
|
-
name = kwargs.fetch(:name)
|
67
|
-
email = kwargs.fetch(:email)
|
68
|
-
# ...
|
69
|
-
end
|
70
|
-
|
71
|
-
def each_iteration(object_yielded_from_enumerator, kwargs)
|
72
|
-
name = kwargs.fetch(:name)
|
73
|
-
email = kwargs.fetch(:email)
|
74
|
-
# ...
|
75
|
-
end
|
76
|
-
end
|
77
|
-
```
|
78
|
-
|
79
|
-
To enqueue the job:
|
80
|
-
|
81
|
-
```ruby
|
82
|
-
ParameterizedJob.perform_later(name: "Jane", email: "jane@host.example")
|
83
|
-
```
|
84
|
-
|
85
|
-
Note that you cannot use `ruby2_keywords` at present, and the keyword arguments syntax is not supported in `each_iteration` / `build_enumerator`.
|
86
|
-
|
87
|
-
### Jobs with both positional and keyword arguments
|
88
|
-
|
89
|
-
Jobs with keyword arguments will have the keyword arguments available to both `build_enumerator` and `each_iteration`, but these arguments come packaged into a Hash in both cases. You will need to `fetch` or `[]` your parameter from the `Hash` you get passed in. Positional arguments get passed first and "unsplatted" (not combined into an array), the `Hash` containing keyword arguments comes after:
|
90
|
-
|
91
|
-
```ruby
|
92
|
-
class HighlyConfigurableGreetingJob < ActiveJob::Base
|
93
|
-
include JobIteration::Iteration
|
94
|
-
|
95
|
-
def build_enumerator(subject_line, kwargs, cursor:)
|
96
|
-
name = kwargs.fetch(:sender_name)
|
97
|
-
email = kwargs.fetch(:sender_email)
|
98
|
-
# ...
|
99
|
-
end
|
100
|
-
|
101
|
-
def each_iteration(object_yielded_from_enumerator, subject_line, kwargs)
|
102
|
-
name = kwargs.fetch(:sender_name)
|
103
|
-
email = kwargs.fetch(:sender_email)
|
104
|
-
# ...
|
105
|
-
end
|
106
|
-
end
|
107
|
-
```
|
108
|
-
|
109
|
-
To enqueue the job:
|
110
|
-
|
111
|
-
```ruby
|
112
|
-
HighlyConfigurableGreetingJob.perform_later(_subject_line = "Greetings everybody!", sender_name: "Jane", sender_email: "jane@host.example")
|
113
|
-
```
|
114
|
-
|
115
|
-
Note that you cannot use `ruby2_keywords` at present, and the keyword arguments syntax is not supported in `each_iteration` / `build_enumerator`.
|
116
|
-
|
117
|
-
### Returning (yielding) from enumerators
|
118
|
-
|
119
|
-
When defining a custom enumerator (see the [custom enumerator guide](custom-enumerator.md)) you need to yield two positional arguments from it: the object that will be the value for the current iteration (like a single ActiveModel instance, a single number...) and the value you want to be persisted as the `cursor` value should `job-iteration` decide to interrupt you after this iteration. Calling the enumerator with that cursor should return the next object after the one returned in this iteration. That new `cursor` value does not get passed to `each_iteration`:
|
120
|
-
|
121
|
-
```ruby
|
122
|
-
Enumerator.new do |yielder|
|
123
|
-
# In this case `cursor` is an Integer
|
124
|
-
cursor.upto(99999) do |offset|
|
125
|
-
yielder.yield(fetch_record_at(offset), offset)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
```
|
data/guides/best-practices.md
DELETED
@@ -1,108 +0,0 @@
|
|
1
|
-
# Best practices
|
2
|
-
|
3
|
-
## Batch iteration
|
4
|
-
|
5
|
-
Regardless of the active record enumerator used in the task, `job-iteration` gem loads records in batches of 100 (by default).
|
6
|
-
The following two tasks produce equivalent database queries,
|
7
|
-
however `RecordsJob` task allows for more frequent interruptions by doing just one thing in the `each_iteration` method.
|
8
|
-
|
9
|
-
```ruby
|
10
|
-
# bad
|
11
|
-
class BatchesJob < ApplicationJob
|
12
|
-
include JobIteration::Iteration
|
13
|
-
|
14
|
-
def build_enumerator(product_id, cursor:)
|
15
|
-
enumerator_builder.active_record_on_batches(
|
16
|
-
Comment.where(product_id: product_id),
|
17
|
-
cursor: cursor,
|
18
|
-
batch_size: 5,
|
19
|
-
)
|
20
|
-
end
|
21
|
-
|
22
|
-
def each_iteration(batch_of_comments, product_id)
|
23
|
-
batch_of_comments.each(&:destroy)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# good
|
28
|
-
class RecordsJob < ApplicationJob
|
29
|
-
include JobIteration::Iteration
|
30
|
-
|
31
|
-
def build_enumerator(product_id, cursor:)
|
32
|
-
enumerator_builder.active_record_on_records(
|
33
|
-
Comment.where(product_id: product_id),
|
34
|
-
cursor: cursor,
|
35
|
-
batch_size: 5,
|
36
|
-
)
|
37
|
-
end
|
38
|
-
|
39
|
-
def each_iteration(comment, product_id)
|
40
|
-
comment.destroy
|
41
|
-
end
|
42
|
-
end
|
43
|
-
```
|
44
|
-
|
45
|
-
## Instrumentation
|
46
|
-
|
47
|
-
Iteration leverages [`ActiveSupport::Notifications`](https://guides.rubyonrails.org/active_support_instrumentation.html)
|
48
|
-
to notify you what it's doing. You can subscribe to the following events (listed in order of job lifecycle):
|
49
|
-
|
50
|
-
- `build_enumerator.iteration`
|
51
|
-
- `throttled.iteration` (when using ThrottleEnumerator)
|
52
|
-
- `nil_enumerator.iteration`
|
53
|
-
- `resumed.iteration`
|
54
|
-
- `each_iteration.iteration`
|
55
|
-
- `not_found.iteration`
|
56
|
-
- `interrupted.iteration`
|
57
|
-
- `completed.iteration`
|
58
|
-
|
59
|
-
All events have tags including the job class name and cursor position, some add the amount of times interrupted and/or
|
60
|
-
total time the job spent running across interruptions.
|
61
|
-
|
62
|
-
```ruby
|
63
|
-
# config/initializers/instrumentation.rb
|
64
|
-
ActiveSupport::Notifications.monotonic_subscribe("each_iteration.iteration") do |_, started, finished, _, tags|
|
65
|
-
elapsed = finished - started
|
66
|
-
StatsD.distribution(
|
67
|
-
"iteration.each_iteration",
|
68
|
-
elapsed,
|
69
|
-
tags: { job_class: tags[:job_class]&.underscore }
|
70
|
-
)
|
71
|
-
|
72
|
-
if elapsed >= BackgroundQueue.max_iteration_runtime
|
73
|
-
Rails.logger.warn "[Iteration] job_class=#{tags[:job_class]} " \
|
74
|
-
"each_iteration runtime exceeded limit of #{BackgroundQueue.max_iteration_runtime}s"
|
75
|
-
end
|
76
|
-
end
|
77
|
-
```
|
78
|
-
|
79
|
-
## Max iteration time
|
80
|
-
|
81
|
-
As you may notice in the snippet above, at Shopify we enforce that `each_iteration` does not take longer than `BackgroundQueue.max_iteration_runtime`, which is set to `25` seconds.
|
82
|
-
|
83
|
-
We discourage that because jobs with a long `each_iteration` make interruptibility somewhat useless, as the infrastructure will have to wait longer for the job to interrupt.
|
84
|
-
|
85
|
-
## Max job runtime
|
86
|
-
|
87
|
-
If a job is supposed to have millions of iterations and you expect it to run for hours and days, it's still a good idea to sometimes interrupt the job even if there are no interruption signals coming from deploys or the infrastructure. At Shopify, we interrupt at least every 5 minutes to preserve **worker capacity**.
|
88
|
-
|
89
|
-
```ruby
|
90
|
-
JobIteration.max_job_runtime = 5.minutes # nil by default
|
91
|
-
```
|
92
|
-
|
93
|
-
Use this accessor to tweak how often you'd like the job to interrupt itself.
|
94
|
-
|
95
|
-
### Per job max job runtime
|
96
|
-
|
97
|
-
For more granular control, `job_iteration_max_job_runtime` can be set **per-job class**. This allows both incremental adoption, as well as using a conservative global setting, and an aggressive setting on a per-job basis.
|
98
|
-
|
99
|
-
```ruby
|
100
|
-
class MyJob < ApplicationJob
|
101
|
-
include JobIteration::Iteration
|
102
|
-
|
103
|
-
self.job_iteration_max_job_runtime = 3.minutes
|
104
|
-
|
105
|
-
# ...
|
106
|
-
```
|
107
|
-
|
108
|
-
This setting will be inherited by any child classes, although it can be further overridden. Note that no class can **increase** the `max_job_runtime` it has inherited; it can only be **decreased**. No job can increase its `max_job_runtime` beyond the global limit.
|
data/guides/custom-enumerator.md
DELETED
@@ -1,140 +0,0 @@
|
|
1
|
-
# Custom Enumerator
|
2
|
-
|
3
|
-
`Iteration` leverages the [Enumerator](https://ruby-doc.org/3.2.1/Enumerator.html) pattern from the Ruby standard library,
|
4
|
-
which allows us to use almost any resource as a collection to iterate.
|
5
|
-
|
6
|
-
Before writing an enumerator, it is important to understand [how Iteration works](iteration-how-it-works.md) and how
|
7
|
-
your enumerator will be used by it. An enumerator must `yield` two things in the following order as positional
|
8
|
-
arguments:
|
9
|
-
- An object to be processed in a job `each_iteration` method
|
10
|
-
- A cursor position, which `Iteration` will persist if `each_iteration` returns successfully and the job is forced to shut
|
11
|
-
down. It can be any data type your job backend can serialize and deserialize correctly.
|
12
|
-
|
13
|
-
A job that includes `Iteration` is first started with `nil` as the cursor. When resuming an interrupted job, `Iteration`
|
14
|
-
will deserialize the persisted cursor and pass it to the job's `build_enumerator` method, which your enumerator uses to
|
15
|
-
find objects that come _after_ the last successfully processed object. The [array enumerator](https://github.com/Shopify/job-iteration/blob/v1.3.6/lib/job-iteration/enumerator_builder.rb#L50-L67)
|
16
|
-
is a simple example which uses the array index as the cursor position.
|
17
|
-
|
18
|
-
In addition to the remainder of this guide, we recommend you read the implementation of the other enumerators that come with the library (`CsvEnumerator`, `ActiveRecordEnumerator`) to gain a better understanding of building enumerators.
|
19
|
-
|
20
|
-
## Enumerator with cursor
|
21
|
-
|
22
|
-
For a more complex example, consider this `Enumerator` that wraps a third party API (Stripe) for paginated iteration and
|
23
|
-
stores a string as the cursor position:
|
24
|
-
|
25
|
-
```ruby
|
26
|
-
class StripeListEnumerator
|
27
|
-
# @see https://stripe.com/docs/api/pagination
|
28
|
-
# @param resource [Stripe::APIResource] The type of Stripe object to request
|
29
|
-
# @param params [Hash] Query parameters for the request
|
30
|
-
# @param options [Hash] Request options, such as API key or version
|
31
|
-
# @param cursor [nil, String] The Stripe ID of the last item iterated over
|
32
|
-
def initialize(resource, params: {}, options: {}, cursor:)
|
33
|
-
pagination_params = {}
|
34
|
-
pagination_params[:starting_after] = cursor unless cursor.nil?
|
35
|
-
|
36
|
-
# The following line makes a request, consider adding your rate limiter here.
|
37
|
-
@list = resource.public_send(:list, params.merge(pagination_params), options)
|
38
|
-
end
|
39
|
-
|
40
|
-
def to_enumerator
|
41
|
-
to_enum(:each).lazy
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
# We yield our enumerator with the object id as the index so it is persisted
|
47
|
-
# as the cursor on the job. This allows us to properly set the
|
48
|
-
# `starting_after` parameter for the API request when resuming.
|
49
|
-
def each
|
50
|
-
loop do
|
51
|
-
@list.each do |item, _index|
|
52
|
-
# The first argument is what gets passed to `each_iteration`.
|
53
|
-
# The second argument (item.id) is going to be persisted as the cursor,
|
54
|
-
# it doesn't get passed to `each_iteration`.
|
55
|
-
yield item, item.id
|
56
|
-
end
|
57
|
-
|
58
|
-
# The following line makes a request, consider adding your rate limiter here.
|
59
|
-
@list = @list.next_page
|
60
|
-
|
61
|
-
break if @list.empty?
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
```
|
66
|
-
|
67
|
-
### Usage
|
68
|
-
|
69
|
-
Here we leverage the Stripe cursor pagination where the cursor is an ID of a specific item in the collection. The job
|
70
|
-
which uses such an `Enumerator` would then look like so:
|
71
|
-
|
72
|
-
```ruby
|
73
|
-
class LoadRefundsForChargeJob < ActiveJob::Base
|
74
|
-
include JobIteration::Iteration
|
75
|
-
|
76
|
-
# If you added your own rate limiting above, handle it here. For example:
|
77
|
-
# retry_on(MyRateLimiter::LimitExceededError, wait: 30.seconds, attempts: :unlimited)
|
78
|
-
# Use an exponential back-off strategy when Stripe's API returns errors.
|
79
|
-
|
80
|
-
def build_enumerator(charge_id, cursor:)
|
81
|
-
enumerator_builder.wrap(
|
82
|
-
StripeListEnumerator.new(
|
83
|
-
Stripe::Refund,
|
84
|
-
params: { charge: charge_id}, # "charge_id" will be a prefixed Stripe ID such as "chrg_123"
|
85
|
-
options: { api_key: "sk_test_123", stripe_version: "2018-01-18" },
|
86
|
-
cursor: cursor
|
87
|
-
).to_enumerator
|
88
|
-
)
|
89
|
-
end
|
90
|
-
|
91
|
-
# Note that in this case `each_iteration` will only receive one positional argument per iteration.
|
92
|
-
# If what your enumerator yields is a composite object you will need to unpack it yourself
|
93
|
-
# inside the `each_iteration`.
|
94
|
-
def each_iteration(stripe_refund, charge_id)
|
95
|
-
# ...
|
96
|
-
end
|
97
|
-
end
|
98
|
-
```
|
99
|
-
|
100
|
-
and you initiate the job with
|
101
|
-
|
102
|
-
```ruby
|
103
|
-
LoadRefundsForChargeJob.perform_later(charge_id = "chrg_345")
|
104
|
-
```
|
105
|
-
|
106
|
-
## Cursorless enumerator
|
107
|
-
|
108
|
-
Sometimes you can ignore the cursor. Consider the following custom `Enumerator` that takes items from a Redis list, which
|
109
|
-
is essentially a queue. Even if this job doesn't need to persist a cursor in order to resume, it can still use
|
110
|
-
`Iteration`'s signal handling to finish `each_iteration` and gracefully terminate.
|
111
|
-
|
112
|
-
```ruby
|
113
|
-
class RedisPopListJob < ActiveJob::Base
|
114
|
-
include JobIteration::Iteration
|
115
|
-
|
116
|
-
# @see https://redis.io/commands/lpop/
|
117
|
-
def build_enumerator(*)
|
118
|
-
@redis = Redis.new
|
119
|
-
enumerator_builder.wrap(
|
120
|
-
Enumerator.new do |yielder|
|
121
|
-
yielder.yield @redis.lpop(key), nil
|
122
|
-
end
|
123
|
-
)
|
124
|
-
end
|
125
|
-
|
126
|
-
def each_iteration(item_from_redis)
|
127
|
-
# ...
|
128
|
-
end
|
129
|
-
end
|
130
|
-
```
|
131
|
-
|
132
|
-
## Caveats
|
133
|
-
|
134
|
-
### Post-`yield` code
|
135
|
-
|
136
|
-
Code that is written after the `yield` in a custom enumerator is not guaranteed to execute. In the case that a job is
|
137
|
-
forced to exit ie `job_should_exit?` is true, then the job is re-enqueued during the yield and the rest of the code in
|
138
|
-
the enumerator does not run. You can follow that logic
|
139
|
-
[here](https://github.com/Shopify/job-iteration/blob/v1.3.6/lib/job-iteration/iteration.rb#L161-L165) and
|
140
|
-
[here](https://github.com/Shopify/job-iteration/blob/v1.3.6/lib/job-iteration/iteration.rb#L131-L143)
|
@@ -1,51 +0,0 @@
|
|
1
|
-
# Iteration: how it works
|
2
|
-
|
3
|
-
The main idea behind Iteration is to provide an API to describe jobs in an interruptible manner, in contrast with implementing one massive `#perform` method that is impossible to interrupt safely.
|
4
|
-
|
5
|
-
Exposing the enumerator and the action to apply allows us to keep a cursor and interrupt between iterations. Let's see what this looks like with an ActiveRecord relation (and Enumerator).
|
6
|
-
|
7
|
-
1. `build_enumerator` is called, which constructs `ActiveRecordEnumerator` from an ActiveRecord relation (`Product.all`)
|
8
|
-
2. The first batch of records is loaded:
|
9
|
-
|
10
|
-
```sql
|
11
|
-
SELECT `products`.* FROM `products` ORDER BY products.id LIMIT 100
|
12
|
-
```
|
13
|
-
|
14
|
-
3. The job iterates over two records of the relation and then receives `SIGTERM` (graceful termination signal) caused by a deploy.
|
15
|
-
4. The signal handler sets a flag that makes `job_should_exit?` return `true`.
|
16
|
-
5. After the last iteration is completed, we will check `job_should_exit?` which now returns `true`.
|
17
|
-
6. The job stops iterating and pushes itself back to the queue, with the latest `cursor_position` value.
|
18
|
-
7. Next time when the job is taken from the queue, we'll load records starting from the last primary key that was processed:
|
19
|
-
|
20
|
-
```sql
|
21
|
-
SELECT `products`.* FROM `products` WHERE (products.id > 2) ORDER BY products.id LIMIT 100
|
22
|
-
```
|
23
|
-
|
24
|
-
## Exceptions inside `each_iteration`
|
25
|
-
|
26
|
-
Unrescued exceptions inside the `each_iteration` block are handled the same way as exceptions occuring in `perform` for a regular Active Job subclass, meaning you need to configure it to retry using [`retry_on`](https://api.rubyonrails.org/classes/ActiveJob/Exceptions/ClassMethods.html#method-i-retry_on) or manually call [`retry_job`](https://api.rubyonrails.org/classes/ActiveJob/Exceptions.html#method-i-retry_job). The job will re-enqueue itself with the last successful cursor, the iteration that failed will be retried with the same parameters and the cursor will only move if that iteration succeeds. This behaviour may be enough for intermittent errors, such as network connection failures, but if your execution is deterministic and you have an error, subsequent iterations will never run.
|
27
|
-
|
28
|
-
In other words, if you are trying to process 100 records but the job consistently fails on the 61st, only the first 60 will be processed and the job will try to process the 61st record until retries are exhausted.
|
29
|
-
|
30
|
-
If no retries are configured or retries are exhausted, Active Job 'bubbles up' the exception to the job backend. Retries by the backend (e.g. Sidekiq) are not supported, meaning that jobs retried by the job backend instead of Active Job will restart from the beginning.
|
31
|
-
|
32
|
-
## Stopping a job
|
33
|
-
|
34
|
-
Because jobs typically retry when exceptions are thrown, there is a special mechanism to fully stop a job that still has iterations remaining. To do this, you can `throw(:abort)`. This is then caught by job-iteration and signals that the job should complete now, regardless of its iteration state.
|
35
|
-
|
36
|
-
## Signals
|
37
|
-
|
38
|
-
It's critical to know [UNIX signals](https://www.tutorialspoint.com/unix/unix-signals-traps.htm) in order to understand how interruption works. There are two main signals that Sidekiq and Resque use: `SIGTERM` and `SIGKILL`. `SIGTERM` is the graceful termination signal which means that the process should exit _soon_, not immediately. For Iteration, it means that we have time to wait for the last iteration to finish and to push job back to the queue with the last cursor position.
|
39
|
-
`SIGTERM` is what allows Iteration to work. In contrast, `SIGKILL` means immediate exit. It doesn't let the worker terminate gracefully, instead it will drop the job and exit as soon as possible.
|
40
|
-
|
41
|
-
Most of the deploy strategies (Kubernetes, Heroku, Capistrano) send `SIGTERM` before shutting down a node, then wait for a timeout (usually from 30 seconds to a minute) to send `SIGKILL` if the process has not terminated yet.
|
42
|
-
|
43
|
-
Further reading: [Sidekiq signals](https://github.com/mperham/sidekiq/wiki/Signals).
|
44
|
-
|
45
|
-
## Enumerators
|
46
|
-
|
47
|
-
In the early versions of Iteration, `build_enumerator` used to return ActiveRecord relations directly, and we would infer the Enumerator based on the type of object. We used to support ActiveRecord relations, arrays and CSVs. This made it hard to add support for other types of enumerations, and it was easy for developers to make mistakes and return an array of ActiveRecord objects, and for us starting to treat that as an array instead of as an ActiveRecord relation.
|
48
|
-
|
49
|
-
The current version of Iteration supports _any_ Enumerator. We expose helpers to build common enumerators conveniently (`enumerator_builder.active_record_on_records`), but it's up to a developer to implement [a custom Enumerator](custom-enumerator.md).
|
50
|
-
|
51
|
-
Further reading: [ruby-doc](https://ruby-doc.org/3.2.1/Enumerator.html), [a great post about Enumerators](http://blog.arkency.com/2014/01/ruby-to-enum-for-enumerator/).
|
data/guides/throttling.md
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
Iteration comes with a special wrapper enumerator that allows you to throttle iterations based on external signal (e.g. database health).
|
2
|
-
|
3
|
-
Consider this example:
|
4
|
-
|
5
|
-
```ruby
|
6
|
-
class InactiveAccountDeleteJob < ActiveJob::Base
|
7
|
-
include JobIteration::Iteration
|
8
|
-
|
9
|
-
def build_enumerator(_params, cursor:)
|
10
|
-
enumerator_builder.active_record_on_batches(
|
11
|
-
Account.inactive,
|
12
|
-
cursor: cursor
|
13
|
-
)
|
14
|
-
end
|
15
|
-
|
16
|
-
def each_iteration(batch, _params)
|
17
|
-
Account.where(id: batch.map(&:id)).delete_all
|
18
|
-
end
|
19
|
-
end
|
20
|
-
```
|
21
|
-
|
22
|
-
For an app that keeps track of customer accounts, it's typical to purge old data that's no longer relevant for storage.
|
23
|
-
|
24
|
-
At the same time, if you've got a lot of DB writes to perform, this can cause extra load on the database and slow down other parts of your service.
|
25
|
-
|
26
|
-
You can change `build_enumerator` to wrap enumeration on DB rows into a throttle enumerator, which takes signal as a proc and enqueues the job for later in case the proc returned `true`.
|
27
|
-
|
28
|
-
```ruby
|
29
|
-
def build_enumerator(_params, cursor:)
|
30
|
-
enumerator_builder.build_throttle_enumerator(
|
31
|
-
enumerator_builder.active_record_on_batches(
|
32
|
-
Account.inactive,
|
33
|
-
cursor: cursor
|
34
|
-
),
|
35
|
-
throttle_on: -> { DatabaseStatus.unhealthy? },
|
36
|
-
backoff: 30.seconds
|
37
|
-
)
|
38
|
-
end
|
39
|
-
```
|
40
|
-
|
41
|
-
If you want to apply throttling on all jobs, you can subclass your own EnumeratorBuilder and override the default
|
42
|
-
enumerator builder. The builder always wraps the returned enumerators from `build_enumerator`
|
43
|
-
|
44
|
-
```ruby
|
45
|
-
class MyOwnBuilder < JobIteration::EnumeratorBuilder
|
46
|
-
class Wrapper < Enumerator
|
47
|
-
class << self
|
48
|
-
def wrap(_builder, enum)
|
49
|
-
ThrottleEnumerator.new(
|
50
|
-
enum,
|
51
|
-
nil,
|
52
|
-
throttle_on: -> { DatabaseStatus.unhealthy? },
|
53
|
-
backoff: 30.seconds
|
54
|
-
)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
JobIteration.enumerator_builder = MyOwnBuilder
|
61
|
-
```
|
62
|
-
|
63
|
-
Note that it's up to you to implement `DatabaseStatus.unhealthy?` that works for your database choice. At Shopify, a helper like `DatabaseStatus` checks the following MySQL metrics:
|
64
|
-
|
65
|
-
* Replication lag across all regions
|
66
|
-
* DB threads
|
67
|
-
* DB is available for writes (otherwise indicates a failover happening)
|
68
|
-
* [Semian](https://github.com/shopify/semian) open circuits
|