job-iteration 1.1.12 → 1.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -2
- data/README.md +22 -1
- data/lib/job-iteration/active_record_batch_enumerator.rb +114 -0
- data/lib/job-iteration/enumerator_builder.rb +18 -1
- data/lib/job-iteration/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1707731e6bf03617bdbaa60bc2ddeb9652f34534a8fe0d69b26e233c790d469
|
4
|
+
data.tar.gz: a981f1230a7784e4e6673c0d2e4c0642f4a1d3c48baeb3039ca6e41daced9107
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db396697d25d4b0450b69d2a58d68e1e247a074f02c9a5887746184789f2aca2569e345ef0d88fb5e142fcaa1c0586169ef91b7bc0ffe2ef35796f544e0cafa1
|
7
|
+
data.tar.gz: 757b0c074830cbe97d49fd6e84c59ade45c4b5ecdc57b52cf17ea5539e9f2bbffb8ea929804da531c217bf43bbc4f67f726e9d0f1a9a391e32e35837b7edf624
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
### Master (unreleased)
|
2
2
|
|
3
|
-
|
3
|
+
## v1.1.13 (May 20, 2021)
|
4
4
|
|
5
|
-
####
|
5
|
+
#### New feature
|
6
|
+
- [91](https://github.com/Shopify/job-iteration/pull/91) - Add enumerator yielding batches as Active Record Relations
|
6
7
|
|
7
8
|
## v1.1.12 (April 19, 2021)
|
8
9
|
|
data/README.md
CHANGED
@@ -77,7 +77,28 @@ class BatchesJob < ApplicationJob
|
|
77
77
|
|
78
78
|
def each_iteration(batch_of_comments, product_id)
|
79
79
|
# batch_of_comments will contain batches of 100 records
|
80
|
-
|
80
|
+
batch_of_comments.each do |comment|
|
81
|
+
DeleteCommentJob.perform_later(comment)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
```
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
class BatchesAsRelationJob < ApplicationJob
|
89
|
+
include JobIteration::Iteration
|
90
|
+
|
91
|
+
def build_enumerator(product_id, cursor:)
|
92
|
+
enumerator_builder.active_record_on_batch_relations(
|
93
|
+
Product.find(product_id).comments,
|
94
|
+
cursor: cursor,
|
95
|
+
batch_size: 100,
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
def each_iteration(batch_of_comments, product_id)
|
100
|
+
# batch_of_comments will be a Comment::ActiveRecord_Relation
|
101
|
+
batch_of_comments.update_all(deleted: true)
|
81
102
|
end
|
82
103
|
end
|
83
104
|
```
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
# Builds Batch Enumerator based on ActiveRecord Relation.
|
5
|
+
# @see EnumeratorBuilder
|
6
|
+
class ActiveRecordBatchEnumerator
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
|
11
|
+
def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
|
12
|
+
@batch_size = batch_size
|
13
|
+
@primary_key = "#{relation.table_name}.#{relation.primary_key}"
|
14
|
+
@columns = Array(columns&.map(&:to_s) || @primary_key)
|
15
|
+
@primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
|
16
|
+
@pluck_columns = if @primary_key_index
|
17
|
+
@columns
|
18
|
+
else
|
19
|
+
@columns.dup << @primary_key
|
20
|
+
end
|
21
|
+
@cursor = Array.wrap(cursor)
|
22
|
+
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
23
|
+
if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
|
24
|
+
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
25
|
+
end
|
26
|
+
|
27
|
+
if relation.arel.orders.present? || relation.arel.taken.present?
|
28
|
+
raise ConditionNotSupportedError
|
29
|
+
end
|
30
|
+
|
31
|
+
@base_relation = relation.reorder(@columns.join(","))
|
32
|
+
end
|
33
|
+
|
34
|
+
def each
|
35
|
+
return to_enum { size } unless block_given?
|
36
|
+
while (relation = next_batch)
|
37
|
+
break if @cursor.nil?
|
38
|
+
yield relation, cursor_value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def size
|
43
|
+
@base_relation.count
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def next_batch
|
49
|
+
relation = @base_relation.limit(@batch_size)
|
50
|
+
if conditions.any?
|
51
|
+
relation = relation.where(*conditions)
|
52
|
+
end
|
53
|
+
|
54
|
+
cursor_values, ids = relation.uncached do
|
55
|
+
pluck_columns(relation)
|
56
|
+
end
|
57
|
+
|
58
|
+
cursor = cursor_values.last
|
59
|
+
return unless cursor.present?
|
60
|
+
# The primary key was plucked, but original cursor did not include it, so we should remove it
|
61
|
+
cursor.pop unless @primary_key_index
|
62
|
+
@cursor = Array.wrap(cursor)
|
63
|
+
|
64
|
+
# Yields relations by selecting the primary keys of records in the batch.
|
65
|
+
# Post.where(published: nil) results in an enumerator of relations like: Post.where(ids: batch_of_ids)
|
66
|
+
@base_relation.where(@primary_key => ids)
|
67
|
+
end
|
68
|
+
|
69
|
+
def pluck_columns(relation)
|
70
|
+
if @pluck_columns.size == 1 # only the primary key
|
71
|
+
column_values = relation.pluck(*@pluck_columns)
|
72
|
+
return [column_values, column_values]
|
73
|
+
end
|
74
|
+
|
75
|
+
column_values = relation.pluck(*@pluck_columns)
|
76
|
+
primary_key_index = @primary_key_index || -1
|
77
|
+
primary_key_values = column_values.map { |values| values[primary_key_index] }
|
78
|
+
|
79
|
+
serialize_column_values!(column_values)
|
80
|
+
[column_values, primary_key_values]
|
81
|
+
end
|
82
|
+
|
83
|
+
def cursor_value
|
84
|
+
return @cursor.first if @cursor.size == 1
|
85
|
+
@cursor
|
86
|
+
end
|
87
|
+
|
88
|
+
def conditions
|
89
|
+
column_index = @cursor.size - 1
|
90
|
+
column = @columns[column_index]
|
91
|
+
where_clause = if @columns.size == @cursor.size
|
92
|
+
"#{column} > ?"
|
93
|
+
else
|
94
|
+
"#{column} >= ?"
|
95
|
+
end
|
96
|
+
while column_index > 0
|
97
|
+
column_index -= 1
|
98
|
+
column = @columns[column_index]
|
99
|
+
where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
|
100
|
+
end
|
101
|
+
ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
|
102
|
+
ret.pop
|
103
|
+
ret
|
104
|
+
end
|
105
|
+
|
106
|
+
def serialize_column_values!(column_values)
|
107
|
+
column_values.map! { |values| values.map! { |value| column_value(value) } }
|
108
|
+
end
|
109
|
+
|
110
|
+
def column_value(value)
|
111
|
+
value.is_a?(Time) ? value.strftime(SQL_DATETIME_WITH_NSEC) : value
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require_relative "./active_record_batch_enumerator"
|
2
3
|
require_relative "./active_record_enumerator"
|
3
4
|
require_relative "./csv_enumerator"
|
4
5
|
require_relative "./throttle_enumerator"
|
@@ -86,6 +87,11 @@ module JobIteration
|
|
86
87
|
# WHERE (created_at > '$LAST_CREATED_AT_CURSOR'
|
87
88
|
# OR (created_at = '$LAST_CREATED_AT_CURSOR' AND (id > '$LAST_ID_CURSOR')))
|
88
89
|
# ORDER BY created_at, id LIMIT 100
|
90
|
+
#
|
91
|
+
# As a result of this query pattern, if the values in these columns change for the records in scope during
|
92
|
+
# iteration, they may be skipped or yielded multiple times depending on the nature of the update and the
|
93
|
+
# cursor's value. If the value gets updated to a greater value than the cursor's value, it will get yielded
|
94
|
+
# again. Similarly, if the value gets updated to a lesser value than the curor's value, it will get skipped.
|
89
95
|
def build_active_record_enumerator_on_records(scope, cursor:, **args)
|
90
96
|
enum = build_active_record_enumerator(
|
91
97
|
scope,
|
@@ -95,7 +101,7 @@ module JobIteration
|
|
95
101
|
wrap(self, enum)
|
96
102
|
end
|
97
103
|
|
98
|
-
# Builds Enumerator from Active Record Relation and enumerates on batches.
|
104
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches of records.
|
99
105
|
# Each Enumerator tick moves the cursor +batch_size+ rows forward.
|
100
106
|
#
|
101
107
|
# +batch_size:+ sets how many records will be fetched in one batch. Defaults to 100.
|
@@ -110,6 +116,16 @@ module JobIteration
|
|
110
116
|
wrap(self, enum)
|
111
117
|
end
|
112
118
|
|
119
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches, yielding Active Record Relations.
|
120
|
+
# See documentation for #build_active_record_enumerator_on_batches.
|
121
|
+
def build_active_record_enumerator_on_batch_relations(scope, cursor:, **args)
|
122
|
+
JobIteration::ActiveRecordBatchEnumerator.new(
|
123
|
+
scope,
|
124
|
+
cursor: cursor,
|
125
|
+
**args
|
126
|
+
).each
|
127
|
+
end
|
128
|
+
|
113
129
|
def build_throttle_enumerator(enum, throttle_on:, backoff:)
|
114
130
|
JobIteration::ThrottleEnumerator.new(
|
115
131
|
enum,
|
@@ -124,6 +140,7 @@ module JobIteration
|
|
124
140
|
alias_method :array, :build_array_enumerator
|
125
141
|
alias_method :active_record_on_records, :build_active_record_enumerator_on_records
|
126
142
|
alias_method :active_record_on_batches, :build_active_record_enumerator_on_batches
|
143
|
+
alias_method :active_record_on_batch_relations, :build_active_record_enumerator_on_batch_relations
|
127
144
|
alias_method :throttle, :build_throttle_enumerator
|
128
145
|
|
129
146
|
private
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- guides/throttling.md
|
67
67
|
- job-iteration.gemspec
|
68
68
|
- lib/job-iteration.rb
|
69
|
+
- lib/job-iteration/active_record_batch_enumerator.rb
|
69
70
|
- lib/job-iteration/active_record_cursor.rb
|
70
71
|
- lib/job-iteration/active_record_enumerator.rb
|
71
72
|
- lib/job-iteration/csv_enumerator.rb
|
@@ -98,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
99
|
- !ruby/object:Gem::Version
|
99
100
|
version: '0'
|
100
101
|
requirements: []
|
101
|
-
rubygems_version: 3.
|
102
|
+
rubygems_version: 3.2.17
|
102
103
|
signing_key:
|
103
104
|
specification_version: 4
|
104
105
|
summary: Makes your background jobs interruptible and resumable.
|