job-iteration 1.1.12 → 1.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -2
- data/README.md +22 -1
- data/lib/job-iteration/active_record_batch_enumerator.rb +114 -0
- data/lib/job-iteration/enumerator_builder.rb +18 -1
- data/lib/job-iteration/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1707731e6bf03617bdbaa60bc2ddeb9652f34534a8fe0d69b26e233c790d469
|
4
|
+
data.tar.gz: a981f1230a7784e4e6673c0d2e4c0642f4a1d3c48baeb3039ca6e41daced9107
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db396697d25d4b0450b69d2a58d68e1e247a074f02c9a5887746184789f2aca2569e345ef0d88fb5e142fcaa1c0586169ef91b7bc0ffe2ef35796f544e0cafa1
|
7
|
+
data.tar.gz: 757b0c074830cbe97d49fd6e84c59ade45c4b5ecdc57b52cf17ea5539e9f2bbffb8ea929804da531c217bf43bbc4f67f726e9d0f1a9a391e32e35837b7edf624
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
### Master (unreleased)
|
2
2
|
|
3
|
-
|
3
|
+
## v1.1.13 (May 20, 2021)
|
4
4
|
|
5
|
-
####
|
5
|
+
#### New feature
|
6
|
+
- [91](https://github.com/Shopify/job-iteration/pull/91) - Add enumerator yielding batches as Active Record Relations
|
6
7
|
|
7
8
|
## v1.1.12 (April 19, 2021)
|
8
9
|
|
data/README.md
CHANGED
@@ -77,7 +77,28 @@ class BatchesJob < ApplicationJob
|
|
77
77
|
|
78
78
|
def each_iteration(batch_of_comments, product_id)
|
79
79
|
# batch_of_comments will contain batches of 100 records
|
80
|
-
|
80
|
+
batch_of_comments.each do |comment|
|
81
|
+
DeleteCommentJob.perform_later(comment)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
```
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
class BatchesAsRelationJob < ApplicationJob
|
89
|
+
include JobIteration::Iteration
|
90
|
+
|
91
|
+
def build_enumerator(product_id, cursor:)
|
92
|
+
enumerator_builder.active_record_on_batch_relations(
|
93
|
+
Product.find(product_id).comments,
|
94
|
+
cursor: cursor,
|
95
|
+
batch_size: 100,
|
96
|
+
)
|
97
|
+
end
|
98
|
+
|
99
|
+
def each_iteration(batch_of_comments, product_id)
|
100
|
+
# batch_of_comments will be a Comment::ActiveRecord_Relation
|
101
|
+
batch_of_comments.update_all(deleted: true)
|
81
102
|
end
|
82
103
|
end
|
83
104
|
```
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JobIteration
|
4
|
+
# Builds Batch Enumerator based on ActiveRecord Relation.
|
5
|
+
# @see EnumeratorBuilder
|
6
|
+
class ActiveRecordBatchEnumerator
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
|
11
|
+
def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
|
12
|
+
@batch_size = batch_size
|
13
|
+
@primary_key = "#{relation.table_name}.#{relation.primary_key}"
|
14
|
+
@columns = Array(columns&.map(&:to_s) || @primary_key)
|
15
|
+
@primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
|
16
|
+
@pluck_columns = if @primary_key_index
|
17
|
+
@columns
|
18
|
+
else
|
19
|
+
@columns.dup << @primary_key
|
20
|
+
end
|
21
|
+
@cursor = Array.wrap(cursor)
|
22
|
+
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
23
|
+
if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
|
24
|
+
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
25
|
+
end
|
26
|
+
|
27
|
+
if relation.arel.orders.present? || relation.arel.taken.present?
|
28
|
+
raise ConditionNotSupportedError
|
29
|
+
end
|
30
|
+
|
31
|
+
@base_relation = relation.reorder(@columns.join(","))
|
32
|
+
end
|
33
|
+
|
34
|
+
def each
|
35
|
+
return to_enum { size } unless block_given?
|
36
|
+
while (relation = next_batch)
|
37
|
+
break if @cursor.nil?
|
38
|
+
yield relation, cursor_value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def size
|
43
|
+
@base_relation.count
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def next_batch
|
49
|
+
relation = @base_relation.limit(@batch_size)
|
50
|
+
if conditions.any?
|
51
|
+
relation = relation.where(*conditions)
|
52
|
+
end
|
53
|
+
|
54
|
+
cursor_values, ids = relation.uncached do
|
55
|
+
pluck_columns(relation)
|
56
|
+
end
|
57
|
+
|
58
|
+
cursor = cursor_values.last
|
59
|
+
return unless cursor.present?
|
60
|
+
# The primary key was plucked, but original cursor did not include it, so we should remove it
|
61
|
+
cursor.pop unless @primary_key_index
|
62
|
+
@cursor = Array.wrap(cursor)
|
63
|
+
|
64
|
+
# Yields relations by selecting the primary keys of records in the batch.
|
65
|
+
# Post.where(published: nil) results in an enumerator of relations like: Post.where(ids: batch_of_ids)
|
66
|
+
@base_relation.where(@primary_key => ids)
|
67
|
+
end
|
68
|
+
|
69
|
+
def pluck_columns(relation)
|
70
|
+
if @pluck_columns.size == 1 # only the primary key
|
71
|
+
column_values = relation.pluck(*@pluck_columns)
|
72
|
+
return [column_values, column_values]
|
73
|
+
end
|
74
|
+
|
75
|
+
column_values = relation.pluck(*@pluck_columns)
|
76
|
+
primary_key_index = @primary_key_index || -1
|
77
|
+
primary_key_values = column_values.map { |values| values[primary_key_index] }
|
78
|
+
|
79
|
+
serialize_column_values!(column_values)
|
80
|
+
[column_values, primary_key_values]
|
81
|
+
end
|
82
|
+
|
83
|
+
def cursor_value
|
84
|
+
return @cursor.first if @cursor.size == 1
|
85
|
+
@cursor
|
86
|
+
end
|
87
|
+
|
88
|
+
def conditions
|
89
|
+
column_index = @cursor.size - 1
|
90
|
+
column = @columns[column_index]
|
91
|
+
where_clause = if @columns.size == @cursor.size
|
92
|
+
"#{column} > ?"
|
93
|
+
else
|
94
|
+
"#{column} >= ?"
|
95
|
+
end
|
96
|
+
while column_index > 0
|
97
|
+
column_index -= 1
|
98
|
+
column = @columns[column_index]
|
99
|
+
where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
|
100
|
+
end
|
101
|
+
ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
|
102
|
+
ret.pop
|
103
|
+
ret
|
104
|
+
end
|
105
|
+
|
106
|
+
def serialize_column_values!(column_values)
|
107
|
+
column_values.map! { |values| values.map! { |value| column_value(value) } }
|
108
|
+
end
|
109
|
+
|
110
|
+
def column_value(value)
|
111
|
+
value.is_a?(Time) ? value.strftime(SQL_DATETIME_WITH_NSEC) : value
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require_relative "./active_record_batch_enumerator"
|
2
3
|
require_relative "./active_record_enumerator"
|
3
4
|
require_relative "./csv_enumerator"
|
4
5
|
require_relative "./throttle_enumerator"
|
@@ -86,6 +87,11 @@ module JobIteration
|
|
86
87
|
# WHERE (created_at > '$LAST_CREATED_AT_CURSOR'
|
87
88
|
# OR (created_at = '$LAST_CREATED_AT_CURSOR' AND (id > '$LAST_ID_CURSOR')))
|
88
89
|
# ORDER BY created_at, id LIMIT 100
|
90
|
+
#
|
91
|
+
# As a result of this query pattern, if the values in these columns change for the records in scope during
|
92
|
+
# iteration, they may be skipped or yielded multiple times depending on the nature of the update and the
|
93
|
+
# cursor's value. If the value gets updated to a greater value than the cursor's value, it will get yielded
|
94
|
+
# again. Similarly, if the value gets updated to a lesser value than the curor's value, it will get skipped.
|
89
95
|
def build_active_record_enumerator_on_records(scope, cursor:, **args)
|
90
96
|
enum = build_active_record_enumerator(
|
91
97
|
scope,
|
@@ -95,7 +101,7 @@ module JobIteration
|
|
95
101
|
wrap(self, enum)
|
96
102
|
end
|
97
103
|
|
98
|
-
# Builds Enumerator from Active Record Relation and enumerates on batches.
|
104
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches of records.
|
99
105
|
# Each Enumerator tick moves the cursor +batch_size+ rows forward.
|
100
106
|
#
|
101
107
|
# +batch_size:+ sets how many records will be fetched in one batch. Defaults to 100.
|
@@ -110,6 +116,16 @@ module JobIteration
|
|
110
116
|
wrap(self, enum)
|
111
117
|
end
|
112
118
|
|
119
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches, yielding Active Record Relations.
|
120
|
+
# See documentation for #build_active_record_enumerator_on_batches.
|
121
|
+
def build_active_record_enumerator_on_batch_relations(scope, cursor:, **args)
|
122
|
+
JobIteration::ActiveRecordBatchEnumerator.new(
|
123
|
+
scope,
|
124
|
+
cursor: cursor,
|
125
|
+
**args
|
126
|
+
).each
|
127
|
+
end
|
128
|
+
|
113
129
|
def build_throttle_enumerator(enum, throttle_on:, backoff:)
|
114
130
|
JobIteration::ThrottleEnumerator.new(
|
115
131
|
enum,
|
@@ -124,6 +140,7 @@ module JobIteration
|
|
124
140
|
alias_method :array, :build_array_enumerator
|
125
141
|
alias_method :active_record_on_records, :build_active_record_enumerator_on_records
|
126
142
|
alias_method :active_record_on_batches, :build_active_record_enumerator_on_batches
|
143
|
+
alias_method :active_record_on_batch_relations, :build_active_record_enumerator_on_batch_relations
|
127
144
|
alias_method :throttle, :build_throttle_enumerator
|
128
145
|
|
129
146
|
private
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job-iteration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- guides/throttling.md
|
67
67
|
- job-iteration.gemspec
|
68
68
|
- lib/job-iteration.rb
|
69
|
+
- lib/job-iteration/active_record_batch_enumerator.rb
|
69
70
|
- lib/job-iteration/active_record_cursor.rb
|
70
71
|
- lib/job-iteration/active_record_enumerator.rb
|
71
72
|
- lib/job-iteration/csv_enumerator.rb
|
@@ -98,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
99
|
- !ruby/object:Gem::Version
|
99
100
|
version: '0'
|
100
101
|
requirements: []
|
101
|
-
rubygems_version: 3.
|
102
|
+
rubygems_version: 3.2.17
|
102
103
|
signing_key:
|
103
104
|
specification_version: 4
|
104
105
|
summary: Makes your background jobs interruptible and resumable.
|