job-iteration 1.11.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/job-iteration/active_record_batch_enumerator/column_manager.rb +138 -0
- data/lib/job-iteration/active_record_batch_enumerator.rb +45 -35
- data/lib/job-iteration/active_record_enumerator.rb +2 -0
- data/lib/job-iteration/enumerator_builder.rb +1 -1
- data/lib/job-iteration/iteration.rb +1 -1
- data/lib/job-iteration/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4a851271cdcba188da01fad33b65fa92cdbbe27d6515c44ee950d27d761e1519
|
|
4
|
+
data.tar.gz: e15477e57d43194a00067d026dc445c9edbc9225ba4bef2b0de60f91618d9874
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a7abcf968843fcfc5c260adae6998f905398308acdd3b42fd8a4208556e3207c1cd0650724b2f48f07a7d23f84cc1740ede0fc6066ee8c211109de556b83357d
|
|
7
|
+
data.tar.gz: d8ebd653153f30e87d91951be33b38a7c8644a58865e7bff1c76603cd5287301fe596116c9eac412cd93a5b571e1062e6ef766b38b8debd6d93c20b44f9af4b8
|
data/CHANGELOG.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
## Main (unreleased)
|
|
2
2
|
|
|
3
3
|
### Breaking Changes
|
|
4
4
|
|
|
@@ -16,6 +16,16 @@ nil
|
|
|
16
16
|
|
|
17
17
|
nil
|
|
18
18
|
|
|
19
|
+
## v1.12.0 (Jan 16, 2026)
|
|
20
|
+
|
|
21
|
+
### Features
|
|
22
|
+
|
|
23
|
+
- [650](https://github.com/Shopify/job-iteration/pull/650) Add support for batch enumeration over models with composite primary keys.
|
|
24
|
+
|
|
25
|
+
### Bug fixes
|
|
26
|
+
|
|
27
|
+
- [652](https://github.com/Shopify/job-iteration/pull/652) Fix ISO8601 serialization for `Date` columns in ActiveRecord enumerators.
|
|
28
|
+
|
|
19
29
|
## v1.11.0 (Jul 14, 2025)
|
|
20
30
|
|
|
21
31
|
### Security fixes
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JobIteration
|
|
4
|
+
class ActiveRecordBatchEnumerator
|
|
5
|
+
# Utility class for the batch enumerator that manages the columns that need
|
|
6
|
+
# to be plucked. It ensures primary key columns are plucked so that records
|
|
7
|
+
# in the batch can be queried for efficiently.
|
|
8
|
+
#
|
|
9
|
+
# @see ActiveRecordBatchEnumerator
|
|
10
|
+
class ColumnManager
|
|
11
|
+
# @param relation [ActiveRecord::Relation] - relation to manage columns for
|
|
12
|
+
# @param columns [Array<String,Symbol>, nil] - set of columns to select
|
|
13
|
+
def initialize(relation:, columns:)
|
|
14
|
+
@table_name = relation.table_name
|
|
15
|
+
@primary_key = Array(relation.primary_key)
|
|
16
|
+
@qualified_pkey_columns = @primary_key.map { |col| qualify_column(col) }
|
|
17
|
+
@columns = columns&.map(&:to_s) || @qualified_pkey_columns
|
|
18
|
+
|
|
19
|
+
validate_columns!(relation)
|
|
20
|
+
initialize_pluck_columns_and_pkey_positions
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Array<String>]
|
|
24
|
+
# The list of columns to be plucked. If no columns were specified, this
|
|
25
|
+
# list contains the fully qualified primary key column(s).
|
|
26
|
+
attr_reader :columns
|
|
27
|
+
|
|
28
|
+
# @return [Array<String>]
|
|
29
|
+
# The list of primary key columns for the relation. These columns are
|
|
30
|
+
# not qualified with the table name.
|
|
31
|
+
attr_reader :primary_key
|
|
32
|
+
|
|
33
|
+
# @return [Array<String>]
|
|
34
|
+
# The full set of columns to be plucked from the relation. This is a
|
|
35
|
+
# superset of `columns` and is guaranteed to contain all of the primary
|
|
36
|
+
# key columns on the relation.
|
|
37
|
+
attr_reader :pluck_columns
|
|
38
|
+
|
|
39
|
+
# @param column_values [Array<Array>]
|
|
40
|
+
# List of rows where each row contains values as determined by
|
|
41
|
+
# `pluck_columns`.
|
|
42
|
+
#
|
|
43
|
+
# @return [Array<Array>]
|
|
44
|
+
# List where each item contains the primary key column values for the
|
|
45
|
+
# corresponding row. Values are guaranteed to be in the same order as
|
|
46
|
+
# the columns are listed in `primary_key`.
|
|
47
|
+
def pkey_values(column_values)
|
|
48
|
+
column_values.map do |values|
|
|
49
|
+
@qualified_pkey_columns.map do |pkey_column|
|
|
50
|
+
pkey_column_idx = @primary_key_index_map[pkey_column]
|
|
51
|
+
values[pkey_column_idx]
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @param cursor [Array]
|
|
57
|
+
# A list of values for a single row, as determined by `pluck_columns`.
|
|
58
|
+
#
|
|
59
|
+
# @return [Array]
|
|
60
|
+
# The same values that were passed in, minus any primary key column
|
|
61
|
+
# values that do not appear in `columns`.
|
|
62
|
+
def remove_missing_pkey_values(cursor)
|
|
63
|
+
cursor.pop(@missing_pkey_count)
|
|
64
|
+
cursor
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
def qualify_column(column)
|
|
70
|
+
"#{@table_name}.#{column}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def validate_columns!(relation)
|
|
74
|
+
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
|
75
|
+
|
|
76
|
+
if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
|
|
77
|
+
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# This method is responsible for initializing several instance variables:
|
|
82
|
+
#
|
|
83
|
+
# * `@pluck_columns` [Array<String>] -
|
|
84
|
+
# The set of columns to pluck.
|
|
85
|
+
# * `@missing_pkey_count` [Integer] -
|
|
86
|
+
# The number of primary keys that were missing from `@columns`.
|
|
87
|
+
# * `@primary_key_index_map` [Hash<String:Integer>] -
|
|
88
|
+
# Hash mapping all primary key columns to their position in
|
|
89
|
+
# `@pluck_columns`.
|
|
90
|
+
def initialize_pluck_columns_and_pkey_positions
|
|
91
|
+
@pluck_columns = @columns.dup
|
|
92
|
+
initial_pkey_index_map = find_initial_primary_key_indices(@pluck_columns)
|
|
93
|
+
|
|
94
|
+
missing_pkey_columns = initial_pkey_index_map.select { |_, idx| idx.nil? }.keys
|
|
95
|
+
missing_pkey_index_map = add_missing_pkey_columns!(missing_pkey_columns, @pluck_columns)
|
|
96
|
+
@missing_pkey_count = missing_pkey_index_map.size
|
|
97
|
+
|
|
98
|
+
# Compute the location of each primary key column in `@pluck_columns`.
|
|
99
|
+
@primary_key_index_map = initial_pkey_index_map.merge(missing_pkey_index_map)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Figure out which primary key columns are already included in `columns`
|
|
103
|
+
# and track their position in the array.
|
|
104
|
+
#
|
|
105
|
+
# @param columns [Array<String>] - list of columns
|
|
106
|
+
#
|
|
107
|
+
# @return [Hash<String:Integer,nil>]
|
|
108
|
+
# A hash containing all of the fully qualified primary key columns as
|
|
109
|
+
# its keys. Values are the position of each column in the `columns`
|
|
110
|
+
# array. A `nil` value indicates the column is not present in `columns`.
|
|
111
|
+
def find_initial_primary_key_indices(columns)
|
|
112
|
+
@primary_key.each_with_object({}) do |pkey_column, indices|
|
|
113
|
+
fully_qualified_pkey_column = qualify_column(pkey_column)
|
|
114
|
+
idx = columns.index(pkey_column) || columns.index(fully_qualified_pkey_column)
|
|
115
|
+
|
|
116
|
+
indices[fully_qualified_pkey_column] = idx
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Takes a set of primary key columns and adds them to `columns`.
|
|
121
|
+
#
|
|
122
|
+
# @note - mutates `columns`
|
|
123
|
+
#
|
|
124
|
+
# @param missing_columns [Array<String>] - set of missing pkey columns
|
|
125
|
+
# @param columns [Array<String>] - set of columns to pluck
|
|
126
|
+
#
|
|
127
|
+
# @return [Hash<String:Integer>]
|
|
128
|
+
# A hash containing all of the values from `missing_columns` as its
|
|
129
|
+
# keys. Values are the position of those columns in `columns`.
|
|
130
|
+
def add_missing_pkey_columns!(missing_columns, columns)
|
|
131
|
+
missing_columns.each_with_object({}) do |pkey_column, indices|
|
|
132
|
+
indices[pkey_column] = columns.size
|
|
133
|
+
columns << pkey_column
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "active_record_batch_enumerator/column_manager"
|
|
4
|
+
|
|
3
5
|
module JobIteration
|
|
4
6
|
# Builds Batch Enumerator based on ActiveRecord Relation.
|
|
5
7
|
# @see EnumeratorBuilder
|
|
@@ -11,26 +13,15 @@ module JobIteration
|
|
|
11
13
|
def initialize(relation, columns: nil, batch_size: 100, timezone: nil, cursor: nil)
|
|
12
14
|
@batch_size = batch_size
|
|
13
15
|
@timezone = timezone
|
|
14
|
-
@
|
|
15
|
-
@columns = Array(columns&.map(&:to_s) || @primary_key)
|
|
16
|
-
@primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
|
|
17
|
-
@pluck_columns = if @primary_key_index
|
|
18
|
-
@columns
|
|
19
|
-
else
|
|
20
|
-
@columns.dup << @primary_key
|
|
21
|
-
end
|
|
16
|
+
@column_mgr = ColumnManager.new(relation: relation, columns: columns)
|
|
22
17
|
@cursor = Array.wrap(cursor)
|
|
23
18
|
@initial_cursor = @cursor
|
|
24
|
-
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
|
25
|
-
if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
|
|
26
|
-
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
|
27
|
-
end
|
|
28
19
|
|
|
29
20
|
if relation.arel.orders.present? || relation.arel.taken.present?
|
|
30
21
|
raise JobIteration::ActiveRecordCursor::ConditionNotSupportedError
|
|
31
22
|
end
|
|
32
23
|
|
|
33
|
-
@base_relation = relation.reorder(@columns.join(","))
|
|
24
|
+
@base_relation = relation.reorder(@column_mgr.columns.join(","))
|
|
34
25
|
end
|
|
35
26
|
|
|
36
27
|
def each
|
|
@@ -53,7 +44,7 @@ module JobIteration
|
|
|
53
44
|
relation = relation.where(*conditions)
|
|
54
45
|
end
|
|
55
46
|
|
|
56
|
-
cursor_values,
|
|
47
|
+
cursor_values, pkey_ids = relation.uncached do
|
|
57
48
|
pluck_columns(relation)
|
|
58
49
|
end
|
|
59
50
|
|
|
@@ -62,25 +53,39 @@ module JobIteration
|
|
|
62
53
|
@cursor = @initial_cursor
|
|
63
54
|
return
|
|
64
55
|
end
|
|
65
|
-
# The primary key was plucked, but original cursor did not include it, so we should remove it
|
|
66
|
-
cursor.pop unless @primary_key_index
|
|
67
|
-
@cursor = Array.wrap(cursor)
|
|
68
56
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@base_relation.where(@primary_key => ids)
|
|
57
|
+
@cursor = @column_mgr.remove_missing_pkey_values(cursor)
|
|
58
|
+
|
|
59
|
+
filter_relation_with_primary_key(pkey_ids)
|
|
73
60
|
end
|
|
74
61
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
62
|
+
# Yields relations by selecting the primary keys of records in the batch.
|
|
63
|
+
# Post.where(published: nil) results in an enumerator of relations like:
|
|
64
|
+
# Post.where(published: nil, ids: batch_of_ids)
|
|
65
|
+
def filter_relation_with_primary_key(primary_key_values)
|
|
66
|
+
pkey = @column_mgr.primary_key
|
|
67
|
+
pkey_values = primary_key_values
|
|
68
|
+
|
|
69
|
+
# If the primary key is only composed of a single column, simplify the
|
|
70
|
+
# query. This keeps us compatible with Rails prior to 7.1 where composite
|
|
71
|
+
# primary keys were introduced along with the syntax that allows you to
|
|
72
|
+
# query for multi-column values.
|
|
73
|
+
if pkey.size <= 1
|
|
74
|
+
pkey = pkey.first
|
|
75
|
+
pkey_values = pkey_values.map(&:first)
|
|
79
76
|
end
|
|
80
77
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
@base_relation.where(pkey => pkey_values)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def pluck_columns(relation)
|
|
82
|
+
column_values = relation.pluck(*@column_mgr.pluck_columns)
|
|
83
|
+
|
|
84
|
+
# Pluck behaves differently when only one column is given. By using zip,
|
|
85
|
+
# we make the output consistent (at the cost of more object allocation).
|
|
86
|
+
column_values = column_values.zip if @column_mgr.pluck_columns.size == 1
|
|
87
|
+
|
|
88
|
+
primary_key_values = @column_mgr.pkey_values(column_values)
|
|
84
89
|
|
|
85
90
|
serialize_column_values!(column_values)
|
|
86
91
|
[column_values, primary_key_values]
|
|
@@ -94,15 +99,15 @@ module JobIteration
|
|
|
94
99
|
|
|
95
100
|
def conditions
|
|
96
101
|
column_index = @cursor.size - 1
|
|
97
|
-
column = @columns[column_index]
|
|
98
|
-
where_clause = if @columns.size == @cursor.size
|
|
102
|
+
column = @column_mgr.columns[column_index]
|
|
103
|
+
where_clause = if @column_mgr.columns.size == @cursor.size
|
|
99
104
|
"#{column} > ?"
|
|
100
105
|
else
|
|
101
106
|
"#{column} >= ?"
|
|
102
107
|
end
|
|
103
108
|
while column_index > 0
|
|
104
109
|
column_index -= 1
|
|
105
|
-
column = @columns[column_index]
|
|
110
|
+
column = @column_mgr.columns[column_index]
|
|
106
111
|
where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
|
|
107
112
|
end
|
|
108
113
|
ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
|
|
@@ -115,10 +120,15 @@ module JobIteration
|
|
|
115
120
|
end
|
|
116
121
|
|
|
117
122
|
def column_value(value)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
123
|
+
case value
|
|
124
|
+
when Time
|
|
125
|
+
value = value.in_time_zone(@timezone) unless @timezone.nil?
|
|
126
|
+
value.strftime(SQL_DATETIME_WITH_NSEC)
|
|
127
|
+
when Date
|
|
128
|
+
value.iso8601
|
|
129
|
+
else
|
|
130
|
+
value
|
|
131
|
+
end
|
|
122
132
|
end
|
|
123
133
|
end
|
|
124
134
|
end
|
|
@@ -16,7 +16,7 @@ module JobIteration
|
|
|
16
16
|
# `enumerator_builder` is _always_ the type that is returned from
|
|
17
17
|
# `build_enumerator`. This prevents people from implementing custom
|
|
18
18
|
# Enumerators without wrapping them in
|
|
19
|
-
# `enumerator_builder.wrap(custom_enum)`. Think of these wrappers
|
|
19
|
+
# `enumerator_builder.wrap(enumerator_builder, custom_enum)`. Think of these wrappers
|
|
20
20
|
# the way you should a middleware.
|
|
21
21
|
class Wrapper < Enumerator
|
|
22
22
|
class << self
|
|
@@ -222,7 +222,7 @@ module JobIteration
|
|
|
222
222
|
if enum.is_a?(Enumerator)
|
|
223
223
|
unless enum.is_a?(JobIteration.enumerator_builder::Wrapper)
|
|
224
224
|
JobIteration::Deprecation.warn("Returning an unwrapped enumerator from build_enumerator is deprecated. " \
|
|
225
|
-
"Wrap the enumerator using enumerator_builder.wrap(my_enumerator) instead.")
|
|
225
|
+
"Wrap the enumerator using enumerator_builder.wrap(enumerator_builder, my_enumerator) instead.")
|
|
226
226
|
end
|
|
227
227
|
|
|
228
228
|
return
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: job-iteration
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.12.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shopify
|
|
@@ -36,6 +36,7 @@ files:
|
|
|
36
36
|
- job-iteration.gemspec
|
|
37
37
|
- lib/job-iteration.rb
|
|
38
38
|
- lib/job-iteration/active_record_batch_enumerator.rb
|
|
39
|
+
- lib/job-iteration/active_record_batch_enumerator/column_manager.rb
|
|
39
40
|
- lib/job-iteration/active_record_cursor.rb
|
|
40
41
|
- lib/job-iteration/active_record_enumerator.rb
|
|
41
42
|
- lib/job-iteration/csv_enumerator.rb
|
|
@@ -76,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
76
77
|
- !ruby/object:Gem::Version
|
|
77
78
|
version: '0'
|
|
78
79
|
requirements: []
|
|
79
|
-
rubygems_version:
|
|
80
|
+
rubygems_version: 4.0.4
|
|
80
81
|
specification_version: 4
|
|
81
82
|
summary: Makes your background jobs interruptible and resumable.
|
|
82
83
|
test_files: []
|