job-iteration 1.11.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61c40af6f75909b71f400462b9d64f994530f0fb68367ddbe6479f1b5fe8d831
4
- data.tar.gz: a293c68062cd9714d522a2c3c5e81292bcd5a9f8172cbf3f537f0676e0de5731
3
+ metadata.gz: 4a851271cdcba188da01fad33b65fa92cdbbe27d6515c44ee950d27d761e1519
4
+ data.tar.gz: e15477e57d43194a00067d026dc445c9edbc9225ba4bef2b0de60f91618d9874
5
5
  SHA512:
6
- metadata.gz: a773c72a419db3fe0677ba323ec0a2bdca6c02e34f322e7519ff080752c66e8ce7c973ca3b9031e84e2b70e27eb61dcf5875dfd38ef0b4421983991202256c71
7
- data.tar.gz: 2abedd098be50d634f68eade3e681042aef00a87c8a3b0fb7cc88a91143cafade08bc18435f7479399c2ccbb660563ff781c1b4e9248b8bff7cf22190504383e
6
+ metadata.gz: a7abcf968843fcfc5c260adae6998f905398308acdd3b42fd8a4208556e3207c1cd0650724b2f48f07a7d23f84cc1740ede0fc6066ee8c211109de556b83357d
7
+ data.tar.gz: d8ebd653153f30e87d91951be33b38a7c8644a58865e7bff1c76603cd5287301fe596116c9eac412cd93a5b571e1062e6ef766b38b8debd6d93c20b44f9af4b8
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### Main (unreleased)
1
+ ## Main (unreleased)
2
2
 
3
3
  ### Breaking Changes
4
4
 
@@ -16,6 +16,16 @@ nil
16
16
 
17
17
  nil
18
18
 
19
+ ## v1.12.0 (Jan 16, 2026)
20
+
21
+ ### Features
22
+
23
+ - [650](https://github.com/Shopify/job-iteration/pull/650) Add support for batch enumeration over models with composite primary keys.
24
+
25
+ ### Bug fixes
26
+
27
+ - [652](https://github.com/Shopify/job-iteration/pull/652) Fix ISO8601 serialization for `Date` columns in ActiveRecord enumerators.
28
+
19
29
  ## v1.11.0 (Jul 14, 2025)
20
30
 
21
31
  ### Security fixes
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JobIteration
4
+ class ActiveRecordBatchEnumerator
5
+ # Utility class for the batch enumerator that manages the columns that need
6
+ # to be plucked. It ensures primary key columns are plucked so that records
7
+ # in the batch can be queried for efficiently.
8
+ #
9
+ # @see ActiveRecordBatchEnumerator
10
+ class ColumnManager
11
+ # @param relation [ActiveRecord::Relation] - relation to manage columns for
12
+ # @param columns [Array<String,Symbol>, nil] - set of columns to select
13
+ def initialize(relation:, columns:)
14
+ @table_name = relation.table_name
15
+ @primary_key = Array(relation.primary_key)
16
+ @qualified_pkey_columns = @primary_key.map { |col| qualify_column(col) }
17
+ @columns = columns&.map(&:to_s) || @qualified_pkey_columns
18
+
19
+ validate_columns!(relation)
20
+ initialize_pluck_columns_and_pkey_positions
21
+ end
22
+
23
+ # @return [Array<String>]
24
+ # The list of columns to be plucked. If no columns were specified, this
25
+ # list contains the fully qualified primary key column(s).
26
+ attr_reader :columns
27
+
28
+ # @return [Array<String>]
29
+ # The list of primary key columns for the relation. These columns are
30
+ # not qualified with the table name.
31
+ attr_reader :primary_key
32
+
33
+ # @return [Array<String>]
34
+ # The full set of columns to be plucked from the relation. This is a
35
+ # superset of `columns` and is guaranteed to contain all of the primary
36
+ # key columns on the relation.
37
+ attr_reader :pluck_columns
38
+
39
+ # @param column_values [Array<Array>]
40
+ # List of rows where each row contains values as determined by
41
+ # `pluck_columns`.
42
+ #
43
+ # @return [Array<Array>]
44
+ # List where each item contains the primary key column values for the
45
+ # corresponding row. Values are guaranteed to be in the same order as
46
+ # the columns are listed in `primary_key`.
47
+ def pkey_values(column_values)
48
+ column_values.map do |values|
49
+ @qualified_pkey_columns.map do |pkey_column|
50
+ pkey_column_idx = @primary_key_index_map[pkey_column]
51
+ values[pkey_column_idx]
52
+ end
53
+ end
54
+ end
55
+
56
+ # @param cursor [Array]
57
+ # A list of values for a single row, as determined by `pluck_columns`.
58
+ #
59
+ # @return [Array]
60
+ # The same values that were passed in, minus any primary key column
61
+ # values that do not appear in `columns`.
62
+ def remove_missing_pkey_values(cursor)
63
+ cursor.pop(@missing_pkey_count)
64
+ cursor
65
+ end
66
+
67
+ private
68
+
69
+ def qualify_column(column)
70
+ "#{@table_name}.#{column}"
71
+ end
72
+
73
+ def validate_columns!(relation)
74
+ raise ArgumentError, "Must specify at least one column" if @columns.empty?
75
+
76
+ if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
77
+ raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
78
+ end
79
+ end
80
+
81
+ # This method is responsible for initializing several instance variables:
82
+ #
83
+ # * `@pluck_columns` [Array<String>] -
84
+ # The set of columns to pluck.
85
+ # * `@missing_pkey_count` [Integer] -
86
+ # The number of primary keys that were missing from `@columns`.
87
+ # * `@primary_key_index_map` [Hash<String:Integer>] -
88
+ # Hash mapping all primary key columns to their position in
89
+ # `@pluck_columns`.
90
+ def initialize_pluck_columns_and_pkey_positions
91
+ @pluck_columns = @columns.dup
92
+ initial_pkey_index_map = find_initial_primary_key_indices(@pluck_columns)
93
+
94
+ missing_pkey_columns = initial_pkey_index_map.select { |_, idx| idx.nil? }.keys
95
+ missing_pkey_index_map = add_missing_pkey_columns!(missing_pkey_columns, @pluck_columns)
96
+ @missing_pkey_count = missing_pkey_index_map.size
97
+
98
+ # Compute the location of each primary key column in `@pluck_columns`.
99
+ @primary_key_index_map = initial_pkey_index_map.merge(missing_pkey_index_map)
100
+ end
101
+
102
+ # Figure out which primary key columns are already included in `columns`
103
+ # and track their position in the array.
104
+ #
105
+ # @param columns [Array<String>] - list of columns
106
+ #
107
+ # @return [Hash<String:Integer,nil>]
108
+ # A hash containing all of the fully qualified primary key columns as
109
+ # its keys. Values are the position of each column in the `columns`
110
+ # array. A `nil` value indicates the column is not present in `columns`.
111
+ def find_initial_primary_key_indices(columns)
112
+ @primary_key.each_with_object({}) do |pkey_column, indices|
113
+ fully_qualified_pkey_column = qualify_column(pkey_column)
114
+ idx = columns.index(pkey_column) || columns.index(fully_qualified_pkey_column)
115
+
116
+ indices[fully_qualified_pkey_column] = idx
117
+ end
118
+ end
119
+
120
+ # Takes a set of primary key columns and adds them to `columns`.
121
+ #
122
+ # @note - mutates `columns`
123
+ #
124
+ # @param missing_columns [Array<String>] - set of missing pkey columns
125
+ # @param columns [Array<String>] - set of columns to pluck
126
+ #
127
+ # @return [Hash<String:Integer>]
128
+ # A hash containing all of the values from `missing_columns` as its
129
+ # keys. Values are the position of those columns in `columns`.
130
+ def add_missing_pkey_columns!(missing_columns, columns)
131
+ missing_columns.each_with_object({}) do |pkey_column, indices|
132
+ indices[pkey_column] = columns.size
133
+ columns << pkey_column
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "active_record_batch_enumerator/column_manager"
4
+
3
5
  module JobIteration
4
6
  # Builds Batch Enumerator based on ActiveRecord Relation.
5
7
  # @see EnumeratorBuilder
@@ -11,26 +13,15 @@ module JobIteration
11
13
  def initialize(relation, columns: nil, batch_size: 100, timezone: nil, cursor: nil)
12
14
  @batch_size = batch_size
13
15
  @timezone = timezone
14
- @primary_key = "#{relation.table_name}.#{relation.primary_key}"
15
- @columns = Array(columns&.map(&:to_s) || @primary_key)
16
- @primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
17
- @pluck_columns = if @primary_key_index
18
- @columns
19
- else
20
- @columns.dup << @primary_key
21
- end
16
+ @column_mgr = ColumnManager.new(relation: relation, columns: columns)
22
17
  @cursor = Array.wrap(cursor)
23
18
  @initial_cursor = @cursor
24
- raise ArgumentError, "Must specify at least one column" if @columns.empty?
25
- if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
26
- raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
27
- end
28
19
 
29
20
  if relation.arel.orders.present? || relation.arel.taken.present?
30
21
  raise JobIteration::ActiveRecordCursor::ConditionNotSupportedError
31
22
  end
32
23
 
33
- @base_relation = relation.reorder(@columns.join(","))
24
+ @base_relation = relation.reorder(@column_mgr.columns.join(","))
34
25
  end
35
26
 
36
27
  def each
@@ -53,7 +44,7 @@ module JobIteration
53
44
  relation = relation.where(*conditions)
54
45
  end
55
46
 
56
- cursor_values, ids = relation.uncached do
47
+ cursor_values, pkey_ids = relation.uncached do
57
48
  pluck_columns(relation)
58
49
  end
59
50
 
@@ -62,25 +53,39 @@ module JobIteration
62
53
  @cursor = @initial_cursor
63
54
  return
64
55
  end
65
- # The primary key was plucked, but original cursor did not include it, so we should remove it
66
- cursor.pop unless @primary_key_index
67
- @cursor = Array.wrap(cursor)
68
56
 
69
- # Yields relations by selecting the primary keys of records in the batch.
70
- # Post.where(published: nil) results in an enumerator of relations like:
71
- # Post.where(published: nil, ids: batch_of_ids)
72
- @base_relation.where(@primary_key => ids)
57
+ @cursor = @column_mgr.remove_missing_pkey_values(cursor)
58
+
59
+ filter_relation_with_primary_key(pkey_ids)
73
60
  end
74
61
 
75
- def pluck_columns(relation)
76
- if @pluck_columns.size == 1 # only the primary key
77
- column_values = relation.pluck(*@pluck_columns)
78
- return [column_values, column_values]
62
+ # Yields relations by selecting the primary keys of records in the batch.
63
+ # Post.where(published: nil) results in an enumerator of relations like:
64
+ # Post.where(published: nil, ids: batch_of_ids)
65
+ def filter_relation_with_primary_key(primary_key_values)
66
+ pkey = @column_mgr.primary_key
67
+ pkey_values = primary_key_values
68
+
69
+ # If the primary key is only composed of a single column, simplify the
70
+ # query. This keeps us compatible with Rails prior to 7.1 where composite
71
+ # primary keys were introduced along with the syntax that allows you to
72
+ # query for multi-column values.
73
+ if pkey.size <= 1
74
+ pkey = pkey.first
75
+ pkey_values = pkey_values.map(&:first)
79
76
  end
80
77
 
81
- column_values = relation.pluck(*@pluck_columns)
82
- primary_key_index = @primary_key_index || -1
83
- primary_key_values = column_values.map { |values| values[primary_key_index] }
78
+ @base_relation.where(pkey => pkey_values)
79
+ end
80
+
81
+ def pluck_columns(relation)
82
+ column_values = relation.pluck(*@column_mgr.pluck_columns)
83
+
84
+ # Pluck behaves differently when only one column is given. By using zip,
85
+ # we make the output consistent (at the cost of more object allocation).
86
+ column_values = column_values.zip if @column_mgr.pluck_columns.size == 1
87
+
88
+ primary_key_values = @column_mgr.pkey_values(column_values)
84
89
 
85
90
  serialize_column_values!(column_values)
86
91
  [column_values, primary_key_values]
@@ -94,15 +99,15 @@ module JobIteration
94
99
 
95
100
  def conditions
96
101
  column_index = @cursor.size - 1
97
- column = @columns[column_index]
98
- where_clause = if @columns.size == @cursor.size
102
+ column = @column_mgr.columns[column_index]
103
+ where_clause = if @column_mgr.columns.size == @cursor.size
99
104
  "#{column} > ?"
100
105
  else
101
106
  "#{column} >= ?"
102
107
  end
103
108
  while column_index > 0
104
109
  column_index -= 1
105
- column = @columns[column_index]
110
+ column = @column_mgr.columns[column_index]
106
111
  where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
107
112
  end
108
113
  ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
@@ -115,10 +120,15 @@ module JobIteration
115
120
  end
116
121
 
117
122
  def column_value(value)
118
- return value unless value.is_a?(Time)
119
-
120
- value = value.in_time_zone(@timezone) unless @timezone.nil?
121
- value.strftime(SQL_DATETIME_WITH_NSEC)
123
+ case value
124
+ when Time
125
+ value = value.in_time_zone(@timezone) unless @timezone.nil?
126
+ value.strftime(SQL_DATETIME_WITH_NSEC)
127
+ when Date
128
+ value.iso8601
129
+ else
130
+ value
131
+ end
122
132
  end
123
133
  end
124
134
  end
@@ -70,6 +70,8 @@ module JobIteration
70
70
  when :datetime
71
71
  value = value.in_time_zone(@timezone) unless @timezone.nil?
72
72
  value.strftime(SQL_DATETIME_WITH_NSEC)
73
+ when :date
74
+ value.iso8601
73
75
  else
74
76
  value
75
77
  end
@@ -16,7 +16,7 @@ module JobIteration
16
16
  # `enumerator_builder` is _always_ the type that is returned from
17
17
  # `build_enumerator`. This prevents people from implementing custom
18
18
  # Enumerators without wrapping them in
19
- # `enumerator_builder.wrap(custom_enum)`. Think of these wrappers
19
+ # `enumerator_builder.wrap(enumerator_builder, custom_enum)`. Think of these wrappers
20
20
  # the way you should a middleware.
21
21
  class Wrapper < Enumerator
22
22
  class << self
@@ -222,7 +222,7 @@ module JobIteration
222
222
  if enum.is_a?(Enumerator)
223
223
  unless enum.is_a?(JobIteration.enumerator_builder::Wrapper)
224
224
  JobIteration::Deprecation.warn("Returning an unwrapped enumerator from build_enumerator is deprecated. " \
225
- "Wrap the enumerator using enumerator_builder.wrap(my_enumerator) instead.")
225
+ "Wrap the enumerator using enumerator_builder.wrap(enumerator_builder, my_enumerator) instead.")
226
226
  end
227
227
 
228
228
  return
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JobIteration
4
- VERSION = "1.11.0"
4
+ VERSION = "1.12.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: job-iteration
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.0
4
+ version: 1.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
@@ -36,6 +36,7 @@ files:
36
36
  - job-iteration.gemspec
37
37
  - lib/job-iteration.rb
38
38
  - lib/job-iteration/active_record_batch_enumerator.rb
39
+ - lib/job-iteration/active_record_batch_enumerator/column_manager.rb
39
40
  - lib/job-iteration/active_record_cursor.rb
40
41
  - lib/job-iteration/active_record_enumerator.rb
41
42
  - lib/job-iteration/csv_enumerator.rb
@@ -76,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
77
  - !ruby/object:Gem::Version
77
78
  version: '0'
78
79
  requirements: []
79
- rubygems_version: 3.6.9
80
+ rubygems_version: 4.0.4
80
81
  specification_version: 4
81
82
  summary: Makes your background jobs interruptible and resumable.
82
83
  test_files: []