RubyGems - job-iteration - Versions diffs - 1.11.0 → 1.12.0 - Mend

job-iteration 1.11.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -1
data/lib/job-iteration/active_record_batch_enumerator/column_manager.rb +138 -0
data/lib/job-iteration/active_record_batch_enumerator.rb +45 -35
data/lib/job-iteration/active_record_enumerator.rb +2 -0
data/lib/job-iteration/enumerator_builder.rb +1 -1
data/lib/job-iteration/iteration.rb +1 -1
data/lib/job-iteration/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 61c40af6f75909b71f400462b9d64f994530f0fb68367ddbe6479f1b5fe8d831
-  data.tar.gz: a293c68062cd9714d522a2c3c5e81292bcd5a9f8172cbf3f537f0676e0de5731
+  metadata.gz: 4a851271cdcba188da01fad33b65fa92cdbbe27d6515c44ee950d27d761e1519
+  data.tar.gz: e15477e57d43194a00067d026dc445c9edbc9225ba4bef2b0de60f91618d9874
 SHA512:
-  metadata.gz: a773c72a419db3fe0677ba323ec0a2bdca6c02e34f322e7519ff080752c66e8ce7c973ca3b9031e84e2b70e27eb61dcf5875dfd38ef0b4421983991202256c71
-  data.tar.gz: 2abedd098be50d634f68eade3e681042aef00a87c8a3b0fb7cc88a91143cafade08bc18435f7479399c2ccbb660563ff781c1b4e9248b8bff7cf22190504383e
+  metadata.gz: a7abcf968843fcfc5c260adae6998f905398308acdd3b42fd8a4208556e3207c1cd0650724b2f48f07a7d23f84cc1740ede0fc6066ee8c211109de556b83357d
+  data.tar.gz: d8ebd653153f30e87d91951be33b38a7c8644a58865e7bff1c76603cd5287301fe596116c9eac412cd93a5b571e1062e6ef766b38b8debd6d93c20b44f9af4b8

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,4 @@
-### Main (unreleased)
+## Main (unreleased)
 ### Breaking Changes
@@ -16,6 +16,16 @@ nil
 nil
+## v1.12.0 (Jan 16, 2026)
+### Features
+- [650](https://github.com/Shopify/job-iteration/pull/650) Add support for batch enumeration over models with composite primary keys.
+### Bug fixes
+- [652](https://github.com/Shopify/job-iteration/pull/652) Fix ISO8601 serialization for `Date` columns in ActiveRecord enumerators.
 ## v1.11.0 (Jul 14, 2025)
 ### Security fixes

data/lib/job-iteration/active_record_batch_enumerator/column_manager.rb ADDED Viewed

@@ -0,0 +1,138 @@
+# frozen_string_literal: true
+module JobIteration
+  class ActiveRecordBatchEnumerator
+    # Utility class for the batch enumerator that manages the columns that need
+    # to be plucked. It ensures primary key columns are plucked so that records
+    # in the batch can be queried for efficiently.
+    #
+    # @see ActiveRecordBatchEnumerator
+    class ColumnManager
+      # @param relation [ActiveRecord::Relation] - relation to manage columns for
+      # @param columns [Array<String,Symbol>, nil] - set of columns to select
+      def initialize(relation:, columns:)
+        @table_name = relation.table_name
+        @primary_key = Array(relation.primary_key)
+        @qualified_pkey_columns = @primary_key.map { |col| qualify_column(col) }
+        @columns = columns&.map(&:to_s) || @qualified_pkey_columns
+        validate_columns!(relation)
+        initialize_pluck_columns_and_pkey_positions
+      end
+      # @return [Array<String>]
+      #   The list of columns to be plucked. If no columns were specified, this
+      #   list contains the fully qualified primary key column(s).
+      attr_reader :columns
+      # @return [Array<String>]
+      #   The list of primary key columns for the relation. These columns are
+      #   not qualified with the table name.
+      attr_reader :primary_key
+      # @return [Array<String>]
+      #   The full set of columns to be plucked from the relation. This is a
+      #   superset of `columns` and is guaranteed to contain all of the primary
+      #   key columns on the relation.
+      attr_reader :pluck_columns
+      # @param column_values [Array<Array>]
+      #   List of rows where each row contains values as determined by
+      #   `pluck_columns`.
+      #
+      # @return [Array<Array>]
+      #   List where each item contains the primary key column values for the
+      #   corresponding row. Values are guaranteed to be in the same order as
+      #   the columns are listed in `primary_key`.
+      def pkey_values(column_values)
+        column_values.map do |values|
+          @qualified_pkey_columns.map do |pkey_column|
+            pkey_column_idx = @primary_key_index_map[pkey_column]
+            values[pkey_column_idx]
+          end
+        end
+      end
+      # @param cursor [Array]
+      #   A list of values for a single row, as determined by `pluck_columns`.
+      #
+      # @return [Array]
+      #   The same values that were passed in, minus any primary key column
+      #   values that do not appear in `columns`.
+      def remove_missing_pkey_values(cursor)
+        cursor.pop(@missing_pkey_count)
+        cursor
+      end
+      private
+      def qualify_column(column)
+        "#{@table_name}.#{column}"
+      end
+      def validate_columns!(relation)
+        raise ArgumentError, "Must specify at least one column" if @columns.empty?
+        if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
+          raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
+        end
+      end
+      # This method is responsible for initializing several instance variables:
+      #
+      # * `@pluck_columns` [Array<String>] -
+      #       The set of columns to pluck.
+      # * `@missing_pkey_count` [Integer] -
+      #       The number of primary keys that were missing from `@columns`.
+      # * `@primary_key_index_map` [Hash<String:Integer>] -
+      #       Hash mapping all primary key columns to their position in
+      #       `@pluck_columns`.
+      def initialize_pluck_columns_and_pkey_positions
+        @pluck_columns = @columns.dup
+        initial_pkey_index_map = find_initial_primary_key_indices(@pluck_columns)
+        missing_pkey_columns = initial_pkey_index_map.select { |_, idx| idx.nil? }.keys
+        missing_pkey_index_map = add_missing_pkey_columns!(missing_pkey_columns, @pluck_columns)
+        @missing_pkey_count = missing_pkey_index_map.size
+        # Compute the location of each primary key column in `@pluck_columns`.
+        @primary_key_index_map = initial_pkey_index_map.merge(missing_pkey_index_map)
+      end
+      # Figure out which primary key columns are already included in `columns`
+      # and track their position in the array.
+      #
+      # @param columns [Array<String>] - list of columns
+      #
+      # @return [Hash<String:Integer,nil>]
+      #   A hash containing all of the fully qualified primary key columns as
+      #   its keys. Values are the position of each column in the `columns`
+      #   array. A `nil` value indicates the column is not present in `columns`.
+      def find_initial_primary_key_indices(columns)
+        @primary_key.each_with_object({}) do |pkey_column, indices|
+          fully_qualified_pkey_column = qualify_column(pkey_column)
+          idx = columns.index(pkey_column) || columns.index(fully_qualified_pkey_column)
+          indices[fully_qualified_pkey_column] = idx
+        end
+      end
+      # Takes a set of primary key columns and adds them to `columns`.
+      #
+      # @note - mutates `columns`
+      #
+      # @param missing_columns [Array<String>] - set of missing pkey columns
+      # @param columns [Array<String>] - set of columns to pluck
+      #
+      # @return [Hash<String:Integer>]
+      #   A hash containing all of the values from `missing_columns` as its
+      #   keys. Values are the position of those columns in `columns`.
+      def add_missing_pkey_columns!(missing_columns, columns)
+        missing_columns.each_with_object({}) do |pkey_column, indices|
+          indices[pkey_column] = columns.size
+          columns << pkey_column
+        end
+      end
+    end
+  end
+end

data/lib/job-iteration/active_record_batch_enumerator.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require_relative "active_record_batch_enumerator/column_manager"
 module JobIteration
   # Builds Batch Enumerator based on ActiveRecord Relation.
   # @see EnumeratorBuilder
@@ -11,26 +13,15 @@ module JobIteration
     def initialize(relation, columns: nil, batch_size: 100, timezone: nil, cursor: nil)
       @batch_size = batch_size
       @timezone = timezone
-      @primary_key = "#{relation.table_name}.#{relation.primary_key}"
-      @columns = Array(columns&.map(&:to_s) || @primary_key)
-      @primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
-      @pluck_columns = if @primary_key_index
-        @columns
-      else
-        @columns.dup << @primary_key
-      end
+      @column_mgr = ColumnManager.new(relation: relation, columns: columns)
       @cursor = Array.wrap(cursor)
       @initial_cursor = @cursor
-      raise ArgumentError, "Must specify at least one column" if @columns.empty?
-      if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
-        raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
-      end
       if relation.arel.orders.present? || relation.arel.taken.present?
         raise JobIteration::ActiveRecordCursor::ConditionNotSupportedError
       end
-      @base_relation = relation.reorder(@columns.join(","))
+      @base_relation = relation.reorder(@column_mgr.columns.join(","))
     end
     def each
@@ -53,7 +44,7 @@ module JobIteration
         relation = relation.where(*conditions)
       end
-      cursor_values, ids = relation.uncached do
+      cursor_values, pkey_ids = relation.uncached do
         pluck_columns(relation)
       end
@@ -62,25 +53,39 @@ module JobIteration
         @cursor = @initial_cursor
         return
       end
-      # The primary key was plucked, but original cursor did not include it, so we should remove it
-      cursor.pop unless @primary_key_index
-      @cursor = Array.wrap(cursor)
-      # Yields relations by selecting the primary keys of records in the batch.
-      # Post.where(published: nil) results in an enumerator of relations like:
-      # Post.where(published: nil, ids: batch_of_ids)
-      @base_relation.where(@primary_key => ids)
+      @cursor = @column_mgr.remove_missing_pkey_values(cursor)
+      filter_relation_with_primary_key(pkey_ids)
     end
-    def pluck_columns(relation)
-      if @pluck_columns.size == 1 # only the primary key
-        column_values = relation.pluck(*@pluck_columns)
-        return [column_values, column_values]
+    # Yields relations by selecting the primary keys of records in the batch.
+    # Post.where(published: nil) results in an enumerator of relations like:
+    # Post.where(published: nil, ids: batch_of_ids)
+    def filter_relation_with_primary_key(primary_key_values)
+      pkey = @column_mgr.primary_key
+      pkey_values = primary_key_values
+      # If the primary key is only composed of a single column, simplify the
+      # query. This keeps us compatible with Rails prior to 7.1 where composite
+      # primary keys were introduced along with the syntax that allows you to
+      # query for multi-column values.
+      if pkey.size <= 1
+        pkey = pkey.first
+        pkey_values = pkey_values.map(&:first)
       end
-      column_values = relation.pluck(*@pluck_columns)
-      primary_key_index = @primary_key_index || -1
-      primary_key_values = column_values.map { |values| values[primary_key_index] }
+      @base_relation.where(pkey => pkey_values)
+    end
+    def pluck_columns(relation)
+      column_values = relation.pluck(*@column_mgr.pluck_columns)
+      # Pluck behaves differently when only one column is given. By using zip,
+      # we make the output consistent (at the cost of more object allocation).
+      column_values = column_values.zip if @column_mgr.pluck_columns.size == 1
+      primary_key_values = @column_mgr.pkey_values(column_values)
       serialize_column_values!(column_values)
       [column_values, primary_key_values]
@@ -94,15 +99,15 @@ module JobIteration
     def conditions
       column_index = @cursor.size - 1
-      column = @columns[column_index]
-      where_clause = if @columns.size == @cursor.size
+      column = @column_mgr.columns[column_index]
+      where_clause = if @column_mgr.columns.size == @cursor.size
         "#{column} > ?"
       else
         "#{column} >= ?"
       end
       while column_index > 0
         column_index -= 1
-        column = @columns[column_index]
+        column = @column_mgr.columns[column_index]
         where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
       end
       ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
@@ -115,10 +120,15 @@ module JobIteration
     end
     def column_value(value)
-      return value unless value.is_a?(Time)
-      value = value.in_time_zone(@timezone) unless @timezone.nil?
-      value.strftime(SQL_DATETIME_WITH_NSEC)
+      case value
+      when Time
+        value = value.in_time_zone(@timezone) unless @timezone.nil?
+        value.strftime(SQL_DATETIME_WITH_NSEC)
+      when Date
+        value.iso8601
+      else
+        value
+      end
     end
   end
 end

data/lib/job-iteration/active_record_enumerator.rb CHANGED Viewed

@@ -70,6 +70,8 @@ module JobIteration
       when :datetime
         value = value.in_time_zone(@timezone) unless @timezone.nil?
         value.strftime(SQL_DATETIME_WITH_NSEC)
+      when :date
+        value.iso8601
       else
         value
       end

data/lib/job-iteration/enumerator_builder.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module JobIteration
     # `enumerator_builder` is _always_ the type that is returned from
     # `build_enumerator`. This prevents people from implementing custom
     # Enumerators without wrapping them in
-    # `enumerator_builder.wrap(custom_enum)`. Think of these wrappers
+    # `enumerator_builder.wrap(enumerator_builder, custom_enum)`. Think of these wrappers
     # the way you should a middleware.
     class Wrapper < Enumerator
       class << self

data/lib/job-iteration/iteration.rb CHANGED Viewed

@@ -222,7 +222,7 @@ module JobIteration
       if enum.is_a?(Enumerator)
         unless enum.is_a?(JobIteration.enumerator_builder::Wrapper)
           JobIteration::Deprecation.warn("Returning an unwrapped enumerator from build_enumerator is deprecated. " \
-            "Wrap the enumerator using enumerator_builder.wrap(my_enumerator) instead.")
+            "Wrap the enumerator using enumerator_builder.wrap(enumerator_builder, my_enumerator) instead.")
         end
         return

data/lib/job-iteration/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module JobIteration
-  VERSION = "1.11.0"
+  VERSION = "1.12.0"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: job-iteration
 version: !ruby/object:Gem::Version
-  version: 1.11.0
+  version: 1.12.0
 platform: ruby
 authors:
 - Shopify
@@ -36,6 +36,7 @@ files:
 - job-iteration.gemspec
 - lib/job-iteration.rb
 - lib/job-iteration/active_record_batch_enumerator.rb
+- lib/job-iteration/active_record_batch_enumerator/column_manager.rb
 - lib/job-iteration/active_record_cursor.rb
 - lib/job-iteration/active_record_enumerator.rb
 - lib/job-iteration/csv_enumerator.rb
@@ -76,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.9
+rubygems_version: 4.0.4
 specification_version: 4
 summary: Makes your background jobs interruptible and resumable.
 test_files: []