job-iteration 1.1.12 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2109f33c06a85e1b336876ea4c0aafe2dc0ea7f17fbdda0d4bba06177f940113
4
- data.tar.gz: 3ce3b8017f1dfaac8bb3c83f8b846c4068ed09a0968f937bb1bd3e31d9d1341a
3
+ metadata.gz: c1707731e6bf03617bdbaa60bc2ddeb9652f34534a8fe0d69b26e233c790d469
4
+ data.tar.gz: a981f1230a7784e4e6673c0d2e4c0642f4a1d3c48baeb3039ca6e41daced9107
5
5
  SHA512:
6
- metadata.gz: d22611524765adb75a6d6cd457d2c1e0e7169621b3c9a2fd4f8fbcc4d68f7a79cf89764852a61f11bdd586ac60c1bb9d6c76f34426c5a1eb25299bd8b6ec77b7
7
- data.tar.gz: 952569f76f1d9b739d4219bfb3df0878ef1ec1c3784bba985f2246a956a581c51723e113ca10ac9eec3f4c2ae61c6acaabd4d461aa321c1af989d1921293ef34
6
+ metadata.gz: db396697d25d4b0450b69d2a58d68e1e247a074f02c9a5887746184789f2aca2569e345ef0d88fb5e142fcaa1c0586169ef91b7bc0ffe2ef35796f544e0cafa1
7
+ data.tar.gz: 757b0c074830cbe97d49fd6e84c59ade45c4b5ecdc57b52cf17ea5539e9f2bbffb8ea929804da531c217bf43bbc4f67f726e9d0f1a9a391e32e35837b7edf624
data/CHANGELOG.md CHANGED
@@ -1,8 +1,9 @@
1
1
  ### Master (unreleased)
2
2
 
3
- #### New feature
3
+ ## v1.1.13 (May 20, 2021)
4
4
 
5
- #### Bug fix
5
+ #### New feature
6
+ - [91](https://github.com/Shopify/job-iteration/pull/91) - Add enumerator yielding batches as Active Record Relations
6
7
 
7
8
  ## v1.1.12 (April 19, 2021)
8
9
 
data/README.md CHANGED
@@ -77,7 +77,28 @@ class BatchesJob < ApplicationJob
77
77
 
78
78
  def each_iteration(batch_of_comments, product_id)
79
79
  # batch_of_comments will contain batches of 100 records
80
- Comment.where(id: batch_of_comments.map(&:id)).update_all(deleted: true)
80
+ batch_of_comments.each do |comment|
81
+ DeleteCommentJob.perform_later(comment)
82
+ end
83
+ end
84
+ end
85
+ ```
86
+
87
+ ```ruby
88
+ class BatchesAsRelationJob < ApplicationJob
89
+ include JobIteration::Iteration
90
+
91
+ def build_enumerator(product_id, cursor:)
92
+ enumerator_builder.active_record_on_batch_relations(
93
+ Product.find(product_id).comments,
94
+ cursor: cursor,
95
+ batch_size: 100,
96
+ )
97
+ end
98
+
99
+ def each_iteration(batch_of_comments, product_id)
100
+ # batch_of_comments will be a Comment::ActiveRecord_Relation
101
+ batch_of_comments.update_all(deleted: true)
81
102
  end
82
103
  end
83
104
  ```
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JobIteration
4
+ # Builds Batch Enumerator based on ActiveRecord Relation.
5
+ # @see EnumeratorBuilder
6
+ class ActiveRecordBatchEnumerator
7
+ include Enumerable
8
+
9
+ SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
10
+
11
+ def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
12
+ @batch_size = batch_size
13
+ @primary_key = "#{relation.table_name}.#{relation.primary_key}"
14
+ @columns = Array(columns&.map(&:to_s) || @primary_key)
15
+ @primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
16
+ @pluck_columns = if @primary_key_index
17
+ @columns
18
+ else
19
+ @columns.dup << @primary_key
20
+ end
21
+ @cursor = Array.wrap(cursor)
22
+ raise ArgumentError, "Must specify at least one column" if @columns.empty?
23
+ if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
24
+ raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
25
+ end
26
+
27
+ if relation.arel.orders.present? || relation.arel.taken.present?
28
+ raise ConditionNotSupportedError
29
+ end
30
+
31
+ @base_relation = relation.reorder(@columns.join(","))
32
+ end
33
+
34
+ def each
35
+ return to_enum { size } unless block_given?
36
+ while (relation = next_batch)
37
+ break if @cursor.nil?
38
+ yield relation, cursor_value
39
+ end
40
+ end
41
+
42
+ def size
43
+ @base_relation.count
44
+ end
45
+
46
+ private
47
+
48
+ def next_batch
49
+ relation = @base_relation.limit(@batch_size)
50
+ if conditions.any?
51
+ relation = relation.where(*conditions)
52
+ end
53
+
54
+ cursor_values, ids = relation.uncached do
55
+ pluck_columns(relation)
56
+ end
57
+
58
+ cursor = cursor_values.last
59
+ return unless cursor.present?
60
+ # The primary key was plucked, but original cursor did not include it, so we should remove it
61
+ cursor.pop unless @primary_key_index
62
+ @cursor = Array.wrap(cursor)
63
+
64
+ # Yields relations by selecting the primary keys of records in the batch.
65
+ # Post.where(published: nil) results in an enumerator of relations like: Post.where(ids: batch_of_ids)
66
+ @base_relation.where(@primary_key => ids)
67
+ end
68
+
69
+ def pluck_columns(relation)
70
+ if @pluck_columns.size == 1 # only the primary key
71
+ column_values = relation.pluck(*@pluck_columns)
72
+ return [column_values, column_values]
73
+ end
74
+
75
+ column_values = relation.pluck(*@pluck_columns)
76
+ primary_key_index = @primary_key_index || -1
77
+ primary_key_values = column_values.map { |values| values[primary_key_index] }
78
+
79
+ serialize_column_values!(column_values)
80
+ [column_values, primary_key_values]
81
+ end
82
+
83
+ def cursor_value
84
+ return @cursor.first if @cursor.size == 1
85
+ @cursor
86
+ end
87
+
88
+ def conditions
89
+ column_index = @cursor.size - 1
90
+ column = @columns[column_index]
91
+ where_clause = if @columns.size == @cursor.size
92
+ "#{column} > ?"
93
+ else
94
+ "#{column} >= ?"
95
+ end
96
+ while column_index > 0
97
+ column_index -= 1
98
+ column = @columns[column_index]
99
+ where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
100
+ end
101
+ ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
102
+ ret.pop
103
+ ret
104
+ end
105
+
106
+ def serialize_column_values!(column_values)
107
+ column_values.map! { |values| values.map! { |value| column_value(value) } }
108
+ end
109
+
110
+ def column_value(value)
111
+ value.is_a?(Time) ? value.strftime(SQL_DATETIME_WITH_NSEC) : value
112
+ end
113
+ end
114
+ end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require_relative "./active_record_batch_enumerator"
2
3
  require_relative "./active_record_enumerator"
3
4
  require_relative "./csv_enumerator"
4
5
  require_relative "./throttle_enumerator"
@@ -86,6 +87,11 @@ module JobIteration
86
87
  # WHERE (created_at > '$LAST_CREATED_AT_CURSOR'
87
88
  # OR (created_at = '$LAST_CREATED_AT_CURSOR' AND (id > '$LAST_ID_CURSOR')))
88
89
  # ORDER BY created_at, id LIMIT 100
90
+ #
91
+ # As a result of this query pattern, if the values in these columns change for the records in scope during
92
+ # iteration, they may be skipped or yielded multiple times depending on the nature of the update and the
93
+ # cursor's value. If the value gets updated to a greater value than the cursor's value, it will get yielded
94
+ # again. Similarly, if the value gets updated to a lesser value than the curor's value, it will get skipped.
89
95
  def build_active_record_enumerator_on_records(scope, cursor:, **args)
90
96
  enum = build_active_record_enumerator(
91
97
  scope,
@@ -95,7 +101,7 @@ module JobIteration
95
101
  wrap(self, enum)
96
102
  end
97
103
 
98
- # Builds Enumerator from Active Record Relation and enumerates on batches.
104
+ # Builds Enumerator from Active Record Relation and enumerates on batches of records.
99
105
  # Each Enumerator tick moves the cursor +batch_size+ rows forward.
100
106
  #
101
107
  # +batch_size:+ sets how many records will be fetched in one batch. Defaults to 100.
@@ -110,6 +116,16 @@ module JobIteration
110
116
  wrap(self, enum)
111
117
  end
112
118
 
119
+ # Builds Enumerator from Active Record Relation and enumerates on batches, yielding Active Record Relations.
120
+ # See documentation for #build_active_record_enumerator_on_batches.
121
+ def build_active_record_enumerator_on_batch_relations(scope, cursor:, **args)
122
+ JobIteration::ActiveRecordBatchEnumerator.new(
123
+ scope,
124
+ cursor: cursor,
125
+ **args
126
+ ).each
127
+ end
128
+
113
129
  def build_throttle_enumerator(enum, throttle_on:, backoff:)
114
130
  JobIteration::ThrottleEnumerator.new(
115
131
  enum,
@@ -124,6 +140,7 @@ module JobIteration
124
140
  alias_method :array, :build_array_enumerator
125
141
  alias_method :active_record_on_records, :build_active_record_enumerator_on_records
126
142
  alias_method :active_record_on_batches, :build_active_record_enumerator_on_batches
143
+ alias_method :active_record_on_batch_relations, :build_active_record_enumerator_on_batch_relations
127
144
  alias_method :throttle, :build_throttle_enumerator
128
145
 
129
146
  private
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JobIteration
4
- VERSION = "1.1.12"
4
+ VERSION = "1.1.13"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: job-iteration
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.12
4
+ version: 1.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-20 00:00:00.000000000 Z
11
+ date: 2021-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -66,6 +66,7 @@ files:
66
66
  - guides/throttling.md
67
67
  - job-iteration.gemspec
68
68
  - lib/job-iteration.rb
69
+ - lib/job-iteration/active_record_batch_enumerator.rb
69
70
  - lib/job-iteration/active_record_cursor.rb
70
71
  - lib/job-iteration/active_record_enumerator.rb
71
72
  - lib/job-iteration/csv_enumerator.rb
@@ -98,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
99
  - !ruby/object:Gem::Version
99
100
  version: '0'
100
101
  requirements: []
101
- rubygems_version: 3.0.3
102
+ rubygems_version: 3.2.17
102
103
  signing_key:
103
104
  specification_version: 4
104
105
  summary: Makes your background jobs interruptible and resumable.