job-iteration 1.1.12 → 1.1.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2109f33c06a85e1b336876ea4c0aafe2dc0ea7f17fbdda0d4bba06177f940113
4
- data.tar.gz: 3ce3b8017f1dfaac8bb3c83f8b846c4068ed09a0968f937bb1bd3e31d9d1341a
3
+ metadata.gz: c1707731e6bf03617bdbaa60bc2ddeb9652f34534a8fe0d69b26e233c790d469
4
+ data.tar.gz: a981f1230a7784e4e6673c0d2e4c0642f4a1d3c48baeb3039ca6e41daced9107
5
5
  SHA512:
6
- metadata.gz: d22611524765adb75a6d6cd457d2c1e0e7169621b3c9a2fd4f8fbcc4d68f7a79cf89764852a61f11bdd586ac60c1bb9d6c76f34426c5a1eb25299bd8b6ec77b7
7
- data.tar.gz: 952569f76f1d9b739d4219bfb3df0878ef1ec1c3784bba985f2246a956a581c51723e113ca10ac9eec3f4c2ae61c6acaabd4d461aa321c1af989d1921293ef34
6
+ metadata.gz: db396697d25d4b0450b69d2a58d68e1e247a074f02c9a5887746184789f2aca2569e345ef0d88fb5e142fcaa1c0586169ef91b7bc0ffe2ef35796f544e0cafa1
7
+ data.tar.gz: 757b0c074830cbe97d49fd6e84c59ade45c4b5ecdc57b52cf17ea5539e9f2bbffb8ea929804da531c217bf43bbc4f67f726e9d0f1a9a391e32e35837b7edf624
data/CHANGELOG.md CHANGED
@@ -1,8 +1,9 @@
1
1
  ### Master (unreleased)
2
2
 
3
- #### New feature
3
+ ## v1.1.13 (May 20, 2021)
4
4
 
5
- #### Bug fix
5
+ #### New feature
6
+ - [91](https://github.com/Shopify/job-iteration/pull/91) - Add enumerator yielding batches as Active Record Relations
6
7
 
7
8
  ## v1.1.12 (April 19, 2021)
8
9
 
data/README.md CHANGED
@@ -77,7 +77,28 @@ class BatchesJob < ApplicationJob
77
77
 
78
78
  def each_iteration(batch_of_comments, product_id)
79
79
  # batch_of_comments will contain batches of 100 records
80
- Comment.where(id: batch_of_comments.map(&:id)).update_all(deleted: true)
80
+ batch_of_comments.each do |comment|
81
+ DeleteCommentJob.perform_later(comment)
82
+ end
83
+ end
84
+ end
85
+ ```
86
+
87
+ ```ruby
88
+ class BatchesAsRelationJob < ApplicationJob
89
+ include JobIteration::Iteration
90
+
91
+ def build_enumerator(product_id, cursor:)
92
+ enumerator_builder.active_record_on_batch_relations(
93
+ Product.find(product_id).comments,
94
+ cursor: cursor,
95
+ batch_size: 100,
96
+ )
97
+ end
98
+
99
+ def each_iteration(batch_of_comments, product_id)
100
+ # batch_of_comments will be a Comment::ActiveRecord_Relation
101
+ batch_of_comments.update_all(deleted: true)
81
102
  end
82
103
  end
83
104
  ```
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JobIteration
4
+ # Builds Batch Enumerator based on ActiveRecord Relation.
5
+ # @see EnumeratorBuilder
6
+ class ActiveRecordBatchEnumerator
7
+ include Enumerable
8
+
9
+ SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
10
+
11
+ def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
12
+ @batch_size = batch_size
13
+ @primary_key = "#{relation.table_name}.#{relation.primary_key}"
14
+ @columns = Array(columns&.map(&:to_s) || @primary_key)
15
+ @primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
16
+ @pluck_columns = if @primary_key_index
17
+ @columns
18
+ else
19
+ @columns.dup << @primary_key
20
+ end
21
+ @cursor = Array.wrap(cursor)
22
+ raise ArgumentError, "Must specify at least one column" if @columns.empty?
23
+ if relation.joins_values.present? && !@columns.all? { |column| column.to_s.include?(".") }
24
+ raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
25
+ end
26
+
27
+ if relation.arel.orders.present? || relation.arel.taken.present?
28
+ raise ConditionNotSupportedError
29
+ end
30
+
31
+ @base_relation = relation.reorder(@columns.join(","))
32
+ end
33
+
34
+ def each
35
+ return to_enum { size } unless block_given?
36
+ while (relation = next_batch)
37
+ break if @cursor.nil?
38
+ yield relation, cursor_value
39
+ end
40
+ end
41
+
42
+ def size
43
+ @base_relation.count
44
+ end
45
+
46
+ private
47
+
48
+ def next_batch
49
+ relation = @base_relation.limit(@batch_size)
50
+ if conditions.any?
51
+ relation = relation.where(*conditions)
52
+ end
53
+
54
+ cursor_values, ids = relation.uncached do
55
+ pluck_columns(relation)
56
+ end
57
+
58
+ cursor = cursor_values.last
59
+ return unless cursor.present?
60
+ # The primary key was plucked, but original cursor did not include it, so we should remove it
61
+ cursor.pop unless @primary_key_index
62
+ @cursor = Array.wrap(cursor)
63
+
64
+ # Yields relations by selecting the primary keys of records in the batch.
65
+ # Post.where(published: nil) results in an enumerator of relations like: Post.where(ids: batch_of_ids)
66
+ @base_relation.where(@primary_key => ids)
67
+ end
68
+
69
+ def pluck_columns(relation)
70
+ if @pluck_columns.size == 1 # only the primary key
71
+ column_values = relation.pluck(*@pluck_columns)
72
+ return [column_values, column_values]
73
+ end
74
+
75
+ column_values = relation.pluck(*@pluck_columns)
76
+ primary_key_index = @primary_key_index || -1
77
+ primary_key_values = column_values.map { |values| values[primary_key_index] }
78
+
79
+ serialize_column_values!(column_values)
80
+ [column_values, primary_key_values]
81
+ end
82
+
83
+ def cursor_value
84
+ return @cursor.first if @cursor.size == 1
85
+ @cursor
86
+ end
87
+
88
+ def conditions
89
+ column_index = @cursor.size - 1
90
+ column = @columns[column_index]
91
+ where_clause = if @columns.size == @cursor.size
92
+ "#{column} > ?"
93
+ else
94
+ "#{column} >= ?"
95
+ end
96
+ while column_index > 0
97
+ column_index -= 1
98
+ column = @columns[column_index]
99
+ where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
100
+ end
101
+ ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
102
+ ret.pop
103
+ ret
104
+ end
105
+
106
+ def serialize_column_values!(column_values)
107
+ column_values.map! { |values| values.map! { |value| column_value(value) } }
108
+ end
109
+
110
+ def column_value(value)
111
+ value.is_a?(Time) ? value.strftime(SQL_DATETIME_WITH_NSEC) : value
112
+ end
113
+ end
114
+ end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require_relative "./active_record_batch_enumerator"
2
3
  require_relative "./active_record_enumerator"
3
4
  require_relative "./csv_enumerator"
4
5
  require_relative "./throttle_enumerator"
@@ -86,6 +87,11 @@ module JobIteration
86
87
  # WHERE (created_at > '$LAST_CREATED_AT_CURSOR'
87
88
  # OR (created_at = '$LAST_CREATED_AT_CURSOR' AND (id > '$LAST_ID_CURSOR')))
88
89
  # ORDER BY created_at, id LIMIT 100
90
+ #
91
+ # As a result of this query pattern, if the values in these columns change for the records in scope during
92
+ # iteration, they may be skipped or yielded multiple times depending on the nature of the update and the
93
+ # cursor's value. If the value gets updated to a greater value than the cursor's value, it will get yielded
94
+ # again. Similarly, if the value gets updated to a lesser value than the curor's value, it will get skipped.
89
95
  def build_active_record_enumerator_on_records(scope, cursor:, **args)
90
96
  enum = build_active_record_enumerator(
91
97
  scope,
@@ -95,7 +101,7 @@ module JobIteration
95
101
  wrap(self, enum)
96
102
  end
97
103
 
98
- # Builds Enumerator from Active Record Relation and enumerates on batches.
104
+ # Builds Enumerator from Active Record Relation and enumerates on batches of records.
99
105
  # Each Enumerator tick moves the cursor +batch_size+ rows forward.
100
106
  #
101
107
  # +batch_size:+ sets how many records will be fetched in one batch. Defaults to 100.
@@ -110,6 +116,16 @@ module JobIteration
110
116
  wrap(self, enum)
111
117
  end
112
118
 
119
+ # Builds Enumerator from Active Record Relation and enumerates on batches, yielding Active Record Relations.
120
+ # See documentation for #build_active_record_enumerator_on_batches.
121
+ def build_active_record_enumerator_on_batch_relations(scope, cursor:, **args)
122
+ JobIteration::ActiveRecordBatchEnumerator.new(
123
+ scope,
124
+ cursor: cursor,
125
+ **args
126
+ ).each
127
+ end
128
+
113
129
  def build_throttle_enumerator(enum, throttle_on:, backoff:)
114
130
  JobIteration::ThrottleEnumerator.new(
115
131
  enum,
@@ -124,6 +140,7 @@ module JobIteration
124
140
  alias_method :array, :build_array_enumerator
125
141
  alias_method :active_record_on_records, :build_active_record_enumerator_on_records
126
142
  alias_method :active_record_on_batches, :build_active_record_enumerator_on_batches
143
+ alias_method :active_record_on_batch_relations, :build_active_record_enumerator_on_batch_relations
127
144
  alias_method :throttle, :build_throttle_enumerator
128
145
 
129
146
  private
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JobIteration
4
- VERSION = "1.1.12"
4
+ VERSION = "1.1.13"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: job-iteration
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.12
4
+ version: 1.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-20 00:00:00.000000000 Z
11
+ date: 2021-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -66,6 +66,7 @@ files:
66
66
  - guides/throttling.md
67
67
  - job-iteration.gemspec
68
68
  - lib/job-iteration.rb
69
+ - lib/job-iteration/active_record_batch_enumerator.rb
69
70
  - lib/job-iteration/active_record_cursor.rb
70
71
  - lib/job-iteration/active_record_enumerator.rb
71
72
  - lib/job-iteration/csv_enumerator.rb
@@ -98,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
99
  - !ruby/object:Gem::Version
99
100
  version: '0'
100
101
  requirements: []
101
- rubygems_version: 3.0.3
102
+ rubygems_version: 3.2.17
102
103
  signing_key:
103
104
  specification_version: 4
104
105
  summary: Makes your background jobs interruptible and resumable.