each_batch 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/Appraisals +17 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +235 -0
- data/Rakefile +6 -0
- data/each_batch.gemspec +36 -0
- data/lib/each_batch/active_record/base.rb +9 -0
- data/lib/each_batch/active_record/relation.rb +34 -0
- data/lib/each_batch/active_record.rb +1 -0
- data/lib/each_batch/batch_enumerator.rb +108 -0
- data/lib/each_batch/plucked_batch_enumerator.rb +68 -0
- data/lib/each_batch/version.rb +3 -0
- data/lib/each_batch.rb +11 -0
- metadata +191 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bc3df7b193e7c5c46b83d7f3617a65b256b20f2cd1a048b8d00eb2e77758db39
|
4
|
+
data.tar.gz: f071dd5a74821d7b0b9d083a8878c5bd7c1b608c093b09eb9bea07448a893827
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d3cef5618641b0311ce6df5511fbe72d4088b9de6ece536f96d6da65d8631a451258ea093fcda54d8adf47db1bcefe5acce41542ec4b2fbd9552e84d81beeca4
|
7
|
+
data.tar.gz: c224898ddf3f46655b44340ded2165539bb518becdfedb22b475e68a338170c80474449c9e6a7205db83363d8715a1570bf3e0f3e73b365dd918f2867dc7b8e8
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Appraisals
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
appraise "active_record_52" do
|
4
|
+
gem "activerecord", "~> 5.2.0", require: "active_record"
|
5
|
+
end
|
6
|
+
|
7
|
+
appraise "active_record_60" do
|
8
|
+
gem "activerecord", "~> 6.0.0", require: "active_record"
|
9
|
+
end
|
10
|
+
|
11
|
+
appraise "active_record_61" do
|
12
|
+
gem "activerecord", "~> 6.1.0", require: "active_record"
|
13
|
+
end
|
14
|
+
|
15
|
+
appraise "active_record_70" do
|
16
|
+
gem "activerecord", "~> 7.0.0", require: "active_record"
|
17
|
+
end
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023 Odysseas Doumas
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,235 @@
|
|
1
|
+
# EachBatch
|
2
|
+
|
3
|
+
Improved batch processing in Rails.
|
4
|
+
|
5
|
+
This gem provides a new method called `each_batch` to ActiveRecord relations, similar to the built-in `in_batches`.
|
6
|
+
|
7
|
+
There are two main issues this gem attempts to tackle:
|
8
|
+
|
9
|
+
- No custom ordering. Rails' default and only behaviour is to order the results by the primary key.
|
10
|
+
- No proper use of indexes, because of the inability to set custom ordering.
|
11
|
+
- No efficient pluck solution in batches.
|
12
|
+
|
13
|
+
## Example
|
14
|
+
|
15
|
+
Suppose we have a huge products table:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
ActiveRecord::Schema.define(version: 1) do
|
19
|
+
create_table :products do |t|
|
20
|
+
t.datetime :enabled_at, index: true
|
21
|
+
t.integer :sales
|
22
|
+
|
23
|
+
t.timestamps
|
24
|
+
end
|
25
|
+
end
|
26
|
+
```
|
27
|
+
|
28
|
+
And suppose we want to process a subset of the data based on the enabled_at value: In Rails, one can do this:
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
Product.
|
32
|
+
where(enabled_at: a_date_range).
|
33
|
+
find_each { |product| product.do_something }
|
34
|
+
```
|
35
|
+
|
36
|
+
This would generate SQL similar to this:
|
37
|
+
|
38
|
+
```SQL
|
39
|
+
SELECT `products`.*
|
40
|
+
FROM `products`
|
41
|
+
WHERE `products`.`enabled_at` BETWEEN '2023-05-28 00:00:00' AND '2023-06-04 23:59:59'
|
42
|
+
ORDER BY `products`.`id` ASC LIMIT 1000
|
43
|
+
```
|
44
|
+
|
45
|
+
And for subsequent batches something like this:
|
46
|
+
|
47
|
+
```SQL
|
48
|
+
SELECT `products`.*
|
49
|
+
FROM `products`
|
50
|
+
WHERE `products`.`enabled_at` BETWEEN '2023-05-28 00:00:00' AND '2023-06-04 23:59:59'
|
51
|
+
AND `products`.`id` > 123456
|
52
|
+
ORDER BY `products`.`id` ASC LIMIT 1000
|
53
|
+
```
|
54
|
+
|
55
|
+
The order clause here is what can kill performance! It doesn't utilize the index properly because of that.
|
56
|
+
|
57
|
+
With this gem, one can write this instead:
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
Product.
|
61
|
+
where(enabled_at: a_date_range).
|
62
|
+
each_batch(keys: [:enabled_at, :id]).
|
63
|
+
each_record { |product| product.do_something }
|
64
|
+
```
|
65
|
+
Which would generate something like the following:
|
66
|
+
|
67
|
+
```SQL
|
68
|
+
SELECT `products`.*
|
69
|
+
FROM `products`
|
70
|
+
WHERE `products`.`enabled_at` BETWEEN '2023-05-28 00:00:00' AND '2023-06-04 23:59:59'
|
71
|
+
ORDER BY `products`.`enabled_at` ASC, `products`.`id` ASC LIMIT 1000
|
72
|
+
```
|
73
|
+
|
74
|
+
This order matches the index one and the index will be utilized properly.
|
75
|
+
|
76
|
+
For subsequent batches:
|
77
|
+
|
78
|
+
```SQL
|
79
|
+
SELECT `products`.*
|
80
|
+
FROM `products`
|
81
|
+
WHERE `products`.`enabled_at` BETWEEN '2023-05-28 00:00:00' AND '2023-06-04 23:59:59'
|
82
|
+
AND (`products`.`enabled_at`, `products`.`id`) > ('2023-05-29 00:00:00', 123456)
|
83
|
+
ORDER BY `products`.`enabled_at` ASC, `products`.`id` ASC LIMIT 1000
|
84
|
+
```
|
85
|
+
|
86
|
+
which again, utilizes the index properly.
|
87
|
+
|
88
|
+
Note: the generated query is not exactly like the above, see [where_row](https://github.com/odydoum/where_row) for more info.
|
89
|
+
|
90
|
+
## Installation
|
91
|
+
|
92
|
+
Add this line to your application's Gemfile:
|
93
|
+
|
94
|
+
```ruby
|
95
|
+
gem 'each_batch'
|
96
|
+
```
|
97
|
+
|
98
|
+
And then execute:
|
99
|
+
|
100
|
+
$ bundle
|
101
|
+
|
102
|
+
Or install it yourself as:
|
103
|
+
|
104
|
+
$ gem install each_batch
|
105
|
+
|
106
|
+
## Usage
|
107
|
+
|
108
|
+
### Iterating in batches
|
109
|
+
|
110
|
+
To simply iterate a relation in batches:
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
Product.each_batch do |batch|
|
114
|
+
# do something useful
|
115
|
+
end
|
116
|
+
```
|
117
|
+
|
118
|
+
By default the batch size is 1000. To override, use the `of` option:
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
Product.each_batch(of: 500) do |batch|
|
122
|
+
# do something useful
|
123
|
+
end
|
124
|
+
```
|
125
|
+
|
126
|
+
Naturally, any relation can be batched, like so:
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
Product.where(enabled_at: a_date_range).each_batch(of: 500) do |batch|
|
130
|
+
# do something useful
|
131
|
+
end
|
132
|
+
```
|
133
|
+
|
134
|
+
Assuming that `products` is a huge table with an index on `enabled_at`, it would make more sense to order the results by this date. And in order to have a deterministic order (many products could be updated on the same time), ordering by `enabled_at` and `id` could prove optimal. To do that, use the `keys` option:
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
Product.where(enabled_at: a_date_range).each_batch(of: 500, keys: [:enabled_at, :id]) do |batch|
|
138
|
+
# do something useful
|
139
|
+
end
|
140
|
+
```
|
141
|
+
|
142
|
+
To change the order, use the `order` option (by default is accending):
|
143
|
+
|
144
|
+
```ruby
|
145
|
+
Product.where(enabled_at: a_date_range).each_batch(of: 500, order: :desc, keys: [:enabled_at, :id]) do |batch|
|
146
|
+
# do something useful
|
147
|
+
end
|
148
|
+
```
|
149
|
+
|
150
|
+
To access each record instead of the whole relation (this preloads the relation), use `each_record`:
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
Product.where(enabled_at: a_date_range).each_batch(of: 500, keys: [:enabled_at, :id]).each_record do |record|
|
154
|
+
# do something useful
|
155
|
+
end
|
156
|
+
```
|
157
|
+
|
158
|
+
### Plucking in batches
|
159
|
+
|
160
|
+
To simply iterate over pluck results in batches, use the `pluck` method:
|
161
|
+
|
162
|
+
```ruby
|
163
|
+
Product.where(enabled_at: a_date_range).each_batch(of: 500, keys: [:enabled_at, :id]).pluck(:id, :enabled_at) do |pluck_batch|
|
164
|
+
# do something useful
|
165
|
+
end
|
166
|
+
```
|
167
|
+
|
168
|
+
To iterate over each row instead, use the `each_row` method:
|
169
|
+
|
170
|
+
```ruby
|
171
|
+
Product.where(enabled_at: a_date_range).each_batch(of: 500, keys: [:enabled_at, :id]).pluck(:id, :enabled_at).each_row do |(id, enabled_at)|
|
172
|
+
# do something useful
|
173
|
+
end
|
174
|
+
```
|
175
|
+
|
176
|
+
### Enumerator usage
|
177
|
+
|
178
|
+
Since these methods return an enumerator, they can be chained with regular enumerator methods:
|
179
|
+
|
180
|
+
```ruby
|
181
|
+
Product.
|
182
|
+
where(enabled_at: a_date_range).
|
183
|
+
each_batch(of: 500, keys: [:enabled_at, :id]).
|
184
|
+
pluck(:id, :enabled_at, :sales).
|
185
|
+
each_row.
|
186
|
+
sum(&:first)
|
187
|
+
```
|
188
|
+
|
189
|
+
## Caveats
|
190
|
+
|
191
|
+
### Empty results
|
192
|
+
|
193
|
+
By default, `each_batch` does not preload any records, it just build the necessary queries and yields the relation. This means that it can not know in advance
|
194
|
+
whether there are any records for the specified conditions. In practice this means that it will **always** yield a relation, even if it's empty.
|
195
|
+
|
196
|
+
This also applies if the result set is a multiple of the batch size. There is no way to deduce that no more results are present, so it will return an empty relation.
|
197
|
+
|
198
|
+
`each_record`, `pluck` and `each_row` do not suffer from this since they preload the records/values necessary to deduce that.
|
199
|
+
|
200
|
+
### Missing keys for select or pluck
|
201
|
+
|
202
|
+
For the algorithm to work, we need the last values for each of the keys specified. This means that there must a exist a select clause with those columns:
|
203
|
+
|
204
|
+
```ruby
|
205
|
+
Product.each_batch(of: 500, keys: [:enabled_at, :id]) # ok
|
206
|
+
Product.select(:id, :sales).each_batch(of: 500, keys: [:enabled_at, :id]) # ArgumentError
|
207
|
+
Product.select(:id, :enabled_at, :sales).each_batch(of: 500, keys: [:enabled_at, :id]) # Ok
|
208
|
+
|
209
|
+
Product.each_batch(of: 500, keys: [:enabled_at, :id]).pluck # Ok, plucks everything
|
210
|
+
Product.each_batch(of: 500, keys: [:enabled_at, :id]).pluck(:id, :sales) # ArgumentError
|
211
|
+
Product.each_batch(of: 500, keys: [:enabled_at, :id]).pluck(:id, :enabled_at) # Ok
|
212
|
+
Product.each_batch(of: 500, keys: [:enabled_at, :id]).pluck(:enabled_at, :id) # Ok
|
213
|
+
```
|
214
|
+
### Can't omit primary key
|
215
|
+
|
216
|
+
To make this method safer, we can not specify an ordering that doesn't have the primary key as the last order key. This is in order to always guarantee deterministic ordering. This could be relaxed, possible with an extra option `unsafe`, to make it explicit.
|
217
|
+
|
218
|
+
### Race conditions
|
219
|
+
|
220
|
+
This is inherent to batch processing in general.
|
221
|
+
|
222
|
+
## Alternatives
|
223
|
+
|
224
|
+
[pluck_in_batches](https://github.com/fatkodima/pluck_in_batches)
|
225
|
+
[each_batched](https://github.com/dburry/each_batched)
|
226
|
+
|
227
|
+
I am probably missing a lot here..
|
228
|
+
|
229
|
+
## Contributing
|
230
|
+
|
231
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/odydoum/each_batch.
|
232
|
+
|
233
|
+
## License
|
234
|
+
|
235
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/each_batch.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "each_batch/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "each_batch"
|
8
|
+
spec.version = EachBatch::VERSION
|
9
|
+
spec.authors = ["Odysseas Doumas"]
|
10
|
+
spec.email = ["odydoum@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = 'Improved batch processing in Rails'
|
13
|
+
spec.description = 'Improved batch processing in Rails'
|
14
|
+
spec.homepage = "https://github.com/odydoum/each_batch"
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.required_ruby_version = ">= 2.5.0"
|
17
|
+
|
18
|
+
|
19
|
+
# Specify which files should be added to the gem when it is released.
|
20
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
22
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
|
+
end
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
|
26
|
+
spec.add_runtime_dependency "activerecord", ">= 5.2", "< 7.1"
|
27
|
+
spec.add_dependency "where_row", "~> 0.1.3"
|
28
|
+
|
29
|
+
spec.add_development_dependency "bundler", "~> 1.17"
|
30
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
31
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
+
spec.add_development_dependency "appraisal"
|
33
|
+
spec.add_development_dependency "sqlite3"
|
34
|
+
spec.add_development_dependency "yard"
|
35
|
+
spec.add_development_dependency "gem-release"
|
36
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "each_batch/batch_enumerator"
|
3
|
+
|
4
|
+
module EachBatch
|
5
|
+
module ActiveRecord
|
6
|
+
module Relation
|
7
|
+
#
|
8
|
+
# Process records in batches. Optionally specify the keys by which to
|
9
|
+
# calculate the batch offsets.
|
10
|
+
#
|
11
|
+
# @param [Integer] of 1000 The batch size
|
12
|
+
# @param [Boolean] load false Whether the batch records should be loaded
|
13
|
+
# @param [Symbol, String] order :asc The order of processing
|
14
|
+
# @param [Array<String, Symbol>] keys The keys used for the ordering
|
15
|
+
#
|
16
|
+
# @return [EachBatch::BatchEnumerator] The batch enumerator
|
17
|
+
#
|
18
|
+
# @yieldparam [ActiveRecord::Relation] x The relation that corresponds to the batch
|
19
|
+
def each_batch(of: 1000, load: false, order: :asc, keys: [primary_key], &block)
|
20
|
+
batch_enumerator = ::EachBatch::BatchEnumerator.new(
|
21
|
+
self,
|
22
|
+
of: of,
|
23
|
+
load: load,
|
24
|
+
order: order,
|
25
|
+
keys: keys
|
26
|
+
)
|
27
|
+
|
28
|
+
return batch_enumerator unless block_given?
|
29
|
+
|
30
|
+
batch_enumerator.each(&block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
module WhereRow::ActiveRecord; end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "where_row"
|
3
|
+
require "each_batch/plucked_batch_enumerator"
|
4
|
+
|
5
|
+
module EachBatch
|
6
|
+
class BatchEnumerator
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
DEFAULT_BATCH_SIZE = 1000
|
10
|
+
|
11
|
+
attr_reader :relation, :order, :keys
|
12
|
+
|
13
|
+
def initialize(relation, of: DEFAULT_BATCH_SIZE, load: false, order: :asc, keys: nil)
|
14
|
+
raise ArgumentError, 'Batch size must be a positive integer' if of != of.to_i || of <= 0
|
15
|
+
|
16
|
+
order = order.to_s
|
17
|
+
raise ArgumentError, 'Invalid order' if !order.casecmp('desc').zero? && !order.casecmp('asc').zero?
|
18
|
+
|
19
|
+
pk_name = relation.primary_key.to_s
|
20
|
+
keys = keys&.map(&:to_s) || [pk_name.to_s]
|
21
|
+
|
22
|
+
# TODO: This is for safety, since there is no easy way to determine whether the order
|
23
|
+
# is deterministic or not. PK guarantees that.
|
24
|
+
raise ArgumentError, 'Primary key must be that last key' if keys.last != pk_name
|
25
|
+
|
26
|
+
if relation.select_values.present? && (relation.select_values.map(&:to_s) & keys).to_set != keys.to_set
|
27
|
+
raise ArgumentError, 'Not all keys are included in the custom select clause'
|
28
|
+
end
|
29
|
+
|
30
|
+
@relation = relation
|
31
|
+
@of = of
|
32
|
+
@load = load
|
33
|
+
@order = order
|
34
|
+
@keys = keys
|
35
|
+
end
|
36
|
+
|
37
|
+
def batch_size
|
38
|
+
@of
|
39
|
+
end
|
40
|
+
|
41
|
+
def each
|
42
|
+
return self unless block_given?
|
43
|
+
|
44
|
+
batch_relation = relation.reorder(keys.product([order]).to_h).limit(batch_size)
|
45
|
+
batch_relation.skip_query_cache! # Retaining the results in the query cache would undermine the point of batching
|
46
|
+
|
47
|
+
yielded_relation = batch_relation
|
48
|
+
op = order.to_s.casecmp('desc').zero? ? :lt : :gt
|
49
|
+
pk = relation.primary_key.to_sym
|
50
|
+
|
51
|
+
loop do
|
52
|
+
# consistent with rails load behavior.
|
53
|
+
if @load
|
54
|
+
records = yielded_relation.records
|
55
|
+
yielded_relation = relation.where(pk => records.map(&pk))
|
56
|
+
yielded_relation.send(:load_records, records)
|
57
|
+
end
|
58
|
+
|
59
|
+
yield yielded_relation
|
60
|
+
|
61
|
+
offsets =
|
62
|
+
if @load || yielded_relation.loaded?
|
63
|
+
break if yielded_relation.length < batch_size
|
64
|
+
|
65
|
+
yielded_relation.last.attributes_before_type_cast&.values_at(*keys)
|
66
|
+
else
|
67
|
+
# we need an additional query to fetch the last key set
|
68
|
+
offsets = yielded_relation.offset(batch_size - 1).limit(1).pluck(*keys).first
|
69
|
+
|
70
|
+
break if offsets.nil?
|
71
|
+
|
72
|
+
Array.wrap(offsets)
|
73
|
+
end
|
74
|
+
|
75
|
+
yielded_relation = batch_relation.where_row(*keys).public_send(op, *offsets)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def each_record(&block)
|
80
|
+
return to_enum(:each_record) unless block_given?
|
81
|
+
|
82
|
+
each { |yielded_relation| yielded_relation.to_a.each(&block) }
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Pluck selected columns in batches. The batching is the one specified
|
87
|
+
# on the { BatchEnumerator } instance.
|
88
|
+
#
|
89
|
+
# @param [Array<Symbol, String>] pluck_keys The keys of the columns to pluck.
|
90
|
+
#
|
91
|
+
# @return [EachBatch::PluckedBatchEnumerator] The batch enumerator
|
92
|
+
#
|
93
|
+
# @yieldparam [Array<Object>] x The array of the plucked values
|
94
|
+
def pluck(*pluck_keys, &block)
|
95
|
+
plucked_batch_enumerator = ::EachBatch::PluckedBatchEnumerator.new(
|
96
|
+
relation,
|
97
|
+
of: batch_size,
|
98
|
+
order: order,
|
99
|
+
keys: keys,
|
100
|
+
pluck_keys: pluck_keys.map(&:to_s)
|
101
|
+
)
|
102
|
+
|
103
|
+
return plucked_batch_enumerator unless block_given?
|
104
|
+
|
105
|
+
plucked_batch_enumerator.each(&block)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "where_row"
|
3
|
+
|
4
|
+
module EachBatch
|
5
|
+
class PluckedBatchEnumerator
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :relation, :order, :keys
|
9
|
+
|
10
|
+
def initialize(relation, of:, order:, keys:, pluck_keys:)
|
11
|
+
if pluck_keys.present? && (pluck_keys & keys).to_set != keys.to_set
|
12
|
+
raise ArgumentError, 'Not all keys are included in the custom select clause for pluck'
|
13
|
+
end
|
14
|
+
|
15
|
+
@relation = relation
|
16
|
+
@of = of
|
17
|
+
@order = order
|
18
|
+
@keys = keys
|
19
|
+
@pluck_keys = pluck_keys
|
20
|
+
@key_indices = keys.map { |key| pluck_keys.index(key) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def batch_size
|
24
|
+
@of
|
25
|
+
end
|
26
|
+
|
27
|
+
def each
|
28
|
+
return self unless block_given?
|
29
|
+
|
30
|
+
batch_relation = relation.reorder(keys.product([order]).to_h).limit(batch_size)
|
31
|
+
batch_relation.skip_query_cache! # Retaining the results in the query cache would undermine the point of batching
|
32
|
+
|
33
|
+
yielded_relation = batch_relation
|
34
|
+
op = order.to_s.casecmp('desc').zero? ? :lt : :gt
|
35
|
+
last_idx = batch_size - 1
|
36
|
+
|
37
|
+
pk = relation.primary_key.to_sym
|
38
|
+
single_pluck_key = @pluck_keys.length == 1
|
39
|
+
|
40
|
+
|
41
|
+
loop do
|
42
|
+
results = yielded_relation.pluck(*@pluck_keys)
|
43
|
+
|
44
|
+
break if results.empty?
|
45
|
+
|
46
|
+
yield results
|
47
|
+
|
48
|
+
# grab the offsets from the plucked results
|
49
|
+
offsets =
|
50
|
+
if single_pluck_key
|
51
|
+
results[last_idx]
|
52
|
+
else
|
53
|
+
results[last_idx]&.values_at(*@key_indices)
|
54
|
+
end
|
55
|
+
|
56
|
+
break if offsets.nil?
|
57
|
+
|
58
|
+
yielded_relation = batch_relation.where_row(*keys).public_send(op, *offsets)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def each_row(&block)
|
63
|
+
return to_enum(:each_row) unless block_given?
|
64
|
+
|
65
|
+
each { |row_batch| row_batch.each(&block) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/each_batch.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "each_batch/version"
|
3
|
+
require "each_batch/active_record/relation"
|
4
|
+
require "each_batch/active_record/base"
|
5
|
+
|
6
|
+
require "active_record"
|
7
|
+
|
8
|
+
module EachBatch; end
|
9
|
+
|
10
|
+
ActiveRecord::Relation.prepend ::EachBatch::ActiveRecord::Relation
|
11
|
+
ActiveRecord::Base.extend ::EachBatch::ActiveRecord::Base
|
metadata
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: each_batch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Odysseas Doumas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-06-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.2'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '7.1'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '5.2'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '7.1'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: where_row
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.1.3
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.1.3
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '1.17'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '1.17'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: rake
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '10.0'
|
68
|
+
type: :development
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '10.0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: rspec
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '3.0'
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '3.0'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: appraisal
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
type: :development
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: sqlite3
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: yard
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: gem-release
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
138
|
+
type: :development
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ">="
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
description: Improved batch processing in Rails
|
146
|
+
email:
|
147
|
+
- odydoum@gmail.com
|
148
|
+
executables: []
|
149
|
+
extensions: []
|
150
|
+
extra_rdoc_files: []
|
151
|
+
files:
|
152
|
+
- ".gitignore"
|
153
|
+
- ".rspec"
|
154
|
+
- ".travis.yml"
|
155
|
+
- Appraisals
|
156
|
+
- Gemfile
|
157
|
+
- LICENSE.txt
|
158
|
+
- README.md
|
159
|
+
- Rakefile
|
160
|
+
- each_batch.gemspec
|
161
|
+
- lib/each_batch.rb
|
162
|
+
- lib/each_batch/active_record.rb
|
163
|
+
- lib/each_batch/active_record/base.rb
|
164
|
+
- lib/each_batch/active_record/relation.rb
|
165
|
+
- lib/each_batch/batch_enumerator.rb
|
166
|
+
- lib/each_batch/plucked_batch_enumerator.rb
|
167
|
+
- lib/each_batch/version.rb
|
168
|
+
homepage: https://github.com/odydoum/each_batch
|
169
|
+
licenses:
|
170
|
+
- MIT
|
171
|
+
metadata: {}
|
172
|
+
post_install_message:
|
173
|
+
rdoc_options: []
|
174
|
+
require_paths:
|
175
|
+
- lib
|
176
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: 2.5.0
|
181
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
182
|
+
requirements:
|
183
|
+
- - ">="
|
184
|
+
- !ruby/object:Gem::Version
|
185
|
+
version: '0'
|
186
|
+
requirements: []
|
187
|
+
rubygems_version: 3.1.6
|
188
|
+
signing_key:
|
189
|
+
specification_version: 4
|
190
|
+
summary: Improved batch processing in Rails
|
191
|
+
test_files: []
|