occams-record 1.3.0 → 1.4.0.pre.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/occams-record/batches/cursor_helpers.rb +59 -0
- data/lib/occams-record/batches/offset_limit/raw_query.rb +53 -0
- data/lib/occams-record/batches/offset_limit/scoped.rb +72 -0
- data/lib/occams-record/cursor.rb +237 -0
- data/lib/occams-record/query.rb +71 -3
- data/lib/occams-record/raw_query.rb +72 -41
- data/lib/occams-record/version.rb +2 -2
- data/lib/occams-record.rb +7 -1
- metadata +10 -7
- data/lib/occams-record/batches.rb +0 -113
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 662bd65e909a1d957ebdbbd2d3bec3fe96fb45aac2117a786c9a3ad02b03259b
|
4
|
+
data.tar.gz: abeb69ff706dbb11f3fae3672ae60c407080f418fce7330779a7b550915aa886
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8981d9c820b64ebeca28dcbe3ab7ac44e35d8ec7e14df201e890ccf6cf1cf1888d6f78ba6cac001f1a9c458f604189b1aafaf7ff644288499faf5d5afeb896c3
|
7
|
+
data.tar.gz: cdd61c50313a964db4ebdb271dbb875fc4ea888dd0b57df78be78d043e655e04e821e0cef8d9b60034e90866289a0d764f72a7728a18f46c778e511101b72f4a
|
data/README.md
CHANGED
@@ -300,6 +300,9 @@ bundle exec rake test
|
|
300
300
|
|
301
301
|
# test against Postgres
|
302
302
|
TEST_DATABASE_URL=postgres://postgres@localhost:5432/occams_record bundle exec rake test
|
303
|
+
|
304
|
+
# test against MySQL
|
305
|
+
TEST_DATABASE_URL=mysql2://root:@127.0.0.1:3306/occams_record bundle exec rake test
|
303
306
|
```
|
304
307
|
|
305
308
|
**Test against all supported ActiveRecord versions**
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module OccamsRecord
|
2
|
+
module Batches
|
3
|
+
module CursorHelpers
|
4
|
+
#
|
5
|
+
# Loads records in batches of N and yields each record to a block (if given). If no block is given,
|
6
|
+
# returns an Enumerator.
|
7
|
+
#
|
8
|
+
# NOTE Unlike find_each, batches are loaded using a cursor, which offers better performance.
|
9
|
+
# Postgres only. See the docs for OccamsRecord::Cursor for more details.
|
10
|
+
#
|
11
|
+
# @param batch_size [Integer] fetch this many rows at once
|
12
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
13
|
+
# @yield [OccamsRecord::Results::Row]
|
14
|
+
# @return [Enumerator] will yield each record
|
15
|
+
#
|
16
|
+
def find_each_with_cursor(batch_size: 1000, use_transaction: true)
|
17
|
+
enum = Enumerator.new { |y|
|
18
|
+
cursor.open(use_transaction: use_transaction) { |c|
|
19
|
+
c.each(batch_size: batch_size) { |record|
|
20
|
+
y.yield record
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|
24
|
+
if block_given?
|
25
|
+
enum.each { |record| yield record }
|
26
|
+
else
|
27
|
+
enum
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Loads records in batches of N and yields each batch to a block (if given). If no block is given,
|
33
|
+
# returns an Enumerator.
|
34
|
+
#
|
35
|
+
# NOTE Unlike find_in_batches, batches are loaded using a cursor, which offers better performance.
|
36
|
+
# Postgres only. See the docs for OccamsRecord::Cursor for more details.
|
37
|
+
#
|
38
|
+
# @param batch_size [Integer] fetch this many rows at once
|
39
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
40
|
+
# @yield [OccamsRecord::Results::Row]
|
41
|
+
# @return [Enumerator] will yield each record
|
42
|
+
#
|
43
|
+
def find_in_batches_with_cursor(batch_size: 1000, use_transaction: true)
|
44
|
+
enum = Enumerator.new { |y|
|
45
|
+
cursor.open(use_transaction: use_transaction) { |c|
|
46
|
+
c.each_batch(batch_size: batch_size) { |batch|
|
47
|
+
y.yield batch
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
if block_given?
|
52
|
+
enum.each { |batch| yield batch }
|
53
|
+
else
|
54
|
+
enum
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module OccamsRecord
|
2
|
+
module Batches
|
3
|
+
module OffsetLimit
|
4
|
+
class RawQuery
|
5
|
+
def initialize(conn, sql, binds, use: nil, query_logger: nil, eager_loaders: nil)
|
6
|
+
@conn, @sql, @binds = conn, sql, binds
|
7
|
+
@use, @query_logger, @eager_loaders = use, query_logger, eager_loaders
|
8
|
+
|
9
|
+
unless @sql =~ /LIMIT\s+%\{batch_limit\}/i and @sql =~ /OFFSET\s+%\{batch_offset\}/i
|
10
|
+
raise ArgumentError, "When using find_each/find_in_batches you must specify 'LIMIT %{batch_limit} OFFSET %{batch_offset}'. SQL statement: #{@sql}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Returns an Enumerator that yields batches of records, of size "of".
|
16
|
+
# The SQL string must include 'LIMIT %{batch_limit} OFFSET %{batch_offset}'.
|
17
|
+
# The bind values will be provided by OccamsRecord.
|
18
|
+
#
|
19
|
+
# @param batch_size [Integer] batch size
|
20
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
21
|
+
# @return [Enumerator] yields batches
|
22
|
+
#
|
23
|
+
def enum(batch_size:, use_transaction: true)
|
24
|
+
Enumerator.new do |y|
|
25
|
+
if use_transaction and @conn.open_transactions == 0
|
26
|
+
@conn.transaction {
|
27
|
+
run_batches y, batch_size
|
28
|
+
}
|
29
|
+
else
|
30
|
+
run_batches y, batch_size
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def run_batches(y, of)
|
38
|
+
offset = 0
|
39
|
+
loop do
|
40
|
+
results = ::OccamsRecord::RawQuery.new(@sql, @binds.merge({
|
41
|
+
batch_limit: of,
|
42
|
+
batch_offset: offset,
|
43
|
+
}), use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders).run
|
44
|
+
|
45
|
+
y.yield results if results.any?
|
46
|
+
break if results.size < of
|
47
|
+
offset += results.size
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module OccamsRecord
|
2
|
+
#
|
3
|
+
# Methods for building batch finding methods. It expects "model" and "scope" methods to be present.
|
4
|
+
#
|
5
|
+
module Batches
|
6
|
+
module OffsetLimit
|
7
|
+
class Scoped
|
8
|
+
def initialize(model, scope, use: nil, query_logger: nil, eager_loaders: nil)
|
9
|
+
@model, @scope = model, scope
|
10
|
+
@use, @query_logger, @eager_loaders = use, query_logger, eager_loaders
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# Returns an Enumerator that yields batches of records, of size "of".
|
15
|
+
# NOTE ActiveRecord 5+ provides the 'in_batches' method to do something
|
16
|
+
# similiar, although 4.2 does not. Also it does not respect ORDER BY,
|
17
|
+
# whereas this does.
|
18
|
+
#
|
19
|
+
# @param batch_size [Integer] batch size
|
20
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
21
|
+
# @param append_order_by [String] Append this column to ORDER BY to ensure consistent results. Defaults to the primary key. Pass false to disable.
|
22
|
+
# @return [Enumerator] yields batches
|
23
|
+
#
|
24
|
+
def enum(batch_size:, use_transaction: true, append_order_by: nil)
|
25
|
+
append_order =
|
26
|
+
case append_order_by
|
27
|
+
when false then nil
|
28
|
+
when nil then @model.primary_key
|
29
|
+
else append_order_by
|
30
|
+
end
|
31
|
+
|
32
|
+
Enumerator.new do |y|
|
33
|
+
if use_transaction and @model.connection.open_transactions == 0
|
34
|
+
@model.connection.transaction {
|
35
|
+
run_batches y, batch_size, append_order
|
36
|
+
}
|
37
|
+
else
|
38
|
+
run_batches y, batch_size, append_order
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def run_batches(y, of, append_order_by = nil)
|
46
|
+
limit = @scope.limit_value
|
47
|
+
batch_size = limit && limit < of ? limit : of
|
48
|
+
|
49
|
+
offset = @scope.offset_value || 0
|
50
|
+
out_of_records, count = false, 0
|
51
|
+
order_by =
|
52
|
+
if append_order_by
|
53
|
+
append_order_by.to_s == @model.primary_key.to_s ? append_order_by.to_sym : append_order_by
|
54
|
+
end
|
55
|
+
|
56
|
+
until out_of_records
|
57
|
+
l = limit && batch_size > limit - count ? limit - count : batch_size
|
58
|
+
q = @scope
|
59
|
+
q = q.order(order_by) if order_by
|
60
|
+
q = q.offset(offset).limit(l)
|
61
|
+
results = ::OccamsRecord::Query.new(q, use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders).run
|
62
|
+
|
63
|
+
y.yield results if results.any?
|
64
|
+
count += results.size
|
65
|
+
offset += results.size
|
66
|
+
out_of_records = results.size < batch_size || (limit && count >= limit)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,237 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
|
3
|
+
module OccamsRecord
|
4
|
+
#
|
5
|
+
# An interface to database cursors. Supported databases:
|
6
|
+
# * PostgreSQL
|
7
|
+
#
|
8
|
+
class Cursor
|
9
|
+
# @private
|
10
|
+
SCROLL = {
|
11
|
+
true => "SCROLL",
|
12
|
+
false => "NO SCROLL",
|
13
|
+
nil => "",
|
14
|
+
}.freeze
|
15
|
+
|
16
|
+
# @private
|
17
|
+
HOLD = {
|
18
|
+
true => "WITH HOLD",
|
19
|
+
false => "WITHOUT HOLD",
|
20
|
+
nil => "",
|
21
|
+
}.freeze
|
22
|
+
|
23
|
+
# @private
|
24
|
+
DIRECTIONS = {
|
25
|
+
next: "NEXT",
|
26
|
+
prior: "PRIOR",
|
27
|
+
first: "FIRST",
|
28
|
+
last: "LAST",
|
29
|
+
absolute: "ABSOLUTE",
|
30
|
+
relative: "RELATIVE",
|
31
|
+
forward: "FORWARD",
|
32
|
+
backward: "BACKWARD",
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
# @return [ActiveRecord::Connection]
|
36
|
+
attr_reader :conn
|
37
|
+
|
38
|
+
# Name of the cursor
|
39
|
+
# @return [String]
|
40
|
+
attr_reader :name
|
41
|
+
|
42
|
+
# Name of the cursor (safely SQL-escaped)
|
43
|
+
# @return [String]
|
44
|
+
attr_reader :quoted_name
|
45
|
+
|
46
|
+
#
|
47
|
+
# Initializes a new Cursor. NOTE all operations must be performed within a block passed to #open.
|
48
|
+
#
|
49
|
+
# While you CAN manually initialize a cursor, it's more common to get one via OccamsRecord::Query#cursor
|
50
|
+
# or OccamsRecord::RawQuery#cursor.
|
51
|
+
#
|
52
|
+
# @param conn [ActiveRecord::Connection]
|
53
|
+
# @param sql [String] The query to run
|
54
|
+
# @param name [String] Specify a name for the cursor (defaults to a random name)
|
55
|
+
# @param scroll [Boolean] true = SCROLL, false = NO SCROLL, nil = default behavior of DB
|
56
|
+
# @param hold [Boolean] true = WITH HOLD, false = WITHOUT HOLD, nil = default behavior of DB
|
57
|
+
# @param use [Array<Module>] optional Module to include in the result class (single or array)
|
58
|
+
# @param query_logger [Array] (optional) an array into which all queries will be inserted for logging/debug purposes
|
59
|
+
# @param eager_loaders [OccamsRecord::EagerLoaders::Context]
|
60
|
+
#
|
61
|
+
def initialize(conn, sql, name: nil, scroll: nil, hold: nil, use: nil, query_logger: nil, eager_loaders: nil)
|
62
|
+
@conn, @sql = conn, sql
|
63
|
+
@scroll = SCROLL.fetch(scroll)
|
64
|
+
@hold = HOLD.fetch(hold)
|
65
|
+
@use, @query_logger, @eager_loaders = use, query_logger, eager_loaders
|
66
|
+
@name = name || "occams_cursor_#{SecureRandom.hex 4}"
|
67
|
+
@quoted_name = conn.quote_table_name(@name)
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Declares and opens the cursor, runs the given block (yielding self), and closes the cursor.
|
72
|
+
#
|
73
|
+
# cursor.open do |c|
|
74
|
+
# c.fetch :forward, 100
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# @param use_transaction [Boolean] When true, ensures it's wrapped in a transaction
|
78
|
+
# @yield [self]
|
79
|
+
# @return the value returned by the block
|
80
|
+
#
|
81
|
+
def open(use_transaction: true)
|
82
|
+
raise ArgumentError, "A block is required" unless block_given?
|
83
|
+
if use_transaction and conn.open_transactions == 0
|
84
|
+
conn.transaction {
|
85
|
+
perform { yield self }
|
86
|
+
}
|
87
|
+
else
|
88
|
+
perform { yield self }
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Loads records in batches of N and yields each record to a block (if given). If no block is given,
|
94
|
+
# returns an Enumerator.
|
95
|
+
#
|
96
|
+
# cursor.open do |c|
|
97
|
+
# c.each do |record|
|
98
|
+
# ...
|
99
|
+
# end
|
100
|
+
# end
|
101
|
+
#
|
102
|
+
# @param batch_size [Integer] fetch this many rows at once
|
103
|
+
#
|
104
|
+
def each(batch_size: 1000)
|
105
|
+
enum = Enumerator.new { |y|
|
106
|
+
each_batch(batch_size: batch_size).each { |batch|
|
107
|
+
batch.each { |record| y.yield record }
|
108
|
+
}
|
109
|
+
}
|
110
|
+
if block_given?
|
111
|
+
enum.each { |record| yield record }
|
112
|
+
else
|
113
|
+
enum
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# Loads records in batches of N and yields each batch to a block (if given). If no block is given,
|
119
|
+
# returns an Enumerator.
|
120
|
+
#
|
121
|
+
# cursor.open do |c|
|
122
|
+
# c.each_batch do |batch|
|
123
|
+
# ...
|
124
|
+
# end
|
125
|
+
# end
|
126
|
+
#
|
127
|
+
# @param batch_size [Integer] fetch this many rows at once
|
128
|
+
#
|
129
|
+
def each_batch(batch_size: 1000)
|
130
|
+
enum = Enumerator.new { |y|
|
131
|
+
out_of_records = false
|
132
|
+
until out_of_records
|
133
|
+
results = fetch :forward, batch_size
|
134
|
+
y.yield results if results.any?
|
135
|
+
out_of_records = results.size < batch_size
|
136
|
+
end
|
137
|
+
}
|
138
|
+
if block_given?
|
139
|
+
enum.each { |batch| yield batch }
|
140
|
+
else
|
141
|
+
enum
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
#
|
146
|
+
# Fetch records in the given direction.
|
147
|
+
#
|
148
|
+
# cursor.open do |c|
|
149
|
+
# c.fetch :forward, 100
|
150
|
+
# ...
|
151
|
+
# end
|
152
|
+
#
|
153
|
+
# @param direction [Symbol] :next, :prior, :first, :last, :absolute, :relative, :forward or :backward
|
154
|
+
# @param num [Integer] number of rows to fetch (optional for some directions)
|
155
|
+
# @return [OccamsRecord::Results::Row]
|
156
|
+
#
|
157
|
+
def fetch(direction, num = nil)
|
158
|
+
query "FETCH %{dir} %{num} FROM %{name}".freeze % {
|
159
|
+
dir: DIRECTIONS.fetch(direction),
|
160
|
+
num: num&.to_i,
|
161
|
+
name: @quoted_name,
|
162
|
+
}
|
163
|
+
end
|
164
|
+
|
165
|
+
#
|
166
|
+
# Move the cursor the given direction.
|
167
|
+
#
|
168
|
+
# cursor.open do |c|
|
169
|
+
# ...
|
170
|
+
# c.move :backward, 100
|
171
|
+
# ...
|
172
|
+
# end
|
173
|
+
#
|
174
|
+
# @param direction [Symbol] :next, :prior, :first, :last, :absolute, :relative, :forward or :backward
|
175
|
+
# @param num [Integer] number of rows to move (optional for some directions)
|
176
|
+
#
|
177
|
+
def move(direction, num = nil)
|
178
|
+
query "MOVE %{dir} %{num} FROM %{name}".freeze % {
|
179
|
+
dir: DIRECTIONS.fetch(direction),
|
180
|
+
num: num&.to_i,
|
181
|
+
name: @quoted_name,
|
182
|
+
}
|
183
|
+
end
|
184
|
+
|
185
|
+
#
|
186
|
+
# Run an arbitrary query on the cursor. Use 'binds' to escape inputs.
|
187
|
+
#
|
188
|
+
# cursor.open do |c|
|
189
|
+
# c.query("FETCH FORWARD %{num} FOR #{c.quoted_name}", {num: 100})
|
190
|
+
# ...
|
191
|
+
# end
|
192
|
+
#
|
193
|
+
def query(sql, binds = {})
|
194
|
+
::OccamsRecord::RawQuery.new(sql, binds, use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders, connection: conn).run
|
195
|
+
end
|
196
|
+
|
197
|
+
#
|
198
|
+
# Run an arbitrary command on the cursor. Use 'binds' to escape inputs.
|
199
|
+
#
|
200
|
+
# cursor.open do |c|
|
201
|
+
# c.execute("MOVE FORWARD %{num} FOR #{c.quoted_name}", {num: 100})
|
202
|
+
# ...
|
203
|
+
# end
|
204
|
+
#
|
205
|
+
def execute(sql, binds = {})
|
206
|
+
conn.execute(sql % binds.reduce({}) { |acc, (key, val)|
|
207
|
+
acc[key] = conn.quote(val)
|
208
|
+
acc
|
209
|
+
})
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
|
214
|
+
def perform
|
215
|
+
ex = nil
|
216
|
+
conn.execute "DECLARE %{name} %{scroll} CURSOR %{hold} FOR %{query}".freeze % {
|
217
|
+
name: @quoted_name,
|
218
|
+
scroll: @scroll,
|
219
|
+
hold: @hold,
|
220
|
+
query: @sql,
|
221
|
+
}
|
222
|
+
yield
|
223
|
+
rescue => e
|
224
|
+
ex = e
|
225
|
+
raise ex
|
226
|
+
ensure
|
227
|
+
begin
|
228
|
+
conn.execute "CLOSE %{name}".freeze % {name: @quoted_name}
|
229
|
+
rescue => e
|
230
|
+
# Don't let an error from CLOSE (like a dead transaction) hide what lead to the error with CLOSE (like bad SQL that raised an error and aborted the transaction)
|
231
|
+
raise ex || e
|
232
|
+
else
|
233
|
+
raise ex if ex
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
data/lib/occams-record/query.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'occams-record/batches'
|
2
|
-
|
3
1
|
module OccamsRecord
|
4
2
|
#
|
5
3
|
# Starts building a OccamsRecord::Query. Pass it a scope from any of ActiveRecord's query builder
|
@@ -36,7 +34,7 @@ module OccamsRecord
|
|
36
34
|
# @return [ActiveRecord::Relation] scope for building the main SQL query
|
37
35
|
attr_reader :scope
|
38
36
|
|
39
|
-
include Batches
|
37
|
+
include OccamsRecord::Batches::CursorHelpers
|
40
38
|
include EagerLoaders::Builder
|
41
39
|
include Enumerable
|
42
40
|
include Measureable
|
@@ -154,5 +152,75 @@ module OccamsRecord
|
|
154
152
|
to_a.each
|
155
153
|
end
|
156
154
|
end
|
155
|
+
|
156
|
+
#
|
157
|
+
# Load records in batches of N and yield each record to a block if given. If no block is given,
|
158
|
+
# returns an Enumerator.
|
159
|
+
#
|
160
|
+
# NOTE Unlike ActiveRecord's find_each, ORDER BY is respected. The primary key will be appended
|
161
|
+
# to the ORDER BY clause to help ensure consistent batches. Additionally, it will be run inside
|
162
|
+
# of a transaction.
|
163
|
+
#
|
164
|
+
# @param batch_size [Integer]
|
165
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
166
|
+
# @param append_order_by [String] Append this column to ORDER BY to ensure consistent results. Defaults to the primary key. Pass false to disable.
|
167
|
+
# @yield [OccamsRecord::Results::Row]
|
168
|
+
# @return [Enumerator] will yield each record
|
169
|
+
#
|
170
|
+
def find_each(batch_size: 1000, use_transaction: true, append_order_by: nil)
|
171
|
+
enum = Enumerator.new { |y|
|
172
|
+
find_in_batches(batch_size: 1000, use_transaction: use_transaction, append_order_by: append_order_by).each { |batch|
|
173
|
+
batch.each { |record| y.yield record }
|
174
|
+
}
|
175
|
+
}
|
176
|
+
if block_given?
|
177
|
+
enum.each { |record| yield record }
|
178
|
+
else
|
179
|
+
enum
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
#
|
184
|
+
# Load records in batches of N and yield each batch to a block if given.
|
185
|
+
# If no block is given, returns an Enumerator.
|
186
|
+
#
|
187
|
+
# NOTE Unlike ActiveRecord's find_each, ORDER BY is respected. The primary key will be appended
|
188
|
+
# to the ORDER BY clause to help ensure consistent batches. Additionally, it will be run inside
|
189
|
+
# of a transaction.
|
190
|
+
#
|
191
|
+
# @param batch_size [Integer]
|
192
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
193
|
+
# @param append_order_by [String] Append this column to ORDER BY to ensure consistent results. Defaults to the primary key. Pass false to disable.
|
194
|
+
# @yield [OccamsRecord::Results::Row]
|
195
|
+
# @return [Enumerator] will yield each batch
|
196
|
+
#
|
197
|
+
def find_in_batches(batch_size: 1000, use_transaction: true, append_order_by: nil)
|
198
|
+
enum = Batches::OffsetLimit::Scoped
|
199
|
+
.new(model, scope, use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders)
|
200
|
+
.enum(batch_size: batch_size, use_transaction: use_transaction, append_order_by: append_order_by)
|
201
|
+
if block_given?
|
202
|
+
enum.each { |batch| yield batch }
|
203
|
+
else
|
204
|
+
enum
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
#
|
209
|
+
# Returns a cursor you can open and perform operations on. A lower-level alternative to
|
210
|
+
# find_each_with_cursor and find_in_batches_with_cursor.
|
211
|
+
#
|
212
|
+
# NOTE Postgres only. See the docs for OccamsRecord::Cursor for more details.
|
213
|
+
#
|
214
|
+
# @param name [String] Specify a name for the cursor (defaults to a random name)
|
215
|
+
# @param scroll [Boolean] true = SCROLL, false = NO SCROLL, nil = default behavior of DB
|
216
|
+
# @param hold [Boolean] true = WITH HOLD, false = WITHOUT HOLD, nil = default behavior of DB
|
217
|
+
# @return [OccamsRecord::Cursor]
|
218
|
+
#
|
219
|
+
def cursor(name: nil, scroll: nil, hold: nil)
|
220
|
+
Cursor.new(model.connection, scope.to_sql,
|
221
|
+
name: name, scroll: scroll, hold: hold,
|
222
|
+
use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders,
|
223
|
+
)
|
224
|
+
end
|
157
225
|
end
|
158
226
|
end
|
@@ -61,7 +61,7 @@ module OccamsRecord
|
|
61
61
|
# @return [Hash]
|
62
62
|
attr_reader :binds
|
63
63
|
|
64
|
-
include Batches
|
64
|
+
include OccamsRecord::Batches::CursorHelpers
|
65
65
|
include EagerLoaders::Builder
|
66
66
|
include Enumerable
|
67
67
|
include Measureable
|
@@ -75,13 +75,15 @@ module OccamsRecord
|
|
75
75
|
# @param eager_loaders [OccamsRecord::EagerLoaders::Context]
|
76
76
|
# @param query_logger [Array] (optional) an array into which all queries will be inserted for logging/debug purposes
|
77
77
|
# @param measurements [Array]
|
78
|
+
# @param connection
|
78
79
|
#
|
79
|
-
def initialize(sql, binds, use: nil, eager_loaders: nil, query_logger: nil, measurements: nil)
|
80
|
+
def initialize(sql, binds, use: nil, eager_loaders: nil, query_logger: nil, measurements: nil, connection: nil)
|
80
81
|
@sql = sql
|
81
82
|
@binds = binds
|
82
83
|
@use = use
|
83
84
|
@eager_loaders = eager_loaders || EagerLoaders::Context.new
|
84
85
|
@query_logger, @measurements = query_logger, measurements
|
86
|
+
@conn = connection
|
85
87
|
end
|
86
88
|
|
87
89
|
#
|
@@ -139,6 +141,74 @@ module OccamsRecord
|
|
139
141
|
end
|
140
142
|
end
|
141
143
|
|
144
|
+
#
|
145
|
+
# Load records in batches of N and yield each record to a block if given. If no block is given,
|
146
|
+
# returns an Enumerator.
|
147
|
+
#
|
148
|
+
# NOTE Unlike ActiveRecord's find_each, ORDER BY is respected. The primary key will be appended
|
149
|
+
# to the ORDER BY clause to help ensure consistent batches. Additionally, it will be run inside
|
150
|
+
# of a transaction.
|
151
|
+
#
|
152
|
+
# @param batch_size [Integer]
|
153
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
154
|
+
# @yield [OccamsRecord::Results::Row]
|
155
|
+
# @return [Enumerator] will yield each record
|
156
|
+
#
|
157
|
+
def find_each(batch_size: 1000, use_transaction: true)
|
158
|
+
enum = Enumerator.new { |y|
|
159
|
+
find_in_batches(batch_size: batch_size, use_transaction: use_transaction).each { |batch|
|
160
|
+
batch.each { |record| y.yield record }
|
161
|
+
}
|
162
|
+
}
|
163
|
+
if block_given?
|
164
|
+
enum.each { |record| yield record }
|
165
|
+
else
|
166
|
+
enum
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
#
|
171
|
+
# Load records in batches of N and yield each batch to a block if given.
|
172
|
+
# If no block is given, returns an Enumerator.
|
173
|
+
#
|
174
|
+
# NOTE Unlike ActiveRecord's find_each, ORDER BY is respected. The primary key will be appended
|
175
|
+
# to the ORDER BY clause to help ensure consistent batches. Additionally, it will be run inside
|
176
|
+
# of a transaction.
|
177
|
+
#
|
178
|
+
# @param batch_size [Integer]
|
179
|
+
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
180
|
+
# @yield [OccamsRecord::Results::Row]
|
181
|
+
# @return [Enumerator] will yield each batch
|
182
|
+
#
|
183
|
+
def find_in_batches(batch_size: 1000, use_transaction: true)
|
184
|
+
enum = Batches::OffsetLimit::RawQuery
|
185
|
+
.new(conn, @sql, @binds, use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders)
|
186
|
+
.enum(batch_size: batch_size, use_transaction: use_transaction)
|
187
|
+
if block_given?
|
188
|
+
enum.each { |batch| yield batch }
|
189
|
+
else
|
190
|
+
enum
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
#
|
195
|
+
# Returns a cursor you can open and perform operations on. A lower-level alternative to
|
196
|
+
# find_each_with_cursor and find_in_batches_with_cursor.
|
197
|
+
#
|
198
|
+
# NOTE Postgres only. See the docs for OccamsRecord::Cursor for more details.
|
199
|
+
#
|
200
|
+
# @param name [String] Specify a name for the cursor (defaults to a random name)
|
201
|
+
# @param scroll [Boolean] true = SCROLL, false = NO SCROLL, nil = default behavior of DB
|
202
|
+
# @param hold [Boolean] true = WITH HOLD, false = WITHOUT HOLD, nil = default behavior of DB
|
203
|
+
# @return [OccamsRecord::Cursor]
|
204
|
+
#
|
205
|
+
def cursor(name: nil, scroll: nil, hold: nil)
|
206
|
+
Cursor.new(conn, @sql,
|
207
|
+
name: name, scroll: scroll, hold: hold,
|
208
|
+
use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders,
|
209
|
+
)
|
210
|
+
end
|
211
|
+
|
142
212
|
private
|
143
213
|
|
144
214
|
# Returns the SQL as a String with all variables escaped
|
@@ -158,45 +228,6 @@ module OccamsRecord
|
|
158
228
|
@sql.match(/\s+FROM\s+"?(\w+)"?/i)&.captures&.first
|
159
229
|
end
|
160
230
|
|
161
|
-
#
|
162
|
-
# Returns an Enumerator that yields batches of records, of size "of".
|
163
|
-
# The SQL string must include 'LIMIT %{batch_limit} OFFSET %{batch_offset}'.
|
164
|
-
# The bind values will be provided by OccamsRecord.
|
165
|
-
#
|
166
|
-
# @param of [Integer] batch size
|
167
|
-
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
168
|
-
# @return [Enumerator] yields batches
|
169
|
-
#
|
170
|
-
def batches(of:, use_transaction: true, append_order_by: nil)
|
171
|
-
unless @sql =~ /LIMIT\s+%\{batch_limit\}/i and @sql =~ /OFFSET\s+%\{batch_offset\}/i
|
172
|
-
raise ArgumentError, "When using find_each/find_in_batches you must specify 'LIMIT %{batch_limit} OFFSET %{batch_offset}'. SQL statement: #{@sql}"
|
173
|
-
end
|
174
|
-
|
175
|
-
Enumerator.new do |y|
|
176
|
-
if use_transaction and conn.open_transactions == 0
|
177
|
-
conn.transaction {
|
178
|
-
run_batches y, of
|
179
|
-
}
|
180
|
-
else
|
181
|
-
run_batches y, of
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
def run_batches(y, of)
|
187
|
-
offset = 0
|
188
|
-
loop do
|
189
|
-
results = RawQuery.new(@sql, @binds.merge({
|
190
|
-
batch_limit: of,
|
191
|
-
batch_offset: offset,
|
192
|
-
}), use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders).run
|
193
|
-
|
194
|
-
y.yield results if results.any?
|
195
|
-
break if results.size < of
|
196
|
-
offset += results.size
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
231
|
def conn
|
201
232
|
@conn ||= @eager_loaders.model&.connection || ActiveRecord::Base.connection
|
202
233
|
end
|
data/lib/occams-record.rb
CHANGED
@@ -5,9 +5,15 @@ require 'occams-record/measureable'
|
|
5
5
|
require 'occams-record/eager_loaders/eager_loaders'
|
6
6
|
require 'occams-record/results/results'
|
7
7
|
require 'occams-record/results/row'
|
8
|
+
require 'occams-record/cursor'
|
9
|
+
require 'occams-record/errors'
|
10
|
+
|
11
|
+
require 'occams-record/batches/offset_limit/scoped'
|
12
|
+
require 'occams-record/batches/offset_limit/raw_query'
|
13
|
+
require 'occams-record/batches/cursor_helpers'
|
14
|
+
|
8
15
|
require 'occams-record/query'
|
9
16
|
require 'occams-record/raw_query'
|
10
|
-
require 'occams-record/errors'
|
11
17
|
|
12
18
|
module OccamsRecord
|
13
19
|
autoload :Ugly, 'occams-record/ugly'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: occams-record
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0.pre.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jordan Hollinger
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -44,8 +44,8 @@ dependencies:
|
|
44
44
|
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
46
|
version: '0'
|
47
|
-
description: A faster, lower-memory querying API for ActiveRecord
|
48
|
-
as unadorned, read-only objects.
|
47
|
+
description: A faster, lower-memory, fuller-featured querying API for ActiveRecord
|
48
|
+
that returns results as unadorned, read-only objects.
|
49
49
|
email: jordan.hollinger@gmail.com
|
50
50
|
executables: []
|
51
51
|
extensions: []
|
@@ -53,7 +53,10 @@ extra_rdoc_files: []
|
|
53
53
|
files:
|
54
54
|
- README.md
|
55
55
|
- lib/occams-record.rb
|
56
|
-
- lib/occams-record/batches.rb
|
56
|
+
- lib/occams-record/batches/cursor_helpers.rb
|
57
|
+
- lib/occams-record/batches/offset_limit/raw_query.rb
|
58
|
+
- lib/occams-record/batches/offset_limit/scoped.rb
|
59
|
+
- lib/occams-record/cursor.rb
|
57
60
|
- lib/occams-record/eager_loaders/ad_hoc_base.rb
|
58
61
|
- lib/occams-record/eager_loaders/ad_hoc_many.rb
|
59
62
|
- lib/occams-record/eager_loaders/ad_hoc_one.rb
|
@@ -91,9 +94,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
94
|
version: 2.3.0
|
92
95
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
96
|
requirements:
|
94
|
-
- - "
|
97
|
+
- - ">"
|
95
98
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
99
|
+
version: 1.3.1
|
97
100
|
requirements: []
|
98
101
|
rubygems_version: 3.1.6
|
99
102
|
signing_key:
|
@@ -1,113 +0,0 @@
|
|
1
|
-
module OccamsRecord
|
2
|
-
#
|
3
|
-
# Methods for building batch finding methods. It expects "model" and "scope" methods to be present.
|
4
|
-
#
|
5
|
-
module Batches
|
6
|
-
#
|
7
|
-
# Load records in batches of N and yield each record to a block if given. If no block is given,
|
8
|
-
# returns an Enumerator.
|
9
|
-
#
|
10
|
-
# NOTE Unlike ActiveRecord's find_each, ORDER BY is respected. The primary key will be appended
|
11
|
-
# to the ORDER BY clause to help ensure consistent batches. Additionally, it will be run inside
|
12
|
-
# of a transaction.
|
13
|
-
#
|
14
|
-
# @param batch_size [Integer]
|
15
|
-
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
16
|
-
# @param append_order_by [String] Append this column to ORDER BY to ensure consistent results. Defaults to the primary key. Pass false to disable.
|
17
|
-
# @yield [OccamsRecord::Results::Row]
|
18
|
-
# @return [Enumerator] will yield each record
|
19
|
-
#
|
20
|
-
def find_each(batch_size: 1000, use_transaction: true, append_order_by: nil)
|
21
|
-
enum = Enumerator.new { |y|
|
22
|
-
batches(of: batch_size, use_transaction: use_transaction, append_order_by: append_order_by).each { |batch|
|
23
|
-
batch.each { |record| y.yield record }
|
24
|
-
}
|
25
|
-
}
|
26
|
-
if block_given?
|
27
|
-
enum.each { |record| yield record }
|
28
|
-
else
|
29
|
-
enum
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
#
|
34
|
-
# Load records in batches of N and yield each batch to a block if given.
|
35
|
-
# If no block is given, returns an Enumerator.
|
36
|
-
#
|
37
|
-
# NOTE Unlike ActiveRecord's find_each, ORDER BY is respected. The primary key will be appended
|
38
|
-
# to the ORDER BY clause to help ensure consistent batches. Additionally, it will be run inside
|
39
|
-
# of a transaction.
|
40
|
-
#
|
41
|
-
# @param batch_size [Integer]
|
42
|
-
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
43
|
-
# @param append_order_by [String] Append this column to ORDER BY to ensure consistent results. Defaults to the primary key. Pass false to disable.
|
44
|
-
# @yield [OccamsRecord::Results::Row]
|
45
|
-
# @return [Enumerator] will yield each batch
|
46
|
-
#
|
47
|
-
def find_in_batches(batch_size: 1000, use_transaction: true, append_order_by: nil)
|
48
|
-
enum = batches(of: batch_size, use_transaction: use_transaction, append_order_by: append_order_by)
|
49
|
-
if block_given?
|
50
|
-
enum.each { |batch| yield batch }
|
51
|
-
else
|
52
|
-
enum
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
#
|
59
|
-
# Returns an Enumerator that yields batches of records, of size "of".
|
60
|
-
# NOTE ActiveRecord 5+ provides the 'in_batches' method to do something
|
61
|
-
# similiar, although 4.2 does not. Also it does not respect ORDER BY,
|
62
|
-
# whereas this does.
|
63
|
-
#
|
64
|
-
# @param of [Integer] batch size
|
65
|
-
# @param use_transaction [Boolean] Ensure it runs inside of a database transaction
|
66
|
-
# @param append_order_by [String] Append this column to ORDER BY to ensure consistent results. Defaults to the primary key. Pass false to disable.
|
67
|
-
# @return [Enumerator] yields batches
|
68
|
-
#
|
69
|
-
def batches(of:, use_transaction: true, append_order_by: nil)
|
70
|
-
append_order =
|
71
|
-
case append_order_by
|
72
|
-
when false then nil
|
73
|
-
when nil then model.primary_key
|
74
|
-
else append_order_by
|
75
|
-
end
|
76
|
-
|
77
|
-
Enumerator.new do |y|
|
78
|
-
if use_transaction and model.connection.open_transactions == 0
|
79
|
-
model.connection.transaction {
|
80
|
-
run_batches y, of, append_order
|
81
|
-
}
|
82
|
-
else
|
83
|
-
run_batches y, of, append_order
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def run_batches(y, of, append_order_by = nil)
|
89
|
-
limit = scope.limit_value
|
90
|
-
batch_size = limit && limit < of ? limit : of
|
91
|
-
|
92
|
-
offset = scope.offset_value || 0
|
93
|
-
out_of_records, count = false, 0
|
94
|
-
order_by =
|
95
|
-
if append_order_by
|
96
|
-
append_order_by.to_s == model.primary_key.to_s ? append_order_by.to_sym : append_order_by
|
97
|
-
end
|
98
|
-
|
99
|
-
until out_of_records
|
100
|
-
l = limit && batch_size > limit - count ? limit - count : batch_size
|
101
|
-
q = scope
|
102
|
-
q = q.order(order_by) if order_by
|
103
|
-
q = q.offset(offset).limit(l)
|
104
|
-
results = Query.new(q, use: @use, query_logger: @query_logger, eager_loaders: @eager_loaders).run
|
105
|
-
|
106
|
-
y.yield results if results.any?
|
107
|
-
count += results.size
|
108
|
-
offset += results.size
|
109
|
-
out_of_records = results.size < batch_size || (limit && count >= limit)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|