pipeloader 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/DATALOADERS.md +379 -0
- data/LICENSE +21 -0
- data/README.md +243 -161
- data/lib/pipeloader/batch/batch_loader.rb +63 -0
- data/lib/pipeloader/batch/batch_proxy.rb +204 -0
- data/lib/pipeloader/batch/context.rb +43 -0
- data/lib/pipeloader/batch/fetcher.rb +30 -0
- data/lib/pipeloader/batch/fetcher_state.rb +27 -0
- data/lib/pipeloader/batch/load_grouping.rb +28 -0
- data/lib/pipeloader/batch/load_interceptor.rb +44 -0
- data/lib/pipeloader/batch/model.rb +170 -0
- data/lib/pipeloader/batch/relationship.rb +68 -0
- data/lib/pipeloader/batch.rb +44 -0
- data/lib/pipeloader/field_exact.rb +235 -14
- data/lib/pipeloader/pipeliner.rb +107 -24
- data/lib/pipeloader/version.rb +1 -1
- data/lib/pipeloader.rb +32 -1
- metadata +47 -4
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
module Pipeloader
|
|
2
|
-
#
|
|
3
|
-
#
|
|
2
|
+
# Both opt-in, off by default; set before your schema's types are defined.
|
|
3
|
+
# `field_exact` turns on column projection (+ fusion) everywhere (or per type via
|
|
4
|
+
# `pipeloader_field_exact!`). `auto_fuse` turns on whole-row association fusion
|
|
5
|
+
# with no projection — plain `object.author` / `object.comments` collapse to one
|
|
6
|
+
# `WHERE key = ANY($1)` per level, byte-identical to the un-fused result.
|
|
4
7
|
class << self
|
|
5
|
-
attr_accessor :field_exact
|
|
8
|
+
attr_accessor :field_exact, :auto_fuse
|
|
6
9
|
end
|
|
7
10
|
self.field_exact = false
|
|
11
|
+
self.auto_fuse = false
|
|
8
12
|
|
|
9
13
|
# Per-type opt-in, mixed into every GraphQL::Schema::Object.
|
|
10
14
|
module TypeOptIn
|
|
@@ -26,8 +30,12 @@ module Pipeloader
|
|
|
26
30
|
|
|
27
31
|
def initialize(*args, selects: nil, owner: nil, extensions: [], **kwargs, &block)
|
|
28
32
|
@pipeloader_selects = selects && Array(selects).map(&:to_s)
|
|
33
|
+
# At most one extension: field-exact (projects + fuses) wins over auto-fuse
|
|
34
|
+
# (whole-row fuses only). Both bail to plain resolution for non-associations.
|
|
29
35
|
if Pipeloader.field_exact || (owner.respond_to?(:pipeloader_field_exact?) && owner.pipeloader_field_exact?)
|
|
30
36
|
extensions = extensions + [ProjectionExtension]
|
|
37
|
+
elsif Pipeloader.auto_fuse
|
|
38
|
+
extensions = extensions + [FusionExtension]
|
|
31
39
|
end
|
|
32
40
|
super(*args, owner: owner, extensions: extensions, **kwargs, &block)
|
|
33
41
|
end
|
|
@@ -43,18 +51,45 @@ module Pipeloader
|
|
|
43
51
|
lookahead = arguments[:lookahead]
|
|
44
52
|
inner = arguments.key?(:lookahead) ? arguments.reject { |k, _| k == :lookahead } : arguments
|
|
45
53
|
|
|
46
|
-
#
|
|
47
|
-
#
|
|
48
|
-
# custom resolver, or the selection is
|
|
54
|
+
# belongs_to and has_one are singular associations AR loads whole-row via
|
|
55
|
+
# `object.assoc`; resolve them with a projected (and, when safe, fused) query
|
|
56
|
+
# instead. Skipped if the type defines a custom resolver, or the selection is
|
|
57
|
+
# opaque (then fall through to default).
|
|
49
58
|
record = object.respond_to?(:object) ? object.object : object
|
|
50
59
|
if lookahead && record.is_a?(ActiveRecord::Base) &&
|
|
51
60
|
!field.owner.instance_methods(false).include?(field.resolver_method) &&
|
|
52
|
-
(assoc = record.class.reflect_on_association(field.method_str.to_sym))
|
|
53
|
-
|
|
54
|
-
|
|
61
|
+
(assoc = record.class.reflect_on_association(field.method_str.to_sym))
|
|
62
|
+
if assoc.belongs_to?
|
|
63
|
+
fk = record.public_send(assoc.foreign_key)
|
|
64
|
+
return nil if fk.nil?
|
|
55
65
|
|
|
56
|
-
|
|
57
|
-
|
|
66
|
+
cols = Pipeloader.project_columns(assoc.klass, lookahead)
|
|
67
|
+
if cols
|
|
68
|
+
# Mechanical batch: gather the level's foreign keys and resolve them with
|
|
69
|
+
# one `WHERE pk = ANY($1)` instead of a query per parent, when demux is
|
|
70
|
+
# provably unambiguous (see fusable_belongs_to?). The fused query is itself
|
|
71
|
+
# pipelined, so round-trips stay = tree depth.
|
|
72
|
+
if Pipeloader.fusable_belongs_to?(assoc)
|
|
73
|
+
return Pipeloader.fuse(context.dataloader, assoc.klass, :by_pk, assoc.klass.primary_key, cols.sort, fk)
|
|
74
|
+
end
|
|
75
|
+
return assoc.klass.where(assoc.klass.primary_key => fk).select(*cols).first
|
|
76
|
+
end
|
|
77
|
+
elsif assoc.macro == :has_one && assoc.scope.nil? && assoc.through_reflection.nil?
|
|
78
|
+
parent_key = record.public_send(assoc.active_record_primary_key)
|
|
79
|
+
return nil if parent_key.nil?
|
|
80
|
+
|
|
81
|
+
cols = Pipeloader.project_columns(assoc.klass, lookahead)
|
|
82
|
+
if cols
|
|
83
|
+
cols = (cols + [assoc.foreign_key]).uniq
|
|
84
|
+
# has_one is the has_many query with a single-row demux. Fusing it is only
|
|
85
|
+
# unambiguous when a unique index on the FK enforces 1:1 — otherwise the
|
|
86
|
+
# ANY-scan's "first" could differ from the per-parent LIMIT 1.
|
|
87
|
+
if Pipeloader.fusable_has_one?(assoc)
|
|
88
|
+
return Pipeloader.fuse(context.dataloader, assoc.klass, :by_fk_one, assoc.foreign_key, cols.sort, parent_key)
|
|
89
|
+
end
|
|
90
|
+
return assoc.klass.where(assoc.foreign_key => parent_key).select(*cols).first
|
|
91
|
+
end
|
|
92
|
+
end
|
|
58
93
|
end
|
|
59
94
|
|
|
60
95
|
value = yield(object, inner)
|
|
@@ -63,14 +98,200 @@ module Pipeloader
|
|
|
63
98
|
cols = Pipeloader.project_columns(value.klass, lookahead)
|
|
64
99
|
return value unless cols # opaque field selected -> fetch whole rows
|
|
65
100
|
|
|
66
|
-
|
|
67
|
-
if
|
|
68
|
-
|
|
101
|
+
proxy = value.respond_to?(:proxy_association) ? value.proxy_association : nil
|
|
102
|
+
if proxy && Pipeloader.fusable_has_many?(proxy.reflection, value)
|
|
103
|
+
# Mechanical batch: gather the level's parent keys and load all children
|
|
104
|
+
# with one `WHERE fk IN (...)`, grouped back by foreign key. Safe only for a
|
|
105
|
+
# plain has_many (no scope/limit), so each child row belongs to one parent.
|
|
106
|
+
refl = proxy.reflection
|
|
107
|
+
cols = (cols + [refl.foreign_key]).uniq
|
|
108
|
+
parent_key = proxy.owner.public_send(refl.active_record_primary_key)
|
|
109
|
+
return Pipeloader.fuse(context.dataloader, refl.klass, :by_fk_many, refl.foreign_key, cols.sort, parent_key)
|
|
69
110
|
end
|
|
111
|
+
|
|
112
|
+
# Keep a has_many's foreign key so AR can still group / wire the inverse.
|
|
113
|
+
cols += Array(proxy.reflection.foreign_key) if proxy
|
|
70
114
|
value.select(*cols.uniq)
|
|
71
115
|
end
|
|
72
116
|
end
|
|
73
117
|
|
|
118
|
+
# The auto-fuse sibling of ProjectionExtension: fuses a plain association field
|
|
119
|
+
# into one `WHERE key = ANY($1)` per level (reusing the same sources and safety
|
|
120
|
+
# gates) but selects the WHOLE row and never narrows. Anything that isn't a
|
|
121
|
+
# fusable association — scalars, custom resolvers, non-AR objects, polymorphic /
|
|
122
|
+
# scoped / non-unique associations, SQLite — just yields and loads normally
|
|
123
|
+
# through the transparent pipelined path. Result is byte-identical, just batched.
|
|
124
|
+
class FusionExtension < GraphQL::Schema::FieldExtension
|
|
125
|
+
def resolve(object:, arguments:, context:, **)
|
|
126
|
+
record = object.respond_to?(:object) ? object.object : object
|
|
127
|
+
if record.is_a?(ActiveRecord::Base) &&
|
|
128
|
+
!field.owner.instance_methods(false).include?(field.resolver_method) &&
|
|
129
|
+
(assoc = record.class.reflect_on_association(field.method_str.to_sym))
|
|
130
|
+
if assoc.belongs_to? && Pipeloader.fusable_belongs_to?(assoc)
|
|
131
|
+
fk = record.public_send(assoc.foreign_key)
|
|
132
|
+
return nil if fk.nil?
|
|
133
|
+
|
|
134
|
+
return Pipeloader.fuse(context.dataloader, assoc.klass, :by_pk, assoc.klass.primary_key, assoc.klass.column_names.sort, fk)
|
|
135
|
+
elsif assoc.macro == :has_one && Pipeloader.fusable_has_one?(assoc)
|
|
136
|
+
parent_key = record.public_send(assoc.active_record_primary_key)
|
|
137
|
+
return nil if parent_key.nil?
|
|
138
|
+
|
|
139
|
+
return Pipeloader.fuse(context.dataloader, assoc.klass, :by_fk_one, assoc.foreign_key, assoc.klass.column_names.sort, parent_key)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
value = yield(object, arguments)
|
|
144
|
+
return value unless value.is_a?(ActiveRecord::Relation)
|
|
145
|
+
|
|
146
|
+
proxy = value.respond_to?(:proxy_association) ? value.proxy_association : nil
|
|
147
|
+
if proxy && Pipeloader.fusable_has_many?(proxy.reflection, value)
|
|
148
|
+
refl = proxy.reflection
|
|
149
|
+
parent_key = proxy.owner.public_send(refl.active_record_primary_key)
|
|
150
|
+
return Pipeloader.fuse(context.dataloader, refl.klass, :by_fk_many, refl.foreign_key, refl.klass.column_names.sort, parent_key)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
value # bare relation/record -> loads whole-row through the transparent path
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
ARRAY_ENCODER = PG::TextEncoder::Array.new
|
|
158
|
+
|
|
159
|
+
# Build `<key> = ANY($1)` with a single array bind, so a fused query is one stable
|
|
160
|
+
# prepared statement regardless of batch size (an IN-list is a distinct statement
|
|
161
|
+
# per length and re-plans with a custom plan each execution; ANY(array) plans once
|
|
162
|
+
# as a generic array scan). PostgreSQL-specific, which is fine: fusion is the
|
|
163
|
+
# gathering side of pipelining, and only PostgreSQL pipelines (see fusable_* —
|
|
164
|
+
# they gate it). The fused query flows through the AR patch, so it's pipelined.
|
|
165
|
+
def self.any_relation(model, key, values, columns)
|
|
166
|
+
qualified = "#{model.quoted_table_name}.#{model.connection.quote_column_name(key)}"
|
|
167
|
+
model.where("#{qualified} = ANY(?)", ARRAY_ENCODER.encode(values)).select(*columns)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# The [sql, params] an `any_relation` would send through the AR patch — pulled out
|
|
171
|
+
# so FusionSource can enqueue it on Pipeloader::Source directly (via `request`,
|
|
172
|
+
# without forcing) instead of letting `.to_a` force one query at a time.
|
|
173
|
+
def self.sql_and_params(relation)
|
|
174
|
+
conn = relation.klass.connection
|
|
175
|
+
sql, binds = conn.send(:to_sql_and_binds, relation.arel)
|
|
176
|
+
[sql, binds.map { |b| b.respond_to?(:value_for_database) ? b.value_for_database : b }]
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Issue one fused association lookup. `kind` is :by_pk (belongs_to, demux by primary
|
|
180
|
+
# key, single), :by_fk_one (has_one, demux by FK, first) or :by_fk_many (has_many,
|
|
181
|
+
# demux by FK, array). All fused lookups on a connection share ONE FusionSource.
|
|
182
|
+
def self.fuse(dataloader, model, kind, key, columns, value)
|
|
183
|
+
dataloader.with(FusionSource, model.connection.raw_connection)
|
|
184
|
+
.load([kind, model, key, columns, value].freeze)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# One source for EVERY safe association lookup parked at a fiber tick — across all
|
|
188
|
+
# models and macros. graphql-ruby runs sibling sources sequentially in one fiber, so
|
|
189
|
+
# a separate source per association would force its own `WHERE key = ANY($1)` query
|
|
190
|
+
# before the next ran, adding a round trip per association on a level. Funnelling them
|
|
191
|
+
# through a single source lets `fetch` enqueue every shape's query on Pipeloader::Source
|
|
192
|
+
# WITHOUT forcing (`request`), then force them together — so a whole level's fused
|
|
193
|
+
# lookups collapse into one pipeline burst and round trips stay = tree depth.
|
|
194
|
+
class FusionSource < GraphQL::Dataloader::Source
|
|
195
|
+
def initialize(pg)
|
|
196
|
+
@pg = pg
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# descriptors: [kind, model, key, columns, value], deduped by Dataloader (so two
|
|
200
|
+
# parents sharing a belongs_to target hit the DB once). Returns one demuxed value
|
|
201
|
+
# per descriptor, in order.
|
|
202
|
+
def fetch(descriptors)
|
|
203
|
+
src = @dataloader.with(Pipeloader::Source, @pg)
|
|
204
|
+
|
|
205
|
+
# One `WHERE key = ANY($1)` per distinct query shape, enqueued but not forced.
|
|
206
|
+
pending = descriptors.group_by { |d| d[0, 4] }.map do |(kind, model, key, columns), ds|
|
|
207
|
+
values = ds.map { |d| d[4] }.uniq
|
|
208
|
+
rel = Pipeloader.any_relation(model, key, values, columns)
|
|
209
|
+
# Order FK lookups by child PK so an unordered association comes back
|
|
210
|
+
# deterministically (group_by is stable, so each parent keeps that order).
|
|
211
|
+
rel = rel.order(model.arel_table[model.primary_key].asc) unless kind == :by_pk
|
|
212
|
+
[kind, model, key, columns, src.request(Pipeloader.sql_and_params(rel))]
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Forcing the first request runs Pipeloader::Source once for ALL enqueued shapes
|
|
216
|
+
# (one burst); the rest read straight from its cache. Then demux each shape.
|
|
217
|
+
demux = pending.to_h do |kind, model, key, columns, request|
|
|
218
|
+
rows = request.load.map { |attrs| model.instantiate(attrs) }
|
|
219
|
+
bucket = kind == :by_pk ? rows.index_by { |r| r.public_send(model.primary_key) } : rows.group_by { |r| r.public_send(key) }
|
|
220
|
+
[[kind, model, key, columns], bucket]
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
descriptors.map do |kind, model, key, columns, value|
|
|
224
|
+
bucket = demux[[kind, model, key, columns]]
|
|
225
|
+
case kind
|
|
226
|
+
when :by_pk then bucket[value] # nil for a dangling/absent target
|
|
227
|
+
when :by_fk_one then bucket[value]&.first # has_one: first/nil
|
|
228
|
+
else bucket[value] || [] # has_many: array
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Only fuse when each returned row maps back to exactly one parent with zero
|
|
235
|
+
# ambiguity: a non-polymorphic, unscoped belongs_to keyed by a single-column
|
|
236
|
+
# primary key (unique), on PostgreSQL (the only adapter that pipelines/fuses).
|
|
237
|
+
# Anything else keeps the per-parent projected query.
|
|
238
|
+
def self.fusable_belongs_to?(assoc)
|
|
239
|
+
assoc.belongs_to? && !assoc.polymorphic? && assoc.scope.nil? &&
|
|
240
|
+
assoc.klass.primary_key.is_a?(String) &&
|
|
241
|
+
assoc.klass.connection.adapter_name == "PostgreSQL"
|
|
242
|
+
rescue StandardError
|
|
243
|
+
false
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# has_one fuses like has_many but with a single-row demux, so it's exact only when
|
|
247
|
+
# the FK is genuinely 1:1 — proven by a unique index on the FK. Without it, "first"
|
|
248
|
+
# is ambiguous and we keep the per-parent query.
|
|
249
|
+
def self.fusable_has_one?(assoc)
|
|
250
|
+
assoc.macro == :has_one && assoc.through_reflection.nil? && assoc.scope.nil? &&
|
|
251
|
+
assoc.foreign_key.is_a?(String) && assoc.active_record_primary_key.is_a?(String) &&
|
|
252
|
+
assoc.klass.connection.adapter_name == "PostgreSQL" &&
|
|
253
|
+
unique_fk_index?(assoc)
|
|
254
|
+
rescue StandardError
|
|
255
|
+
false
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# A unique index on exactly the FK column proves at most one child per parent.
|
|
259
|
+
# Memoized per reflection.
|
|
260
|
+
def self.unique_fk_index?(assoc)
|
|
261
|
+
@unique_fk_indexes ||= {}
|
|
262
|
+
return @unique_fk_indexes[assoc.object_id] if @unique_fk_indexes.key?(assoc.object_id)
|
|
263
|
+
|
|
264
|
+
fk = assoc.foreign_key
|
|
265
|
+
@unique_fk_indexes[assoc.object_id] =
|
|
266
|
+
assoc.klass.connection.indexes(assoc.klass.table_name).any? { |ix| ix.unique && Array(ix.columns) == [fk] }
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Fuse only a plain has_many: no scope, no `through`, and no per-parent
|
|
270
|
+
# limit/offset (those need a lateral join, not a flat IN — order is preserved by
|
|
271
|
+
# group_by, so it's fine). Single-column keys only. Then group_by(fk) is exact.
|
|
272
|
+
def self.fusable_has_many?(reflection, relation)
|
|
273
|
+
reflection.macro == :has_many &&
|
|
274
|
+
reflection.through_reflection.nil? &&
|
|
275
|
+
reflection.scope.nil? &&
|
|
276
|
+
reflection.foreign_key.is_a?(String) &&
|
|
277
|
+
reflection.active_record_primary_key.is_a?(String) &&
|
|
278
|
+
reflection.klass.connection.adapter_name == "PostgreSQL" &&
|
|
279
|
+
bare_association?(reflection, relation)
|
|
280
|
+
rescue StandardError
|
|
281
|
+
false
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# The relation must be the *bare* association — nothing chained onto it. The fused
|
|
285
|
+
# query is rebuilt from the reflection, so any chained order / where / limit would
|
|
286
|
+
# be silently dropped; comparing the relation's SQL to the untouched association
|
|
287
|
+
# scope catches all of them at once (order, where, limit, joins, group, ...).
|
|
288
|
+
# Ordered or filtered collections fall back to the per-parent query, which keeps them.
|
|
289
|
+
def self.bare_association?(reflection, relation)
|
|
290
|
+
owner_key = relation.where_values_hash[reflection.foreign_key]
|
|
291
|
+
!owner_key.nil? &&
|
|
292
|
+
relation.to_sql == reflection.klass.where(reflection.foreign_key => owner_key).to_sql
|
|
293
|
+
end
|
|
294
|
+
|
|
74
295
|
# Returns the exact column list for a model + selection, or nil meaning
|
|
75
296
|
# "can't prove it's safe — fetch whole rows."
|
|
76
297
|
def self.project_columns(model, lookahead)
|
data/lib/pipeloader/pipeliner.rb
CHANGED
|
@@ -7,43 +7,126 @@ module Pipeloader
|
|
|
7
7
|
|
|
8
8
|
# queries: array of [sql, params]. Returns array of [columns, rows] (raw
|
|
9
9
|
# strings), in the same order, having sent them all in a single round trip.
|
|
10
|
+
#
|
|
11
|
+
# Prepared statements are cached for the lifetime of the REQUEST (the cache and
|
|
12
|
+
# name space are set up by Pipeloader::Trace per multiplex), so a shape is
|
|
13
|
+
# planned once per request and reused across every burst — not re-planned each
|
|
14
|
+
# burst. They're thrown out when the next request begins: each request's first
|
|
15
|
+
# burst DEALLOCATEs the previous request's statements, piggybacked into the same
|
|
16
|
+
# pipeline so cleanup costs no extra round trip. Nothing outlives a request, so
|
|
17
|
+
# no plan goes stale across a reconnect or a migration.
|
|
18
|
+
#
|
|
19
|
+
# If any query errors, the pipeline is drained to its sync point, the connection
|
|
20
|
+
# is restored to a usable state, and the first error is raised — never swallowed.
|
|
10
21
|
def pipeline_batch(pg, queries)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
pg.instance_variable_set(:@pipeloader_prepared, prepared)
|
|
15
|
-
end
|
|
22
|
+
cache = prepared_cache(pg)
|
|
23
|
+
seq = pg.instance_variable_get(:@pipeloader_seq) || 0
|
|
24
|
+
garbage = take_garbage(pg) # previous request's statements, to DEALLOCATE now
|
|
16
25
|
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
# shapes, so this amortizes to ~one parse per shape for the connection's
|
|
20
|
-
# life; thereafter every execution reuses the named statement.
|
|
26
|
+
# Name + pipelined Parse for each shape not yet prepared this request.
|
|
27
|
+
to_prepare = []
|
|
21
28
|
queries.each do |sql, _params|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
29
|
+
next if cache.key?(sql)
|
|
30
|
+
name = "pipeloader_#{seq}_#{cache.size}"
|
|
31
|
+
cache[sql] = name
|
|
32
|
+
to_prepare << [name, sql]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
error = nil
|
|
36
|
+
results = []
|
|
37
|
+
begin
|
|
38
|
+
pg.enter_pipeline_mode
|
|
39
|
+
|
|
40
|
+
# Block 1 — clean up the previous request, in its own sync so a stale name
|
|
41
|
+
# (after a reconnect) can't abort this burst's real queries. Same round trip.
|
|
42
|
+
unless garbage.empty?
|
|
43
|
+
garbage.each { |name| pg.send_query_params("DEALLOCATE #{name}", []) }
|
|
44
|
+
pg.pipeline_sync
|
|
26
45
|
end
|
|
46
|
+
|
|
47
|
+
# Block 2 — prepare new shapes, run every query.
|
|
48
|
+
to_prepare.each { |name, sql| pg.send_prepare(name, sql) }
|
|
49
|
+
queries.each { |sql, params| pg.send_query_prepared(cache[sql], params) }
|
|
50
|
+
pg.pipeline_sync
|
|
51
|
+
|
|
52
|
+
drain_block(pg, nil, false) unless garbage.empty? # cleanup results — ignore failures
|
|
53
|
+
error = drain_block(pg, results, true) # this burst's query results
|
|
54
|
+
ensure
|
|
55
|
+
finish_pipeline(pg)
|
|
27
56
|
end
|
|
28
57
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
58
|
+
raise error if error
|
|
59
|
+
results
|
|
60
|
+
end
|
|
32
61
|
|
|
33
|
-
|
|
62
|
+
# Reads one pipeline sync block. Collects PGRES_TUPLES_OK into `results` (when
|
|
63
|
+
# given) and, when capturing, records the first query error via result.check.
|
|
64
|
+
def drain_block(pg, results, capture_error)
|
|
65
|
+
error = nil
|
|
34
66
|
loop do
|
|
35
67
|
result = pg.get_result
|
|
36
68
|
break if result.nil?
|
|
37
|
-
break if result.result_status == PG::PGRES_PIPELINE_SYNC
|
|
38
69
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
70
|
+
status = result.result_status
|
|
71
|
+
break if status == PG::PGRES_PIPELINE_SYNC
|
|
72
|
+
|
|
73
|
+
if status == PG::PGRES_TUPLES_OK && results
|
|
74
|
+
# Raw strings, so ActiveRecord casts via its own column types (and so we
|
|
75
|
+
# never disturb the connection's type map that AR relies on).
|
|
76
|
+
result.type_map = PG::TypeMapAllStrings.new
|
|
77
|
+
results << [result.fields, result.values]
|
|
78
|
+
elsif status != PG::PGRES_COMMAND_OK && capture_error
|
|
79
|
+
begin
|
|
80
|
+
result.check
|
|
81
|
+
rescue PG::Error => e
|
|
82
|
+
error ||= e
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
pg.get_result # consume this result's nil delimiter
|
|
44
86
|
end
|
|
87
|
+
error
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# The per-request name cache, lazily created (Trace normally seeds it).
|
|
91
|
+
def prepared_cache(pg)
|
|
92
|
+
cache = pg.instance_variable_get(:@pipeloader_prepared)
|
|
93
|
+
return cache if cache
|
|
94
|
+
|
|
95
|
+
cache = {}
|
|
96
|
+
pg.instance_variable_set(:@pipeloader_prepared, cache)
|
|
97
|
+
cache
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Names left by previous requests, to DEALLOCATE on the next burst (cleared).
|
|
101
|
+
def take_garbage(pg)
|
|
102
|
+
garbage = pg.instance_variable_get(:@pipeloader_garbage)
|
|
103
|
+
return [] unless garbage && !garbage.empty?
|
|
104
|
+
|
|
105
|
+
pg.instance_variable_set(:@pipeloader_garbage, [])
|
|
106
|
+
garbage
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Leave the connection usable no matter how the batch ended. If the pipeline
|
|
110
|
+
# can't be drained cleanly (e.g. the connection dropped mid-burst), reset it so
|
|
111
|
+
# the pool gets a healthy connection.
|
|
112
|
+
def finish_pipeline(pg)
|
|
113
|
+
return if pg.pipeline_status == PG::PQ_PIPELINE_OFF
|
|
114
|
+
|
|
115
|
+
loop { break if pg.get_result.nil? }
|
|
45
116
|
pg.exit_pipeline_mode
|
|
46
|
-
|
|
117
|
+
rescue PG::Error
|
|
118
|
+
reset_connection(pg)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def reset_connection(pg)
|
|
122
|
+
pg.reset
|
|
123
|
+
rescue PG::Error
|
|
124
|
+
nil
|
|
125
|
+
ensure
|
|
126
|
+
# The reset session has no prepared statements: drop the request's name cache
|
|
127
|
+
# so later bursts re-prepare, and the pending garbage that's now gone with it.
|
|
128
|
+
pg.instance_variable_get(:@pipeloader_prepared)&.clear
|
|
129
|
+
pg.instance_variable_set(:@pipeloader_garbage, [])
|
|
47
130
|
end
|
|
48
131
|
end
|
|
49
132
|
end
|
data/lib/pipeloader/version.rb
CHANGED
data/lib/pipeloader.rb
CHANGED
|
@@ -7,6 +7,7 @@ require_relative "pipeloader/pipeliner"
|
|
|
7
7
|
require_relative "pipeloader/source"
|
|
8
8
|
require_relative "pipeloader/ar_patch"
|
|
9
9
|
require_relative "pipeloader/field_exact"
|
|
10
|
+
require_relative "pipeloader/batch"
|
|
10
11
|
|
|
11
12
|
# Pipeloader makes a graphql-ruby query resolve its ActiveRecord SELECTs through
|
|
12
13
|
# a libpq pipeline: one round trip per tree level, transparently. Resolvers stay
|
|
@@ -53,6 +54,30 @@ module Pipeloader
|
|
|
53
54
|
end
|
|
54
55
|
end
|
|
55
56
|
|
|
57
|
+
# Prepared statements are scoped to one request. Each multiplex gets a fresh,
|
|
58
|
+
# uniquely-prefixed name space; pipeline_batch fills it in and reuses it across
|
|
59
|
+
# bursts, so a shape is planned once per request rather than once per burst.
|
|
60
|
+
def self.begin_request!(pg)
|
|
61
|
+
pg.instance_variable_set(:@pipeloader_seq, (pg.instance_variable_get(:@pipeloader_seq) || 0) + 1)
|
|
62
|
+
pg.instance_variable_set(:@pipeloader_prepared, {})
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Hand the request's statements to the next one to DEALLOCATE (piggybacked onto
|
|
66
|
+
# its first burst — no extra round trip here). A plan therefore never outlives
|
|
67
|
+
# the request that made it.
|
|
68
|
+
def self.end_request!(pg)
|
|
69
|
+
cache = pg.instance_variable_get(:@pipeloader_prepared)
|
|
70
|
+
pg.remove_instance_variable(:@pipeloader_prepared) if pg.instance_variable_defined?(:@pipeloader_prepared)
|
|
71
|
+
return if cache.nil? || cache.empty?
|
|
72
|
+
|
|
73
|
+
garbage = pg.instance_variable_get(:@pipeloader_garbage)
|
|
74
|
+
unless garbage
|
|
75
|
+
garbage = []
|
|
76
|
+
pg.instance_variable_set(:@pipeloader_garbage, garbage)
|
|
77
|
+
end
|
|
78
|
+
garbage.concat(cache.values)
|
|
79
|
+
end
|
|
80
|
+
|
|
56
81
|
# Stash the active dataloader on the connection for the whole response phase,
|
|
57
82
|
# and clear it at the end. This is done at *multiplex* scope, not per-query,
|
|
58
83
|
# because under Dataloader resolution is deferred to the multiplex's fiber run
|
|
@@ -61,12 +86,18 @@ module Pipeloader
|
|
|
61
86
|
def execute_multiplex(multiplex:)
|
|
62
87
|
Pipeloader.reset_stats!
|
|
63
88
|
conn = ActiveRecord::Base.connection
|
|
89
|
+
pg = nil
|
|
64
90
|
# Raises on an unsupported adapter; on SQLite, leaves the stash unset so
|
|
65
91
|
# select_all never pipelines (column projection still applies).
|
|
66
|
-
|
|
92
|
+
if Pipeloader.pipelining_supported?(conn)
|
|
93
|
+
conn.pipeloader_dataloader = multiplex.dataloader
|
|
94
|
+
pg = conn.raw_connection
|
|
95
|
+
Pipeloader.begin_request!(pg)
|
|
96
|
+
end
|
|
67
97
|
super
|
|
68
98
|
ensure
|
|
69
99
|
conn.pipeloader_dataloader = nil if conn
|
|
100
|
+
Pipeloader.end_request!(pg) if pg
|
|
70
101
|
end
|
|
71
102
|
end
|
|
72
103
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pipeloader
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Joshua Hull
|
|
@@ -23,6 +23,20 @@ dependencies:
|
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '7.1'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: activesupport
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '7.1'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '7.1'
|
|
26
40
|
- !ruby/object:Gem::Dependency
|
|
27
41
|
name: graphql
|
|
28
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -79,19 +93,47 @@ dependencies:
|
|
|
79
93
|
- - ">="
|
|
80
94
|
- !ruby/object:Gem::Version
|
|
81
95
|
version: '13'
|
|
82
|
-
|
|
96
|
+
- !ruby/object:Gem::Dependency
|
|
97
|
+
name: simplecov
|
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
|
99
|
+
requirements:
|
|
100
|
+
- - "~>"
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: '0.22'
|
|
103
|
+
type: :development
|
|
104
|
+
prerelease: false
|
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
+
requirements:
|
|
107
|
+
- - "~>"
|
|
108
|
+
- !ruby/object:Gem::Version
|
|
109
|
+
version: '0.22'
|
|
110
|
+
description: 'During GraphQL response building, Pipeloader routes ActiveRecord SELECTs
|
|
83
111
|
through a libpq pipeline so a query tree resolves in roughly one round trip per
|
|
84
112
|
level — with plain resolvers and plain models, no Futures, no dataloader.load, no
|
|
85
|
-
resolver changes.
|
|
113
|
+
resolver changes. Also ships Pipeloader::Batch: declarative batch-loaded associations
|
|
114
|
+
and aggregates that eliminate N+1 in plain ActiveRecord traversal via AR''s own
|
|
115
|
+
Preloader.'
|
|
86
116
|
email:
|
|
87
117
|
- josh@fireflop.com
|
|
88
118
|
executables: []
|
|
89
119
|
extensions: []
|
|
90
120
|
extra_rdoc_files: []
|
|
91
121
|
files:
|
|
122
|
+
- DATALOADERS.md
|
|
123
|
+
- LICENSE
|
|
92
124
|
- README.md
|
|
93
125
|
- lib/pipeloader.rb
|
|
94
126
|
- lib/pipeloader/ar_patch.rb
|
|
127
|
+
- lib/pipeloader/batch.rb
|
|
128
|
+
- lib/pipeloader/batch/batch_loader.rb
|
|
129
|
+
- lib/pipeloader/batch/batch_proxy.rb
|
|
130
|
+
- lib/pipeloader/batch/context.rb
|
|
131
|
+
- lib/pipeloader/batch/fetcher.rb
|
|
132
|
+
- lib/pipeloader/batch/fetcher_state.rb
|
|
133
|
+
- lib/pipeloader/batch/load_grouping.rb
|
|
134
|
+
- lib/pipeloader/batch/load_interceptor.rb
|
|
135
|
+
- lib/pipeloader/batch/model.rb
|
|
136
|
+
- lib/pipeloader/batch/relationship.rb
|
|
95
137
|
- lib/pipeloader/field_exact.rb
|
|
96
138
|
- lib/pipeloader/pipeliner.rb
|
|
97
139
|
- lib/pipeloader/source.rb
|
|
@@ -116,5 +158,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
116
158
|
requirements: []
|
|
117
159
|
rubygems_version: 3.6.7
|
|
118
160
|
specification_version: 4
|
|
119
|
-
summary: Transparent libpq pipelining for graphql-ruby on ActiveRecord
|
|
161
|
+
summary: Transparent libpq pipelining for graphql-ruby on ActiveRecord, plus batch
|
|
162
|
+
loaders for plain AR
|
|
120
163
|
test_files: []
|