redis-memo 0.0.0.alpha → 0.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,499 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Inspect a SQL's AST to memoize SELECT statements
5
+ #
6
+ # As Rails applies additional logic on top of the rows returned from the
7
+ # database:
8
+ #
9
+ # - `belongs_to ..., inverse_of: ...`: By using `inverse_of`, Rails could
10
+ # prevent instantiating the different objects from the DB when the objects are
11
+ # really the same.
12
+ #
13
+ # - Associations may have scopes that add more filtering to the existing query
14
+ #
15
+ # - +ActiveRecord::Relation+ defers the data fetching until the end
16
+ #
17
+ # - +ActiveRecord::Relation+ could preload associations to avoid N+1 queries
18
+ #
19
+ # Memoizing each SQL query by inspecting its AST is the best approach we have
20
+ # to reliably perform query caching with ActiveRecord.
21
+ #
22
+ # Here's how this works at a high level:
23
+ #
24
+ # First, we extract dependencies from SQL queries. Consider the following query
25
+ #
26
+ # SELECT * FROM my_records WHERE value = 'a'
27
+ #
28
+ # The rows returned from the database would not change unless records with the
29
+ # value 'a' have been updated. Therefore, if we are to cache this query, we
30
+ # need to set dependencies on this query and discard the cache if the
31
+ # dependencies have changed.
32
+ #
33
+ # Here's the dependency (aka a +Memoizable+) for the above query:
34
+ #
35
+ # Memoizable.new(model: MyRecord, value: 'a')
36
+ #
37
+ # We bump the column dependencies automatically when updating a record that has
38
+ # the `memoize_table_column` declaration on the model class.
39
+ #
40
+ # class MyRecord < ApplicationRecord
41
+ # extend RedisMemo::MemoizeRecords
42
+ # memoize_table_column :value
43
+ # end
44
+ #
45
+ # After saving any MyRecord, we will bump the dependencies versions filled with
46
+ # the record's current and past values:
47
+ #
48
+ # my_record.update(value: 'new_value') # from 'old_value'
49
+ #
50
+ # Then we will bump the versions for at least two memoizables:
51
+ #
52
+ # Memoizable.new(model: MyRecord, value: 'new_value')
53
+ # Memoizable.new(model: MyRecord, value: 'old_value')
54
+ #
55
+ # When the another_value column is also memoized, we have another
56
+ # memoizable to bump version for, regardless whether the another_value
57
+ # filed of my_record has been changed:
58
+ #
59
+ # Memoizable.new(model: MyRecord, another_value: 'current_value')
60
+ #
61
+ # We need to do this because other columns could be cached in
62
+ #
63
+ # SELECT * FROM ... WHERE another_value = ?
64
+ #
65
+ # queries. Those query result sets become stale after the update.
66
+ #
67
+ # By setting dependencies on the query, we will use the dependencies versions
68
+ # as a part of the query cache key. After we bump the dependencies versions,
69
+ # the following request will produce a different new query cache key, so the
70
+ # request will end up with a cache_miss:
71
+ # - Compute the fresh query result and it will actually send the query to the database
72
+ # - Fill the new query cache key with the fresh query result
73
+ #
74
+ # After saving my_record and bumping the dependencies versions, all currently
75
+ # cached SQL queries that have `value = 'new_value'` or `value = 'old_value'`
76
+ # in their WHERE clause (or any WHERE conditions that's using the current
77
+ # memoized column values of my_record) can no longer be accessed by any new
78
+ # requests; Those entries will be automatically deleted through cache expiry or
79
+ # cache eviction.
80
+ #
81
+ # We can only memoize SQL queries that can be automatically invalidated through
82
+ # this mechanism:
83
+ #
84
+ # - The query contains only =, IN conditions
85
+ # - And those conditions are on table columns that have been memoized via
86
+ # +memoized_table_column+
87
+ #
88
+ # See +extract_bind_params+ for the precise detection logic.
89
+ #
90
+ class RedisMemo::MemoizeRecords::CachedSelect
91
+ # TODO: merge this into RedisMemo::MemoizeQuery
92
+ def self.install(connection)
93
+ klass = connection.class
94
+ return if klass.singleton_class < RedisMemo::MemoizeMethod
95
+
96
+ klass.class_eval do
97
+ extend RedisMemo::MemoizeMethod
98
+
99
+ memoize_method :exec_query do |_, sql, name, binds, **kwargs|
100
+ RedisMemo::MemoizeRecords::CachedSelect
101
+ .current_query_bind_params
102
+ .params
103
+ .each do |model, attrs_set|
104
+ attrs_set.each do |attrs|
105
+ depends_on model, **attrs
106
+ end
107
+ end
108
+
109
+ depends_on RedisMemo::Memoizable.new(
110
+ __redis_memo_memoize_records_memoize_query_sql__: sql,
111
+ __redis_memo_memoize_records_memoize_query_binds__: binds.map(&:value_for_database),
112
+ )
113
+ end
114
+ end
115
+
116
+ klass.prepend(ConnectionAdapter)
117
+ ActiveRecord::StatementCache.prepend(StatementCache)
118
+
119
+ # Cached result objects could be sampled to compare against fresh result
120
+ # objects. Overwrite the == operator to make the comparison meaningful.
121
+ ActiveRecord::Result.class_eval do
122
+ def ==(other)
123
+ columns == other.columns && rows == other.rows
124
+ end
125
+ end
126
+
127
+ ActiveRecord::StatementCache::BindMap.class_eval do
128
+ def map_substitutes(values)
129
+ ret = {}
130
+ @indexes.each_with_index do |offset, i|
131
+ bound_attr = @bound_attributes[offset]
132
+ substitute = bound_attr.value
133
+ ret[substitute] = values[i]
134
+ end
135
+ ret
136
+ end
137
+ end
138
+ end
139
+
140
+ module ConnectionAdapter
141
+ def cacheable_query(*args)
142
+ query, binds = super(*args)
143
+
144
+ # Persist the arel object to StatementCache#execute
145
+ query.instance_variable_set(:@__redis_memo_memoize_records_memoize_query_arel, args.last)
146
+
147
+ [query, binds]
148
+ end
149
+
150
+ def exec_query(*args)
151
+ # An Arel AST in Thread local is set prior to supported query methods
152
+ if !RedisMemo.without_memo? &&
153
+ RedisMemo::MemoizeRecords::CachedSelect.extract_bind_params(args[0])
154
+ # [Reids $model Load] $sql $binds
155
+ RedisMemo::DefaultOptions.logger&.info(
156
+ "[Redis] \u001b[36;1m#{args[1]} \u001b[34;1m#{args[0]}\u001b[0m #{
157
+ args[2].map { |bind| [bind.name, bind.value_for_database]}
158
+ }"
159
+ )
160
+
161
+ RedisMemo::Tracer.trace(
162
+ 'redis_memo.memoize_query',
163
+ args[0]
164
+ .gsub(/(\$\d+)/, '?') # $1 -> ?
165
+ .gsub(/((, *)*\?)+/, '?'), # (?, ?, ? ...) -> (?)
166
+ ) do
167
+ super(*args)
168
+ end
169
+ else
170
+ RedisMemo.without_memo { super(*args) }
171
+ end
172
+ end
173
+
174
+ def select_all(*args)
175
+ if args[0].is_a?(Arel::SelectManager)
176
+ RedisMemo::MemoizeRecords::CachedSelect.current_query = args[0]
177
+ end
178
+
179
+ super(*args)
180
+ ensure
181
+ RedisMemo::MemoizeRecords::CachedSelect.reset_current_query
182
+ end
183
+ end
184
+
185
+ module StatementCache
186
+ def execute(*args)
187
+ arel = query_builder.instance_variable_get(:@__redis_memo_memoize_records_memoize_query_arel)
188
+ RedisMemo::MemoizeRecords::CachedSelect.current_query = arel
189
+ RedisMemo::MemoizeRecords::CachedSelect.current_substitutes =
190
+ bind_map.map_substitutes(args[0])
191
+
192
+ super(*args)
193
+ ensure
194
+ RedisMemo::MemoizeRecords::CachedSelect.reset_current_query
195
+ end
196
+ end
197
+
198
+ def self.extract_bind_params(sql)
199
+ ast = Thread.current[THREAD_KEY_AREL]&.ast
200
+ return false unless ast.is_a?(Arel::Nodes::SelectStatement)
201
+ return false unless ast.to_sql == sql
202
+
203
+ Thread.current[THREAD_KEY_SUBSTITUTES] ||= {}
204
+ # Iterate through the Arel AST in a Depth First Search
205
+ bind_params = extract_bind_params_recurse(ast)
206
+ return false unless bind_params
207
+
208
+ bind_params.uniq!
209
+ return false unless bind_params.memoizable?
210
+
211
+ Thread.current[THREAD_KEY_AREL_BIND_PARAMS] = bind_params
212
+ true
213
+ end
214
+
215
+ def self.current_query_bind_params
216
+ Thread.current[THREAD_KEY_AREL_BIND_PARAMS]
217
+ end
218
+
219
+ def self.current_query=(arel)
220
+ Thread.current[THREAD_KEY_AREL] = arel
221
+ end
222
+
223
+ def self.current_substitutes=(substitutes)
224
+ Thread.current[THREAD_KEY_SUBSTITUTES] = substitutes
225
+ end
226
+
227
+ def self.reset_current_query
228
+ Thread.current[THREAD_KEY_AREL] = nil
229
+ Thread.current[THREAD_KEY_SUBSTITUTES] = nil
230
+ Thread.current[THREAD_KEY_AREL_BIND_PARAMS] = nil
231
+ end
232
+
233
+ private
234
+
235
+ # A pre-order Depth First Search
236
+ #
237
+ # Note: Arel::Nodes#each returns a list in post-order, and it does not step
238
+ # into Union nodes. So we're implementing our own DFS
239
+ def self.extract_bind_params_recurse(node)
240
+ bind_params = BindParams.new
241
+
242
+ case node
243
+ when Arel::Nodes::Equality, Arel::Nodes::In
244
+ attr_node = node.left
245
+ return unless attr_node.is_a?(Arel::Attributes::Attribute)
246
+
247
+ table_node =
248
+ case attr_node.relation
249
+ when Arel::Table
250
+ attr_node.relation
251
+ when Arel::Nodes::TableAlias
252
+ attr_node.relation.left
253
+ else
254
+ # Not yet supported
255
+ return
256
+ end
257
+
258
+ type_caster = table_node.send(:type_caster)
259
+ binding_relation =
260
+ case type_caster
261
+ when ActiveRecord::TypeCaster::Map
262
+ type_caster.send(:types)
263
+ when ActiveRecord::TypeCaster::Connection
264
+ type_caster.instance_variable_get(:@klass)
265
+ else
266
+ return
267
+ end
268
+
269
+ rights = node.right.is_a?(Array) ? node.right : [node.right]
270
+ substitutes = Thread.current[THREAD_KEY_SUBSTITUTES]
271
+
272
+ rights.each do |right|
273
+ case right
274
+ when Arel::Nodes::BindParam
275
+ # No need to type cast as they're only used to create +memoizables+
276
+ # (used as strings)
277
+ value = right.value.value_before_type_cast
278
+
279
+ if value.is_a?(ActiveRecord::StatementCache::Substitute)
280
+ value = substitutes[value]
281
+ end
282
+
283
+ bind_params.params[binding_relation] << {
284
+ right.value.name.to_sym => value,
285
+ }
286
+ when Arel::Nodes::Casted
287
+ bind_params.params[binding_relation] << {
288
+ right.attribute.name.to_sym => right.val,
289
+ }
290
+ else
291
+ bind_params = bind_params.union(extract_bind_params_recurse(right))
292
+ if bind_params
293
+ next
294
+ else
295
+ return
296
+ end
297
+ end
298
+ end
299
+
300
+ bind_params
301
+ when Arel::Nodes::SelectStatement
302
+ # No OREDER BY
303
+ return unless node.orders.empty?
304
+
305
+ node.cores.each do |core|
306
+ # Should have a WHERE if directly selecting from a table
307
+ source_node = core.source.left
308
+ case source_node
309
+ when Arel::Table
310
+ return if core.wheres.empty?
311
+ when Arel::Nodes::TableAlias
312
+ bind_params = bind_params.union(
313
+ extract_bind_params_recurse(source_node.left)
314
+ )
315
+
316
+ return unless bind_params
317
+ else
318
+ return
319
+ end
320
+
321
+ # Binds wheres before havings
322
+ core.wheres.each do |where|
323
+ bind_params = bind_params.union(
324
+ extract_bind_params_recurse(where)
325
+ )
326
+
327
+ return unless bind_params
328
+ end
329
+
330
+ core.havings.each do |having|
331
+ bind_params = bind_params.union(
332
+ extract_bind_params_recurse(having)
333
+ )
334
+
335
+ return unless bind_params
336
+ end
337
+ end
338
+
339
+ bind_params
340
+ when Arel::Nodes::Grouping
341
+ # Inline SQL
342
+ return if node.expr.is_a?(Arel::Nodes::SqlLiteral)
343
+
344
+ extract_bind_params_recurse(node.expr)
345
+ when Arel::Nodes::And
346
+ node.children.each do |child|
347
+ bind_params = bind_params.product(
348
+ extract_bind_params_recurse(child)
349
+ )
350
+
351
+ return unless bind_params
352
+ end
353
+
354
+ bind_params
355
+ when Arel::Nodes::Join, Arel::Nodes::Union, Arel::Nodes::Or
356
+ [node.left, node.right].each do |child|
357
+ bind_params = bind_params.union(
358
+ extract_bind_params_recurse(child)
359
+ )
360
+
361
+ return unless bind_params
362
+ end
363
+
364
+ bind_params
365
+ else
366
+ # Not yet supported
367
+ return
368
+ end
369
+ end
370
+
371
+ class BindParams
372
+ def params
373
+ #
374
+ # Bind params is hash of sets: each key is a model class, each value is a
375
+ # set of hashes for memoized column conditions. Example:
376
+ #
377
+ # {
378
+ # Site => [
379
+ # {name: 'a', city: 'b'},
380
+ # {name: 'a', city: 'c'},
381
+ # {name: 'b', city: 'b'},
382
+ # {name: 'b', city: 'c'},
383
+ # ],
384
+ # }
385
+ #
386
+ @params ||= Hash.new do |models, model|
387
+ models[model] = []
388
+ end
389
+ end
390
+
391
+ def union(other)
392
+ return unless other
393
+
394
+ # The tree is almost always right-heavy. Merge into the right node for better
395
+ # performance.
396
+ other.params.merge!(params) do |_, other_attrs_set, attrs_set|
397
+ if other_attrs_set.empty?
398
+ attrs_set
399
+ elsif attrs_set.empty?
400
+ other_attrs_set
401
+ else
402
+ attrs_set + other_attrs_set
403
+ end
404
+ end
405
+
406
+ other
407
+ end
408
+
409
+ def product(other)
410
+ # Example:
411
+ #
412
+ # and(
413
+ # [{a: 1}, {a: 2}],
414
+ # [{b: 1}, {b: 2}],
415
+ # )
416
+ #
417
+ # =>
418
+ #
419
+ # [
420
+ # {a: 1, b: 1},
421
+ # {a: 1, b: 2},
422
+ # {a: 2, b: 1},
423
+ # {a: 2, b: 2},
424
+ # ]
425
+ return unless other
426
+
427
+ # The tree is almost always right-heavy. Merge into the right node for better
428
+ # performance.
429
+ params.each do |model, attrs_set|
430
+ next if attrs_set.empty?
431
+
432
+ # The other model does not have any conditions so far: carry the
433
+ # attributes over to the other node
434
+ if other.params[model].empty?
435
+ other.params[model] = attrs_set
436
+ next
437
+ end
438
+
439
+ # Distribute the current attrs into the other
440
+ other_attrs_set_size = other.params[model].size
441
+ other_attrs_set = other.params[model]
442
+ merged_attrs_set = Array.new(other_attrs_set_size * attrs_set.size)
443
+
444
+ attrs_set.each_with_index do |attrs, i|
445
+ other_attrs_set.each_with_index do |other_attrs, j|
446
+ k = i * other_attrs_set_size + j
447
+ merged_attrs = merged_attrs_set[k] = other_attrs.dup
448
+ attrs.each do |name, val|
449
+ # Conflict detected. For example:
450
+ #
451
+ # (a = 1 or b = 1) and (a = 2 or b = 2)
452
+ #
453
+ # Keep: a = 1 and b = 2, a = 2 and b = 1
454
+ # Discard: a = 1 and a = 2, b = 1 and b = 2
455
+ if merged_attrs.include?(name) && merged_attrs[name] != val
456
+ merged_attrs_set[k] = nil
457
+ break
458
+ end
459
+
460
+ merged_attrs[name] = val
461
+ end
462
+ end
463
+ end
464
+
465
+ merged_attrs_set.compact!
466
+ other.params[model] = merged_attrs_set
467
+ end
468
+
469
+ other
470
+ end
471
+
472
+ def uniq!
473
+ params.each do |_, attrs_set|
474
+ attrs_set.uniq!
475
+ end
476
+ end
477
+
478
+ def memoizable?
479
+ return false if params.empty?
480
+
481
+ params.each do |model, attrs_set|
482
+ return false if attrs_set.empty?
483
+
484
+ attrs_set.each do |attrs|
485
+ return false unless RedisMemo::MemoizeRecords
486
+ .memoized_columns(model)
487
+ .include?(attrs.keys.sort)
488
+ end
489
+ end
490
+
491
+ true
492
+ end
493
+ end
494
+
495
+ # Thread locals to exchange information between RedisMemo and ActiveRecord
496
+ THREAD_KEY_AREL = :__redis_memo_memoize_records_cached_select_arel__
497
+ THREAD_KEY_SUBSTITUTES = :__redis_memo_memoize_records_cached_select_substitues__
498
+ THREAD_KEY_AREL_BIND_PARAMS = :__redis_memo_memoize_records_cached_select_arel_bind_params__
499
+ end