redis-memo 0.1.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- #
3
+ ##
4
4
  # Inspect a SQL's AST to memoize SELECT statements
5
5
  #
6
6
  # As Rails applies additional logic on top of the rows returned from the
@@ -94,6 +94,12 @@ class RedisMemo::MemoizeQuery::CachedSelect
94
94
 
95
95
  @@enabled_models = {}
96
96
 
97
+ # Thread locals to exchange information between RedisMemo and ActiveRecord
98
+ RedisMemo::ThreadLocalVar.define :arel
99
+ RedisMemo::ThreadLocalVar.define :substitues
100
+ RedisMemo::ThreadLocalVar.define :arel_bind_params
101
+
102
+ # @return [Hash] models enabled for caching
97
103
  def self.enabled_models
98
104
  @@enabled_models
99
105
  end
@@ -107,10 +113,7 @@ class RedisMemo::MemoizeQuery::CachedSelect
107
113
 
108
114
  memoize_method(
109
115
  :exec_query,
110
- method_id: proc do |_, sql, *args|
111
- sql.gsub(/(\$\d+)/, '?') # $1 -> ?
112
- .gsub(/((, *)*\?)+/, '?') # (?, ?, ? ...) -> (?)
113
- end,
116
+ method_id: proc { |_, sql, *| RedisMemo::Util.tagify_parameterized_sql(sql) },
114
117
  ) do |_, sql, _, binds, **|
115
118
  depends_on RedisMemo::MemoizeQuery::CachedSelect.current_query_bind_params
116
119
 
@@ -123,7 +126,7 @@ class RedisMemo::MemoizeQuery::CachedSelect
123
126
  # In activerecord >= 6, a bind could be an actual database value
124
127
  bind
125
128
  end
126
- end
129
+ end,
127
130
  )
128
131
  end
129
132
  end
@@ -152,61 +155,74 @@ class RedisMemo::MemoizeQuery::CachedSelect
152
155
  end
153
156
  end
154
157
 
158
+ # Extract bind params from the query by inspecting the SQL's AST recursively
159
+ # The bind params will be passed into the local thread variables. See
160
+ # +construct_bind_params_recurse+ for how to construct binding params
161
+ # recursively.
162
+ #
163
+ # @param sql [String] SQL query
164
+ # @return [Boolean] indicating whether a query should be cached
155
165
  def self.extract_bind_params(sql)
156
- ast = Thread.current[THREAD_KEY_AREL]&.ast
157
- return false unless ast.is_a?(Arel::Nodes::SelectStatement)
158
- return false unless ast.to_sql == sql
159
-
160
- Thread.current[THREAD_KEY_SUBSTITUTES] ||= {}
161
- # Iterate through the Arel AST in a Depth First Search
162
- bind_params = extract_bind_params_recurse(ast)
163
- return false unless bind_params
164
-
165
- bind_params.uniq!
166
- return false unless bind_params.memoizable?
167
-
168
- Thread.current[THREAD_KEY_AREL_BIND_PARAMS] = bind_params
169
- true
166
+ RedisMemo::Tracer.trace(
167
+ 'redis_memo.memoize_query.extract_bind_params',
168
+ RedisMemo::Util.tagify_parameterized_sql(sql),
169
+ ) do
170
+ ast = RedisMemo::ThreadLocalVar.arel&.ast
171
+ return false unless ast.is_a?(Arel::Nodes::SelectStatement)
172
+ return false unless ast.to_sql == sql
173
+
174
+ RedisMemo::ThreadLocalVar.substitues ||= {}
175
+ # Iterate through the Arel AST in a Depth First Search
176
+ bind_params = construct_bind_params_recurse(ast)
177
+ return false unless bind_params&.should_cache?
178
+
179
+ bind_params.extract!
180
+ RedisMemo::ThreadLocalVar.arel_bind_params = bind_params
181
+ true
182
+ end
170
183
  end
171
184
 
172
185
  def self.current_query_bind_params
173
- Thread.current[THREAD_KEY_AREL_BIND_PARAMS]
186
+ RedisMemo::ThreadLocalVar.arel_bind_params
174
187
  end
175
188
 
176
189
  def self.current_query=(arel)
177
- Thread.current[THREAD_KEY_AREL] = arel
190
+ RedisMemo::ThreadLocalVar.arel = arel
178
191
  end
179
192
 
180
193
  def self.current_substitutes=(substitutes)
181
- Thread.current[THREAD_KEY_SUBSTITUTES] = substitutes
194
+ RedisMemo::ThreadLocalVar.substitues = substitutes
182
195
  end
183
196
 
184
197
  def self.reset_current_query
185
- Thread.current[THREAD_KEY_AREL] = nil
186
- Thread.current[THREAD_KEY_SUBSTITUTES] = nil
187
- Thread.current[THREAD_KEY_AREL_BIND_PARAMS] = nil
198
+ RedisMemo::ThreadLocalVar.arel = nil
199
+ RedisMemo::ThreadLocalVar.substitues = nil
200
+ RedisMemo::ThreadLocalVar.arel_bind_params = nil
188
201
  end
189
202
 
190
203
  def self.with_new_query_context
191
- prev_arel = Thread.current[THREAD_KEY_AREL]
192
- prev_substitutes = Thread.current[THREAD_KEY_SUBSTITUTES]
193
- prev_bind_params = Thread.current[THREAD_KEY_AREL_BIND_PARAMS]
204
+ prev_arel = RedisMemo::ThreadLocalVar.arel
205
+ prev_substitutes = RedisMemo::ThreadLocalVar.substitues
206
+ prev_bind_params = RedisMemo::ThreadLocalVar.arel_bind_params
194
207
  RedisMemo::MemoizeQuery::CachedSelect.reset_current_query
195
208
 
196
209
  yield
197
210
  ensure
198
- Thread.current[THREAD_KEY_AREL] = prev_arel
199
- Thread.current[THREAD_KEY_SUBSTITUTES] = prev_substitutes
200
- Thread.current[THREAD_KEY_AREL_BIND_PARAMS] = prev_bind_params
211
+ RedisMemo::ThreadLocalVar.arel = prev_arel
212
+ RedisMemo::ThreadLocalVar.substitues = prev_substitutes
213
+ RedisMemo::ThreadLocalVar.arel_bind_params = prev_bind_params
201
214
  end
202
215
 
203
- private
204
-
205
216
  # A pre-order Depth First Search
206
217
  #
207
218
  # Note: Arel::Nodes#each returns a list in post-order, and it does not step
208
219
  # into Union nodes. So we're implementing our own DFS
209
- def self.extract_bind_params_recurse(node)
220
+ #
221
+ # @param node [Arel::Nodes::Node]
222
+ #
223
+ # @return [RedisMemo::MemoizeQuery::CachedSelect::BindParams]
224
+ def self.construct_bind_params_recurse(node)
225
+ # rubocop: disable Lint/NonLocalExitFromIterator
210
226
  bind_params = BindParams.new
211
227
 
212
228
  case node
@@ -229,7 +245,7 @@ class RedisMemo::MemoizeQuery::CachedSelect
229
245
  return unless binding_relation
230
246
 
231
247
  rights = node.right.is_a?(Array) ? node.right : [node.right]
232
- substitutes = Thread.current[THREAD_KEY_SUBSTITUTES]
248
+ substitutes = RedisMemo::ThreadLocalVar.substitues
233
249
 
234
250
  rights.each do |right|
235
251
  case right
@@ -256,20 +272,13 @@ class RedisMemo::MemoizeQuery::CachedSelect
256
272
  end,
257
273
  }
258
274
  else
259
- bind_params = bind_params.union(extract_bind_params_recurse(right))
260
- if bind_params
261
- next
262
- else
263
- return
264
- end
275
+ bind_params = bind_params.union(construct_bind_params_recurse(right))
276
+ return if !bind_params
265
277
  end
266
278
  end
267
279
 
268
280
  bind_params
269
281
  when Arel::Nodes::SelectStatement
270
- # No OREDER BY
271
- return unless node.orders.empty?
272
-
273
282
  node.cores.each do |core|
274
283
  # We don't support JOINs
275
284
  return unless core.source.right.empty?
@@ -284,7 +293,7 @@ class RedisMemo::MemoizeQuery::CachedSelect
284
293
  return if core.wheres.empty? || binding_relation.nil?
285
294
  when Arel::Nodes::TableAlias
286
295
  bind_params = bind_params.union(
287
- extract_bind_params_recurse(source_node.left)
296
+ construct_bind_params_recurse(source_node.left),
288
297
  )
289
298
 
290
299
  return unless bind_params
@@ -295,7 +304,7 @@ class RedisMemo::MemoizeQuery::CachedSelect
295
304
  # Binds wheres before havings
296
305
  core.wheres.each do |where|
297
306
  bind_params = bind_params.union(
298
- extract_bind_params_recurse(where)
307
+ construct_bind_params_recurse(where),
299
308
  )
300
309
 
301
310
  return unless bind_params
@@ -303,26 +312,23 @@ class RedisMemo::MemoizeQuery::CachedSelect
303
312
 
304
313
  core.havings.each do |having|
305
314
  bind_params = bind_params.union(
306
- extract_bind_params_recurse(having)
315
+ construct_bind_params_recurse(having),
307
316
  )
308
317
 
309
318
  return unless bind_params
310
319
  end
311
-
312
- # Reject any unbound select queries
313
- return if binding_relation && bind_params.params[binding_relation].empty?
314
320
  end
315
321
 
316
322
  bind_params
317
323
  when Arel::Nodes::Grouping
318
324
  # Inline SQL
319
- return if node.expr.is_a?(Arel::Nodes::SqlLiteral)
320
-
321
- extract_bind_params_recurse(node.expr)
325
+ construct_bind_params_recurse(node.expr)
326
+ when Arel::Nodes::LessThan, Arel::Nodes::LessThanOrEqual, Arel::Nodes::GreaterThan, Arel::Nodes::GreaterThanOrEqual, Arel::Nodes::NotEqual
327
+ bind_params
322
328
  when Arel::Nodes::And
323
329
  node.children.each do |child|
324
330
  bind_params = bind_params.product(
325
- extract_bind_params_recurse(child)
331
+ construct_bind_params_recurse(child),
326
332
  )
327
333
 
328
334
  return unless bind_params
@@ -332,7 +338,7 @@ class RedisMemo::MemoizeQuery::CachedSelect
332
338
  when Arel::Nodes::Union, Arel::Nodes::Or
333
339
  [node.left, node.right].each do |child|
334
340
  bind_params = bind_params.union(
335
- extract_bind_params_recurse(child)
341
+ construct_bind_params_recurse(child),
336
342
  )
337
343
 
338
344
  return unless bind_params
@@ -341,14 +347,24 @@ class RedisMemo::MemoizeQuery::CachedSelect
341
347
  bind_params
342
348
  else
343
349
  # Not yet supported
344
- return
350
+ nil
345
351
  end
352
+ # rubocop: enable Lint/NonLocalExitFromIterator
346
353
  end
347
354
 
355
+ # Retrieve the model info from the table node
356
+ # table node is an Arel::Table object, e.g. <Arel::Table @name="sites" ...>
357
+ # and we can retrieve the model info by inspecting thhe table name
358
+ # See +RedisMemo::MemoizeQuery::memoize_table_column+ for how to construct enabled_models
359
+ #
360
+ # @params table_node [Arel::Table]
348
361
  def self.extract_binding_relation(table_node)
349
362
  enabled_models[table_node.try(:name)]
350
363
  end
351
364
 
365
+ #
366
+ # Identify whether the node has filter condition
367
+ #
352
368
  class NodeHasFilterCondition
353
369
  def self.===(node)
354
370
  case node
@@ -365,9 +381,4 @@ class RedisMemo::MemoizeQuery::CachedSelect
365
381
  end
366
382
  end
367
383
  end
368
-
369
- # Thread locals to exchange information between RedisMemo and ActiveRecord
370
- THREAD_KEY_AREL = :__redis_memo_memoize_query_cached_select_arel__
371
- THREAD_KEY_SUBSTITUTES = :__redis_memo_memoize_query_cached_select_substitues__
372
- THREAD_KEY_AREL_BIND_PARAMS = :__redis_memo_memoize_query_cached_select_arel_bind_params__
373
384
  end
@@ -2,47 +2,122 @@
2
2
 
3
3
  class RedisMemo::MemoizeQuery::CachedSelect
4
4
  class BindParams
5
- def params
6
- #
7
- # Bind params is hash of sets: each key is a model class, each value is a
8
- # set of hashes for memoized column conditions. Example:
9
- #
10
- # {
11
- # Site => [
12
- # {name: 'a', city: 'b'},
13
- # {name: 'a', city: 'c'},
14
- # {name: 'b', city: 'b'},
15
- # {name: 'b', city: 'c'},
16
- # ],
17
- # }
18
- #
19
- @params ||= Hash.new do |models, model|
20
- models[model] = []
21
- end
5
+ def initialize(left = nil, right = nil, operator = nil)
6
+ @left = left
7
+ @right = right
8
+ @operator = operator
22
9
  end
23
10
 
24
11
  def union(other)
25
12
  return unless other
26
13
 
27
- # The tree is almost always right-heavy. Merge into the right node for better
28
- # performance.
29
- other.params.merge!(params) do |_, other_attrs_set, attrs_set|
30
- if other_attrs_set.empty?
31
- attrs_set
32
- elsif attrs_set.empty?
33
- other_attrs_set
34
- else
35
- attrs_set + other_attrs_set
14
+ self.class.new(self, other, __method__)
15
+ end
16
+
17
+ def product(other)
18
+ return unless other
19
+
20
+ self.class.new(self, other, __method__)
21
+ end
22
+
23
+ def should_cache?
24
+ plan!
25
+
26
+ if plan.model_attrs.empty? || plan.dependency_size_estimation.to_i > RedisMemo::DefaultOptions.max_query_dependency_size
27
+ return false
28
+ end
29
+
30
+ plan.model_attrs.each do |model, attrs_set|
31
+ return false if attrs_set.empty?
32
+
33
+ attrs_set.each do |attrs|
34
+ return false unless RedisMemo::MemoizeQuery
35
+ .memoized_columns(model)
36
+ .include?(attrs.keys.sort)
36
37
  end
37
38
  end
38
39
 
39
- other
40
+ true
40
41
  end
41
42
 
42
- def product(other)
43
+ #
44
+ # Extracted bind params is hash of sets: each key is a model class, each
45
+ # value is a set of hashes for memoized column conditions. Example:
46
+ #
47
+ # {
48
+ # Site => [
49
+ # {name: 'a', city: 'b'},
50
+ # {name: 'a', city: 'c'},
51
+ # {name: 'b', city: 'b'},
52
+ # {name: 'b', city: 'c'},
53
+ # ],
54
+ # }
55
+ #
56
+ def extract!
57
+ return if operator.nil?
58
+
59
+ left.extract!
60
+ right.extract!
61
+ __send__(:"#{operator}!")
62
+ end
63
+
64
+ def params
65
+ @params ||= Hash.new do |models, model|
66
+ models[model] = Set.new
67
+ end
68
+ end
69
+
70
+ protected
71
+
72
+ # BindParams is built recursively when iterating through the Arel AST
73
+ # nodes. BindParams represents a binary tree. Query parameters are added to
74
+ # the leaf nodes of the tree, and the leaf nodes are connected by
75
+ # operators, such as `union` (or conditions) or `product` (and conditions).
76
+ attr_accessor :left
77
+ attr_accessor :right
78
+ attr_accessor :operator
79
+ attr_accessor :plan
80
+
81
+ def plan!
82
+ self.plan = Plan.new(self)
83
+ return if operator.nil?
84
+
85
+ left.plan!
86
+ right.plan!
87
+ __send__(:"plan_#{operator}")
88
+ end
89
+
90
+ def plan_union
91
+ plan.dependency_size_estimation = left.plan.dependency_size_estimation + right.plan.dependency_size_estimation
92
+ plan.model_attrs = union_attrs_set(left.plan.model_attrs, right.plan.model_attrs)
93
+ end
94
+
95
+ def plan_product
96
+ plan.dependency_size_estimation = left.plan.dependency_size_estimation * right.plan.dependency_size_estimation
97
+ plan.model_attrs = product_attrs_set(left.plan.model_attrs, right.plan.model_attrs)
98
+ end
99
+
100
+ def union!
101
+ @params = union_attrs_set(left.params, right.params)
102
+ end
103
+
104
+ def product!
105
+ @params = product_attrs_set(left.params, right.params)
106
+ end
107
+
108
+ def union_attrs_set(left, right)
109
+ left.merge(right) do |_, attrs_set, other_attrs_set|
110
+ next attrs_set if other_attrs_set.empty?
111
+ next other_attrs_set if attrs_set.empty?
112
+
113
+ attrs_set + other_attrs_set
114
+ end
115
+ end
116
+
117
+ def product_attrs_set(left, right)
43
118
  # Example:
44
119
  #
45
- # and(
120
+ # product(
46
121
  # [{a: 1}, {a: 2}],
47
122
  # [{b: 1}, {b: 2}],
48
123
  # )
@@ -55,29 +130,16 @@ class RedisMemo::MemoizeQuery::CachedSelect
55
130
  # {a: 2, b: 1},
56
131
  # {a: 2, b: 2},
57
132
  # ]
58
- return unless other
59
-
60
- # The tree is almost always right-heavy. Merge into the right node for better
61
- # performance.
62
- params.each do |model, attrs_set|
63
- next if attrs_set.empty?
64
-
65
- # The other model does not have any conditions so far: carry the
66
- # attributes over to the other node
67
- if other.params[model].empty?
68
- other.params[model] = attrs_set
69
- next
70
- end
71
-
72
- # Distribute the current attrs into the other
73
- other_attrs_set_size = other.params[model].size
74
- other_attrs_set = other.params[model]
75
- merged_attrs_set = Array.new(other_attrs_set_size * attrs_set.size)
133
+ left.merge(right) do |_, attrs_set, other_attrs_set|
134
+ next attrs_set if other_attrs_set.empty?
135
+ next other_attrs_set if attrs_set.empty?
76
136
 
77
- attrs_set.each_with_index do |attrs, i|
78
- other_attrs_set.each_with_index do |other_attrs, j|
79
- k = i * other_attrs_set_size + j
80
- merged_attrs = merged_attrs_set[k] = other_attrs.dup
137
+ # distribute the current attrs into the other
138
+ merged_attrs_set = Set.new
139
+ attrs_set.each do |attrs|
140
+ other_attrs_set.each do |other_attrs|
141
+ merged_attrs = other_attrs.dup
142
+ should_add_attrs = true
81
143
  attrs.each do |name, val|
82
144
  # Conflict detected. For example:
83
145
  #
@@ -86,42 +148,112 @@ class RedisMemo::MemoizeQuery::CachedSelect
86
148
  # Keep: a = 1 and b = 2, a = 2 and b = 1
87
149
  # Discard: a = 1 and a = 2, b = 1 and b = 2
88
150
  if merged_attrs.include?(name) && merged_attrs[name] != val
89
- merged_attrs_set[k] = nil
151
+ should_add_attrs = false
90
152
  break
91
153
  end
92
154
 
93
155
  merged_attrs[name] = val
94
156
  end
157
+ merged_attrs_set << merged_attrs if should_add_attrs
95
158
  end
96
159
  end
97
160
 
98
- merged_attrs_set.compact!
99
- other.params[model] = merged_attrs_set
161
+ merged_attrs_set
100
162
  end
101
-
102
- other
103
163
  end
104
164
 
105
- def uniq!
106
- params.each do |_, attrs_set|
107
- attrs_set.uniq!
108
- end
109
- end
165
+ # Prior to actually extracting the bind parameters, we first quickly
166
+ # estimate if it makes sense to do so. If a query contains too many
167
+ # dependencies, or contains dependencies that have not been memoized, then
168
+ # the query itself cannot be cached correctly/efficiently, so there’s no
169
+ # point to actually extract.
170
+ #
171
+ # The planning phase is similar to the extraction phase. Though in the
172
+ # planning phase, we can ignore all the actual attribute values and only
173
+ # look at the attribute names. This way, we can precompute the dependency
174
+ # size without populating their actual values.
175
+ #
176
+ # For example, in the planning phase,
177
+ #
178
+ # {a:nil} x {b: nil} => {a: nil, b: nil}
179
+ # {a:nil, b:nil} x {a: nil: b: nil} => {a: nil, b: nil}
180
+ #
181
+ # and in the extraction phase, that's where the # of dependency can
182
+ # actually grow significantly:
183
+ #
184
+ # {a: [1,2,3]} x {b: [1,2,3]} => [{a: 1, b: 1}, ....]
185
+ # {a:[1,2], b:[1,2]} x {a: [1,2,3]: b: [1,2,3]} => [{a: 1, b: 1}, ...]
186
+ #
187
+ class Plan
188
+ class DependencySizeEstimation
189
+ def initialize(hash = nil)
190
+ @hash = hash
191
+ end
110
192
 
111
- def memoizable?
112
- return false if params.empty?
193
+ def +(other)
194
+ merged_hash = hash.dup
195
+ other.hash.each do |k, v|
196
+ merged_hash[k] += v
197
+ end
198
+ self.class.new(merged_hash)
199
+ end
113
200
 
114
- params.each do |model, attrs_set|
115
- return false if attrs_set.empty?
201
+ def *(other)
202
+ merged_hash = hash.dup
203
+ other.hash.each do |k, v|
204
+ if merged_hash.include?(k)
205
+ merged_hash[k] *= v
206
+ else
207
+ merged_hash[k] = v
208
+ end
209
+ end
210
+ self.class.new(merged_hash)
211
+ end
116
212
 
117
- attrs_set.each do |attrs|
118
- return false unless RedisMemo::MemoizeQuery
119
- .memoized_columns(model)
120
- .include?(attrs.keys.sort)
213
+ def [](key)
214
+ hash[key]
215
+ end
216
+
217
+ def []=(key, val)
218
+ hash[key] = val
219
+ end
220
+
221
+ def to_i
222
+ ret = 0
223
+ hash.each do |_, v|
224
+ ret += v
225
+ end
226
+ ret
227
+ end
228
+
229
+ protected
230
+
231
+ def hash
232
+ @hash ||= Hash.new(0)
121
233
  end
122
234
  end
123
235
 
124
- true
236
+ attr_accessor :dependency_size_estimation
237
+ attr_accessor :model_attrs
238
+
239
+ def initialize(bind_params)
240
+ @dependency_size_estimation = DependencySizeEstimation.new
241
+ @model_attrs = Hash.new do |models, model|
242
+ models[model] = Set.new
243
+ end
244
+
245
+ # An aggregated bind_params node can only obtain params by combining
246
+ # its children nodes
247
+ return if !bind_params.__send__(:operator).nil?
248
+
249
+ bind_params.params.each do |model, attrs_set|
250
+ @dependency_size_estimation[model] += attrs_set.size
251
+ attrs_set.each do |attrs|
252
+ # [k, nil]: Ignore the attr value and keep the name only
253
+ @model_attrs[model] << attrs.keys.map { |k| [k, nil] }.to_h
254
+ end
255
+ end
256
+ end
125
257
  end
126
258
  end
127
259
  end