elasticgraph-graphql 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +3 -0
  4. data/elasticgraph-graphql.gemspec +23 -0
  5. data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
  6. data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
  7. data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
  8. data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
  9. data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
  10. data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
  11. data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
  12. data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
  13. data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
  14. data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
  15. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
  16. data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
  17. data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
  18. data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
  19. data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
  20. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
  21. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
  22. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
  23. data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
  24. data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
  25. data/lib/elastic_graph/graphql/client.rb +43 -0
  26. data/lib/elastic_graph/graphql/config.rb +81 -0
  27. data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
  28. data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
  29. data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
  30. data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
  31. data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
  32. data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
  33. data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
  34. data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
  35. data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
  36. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
  37. data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
  38. data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
  39. data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
  40. data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
  41. data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
  42. data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
  43. data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
  44. data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
  45. data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
  46. data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
  47. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
  48. data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
  49. data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
  50. data/lib/elastic_graph/graphql/query_executor.rb +200 -0
  51. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
  52. data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
  53. data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
  54. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
  55. data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
  56. data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
  57. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
  58. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
  59. data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
  60. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
  61. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
  62. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
  63. data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
  64. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
  65. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
  66. data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
  67. data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
  68. data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
  69. data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
  70. data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
  71. data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
  72. data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
  73. data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
  74. data/lib/elastic_graph/graphql/schema/field.rb +147 -0
  75. data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
  76. data/lib/elastic_graph/graphql/schema/type.rb +263 -0
  77. data/lib/elastic_graph/graphql/schema.rb +164 -0
  78. data/lib/elastic_graph/graphql.rb +253 -0
  79. data/script/dump_time_zones +81 -0
  80. data/script/dump_time_zones.java +17 -0
  81. metadata +503 -0
@@ -0,0 +1,526 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/graphql/filtering/boolean_query"
11
+ require "elastic_graph/graphql/filtering/field_path"
12
+ require "elastic_graph/graphql/filtering/range_query"
13
+ require "elastic_graph/graphql/schema/enum_value"
14
+ require "elastic_graph/support/graphql_formatter"
15
+ require "elastic_graph/support/memoizable_data"
16
+ require "elastic_graph/support/time_util"
17
+ require "graphql"
18
+
19
+ module ElasticGraph
20
+ class GraphQL
21
+ module Filtering
22
+ # Contains all query logic related to filtering. Not tested directly; tests drive the `Query` interface instead.
23
+ # For more info on how this works, see:
24
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html
25
+ # https://www.elastic.co/blog/lost-in-translation-boolean-operations-and-filters-in-the-bool-query
26
+ FilterInterpreter = Support::MemoizableData.define(:runtime_metadata, :schema_names, :logger) do
27
+ # @implements FilterInterpreter
28
+
29
+ def initialize(runtime_metadata:, logger:)
30
+ super(
31
+ runtime_metadata: runtime_metadata,
32
+ schema_names: runtime_metadata.schema_element_names,
33
+ logger: logger
34
+ )
35
+ end
36
+
37
+ # Builds a datastore query from the given collection of filter hashes.
38
+ #
39
+ # Returns `nil` if there are no query clauses, to make it easy for a caller to `compact` out
40
+ # `query: {}` in a larger search request body.
41
+ #
42
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.11/query-dsl.html
43
+ def build_query(filter_hashes, from_field_path: FieldPath.empty)
44
+ build_bool_hash do |bool_node|
45
+ filter_hashes.each do |filter_hash|
46
+ process_filter_hash(bool_node, filter_hash, from_field_path)
47
+ end
48
+ end
49
+ end
50
+
51
+ def to_s
52
+ # The inspect/to_s output of `runtime_metadata` and `logger` can be quite large and noisy. We generally don't care about
53
+ # those details but want to be able to tell at a glance if two `FilterInterpreter` instances are equal or not--and, if they
54
+ # aren't equal, which part is responsible for the inequality.
55
+ #
56
+ # Using the hash of the two initialize args provides us with that.
57
+ "#<data #{FilterInterpreter.name} runtime_metadata=(hash: #{runtime_metadata.hash}) logger=(hash: #{logger.hash})>"
58
+ end
59
+ alias_method :inspect, :to_s
60
+
61
+ private
62
+
63
+ def process_filter_hash(bool_node, filter_hash, field_path)
64
+ filter_hash.each do |field_or_op, expression|
65
+ case identify_expression_type(field_or_op, expression)
66
+ when :empty
67
+ # This is an "empty" filter predicate and we can ignore it.
68
+ when :not
69
+ process_not_expression(bool_node, expression, field_path)
70
+ when :list_any_filter
71
+ process_list_any_filter_expression(bool_node, expression, field_path)
72
+ when :any_of
73
+ process_any_of_expression(bool_node, expression, field_path)
74
+ when :all_of
75
+ process_all_of_expression(bool_node, expression, field_path)
76
+ when :operator
77
+ process_operator_expression(bool_node, field_or_op, expression, field_path)
78
+ when :list_count
79
+ process_list_count_expression(bool_node, expression, field_path)
80
+ when :sub_field
81
+ process_sub_field_expression(bool_node, expression, field_path + field_or_op)
82
+ else
83
+ logger.warn("Ignoring unknown filtering operator (#{field_or_op}: #{expression.inspect}) on field `#{field_path.from_root.join(".")}`")
84
+ end
85
+ end
86
+ end
87
+
88
+ def identify_expression_type(field_or_op, expression)
89
+ return :empty if expression.nil?
90
+ return :not if field_or_op == schema_names.not
91
+ return :list_any_filter if field_or_op == schema_names.any_satisfy
92
+ return :all_of if field_or_op == schema_names.all_of
93
+ return :any_of if field_or_op == schema_names.any_of
94
+ return :operator if filter_operators.key?(field_or_op)
95
+ return :list_count if field_or_op == LIST_COUNTS_FIELD
96
+ return :sub_field if expression.is_a?(::Hash)
97
+ :unknown
98
+ end
99
+
100
+ # Indicates if the given `expression` applies filtering to subfields or just applies
101
+ # operators at the current field path.
102
+ def filters_on_sub_fields?(expression)
103
+ expression.any? do |field_or_op, sub_expression|
104
+ case identify_expression_type(field_or_op, sub_expression)
105
+ when :sub_field
106
+ true
107
+ when :not, :list_any_filter
108
+ filters_on_sub_fields?(sub_expression)
109
+ when :any_of, :all_of
110
+ # These are the only two cases where the `sub_expression` is an array of filter sub expressions,
111
+ # so we use `.any?` on it here. (Even for `all_of`--the overall `expression` filters on sub fields so
112
+ # long as at least one of the sub expressions does, regardless of it being `any_of` vs `all_of`).
113
+ sub_expression.any? { |expr| filters_on_sub_fields?(expr) }
114
+ else # :empty, :operator, :unknown, :list_count
115
+ false
116
+ end
117
+ end
118
+ end
119
+
120
+ def process_not_expression(bool_node, expression, field_path)
121
+ sub_filter = build_bool_hash do |inner_node|
122
+ process_filter_hash(inner_node, expression, field_path)
123
+ end
124
+
125
+ return unless sub_filter
126
+
127
+ # Prevent any negated filters from being unnecessarily double-negated by
128
+ # converting them to a positive filter (i.e., !!A == A).
129
+ if sub_filter[:bool].key?(:must_not)
130
+ # Pull clauses up to current bool_node to remove negation
131
+ sub_filter[:bool][:must_not].each do |negated_clause|
132
+ negated_clause[:bool].each { |k, v| bool_node[k].concat(v) }
133
+ end
134
+ end
135
+
136
+ # Don't drop any other filters! Let's negate them now.
137
+ other_filters = sub_filter[:bool].except(:must_not)
138
+ bool_node[:must_not] << {bool: other_filters} unless other_filters.empty?
139
+ end
140
+
141
+ # There are two cases for `any_satisfy`, each of which is handled differently:
142
+ #
143
+ # - List-of-scalars
144
+ # - List-of-nested-objects
145
+ #
146
+ # We can detect which it is by checking `filter` to see if it filters on any subfields.
147
+ # If so, we know the filter is being applied to a `nested` list field. We can count on
148
+ # this because we do not generate `any_satisfy` filters on `object` list fields (instead,
149
+ # they get generated on their leaf fields).
150
+ def process_list_any_filter_expression(bool_node, filter, field_path)
151
+ if filters_on_sub_fields?(filter)
152
+ process_any_satisfy_filter_expression_on_nested_object_list(bool_node, filter, field_path)
153
+ else
154
+ process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, field_path)
155
+ end
156
+ end
157
+
158
+ def process_any_satisfy_filter_expression_on_nested_object_list(bool_node, filter, field_path)
159
+ sub_filter = build_bool_hash do |inner_node|
160
+ process_filter_hash(inner_node, filter, field_path.nested)
161
+ end
162
+
163
+ if sub_filter
164
+ bool_node[:filter] << {nested: {path: field_path.from_root.join("."), query: sub_filter}}
165
+ end
166
+ end
167
+
168
+ # On a list-of-leaf-values field, `any_satisfy` doesn't _do_ anything: it just expresses
169
+ # the fact that documents with any list element values matching the predicates will match
170
+ # the overall filter.
171
+ def process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, field_path)
172
+ return unless (processed = build_bool_hash { |node| process_filter_hash(node, filter, field_path) })
173
+
174
+ processed_bool_query = processed.fetch(:bool)
175
+
176
+ # The semantics we want for `any_satisfy` are that it matches when a value exists in the list that
177
+ # satisfies all of the provided subfilter. That's the semantics the datastore provides when the bool
178
+ # query only requires one clause to match, but if multiple clauses are required to match there's a subtle
179
+ # issue. A document matches so long as each required clause matches *some* value, but it doesn't require
180
+ # that they all match the *same* value. The list field on a document could contain N values, where
181
+ # each value matches a different one of the required clauses, and the document will be a search hit.
182
+ #
183
+ # Rather than behaving in a surprising way here, we'd rather disallow a filter that has multiple required
184
+ # clauses, so we return an error in this case.
185
+ if required_matching_clause_count(processed_bool_query) > 1
186
+ formatted_filter = Support::GraphQLFormatter.serialize(
187
+ {schema_names.any_satisfy => filter},
188
+ wrap_hash_with_braces: false
189
+ )
190
+
191
+ raise ::GraphQL::ExecutionError, "`#{formatted_filter}` is not supported because it produces " \
192
+ "multiple filtering clauses under `#{schema_names.any_satisfy}`, which doesn't work as expected. " \
193
+ "Remove one or more of your `#{schema_names.any_satisfy}` predicates and try again."
194
+ else
195
+ bool_node.update(processed_bool_query) do |_, existing_clauses, any_satisfy_clauses|
196
+ existing_clauses + any_satisfy_clauses
197
+ end
198
+ end
199
+ end
200
+
201
+ def process_any_of_expression(bool_node, expressions, field_path)
202
+ shoulds = expressions.filter_map do |expression|
203
+ build_bool_hash do |inner_bool_node|
204
+ process_filter_hash(inner_bool_node, expression, field_path)
205
+ end
206
+ end
207
+
208
+ # When our `shoulds` array is empty, the filtering semantics we want is to match no documents.
209
+ # However, that's not the behavior the datastore will give us if we have an empty array in the
210
+ # query under `should`. To get the behavior we want, we need to pass the datastore some filter
211
+ # criteria that will evaluate to false for every document.
212
+ bool_query = shoulds.empty? ? BooleanQuery::ALWAYS_FALSE_FILTER : BooleanQuery.should(*shoulds)
213
+ bool_query.merge_into(bool_node)
214
+ end
215
+
216
+ def process_all_of_expression(bool_node, expressions, field_path)
217
+ # `all_of` represents an AND. AND is the default way that `process_filter_hash` combines
218
+ # filters so we just have to call it for each sub-expression.
219
+ expressions.each do |sub_expression|
220
+ process_filter_hash(bool_node, sub_expression, field_path)
221
+ end
222
+ end
223
+
224
+ def process_operator_expression(bool_node, operator, expression, field_path)
225
+ # `operator` is a filtering operator, and `expression` is the value the filtering
226
+ # operator should be applied to. The `op_applicator` lambda, when called, will
227
+ # return a Clause instance (defined in this module).
228
+ bool_query = filter_operators.fetch(operator).call(field_path.from_root.join("."), expression)
229
+ bool_query&.merge_into(bool_node)
230
+ end
231
+
232
+ def process_sub_field_expression(bool_node, expression, field_path)
233
+ # `sub_field` is a field name, and `expression` is a hash of filters to apply to that field.
234
+ # We want to add the field name to the field path and recursively process the hash.
235
+ #
236
+ # However, if the hash has `any_of` in it, then we need to process the filter hash on
237
+ # a nested bool node instead of on the `bool_node` we are already operating on.
238
+ #
239
+ # To understand why, first consider a filter that has no `any_of` but does use field nesting:
240
+ #
241
+ # filter: {
242
+ # weight: {lt: 2000},
243
+ # cost: {
244
+ # currency: {equal_to_any_of: ["USD"]}
245
+ # amount: {gt: 1000}
246
+ # }
247
+ # }
248
+ #
249
+ # While this `currency` and `amount` are expressed as sub-filters under `cost` in our GraphQL
250
+ # syntax, we do not actually need to create a nested bool node structure for the datastore
251
+ # query. We get a flat filter structure like this:
252
+ #
253
+ # {bool: {filter: [
254
+ # {range: {"weight": {lt: 2000}}},
255
+ # {terms: {"cost.currency": ["USD"]}},
256
+ # {range: {"amount": {gt: 1000}}}
257
+ # ]}}
258
+ #
259
+ # The 3 filter conditions are ANDed together as a single list under `filter`.
260
+ # The nested field structure gets flattened using a dot-separated path.
261
+ #
262
+ # Now consider a filter that has multiple `any_of` sub-expressions:
263
+ #
264
+ # filter: {
265
+ # weight: {any_of: [
266
+ # {gt: 9000},
267
+ # {lt: 2000}
268
+ # ]},
269
+ # cost: {any_of: [
270
+ # currency: {equal_to_any_of: ["USD"]},
271
+ # amount: {gt: 1000}
272
+ # ]}
273
+ # }
274
+ #
275
+ # If we did not make a nested structure, we would wind up with a single list of sub-expressions
276
+ # that are OR'd together:
277
+ #
278
+ # {bool: {filter: [{bool: {should: [
279
+ # {range: {"weight": {gt: 9000}}},
280
+ # {range: {"weight": {lt: 2000}}},
281
+ # {terms: {"cost.currency": ["USD"]}},
282
+ # {range: {"amount": {gt: 1000}}}
283
+ # ]}}]}}
284
+ #
285
+ # ...but that's clearly wrong. By creating a nested bool node based on the presence of `any_of`,
286
+ # we can instead produce a structure like this:
287
+ #
288
+ # {bool: {filter: [
289
+ # {bool: {should: [
290
+ # {range: {"weight": {gt: 9000}}},
291
+ # {range: {"weight": {lt: 2000}}}
292
+ # ]}},
293
+ # {bool: {should: [
294
+ # {terms: {"cost.currency": ["USD"]}},
295
+ # {range: {"amount": {gt: 1000}}}
296
+ # ]}}
297
+ # ]}}
298
+ #
299
+ # ...which will actually work correctly.
300
+ if expression.key?(schema_names.any_of)
301
+ sub_filter = build_bool_hash do |inner_node|
302
+ process_filter_hash(inner_node, expression, field_path)
303
+ end
304
+
305
+ bool_node[:filter] << sub_filter if sub_filter
306
+ else
307
+ process_filter_hash(bool_node, expression, field_path)
308
+ end
309
+ end
310
+
311
+ def process_list_count_expression(bool_node, expression, field_path)
312
+ # Normally, we don't have to do anything special for list count expressions.
313
+ # That's the case, for example, for an expression like:
314
+ #
315
+ # filter: {tags: {count: {gt: 2}}}
316
+ #
317
+ # However, if the count expression could match count of 0 (that is, if it doesn't
318
+ # exclude a count of zero), such as this:
319
+ #
320
+ # filter: {tags: {count: {lt: 1}}}
321
+ #
322
+ # ...then we need some special handling here. A count of 0 is equivalent to the list field not existing.
323
+ # While we index an explicit count of 0, the count field will be missing from documents indexed before
324
+ # the list field was defined on the ElasticGraph schema. To properly match those documents, we need to
325
+ # convert this into an OR (using `any_of`) to also match documents that lack the field entirely.
326
+ unless excludes_zero?(expression)
327
+ expression = {schema_names.any_of => [
328
+ expression,
329
+ {schema_names.equal_to_any_of => [nil]}
330
+ ]}
331
+ end
332
+
333
+ process_sub_field_expression(bool_node, expression, field_path.counts_path)
334
+ end
335
+
336
+ def build_bool_hash(&block)
337
+ bool_node = Hash.new { |h, k| h[k] = [] }.tap(&block)
338
+
339
+ # To ignore "empty" filter predicates we need to return `nil` here.
340
+ return nil if bool_node.empty?
341
+
342
+ # According to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html#bool-min-should-match,
343
+ # if the bool query includes at least one should clause and no must or filter clauses, the default value is 1. Otherwise, the default value is 0.
344
+ # However, we want should clauses to work with musts and filters, so we need to set it explicitly to 1 when we have should clauses.
345
+ bool_node[:minimum_should_match] = 1 if bool_node.key?(:should)
346
+
347
+ {bool: bool_node}
348
+ end
349
+
350
+ # Determines if the given filter expression excludes the value `0`.
351
+ def excludes_zero?(expression)
352
+ expression.any? do |operator, operand|
353
+ case operator
354
+ when schema_names.equal_to_any_of then !operand.include?(0)
355
+ when schema_names.lt then operand <= 0
356
+ when schema_names.lte then operand < 0
357
+ when schema_names.gt then operand >= 0
358
+ when schema_names.gte then operand > 0
359
+ else
360
+ # :nocov: -- all operators are covered above. But simplecov complains about an implicit `else` branch being uncovered, so here we've defined it to wrap it with `:nocov:`.
361
+ false
362
+ # :nocov:
363
+ end
364
+ end
365
+ end
366
+
367
+ def filter_operators
368
+ @filter_operators ||= build_filter_operators(runtime_metadata)
369
+ end
370
+
371
+ def build_filter_operators(runtime_metadata)
372
+ schema_names = runtime_metadata.schema_element_names
373
+
374
+ filter_by_time_of_day_script_id = runtime_metadata
375
+ .static_script_ids_by_scoped_name
376
+ .fetch("filter/by_time_of_day")
377
+
378
+ {
379
+ schema_names.equal_to_any_of => ->(field_name, value) {
380
+ values = to_datastore_value(value.compact.uniq) # : ::Array[untyped]
381
+
382
+ equality_sub_expression =
383
+ if field_name == "id"
384
+ # Use specialized "ids" query when querying on ID field.
385
+ # See: https://www.elastic.co/guide/en/elasticsearch/reference/7.15/query-dsl-ids-query.html
386
+ #
387
+ # We reject empty strings because we otherwise get an error from the datastore:
388
+ # "failed to create query: Ids can't be empty"
389
+ {ids: {values: values - [""]}}
390
+ else
391
+ {terms: {field_name => values}}
392
+ end
393
+
394
+ exists_sub_expression = {exists: {"field" => field_name}}
395
+
396
+ if !value.empty? && value.all?(&:nil?)
397
+ BooleanQuery.new(:must_not, [{bool: {filter: [exists_sub_expression]}}])
398
+ elsif value.include?(nil)
399
+ BooleanQuery.filter({bool: {
400
+ minimum_should_match: 1,
401
+ should: [
402
+ {bool: {filter: [equality_sub_expression]}},
403
+ {bool: {must_not: [{bool: {filter: [exists_sub_expression]}}]}}
404
+ ]
405
+ }})
406
+ else
407
+ BooleanQuery.filter(equality_sub_expression)
408
+ end
409
+ },
410
+ schema_names.gt => ->(field_name, value) { RangeQuery.new(field_name, :gt, value) },
411
+ schema_names.gte => ->(field_name, value) { RangeQuery.new(field_name, :gte, value) },
412
+ schema_names.lt => ->(field_name, value) { RangeQuery.new(field_name, :lt, value) },
413
+ schema_names.lte => ->(field_name, value) { RangeQuery.new(field_name, :lte, value) },
414
+ schema_names.matches => ->(field_name, value) { BooleanQuery.must({match: {field_name => value}}) },
415
+ schema_names.matches_query => ->(field_name, value) do
416
+ allowed_edits_per_term = value.fetch(schema_names.allowed_edits_per_term).runtime_metadata.datastore_abbreviation
417
+
418
+ BooleanQuery.must(
419
+ {
420
+ match: {
421
+ field_name => {
422
+ query: value.fetch(schema_names.query),
423
+ # This is always a string field, even though the value is often an integer
424
+ fuzziness: allowed_edits_per_term.to_s,
425
+ operator: value[schema_names.require_all_terms] ? "AND" : "OR"
426
+ }
427
+ }
428
+ }
429
+ )
430
+ end,
431
+ schema_names.matches_phrase => ->(field_name, value) {
432
+ BooleanQuery.must(
433
+ {
434
+ match_phrase_prefix: {
435
+ field_name => {
436
+ query: value.fetch(schema_names.phrase)
437
+ }
438
+ }
439
+ }
440
+ )
441
+ },
442
+
443
+ # This filter operator wraps a geo distance query:
444
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.10/query-dsl-geo-distance-query.html
445
+ schema_names.near => ->(field_name, value) do
446
+ unit_abbreviation = value.fetch(schema_names.unit).runtime_metadata.datastore_abbreviation
447
+
448
+ BooleanQuery.filter({geo_distance: {
449
+ "distance" => "#{value.fetch(schema_names.max_distance)}#{unit_abbreviation}",
450
+ field_name => {
451
+ "lat" => value.fetch(schema_names.latitude),
452
+ "lon" => value.fetch(schema_names.longitude)
453
+ }
454
+ }})
455
+ end,
456
+
457
+ schema_names.time_of_day => ->(field_name, value) do
458
+ # To filter on time of day, we use the `filter/by_time_of_day` script. We accomplish
459
+ # this with a script because Elasticsearch/OpenSearch do not support this natively, and it's
460
+ # incredibly hard to implement correctly with respect to time zones without using a
461
+ # script. We considered indexing the `time_of_day` as a separate index field
462
+ # that we could directly filter on, but since we need the time of day to be relative
463
+ # to a specific time zone, there's no way to make that work with the reality of
464
+ # daylight savings time. For example, the `America/Los_Angeles` time zone has a -07:00
465
+ # UTC offset for part of the year and a `America/Los_Angeles` -08:00 UTC offset for
466
+ # part of the year. In a script we can use Java time zone APIs to handle this correctly.
467
+ params = {
468
+ field: field_name,
469
+ equal_to_any_of: list_of_nanos_of_day_from(value, schema_names.equal_to_any_of),
470
+ gt: nano_of_day_from(value, schema_names.gt),
471
+ gte: nano_of_day_from(value, schema_names.gte),
472
+ lt: nano_of_day_from(value, schema_names.lt),
473
+ lte: nano_of_day_from(value, schema_names.lte),
474
+ time_zone: value[schema_names.time_zone]
475
+ }.compact
476
+
477
+ # If there are no comparison operators, return `nil` instead of a `Clause` so that we avoid
478
+ # invoking the script for no reason. Note that `field` and `time_zone` will always be in
479
+ # `params` so we can't just check for an empty hash here.
480
+ if (params.keys - [:field, :time_zone]).any?
481
+ BooleanQuery.filter({script: {script: {id: filter_by_time_of_day_script_id, params: params}}})
482
+ end
483
+ end
484
+ }.freeze
485
+ end
486
+
487
+ def to_datastore_value(value)
488
+ case value
489
+ when ::Array
490
+ value.map { |v| to_datastore_value(v) }
491
+ when Schema::EnumValue
492
+ value.name.to_s
493
+ else
494
+ value
495
+ end
496
+ end
497
+
498
+ def nano_of_day_from(value, field)
499
+ local_time = value[field]
500
+ Support::TimeUtil.nano_of_day_from_local_time(local_time) if local_time
501
+ end
502
+
503
+ def list_of_nanos_of_day_from(value, field)
504
+ value[field]&.map { |t| Support::TimeUtil.nano_of_day_from_local_time(t) }
505
+ end
506
+
507
+ # Counts how many clauses in `bool_query` are required to match for a document to be a search hit.
508
+ def required_matching_clause_count(bool_query)
509
+ bool_query.reduce(0) do |count, (occurrence, clauses)|
510
+ case occurrence
511
+ when :should
512
+ # The number of required matching clauses imposed by `:should` depends on the `:minimum_should_match` value.
513
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.9/query-dsl-bool-query.html#bool-min-should-match
514
+ bool_query.fetch(:minimum_should_match)
515
+ when :minimum_should_match
516
+ 0 # doesn't have any clauses on its own, just controls how many `:should` clauses are required.
517
+ else
518
+ # For all other occurrences, each cluse must match.
519
+ clauses.size
520
+ end + count
521
+ end
522
+ end
523
+ end
524
+ end
525
+ end
526
+ end
@@ -0,0 +1,148 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Filtering
12
+ # Responsible for extracting a set of values from query filters, based on a using a custom
13
+ # set type that is able to efficiently model the "all values" case.
14
+ class FilterValueSetExtractor
15
+ def initialize(schema_names, all_values_set, &build_set_for_filter)
16
+ @schema_names = schema_names
17
+ @all_values_set = all_values_set
18
+ @build_set_for_filter = build_set_for_filter
19
+ end
20
+
21
+ # Given a list of `filter_hashes` and a list of `target_field_paths`, returns a representation
22
+ # of a set that includes all values that could be matched by the given filters.
23
+ #
24
+ # Essentially, this method guarantees that the following pseudo code is always satisfied:
25
+ #
26
+ # ``` ruby
27
+ # filter_value_set = extract_filter_value_set(filter_hashes, target_field_paths)
28
+ # Datastore.all_documents_matching(filter_hashes).each do |document|
29
+ # target_field_paths.each do |field_path|
30
+ # expect(filter_value_set).to include(document.value_at(field_path))
31
+ # end
32
+ # end
33
+ # ```
34
+ def extract_filter_value_set(filter_hashes, target_field_paths)
35
+ # We union the filter values together in cases where we have multiple target field paths
36
+ # to make sure we cover all the values we need to. We generally do not have multiple
37
+ # `target_field_paths` except for specialized cases, such as when searching multiple
38
+ # indices in one query, where those indices are configured to use differing `routing_field_paths`.
39
+ # In such a situation we must use the set union of values. Remember: including additional
40
+ # routing values causes no adverse behavior (although it may introduce an inefficiency)
41
+ # but if we fail to route to a shard that contains a matching document, the search results
42
+ # will be incorrect.
43
+ map_reduce_sets(target_field_paths, :union, negate: false) do |target_field_path|
44
+ filter_value_set_for_target_field_path(target_field_path, filter_hashes)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Determines a set of filter values for one of our `target_field_paths`,
51
+ # based on a list of `filter_hashes`.
52
+ def filter_value_set_for_target_field_path(target_field_path, filter_hashes)
53
+ # Pre-split the `target_field_path` to make it easy to compare as an array,
54
+ # since we build up the `traversed_field_path_parts` as an array as we recurse. We do this here
55
+ # outside the `map_reduce_sets` block below so we only do it once instead of N times.
56
+ target_field_path_parts = target_field_path.split(".")
57
+
58
+ # Here we intersect the filter value setbecause when we have multiple `filter_hashes`,
59
+ # the filters are ANDed together. Only documents that match ALL the filters will be
60
+ # returned. Therefore, we want the intersection of filter value sets.
61
+ map_reduce_sets(filter_hashes, :intersection, negate: false) do |filter_hash|
62
+ filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, negate: false)
63
+ end
64
+ end
65
+
66
+ # Determines the set of filter values for one of our `target_field_paths` values and one
67
+ # `filter_hash` from a list of filter hashes. Note that this method is called recursively,
68
+ # with `traversed_field_path_parts` as an accumulator that accumulates that path to a nested
69
+ # field we are filtering on.
70
+ def filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, traversed_field_path_parts = [], negate:)
71
+ # Here we intersect the filter value sets because when we have multiple entries in a filter hash,
72
+ # the filters are ANDed together. Only documents that match ALL the filters will be
73
+ # returned. Therefore, we want the intersection of filter value sets.
74
+ map_reduce_sets(filter_hash, :intersection, negate: negate) do |key, value|
75
+ filter_value_set_for_filter_hash_entry(key, value, target_field_path_parts, traversed_field_path_parts, negate: negate)
76
+ end
77
+ end
78
+
79
+ # Determines the set of filter values for one of our `target_field_paths` and one
80
+ # entry from one `filter_hash`. The key/value pair from a single entry is passed as the
81
+ # first two arguments. Depending on where we are at in recursing through the nested structure,
82
+ # the key could identify either a field we are filtering on or a filtering operator to apply
83
+ # to a particular field.
84
+ def filter_value_set_for_filter_hash_entry(field_or_op, filter_value, target_field_path_parts, traversed_field_path_parts, negate:)
85
+ if filter_value.nil?
86
+ # Any filter with a `nil` value is effectively ignored by our filtering logic, so we need
87
+ # to return our `@all_values_set` to indicate this filter matches all documents.
88
+ @all_values_set
89
+ elsif field_or_op == @schema_names.not
90
+ filter_value_set_for_filter_hash(filter_value, target_field_path_parts, traversed_field_path_parts, negate: !negate)
91
+ elsif filter_value.is_a?(::Hash)
92
+ # the only time `value` is a hash is when `field_or_op` is a field name.
93
+ # In that case, `value` is a hash of filters that apply to that field.
94
+ filter_value_set_for_filter_hash(filter_value, target_field_path_parts, traversed_field_path_parts + [field_or_op], negate: negate)
95
+ elsif field_or_op == @schema_names.any_of
96
+ filter_value_set_for_any_of(filter_value, target_field_path_parts, traversed_field_path_parts, negate: negate)
97
+ elsif target_field_path_parts == traversed_field_path_parts
98
+ set = filter_value_set_for_field_filter(field_or_op, filter_value)
99
+ negate ? set.negate : set
100
+ else
101
+ # Otherwise, we have no information in this clause to limit our filter value set.
102
+ @all_values_set
103
+ end
104
+ end
105
+
106
+ # Determines the set of filter values for an `any_of` clause, which is used for ORing multiple filters together.
107
+ def filter_value_set_for_any_of(filter_hashes, target_field_path_parts, traversed_field_path_parts, negate:)
108
+ # Here we union the filter value sets because `any_of` represents an OR. If we can determine specific
109
+ # filter values for all `any_of` clauses, we will OR them together. Alternately, if we cannot
110
+ # determine specific filter values for any clauses, we will union `@all_values_set`,
111
+ # which will result in a return value of `@all_values_set`. This is correct because if there
112
+ # is an `any_of` clause that does not match on the `target_field_path_parts` then the filter
113
+ # excludes no documents on the basis of the target filter.
114
+ map_reduce_sets(filter_hashes, :union, negate: negate) do |filter_hash|
115
+ filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, traversed_field_path_parts, negate: negate)
116
+ end
117
+ end
118
+
119
+ # Determines the set of filter values for a single filter on a single field.
120
+ def filter_value_set_for_field_filter(filter_op, filter_value)
121
+ operator_name = @schema_names.canonical_name_for(filter_op)
122
+ @build_set_for_filter.call(operator_name, filter_value) || @all_values_set
123
+ end
124
+
125
+ # Maps over the provided `collection` by applying the given `map_transform`
126
+ # (which must transform a collection entry to an instance of our set representation), then reduces
127
+ # the resulting collection to a single set value. `reduction` will be either `:union` or `:intersection`.
128
+ #
129
+ # If the collection is empty, we return `@all_values_set` because it's the only "safe" value
130
+ # we can return. We don't have any information that would allow us to limit the set of filter
131
+ # values in any way.
132
+ def map_reduce_sets(collection, reduction, negate:, &map_transform)
133
+ return @all_values_set if collection.empty?
134
+
135
+ # In the case where `negate` is true (`not` is present somewhere in the filtering expression),
136
+ # we negate the reduction operator. Utilizing De Morgan’s Law (¬(A ∪ B) <-> (¬A) ∩ (¬B)),
137
+ # the negation of the union of two sets is the intersection of the negation of each set (the negation
138
+ # of each set is the difference between @all_values_set and the given set)--and vice versa.
139
+ reduction = REDUCTION_INVERSIONS.fetch(reduction) if negate
140
+
141
+ collection.map(&map_transform).reduce(reduction)
142
+ end
143
+
144
+ REDUCTION_INVERSIONS = {union: :intersection, intersection: :union}
145
+ end
146
+ end
147
+ end
148
+ end