elasticgraph-graphql 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
@@ -0,0 +1,526 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/graphql/filtering/boolean_query"
|
11
|
+
require "elastic_graph/graphql/filtering/field_path"
|
12
|
+
require "elastic_graph/graphql/filtering/range_query"
|
13
|
+
require "elastic_graph/graphql/schema/enum_value"
|
14
|
+
require "elastic_graph/support/graphql_formatter"
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
16
|
+
require "elastic_graph/support/time_util"
|
17
|
+
require "graphql"
|
18
|
+
|
19
|
+
module ElasticGraph
|
20
|
+
class GraphQL
|
21
|
+
module Filtering
|
22
|
+
# Contains all query logic related to filtering. Not tested directly; tests drive the `Query` interface instead.
|
23
|
+
# For more info on how this works, see:
|
24
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html
|
25
|
+
# https://www.elastic.co/blog/lost-in-translation-boolean-operations-and-filters-in-the-bool-query
|
26
|
+
FilterInterpreter = Support::MemoizableData.define(:runtime_metadata, :schema_names, :logger) do
|
27
|
+
# @implements FilterInterpreter
|
28
|
+
|
29
|
+
def initialize(runtime_metadata:, logger:)
|
30
|
+
super(
|
31
|
+
runtime_metadata: runtime_metadata,
|
32
|
+
schema_names: runtime_metadata.schema_element_names,
|
33
|
+
logger: logger
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Builds a datastore query from the given collection of filter hashes.
|
38
|
+
#
|
39
|
+
# Returns `nil` if there are no query clauses, to make it easy for a caller to `compact` out
|
40
|
+
# `query: {}` in a larger search request body.
|
41
|
+
#
|
42
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.11/query-dsl.html
|
43
|
+
def build_query(filter_hashes, from_field_path: FieldPath.empty)
|
44
|
+
build_bool_hash do |bool_node|
|
45
|
+
filter_hashes.each do |filter_hash|
|
46
|
+
process_filter_hash(bool_node, filter_hash, from_field_path)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_s
|
52
|
+
# The inspect/to_s output of `runtime_metadata` and `logger` can be quite large and noisy. We generally don't care about
|
53
|
+
# those details but want to be able to tell at a glance if two `FilterInterpreter` instances are equal or not--and, if they
|
54
|
+
# aren't equal, which part is responsible for the inequality.
|
55
|
+
#
|
56
|
+
# Using the hash of the two initialize args provides us with that.
|
57
|
+
"#<data #{FilterInterpreter.name} runtime_metadata=(hash: #{runtime_metadata.hash}) logger=(hash: #{logger.hash})>"
|
58
|
+
end
|
59
|
+
alias_method :inspect, :to_s
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def process_filter_hash(bool_node, filter_hash, field_path)
|
64
|
+
filter_hash.each do |field_or_op, expression|
|
65
|
+
case identify_expression_type(field_or_op, expression)
|
66
|
+
when :empty
|
67
|
+
# This is an "empty" filter predicate and we can ignore it.
|
68
|
+
when :not
|
69
|
+
process_not_expression(bool_node, expression, field_path)
|
70
|
+
when :list_any_filter
|
71
|
+
process_list_any_filter_expression(bool_node, expression, field_path)
|
72
|
+
when :any_of
|
73
|
+
process_any_of_expression(bool_node, expression, field_path)
|
74
|
+
when :all_of
|
75
|
+
process_all_of_expression(bool_node, expression, field_path)
|
76
|
+
when :operator
|
77
|
+
process_operator_expression(bool_node, field_or_op, expression, field_path)
|
78
|
+
when :list_count
|
79
|
+
process_list_count_expression(bool_node, expression, field_path)
|
80
|
+
when :sub_field
|
81
|
+
process_sub_field_expression(bool_node, expression, field_path + field_or_op)
|
82
|
+
else
|
83
|
+
logger.warn("Ignoring unknown filtering operator (#{field_or_op}: #{expression.inspect}) on field `#{field_path.from_root.join(".")}`")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def identify_expression_type(field_or_op, expression)
|
89
|
+
return :empty if expression.nil?
|
90
|
+
return :not if field_or_op == schema_names.not
|
91
|
+
return :list_any_filter if field_or_op == schema_names.any_satisfy
|
92
|
+
return :all_of if field_or_op == schema_names.all_of
|
93
|
+
return :any_of if field_or_op == schema_names.any_of
|
94
|
+
return :operator if filter_operators.key?(field_or_op)
|
95
|
+
return :list_count if field_or_op == LIST_COUNTS_FIELD
|
96
|
+
return :sub_field if expression.is_a?(::Hash)
|
97
|
+
:unknown
|
98
|
+
end
|
99
|
+
|
100
|
+
# Indicates if the given `expression` applies filtering to subfields or just applies
|
101
|
+
# operators at the current field path.
|
102
|
+
def filters_on_sub_fields?(expression)
|
103
|
+
expression.any? do |field_or_op, sub_expression|
|
104
|
+
case identify_expression_type(field_or_op, sub_expression)
|
105
|
+
when :sub_field
|
106
|
+
true
|
107
|
+
when :not, :list_any_filter
|
108
|
+
filters_on_sub_fields?(sub_expression)
|
109
|
+
when :any_of, :all_of
|
110
|
+
# These are the only two cases where the `sub_expression` is an array of filter sub expressions,
|
111
|
+
# so we use `.any?` on it here. (Even for `all_of`--the overall `expression` filters on sub fields so
|
112
|
+
# long as at least one of the sub expressions does, regardless of it being `any_of` vs `all_of`).
|
113
|
+
sub_expression.any? { |expr| filters_on_sub_fields?(expr) }
|
114
|
+
else # :empty, :operator, :unknown, :list_count
|
115
|
+
false
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def process_not_expression(bool_node, expression, field_path)
|
121
|
+
sub_filter = build_bool_hash do |inner_node|
|
122
|
+
process_filter_hash(inner_node, expression, field_path)
|
123
|
+
end
|
124
|
+
|
125
|
+
return unless sub_filter
|
126
|
+
|
127
|
+
# Prevent any negated filters from being unnecessarily double-negated by
|
128
|
+
# converting them to a positive filter (i.e., !!A == A).
|
129
|
+
if sub_filter[:bool].key?(:must_not)
|
130
|
+
# Pull clauses up to current bool_node to remove negation
|
131
|
+
sub_filter[:bool][:must_not].each do |negated_clause|
|
132
|
+
negated_clause[:bool].each { |k, v| bool_node[k].concat(v) }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Don't drop any other filters! Let's negate them now.
|
137
|
+
other_filters = sub_filter[:bool].except(:must_not)
|
138
|
+
bool_node[:must_not] << {bool: other_filters} unless other_filters.empty?
|
139
|
+
end
|
140
|
+
|
141
|
+
# There are two cases for `any_satisfy`, each of which is handled differently:
|
142
|
+
#
|
143
|
+
# - List-of-scalars
|
144
|
+
# - List-of-nested-objects
|
145
|
+
#
|
146
|
+
# We can detect which it is by checking `filter` to see if it filters on any subfields.
|
147
|
+
# If so, we know the filter is being applied to a `nested` list field. We can count on
|
148
|
+
# this because we do not generate `any_satisfy` filters on `object` list fields (instead,
|
149
|
+
# they get generated on their leaf fields).
|
150
|
+
def process_list_any_filter_expression(bool_node, filter, field_path)
|
151
|
+
if filters_on_sub_fields?(filter)
|
152
|
+
process_any_satisfy_filter_expression_on_nested_object_list(bool_node, filter, field_path)
|
153
|
+
else
|
154
|
+
process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, field_path)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def process_any_satisfy_filter_expression_on_nested_object_list(bool_node, filter, field_path)
|
159
|
+
sub_filter = build_bool_hash do |inner_node|
|
160
|
+
process_filter_hash(inner_node, filter, field_path.nested)
|
161
|
+
end
|
162
|
+
|
163
|
+
if sub_filter
|
164
|
+
bool_node[:filter] << {nested: {path: field_path.from_root.join("."), query: sub_filter}}
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# On a list-of-leaf-values field, `any_satisfy` doesn't _do_ anything: it just expresses
|
169
|
+
# the fact that documents with any list element values matching the predicates will match
|
170
|
+
# the overall filter.
|
171
|
+
def process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, field_path)
|
172
|
+
return unless (processed = build_bool_hash { |node| process_filter_hash(node, filter, field_path) })
|
173
|
+
|
174
|
+
processed_bool_query = processed.fetch(:bool)
|
175
|
+
|
176
|
+
# The semantics we want for `any_satisfy` are that it matches when a value exists in the list that
|
177
|
+
# satisfies all of the provided subfilter. That's the semantics the datastore provides when the bool
|
178
|
+
# query only requires one clause to match, but if multiple clauses are required to match there's a subtle
|
179
|
+
# issue. A document matches so long as each required clause matches *some* value, but it doesn't require
|
180
|
+
# that they all match the *same* value. The list field on a document could contain N values, where
|
181
|
+
# each value matches a different one of the required clauses, and the document will be a search hit.
|
182
|
+
#
|
183
|
+
# Rather than behaving in a surprising way here, we'd rather disallow a filter that has multiple required
|
184
|
+
# clauses, so we return an error in this case.
|
185
|
+
if required_matching_clause_count(processed_bool_query) > 1
|
186
|
+
formatted_filter = Support::GraphQLFormatter.serialize(
|
187
|
+
{schema_names.any_satisfy => filter},
|
188
|
+
wrap_hash_with_braces: false
|
189
|
+
)
|
190
|
+
|
191
|
+
raise ::GraphQL::ExecutionError, "`#{formatted_filter}` is not supported because it produces " \
|
192
|
+
"multiple filtering clauses under `#{schema_names.any_satisfy}`, which doesn't work as expected. " \
|
193
|
+
"Remove one or more of your `#{schema_names.any_satisfy}` predicates and try again."
|
194
|
+
else
|
195
|
+
bool_node.update(processed_bool_query) do |_, existing_clauses, any_satisfy_clauses|
|
196
|
+
existing_clauses + any_satisfy_clauses
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def process_any_of_expression(bool_node, expressions, field_path)
|
202
|
+
shoulds = expressions.filter_map do |expression|
|
203
|
+
build_bool_hash do |inner_bool_node|
|
204
|
+
process_filter_hash(inner_bool_node, expression, field_path)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# When our `shoulds` array is empty, the filtering semantics we want is to match no documents.
|
209
|
+
# However, that's not the behavior the datastore will give us if we have an empty array in the
|
210
|
+
# query under `should`. To get the behavior we want, we need to pass the datastore some filter
|
211
|
+
# criteria that will evaluate to false for every document.
|
212
|
+
bool_query = shoulds.empty? ? BooleanQuery::ALWAYS_FALSE_FILTER : BooleanQuery.should(*shoulds)
|
213
|
+
bool_query.merge_into(bool_node)
|
214
|
+
end
|
215
|
+
|
216
|
+
def process_all_of_expression(bool_node, expressions, field_path)
|
217
|
+
# `all_of` represents an AND. AND is the default way that `process_filter_hash` combines
|
218
|
+
# filters so we just have to call it for each sub-expression.
|
219
|
+
expressions.each do |sub_expression|
|
220
|
+
process_filter_hash(bool_node, sub_expression, field_path)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def process_operator_expression(bool_node, operator, expression, field_path)
|
225
|
+
# `operator` is a filtering operator, and `expression` is the value the filtering
|
226
|
+
# operator should be applied to. The `op_applicator` lambda, when called, will
|
227
|
+
# return a Clause instance (defined in this module).
|
228
|
+
bool_query = filter_operators.fetch(operator).call(field_path.from_root.join("."), expression)
|
229
|
+
bool_query&.merge_into(bool_node)
|
230
|
+
end
|
231
|
+
|
232
|
+
def process_sub_field_expression(bool_node, expression, field_path)
|
233
|
+
# `sub_field` is a field name, and `expression` is a hash of filters to apply to that field.
|
234
|
+
# We want to add the field name to the field path and recursively process the hash.
|
235
|
+
#
|
236
|
+
# However, if the hash has `any_of` in it, then we need to process the filter hash on
|
237
|
+
# a nested bool node instead of on the `bool_node` we are already operating on.
|
238
|
+
#
|
239
|
+
# To understand why, first consider a filter that has no `any_of` but does use field nesting:
|
240
|
+
#
|
241
|
+
# filter: {
|
242
|
+
# weight: {lt: 2000},
|
243
|
+
# cost: {
|
244
|
+
# currency: {equal_to_any_of: ["USD"]}
|
245
|
+
# amount: {gt: 1000}
|
246
|
+
# }
|
247
|
+
# }
|
248
|
+
#
|
249
|
+
# While this `currency` and `amount` are expressed as sub-filters under `cost` in our GraphQL
|
250
|
+
# syntax, we do not actually need to create a nested bool node structure for the datastore
|
251
|
+
# query. We get a flat filter structure like this:
|
252
|
+
#
|
253
|
+
# {bool: {filter: [
|
254
|
+
# {range: {"weight": {lt: 2000}}},
|
255
|
+
# {terms: {"cost.currency": ["USD"]}},
|
256
|
+
# {range: {"amount": {gt: 1000}}}
|
257
|
+
# ]}}
|
258
|
+
#
|
259
|
+
# The 3 filter conditions are ANDed together as a single list under `filter`.
|
260
|
+
# The nested field structure gets flattened using a dot-separated path.
|
261
|
+
#
|
262
|
+
# Now consider a filter that has multiple `any_of` sub-expressions:
|
263
|
+
#
|
264
|
+
# filter: {
|
265
|
+
# weight: {any_of: [
|
266
|
+
# {gt: 9000},
|
267
|
+
# {lt: 2000}
|
268
|
+
# ]},
|
269
|
+
# cost: {any_of: [
|
270
|
+
# currency: {equal_to_any_of: ["USD"]},
|
271
|
+
# amount: {gt: 1000}
|
272
|
+
# ]}
|
273
|
+
# }
|
274
|
+
#
|
275
|
+
# If we did not make a nested structure, we would wind up with a single list of sub-expressions
|
276
|
+
# that are OR'd together:
|
277
|
+
#
|
278
|
+
# {bool: {filter: [{bool: {should: [
|
279
|
+
# {range: {"weight": {gt: 9000}}},
|
280
|
+
# {range: {"weight": {lt: 2000}}},
|
281
|
+
# {terms: {"cost.currency": ["USD"]}},
|
282
|
+
# {range: {"amount": {gt: 1000}}}
|
283
|
+
# ]}}]}}
|
284
|
+
#
|
285
|
+
# ...but that's clearly wrong. By creating a nested bool node based on the presence of `any_of`,
|
286
|
+
# we can instead produce a structure like this:
|
287
|
+
#
|
288
|
+
# {bool: {filter: [
|
289
|
+
# {bool: {should: [
|
290
|
+
# {range: {"weight": {gt: 9000}}},
|
291
|
+
# {range: {"weight": {lt: 2000}}}
|
292
|
+
# ]}},
|
293
|
+
# {bool: {should: [
|
294
|
+
# {terms: {"cost.currency": ["USD"]}},
|
295
|
+
# {range: {"amount": {gt: 1000}}}
|
296
|
+
# ]}}
|
297
|
+
# ]}}
|
298
|
+
#
|
299
|
+
# ...which will actually work correctly.
|
300
|
+
if expression.key?(schema_names.any_of)
|
301
|
+
sub_filter = build_bool_hash do |inner_node|
|
302
|
+
process_filter_hash(inner_node, expression, field_path)
|
303
|
+
end
|
304
|
+
|
305
|
+
bool_node[:filter] << sub_filter if sub_filter
|
306
|
+
else
|
307
|
+
process_filter_hash(bool_node, expression, field_path)
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def process_list_count_expression(bool_node, expression, field_path)
|
312
|
+
# Normally, we don't have to do anything special for list count expressions.
|
313
|
+
# That's the case, for example, for an expression like:
|
314
|
+
#
|
315
|
+
# filter: {tags: {count: {gt: 2}}}
|
316
|
+
#
|
317
|
+
# However, if the count expression could match count of 0 (that is, if it doesn't
|
318
|
+
# exclude a count of zero), such as this:
|
319
|
+
#
|
320
|
+
# filter: {tags: {count: {lt: 1}}}
|
321
|
+
#
|
322
|
+
# ...then we need some special handling here. A count of 0 is equivalent to the list field not existing.
|
323
|
+
# While we index an explicit count of 0, the count field will be missing from documents indexed before
|
324
|
+
# the list field was defined on the ElasticGraph schema. To properly match those documents, we need to
|
325
|
+
# convert this into an OR (using `any_of`) to also match documents that lack the field entirely.
|
326
|
+
unless excludes_zero?(expression)
|
327
|
+
expression = {schema_names.any_of => [
|
328
|
+
expression,
|
329
|
+
{schema_names.equal_to_any_of => [nil]}
|
330
|
+
]}
|
331
|
+
end
|
332
|
+
|
333
|
+
process_sub_field_expression(bool_node, expression, field_path.counts_path)
|
334
|
+
end
|
335
|
+
|
336
|
+
def build_bool_hash(&block)
|
337
|
+
bool_node = Hash.new { |h, k| h[k] = [] }.tap(&block)
|
338
|
+
|
339
|
+
# To ignore "empty" filter predicates we need to return `nil` here.
|
340
|
+
return nil if bool_node.empty?
|
341
|
+
|
342
|
+
# According to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html#bool-min-should-match,
|
343
|
+
# if the bool query includes at least one should clause and no must or filter clauses, the default value is 1. Otherwise, the default value is 0.
|
344
|
+
# However, we want should clauses to work with musts and filters, so we need to set it explicitly to 1 when we have should clauses.
|
345
|
+
bool_node[:minimum_should_match] = 1 if bool_node.key?(:should)
|
346
|
+
|
347
|
+
{bool: bool_node}
|
348
|
+
end
|
349
|
+
|
350
|
+
# Determines if the given filter expression excludes the value `0`.
|
351
|
+
def excludes_zero?(expression)
|
352
|
+
expression.any? do |operator, operand|
|
353
|
+
case operator
|
354
|
+
when schema_names.equal_to_any_of then !operand.include?(0)
|
355
|
+
when schema_names.lt then operand <= 0
|
356
|
+
when schema_names.lte then operand < 0
|
357
|
+
when schema_names.gt then operand >= 0
|
358
|
+
when schema_names.gte then operand > 0
|
359
|
+
else
|
360
|
+
# :nocov: -- all operators are covered above. But simplecov complains about an implicit `else` branch being uncovered, so here we've defined it to wrap it with `:nocov:`.
|
361
|
+
false
|
362
|
+
# :nocov:
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def filter_operators
|
368
|
+
@filter_operators ||= build_filter_operators(runtime_metadata)
|
369
|
+
end
|
370
|
+
|
371
|
+
def build_filter_operators(runtime_metadata)
|
372
|
+
schema_names = runtime_metadata.schema_element_names
|
373
|
+
|
374
|
+
filter_by_time_of_day_script_id = runtime_metadata
|
375
|
+
.static_script_ids_by_scoped_name
|
376
|
+
.fetch("filter/by_time_of_day")
|
377
|
+
|
378
|
+
{
|
379
|
+
schema_names.equal_to_any_of => ->(field_name, value) {
|
380
|
+
values = to_datastore_value(value.compact.uniq) # : ::Array[untyped]
|
381
|
+
|
382
|
+
equality_sub_expression =
|
383
|
+
if field_name == "id"
|
384
|
+
# Use specialized "ids" query when querying on ID field.
|
385
|
+
# See: https://www.elastic.co/guide/en/elasticsearch/reference/7.15/query-dsl-ids-query.html
|
386
|
+
#
|
387
|
+
# We reject empty strings because we otherwise get an error from the datastore:
|
388
|
+
# "failed to create query: Ids can't be empty"
|
389
|
+
{ids: {values: values - [""]}}
|
390
|
+
else
|
391
|
+
{terms: {field_name => values}}
|
392
|
+
end
|
393
|
+
|
394
|
+
exists_sub_expression = {exists: {"field" => field_name}}
|
395
|
+
|
396
|
+
if !value.empty? && value.all?(&:nil?)
|
397
|
+
BooleanQuery.new(:must_not, [{bool: {filter: [exists_sub_expression]}}])
|
398
|
+
elsif value.include?(nil)
|
399
|
+
BooleanQuery.filter({bool: {
|
400
|
+
minimum_should_match: 1,
|
401
|
+
should: [
|
402
|
+
{bool: {filter: [equality_sub_expression]}},
|
403
|
+
{bool: {must_not: [{bool: {filter: [exists_sub_expression]}}]}}
|
404
|
+
]
|
405
|
+
}})
|
406
|
+
else
|
407
|
+
BooleanQuery.filter(equality_sub_expression)
|
408
|
+
end
|
409
|
+
},
|
410
|
+
schema_names.gt => ->(field_name, value) { RangeQuery.new(field_name, :gt, value) },
|
411
|
+
schema_names.gte => ->(field_name, value) { RangeQuery.new(field_name, :gte, value) },
|
412
|
+
schema_names.lt => ->(field_name, value) { RangeQuery.new(field_name, :lt, value) },
|
413
|
+
schema_names.lte => ->(field_name, value) { RangeQuery.new(field_name, :lte, value) },
|
414
|
+
schema_names.matches => ->(field_name, value) { BooleanQuery.must({match: {field_name => value}}) },
|
415
|
+
schema_names.matches_query => ->(field_name, value) do
|
416
|
+
allowed_edits_per_term = value.fetch(schema_names.allowed_edits_per_term).runtime_metadata.datastore_abbreviation
|
417
|
+
|
418
|
+
BooleanQuery.must(
|
419
|
+
{
|
420
|
+
match: {
|
421
|
+
field_name => {
|
422
|
+
query: value.fetch(schema_names.query),
|
423
|
+
# This is always a string field, even though the value is often an integer
|
424
|
+
fuzziness: allowed_edits_per_term.to_s,
|
425
|
+
operator: value[schema_names.require_all_terms] ? "AND" : "OR"
|
426
|
+
}
|
427
|
+
}
|
428
|
+
}
|
429
|
+
)
|
430
|
+
end,
|
431
|
+
schema_names.matches_phrase => ->(field_name, value) {
|
432
|
+
BooleanQuery.must(
|
433
|
+
{
|
434
|
+
match_phrase_prefix: {
|
435
|
+
field_name => {
|
436
|
+
query: value.fetch(schema_names.phrase)
|
437
|
+
}
|
438
|
+
}
|
439
|
+
}
|
440
|
+
)
|
441
|
+
},
|
442
|
+
|
443
|
+
# This filter operator wraps a geo distance query:
|
444
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.10/query-dsl-geo-distance-query.html
|
445
|
+
schema_names.near => ->(field_name, value) do
|
446
|
+
unit_abbreviation = value.fetch(schema_names.unit).runtime_metadata.datastore_abbreviation
|
447
|
+
|
448
|
+
BooleanQuery.filter({geo_distance: {
|
449
|
+
"distance" => "#{value.fetch(schema_names.max_distance)}#{unit_abbreviation}",
|
450
|
+
field_name => {
|
451
|
+
"lat" => value.fetch(schema_names.latitude),
|
452
|
+
"lon" => value.fetch(schema_names.longitude)
|
453
|
+
}
|
454
|
+
}})
|
455
|
+
end,
|
456
|
+
|
457
|
+
schema_names.time_of_day => ->(field_name, value) do
|
458
|
+
# To filter on time of day, we use the `filter/by_time_of_day` script. We accomplish
|
459
|
+
# this with a script because Elasticsearch/OpenSearch do not support this natively, and it's
|
460
|
+
# incredibly hard to implement correctly with respect to time zones without using a
|
461
|
+
# script. We considered indexing the `time_of_day` as a separate index field
|
462
|
+
# that we could directly filter on, but since we need the time of day to be relative
|
463
|
+
# to a specific time zone, there's no way to make that work with the reality of
|
464
|
+
# daylight savings time. For example, the `America/Los_Angeles` time zone has a -07:00
|
465
|
+
# UTC offset for part of the year and a `America/Los_Angeles` -08:00 UTC offset for
|
466
|
+
# part of the year. In a script we can use Java time zone APIs to handle this correctly.
|
467
|
+
params = {
|
468
|
+
field: field_name,
|
469
|
+
equal_to_any_of: list_of_nanos_of_day_from(value, schema_names.equal_to_any_of),
|
470
|
+
gt: nano_of_day_from(value, schema_names.gt),
|
471
|
+
gte: nano_of_day_from(value, schema_names.gte),
|
472
|
+
lt: nano_of_day_from(value, schema_names.lt),
|
473
|
+
lte: nano_of_day_from(value, schema_names.lte),
|
474
|
+
time_zone: value[schema_names.time_zone]
|
475
|
+
}.compact
|
476
|
+
|
477
|
+
# If there are no comparison operators, return `nil` instead of a `Clause` so that we avoid
|
478
|
+
# invoking the script for no reason. Note that `field` and `time_zone` will always be in
|
479
|
+
# `params` so we can't just check for an empty hash here.
|
480
|
+
if (params.keys - [:field, :time_zone]).any?
|
481
|
+
BooleanQuery.filter({script: {script: {id: filter_by_time_of_day_script_id, params: params}}})
|
482
|
+
end
|
483
|
+
end
|
484
|
+
}.freeze
|
485
|
+
end
|
486
|
+
|
487
|
+
def to_datastore_value(value)
|
488
|
+
case value
|
489
|
+
when ::Array
|
490
|
+
value.map { |v| to_datastore_value(v) }
|
491
|
+
when Schema::EnumValue
|
492
|
+
value.name.to_s
|
493
|
+
else
|
494
|
+
value
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
def nano_of_day_from(value, field)
|
499
|
+
local_time = value[field]
|
500
|
+
Support::TimeUtil.nano_of_day_from_local_time(local_time) if local_time
|
501
|
+
end
|
502
|
+
|
503
|
+
def list_of_nanos_of_day_from(value, field)
|
504
|
+
value[field]&.map { |t| Support::TimeUtil.nano_of_day_from_local_time(t) }
|
505
|
+
end
|
506
|
+
|
507
|
+
# Counts how many clauses in `bool_query` are required to match for a document to be a search hit.
|
508
|
+
def required_matching_clause_count(bool_query)
|
509
|
+
bool_query.reduce(0) do |count, (occurrence, clauses)|
|
510
|
+
case occurrence
|
511
|
+
when :should
|
512
|
+
# The number of required matching clauses imposed by `:should` depends on the `:minimum_should_match` value.
|
513
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.9/query-dsl-bool-query.html#bool-min-should-match
|
514
|
+
bool_query.fetch(:minimum_should_match)
|
515
|
+
when :minimum_should_match
|
516
|
+
0 # doesn't have any clauses on its own, just controls how many `:should` clauses are required.
|
517
|
+
else
|
518
|
+
# For all other occurrences, each cluse must match.
|
519
|
+
clauses.size
|
520
|
+
end + count
|
521
|
+
end
|
522
|
+
end
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class GraphQL
|
11
|
+
module Filtering
|
12
|
+
# Responsible for extracting a set of values from query filters, based on a using a custom
|
13
|
+
# set type that is able to efficiently model the "all values" case.
|
14
|
+
class FilterValueSetExtractor
|
15
|
+
def initialize(schema_names, all_values_set, &build_set_for_filter)
|
16
|
+
@schema_names = schema_names
|
17
|
+
@all_values_set = all_values_set
|
18
|
+
@build_set_for_filter = build_set_for_filter
|
19
|
+
end
|
20
|
+
|
21
|
+
# Given a list of `filter_hashes` and a list of `target_field_paths`, returns a representation
|
22
|
+
# of a set that includes all values that could be matched by the given filters.
|
23
|
+
#
|
24
|
+
# Essentially, this method guarantees that the following pseudo code is always satisfied:
|
25
|
+
#
|
26
|
+
# ``` ruby
|
27
|
+
# filter_value_set = extract_filter_value_set(filter_hashes, target_field_paths)
|
28
|
+
# Datastore.all_documents_matching(filter_hashes).each do |document|
|
29
|
+
# target_field_paths.each do |field_path|
|
30
|
+
# expect(filter_value_set).to include(document.value_at(field_path))
|
31
|
+
# end
|
32
|
+
# end
|
33
|
+
# ```
|
34
|
+
def extract_filter_value_set(filter_hashes, target_field_paths)
|
35
|
+
# We union the filter values together in cases where we have multiple target field paths
|
36
|
+
# to make sure we cover all the values we need to. We generally do not have multiple
|
37
|
+
# `target_field_paths` except for specialized cases, such as when searching multiple
|
38
|
+
# indices in one query, where those indices are configured to use differing `routing_field_paths`.
|
39
|
+
# In such a situation we must use the set union of values. Remember: including additional
|
40
|
+
# routing values causes no adverse behavior (although it may introduce an inefficiency)
|
41
|
+
# but if we fail to route to a shard that contains a matching document, the search results
|
42
|
+
# will be incorrect.
|
43
|
+
map_reduce_sets(target_field_paths, :union, negate: false) do |target_field_path|
|
44
|
+
filter_value_set_for_target_field_path(target_field_path, filter_hashes)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# Determines a set of filter values for one of our `target_field_paths`,
|
51
|
+
# based on a list of `filter_hashes`.
|
52
|
+
def filter_value_set_for_target_field_path(target_field_path, filter_hashes)
|
53
|
+
# Pre-split the `target_field_path` to make it easy to compare as an array,
|
54
|
+
# since we build up the `traversed_field_path_parts` as an array as we recurse. We do this here
|
55
|
+
# outside the `map_reduce_sets` block below so we only do it once instead of N times.
|
56
|
+
target_field_path_parts = target_field_path.split(".")
|
57
|
+
|
58
|
+
# Here we intersect the filter value setbecause when we have multiple `filter_hashes`,
|
59
|
+
# the filters are ANDed together. Only documents that match ALL the filters will be
|
60
|
+
# returned. Therefore, we want the intersection of filter value sets.
|
61
|
+
map_reduce_sets(filter_hashes, :intersection, negate: false) do |filter_hash|
|
62
|
+
filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, negate: false)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Determines the set of filter values for one of our `target_field_paths` values and one
|
67
|
+
# `filter_hash` from a list of filter hashes. Note that this method is called recursively,
|
68
|
+
# with `traversed_field_path_parts` as an accumulator that accumulates that path to a nested
|
69
|
+
# field we are filtering on.
|
70
|
+
def filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, traversed_field_path_parts = [], negate:)
|
71
|
+
# Here we intersect the filter value sets because when we have multiple entries in a filter hash,
|
72
|
+
# the filters are ANDed together. Only documents that match ALL the filters will be
|
73
|
+
# returned. Therefore, we want the intersection of filter value sets.
|
74
|
+
map_reduce_sets(filter_hash, :intersection, negate: negate) do |key, value|
|
75
|
+
filter_value_set_for_filter_hash_entry(key, value, target_field_path_parts, traversed_field_path_parts, negate: negate)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Determines the set of filter values for one of our `target_field_paths` and one
|
80
|
+
# entry from one `filter_hash`. The key/value pair from a single entry is passed as the
|
81
|
+
# first two arguments. Depending on where we are at in recursing through the nested structure,
|
82
|
+
# the key could identify either a field we are filtering on or a filtering operator to apply
|
83
|
+
# to a particular field.
|
84
|
+
def filter_value_set_for_filter_hash_entry(field_or_op, filter_value, target_field_path_parts, traversed_field_path_parts, negate:)
|
85
|
+
if filter_value.nil?
|
86
|
+
# Any filter with a `nil` value is effectively ignored by our filtering logic, so we need
|
87
|
+
# to return our `@all_values_set` to indicate this filter matches all documents.
|
88
|
+
@all_values_set
|
89
|
+
elsif field_or_op == @schema_names.not
|
90
|
+
filter_value_set_for_filter_hash(filter_value, target_field_path_parts, traversed_field_path_parts, negate: !negate)
|
91
|
+
elsif filter_value.is_a?(::Hash)
|
92
|
+
# the only time `value` is a hash is when `field_or_op` is a field name.
|
93
|
+
# In that case, `value` is a hash of filters that apply to that field.
|
94
|
+
filter_value_set_for_filter_hash(filter_value, target_field_path_parts, traversed_field_path_parts + [field_or_op], negate: negate)
|
95
|
+
elsif field_or_op == @schema_names.any_of
|
96
|
+
filter_value_set_for_any_of(filter_value, target_field_path_parts, traversed_field_path_parts, negate: negate)
|
97
|
+
elsif target_field_path_parts == traversed_field_path_parts
|
98
|
+
set = filter_value_set_for_field_filter(field_or_op, filter_value)
|
99
|
+
negate ? set.negate : set
|
100
|
+
else
|
101
|
+
# Otherwise, we have no information in this clause to limit our filter value set.
|
102
|
+
@all_values_set
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Determines the set of filter values for an `any_of` clause, which is used for ORing multiple filters together.
|
107
|
+
def filter_value_set_for_any_of(filter_hashes, target_field_path_parts, traversed_field_path_parts, negate:)
|
108
|
+
# Here we union the filter value sets because `any_of` represents an OR. If we can determine specific
|
109
|
+
# filter values for all `any_of` clauses, we will OR them together. Alternately, if we cannot
|
110
|
+
# determine specific filter values for any clauses, we will union `@all_values_set`,
|
111
|
+
# which will result in a return value of `@all_values_set`. This is correct because if there
|
112
|
+
# is an `any_of` clause that does not match on the `target_field_path_parts` then the filter
|
113
|
+
# excludes no documents on the basis of the target filter.
|
114
|
+
map_reduce_sets(filter_hashes, :union, negate: negate) do |filter_hash|
|
115
|
+
filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, traversed_field_path_parts, negate: negate)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Determines the set of filter values for a single filter on a single field.
|
120
|
+
def filter_value_set_for_field_filter(filter_op, filter_value)
|
121
|
+
operator_name = @schema_names.canonical_name_for(filter_op)
|
122
|
+
@build_set_for_filter.call(operator_name, filter_value) || @all_values_set
|
123
|
+
end
|
124
|
+
|
125
|
+
# Maps over the provided `collection` by applying the given `map_transform`
|
126
|
+
# (which must transform a collection entry to an instance of our set representation), then reduces
|
127
|
+
# the resulting collection to a single set value. `reduction` will be either `:union` or `:intersection`.
|
128
|
+
#
|
129
|
+
# If the collection is empty, we return `@all_values_set` because it's the only "safe" value
|
130
|
+
# we can return. We don't have any information that would allow us to limit the set of filter
|
131
|
+
# values in any way.
|
132
|
+
def map_reduce_sets(collection, reduction, negate:, &map_transform)
|
133
|
+
return @all_values_set if collection.empty?
|
134
|
+
|
135
|
+
# In the case where `negate` is true (`not` is present somewhere in the filtering expression),
|
136
|
+
# we negate the reduction operator. Utilizing De Morgan’s Law (¬(A ∪ B) <-> (¬A) ∩ (¬B)),
|
137
|
+
# the negation of the union of two sets is the intersection of the negation of each set (the negation
|
138
|
+
# of each set is the difference between @all_values_set and the given set)--and vice versa.
|
139
|
+
reduction = REDUCTION_INVERSIONS.fetch(reduction) if negate
|
140
|
+
|
141
|
+
collection.map(&map_transform).reduce(reduction)
|
142
|
+
end
|
143
|
+
|
144
|
+
REDUCTION_INVERSIONS = {union: :intersection, intersection: :union}
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|