elasticgraph-graphql 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/constants"
|
|
10
|
+
require "elastic_graph/graphql/filtering/boolean_query"
|
|
11
|
+
require "elastic_graph/graphql/filtering/field_path"
|
|
12
|
+
require "elastic_graph/graphql/filtering/range_query"
|
|
13
|
+
require "elastic_graph/graphql/schema/enum_value"
|
|
14
|
+
require "elastic_graph/support/graphql_formatter"
|
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
|
16
|
+
require "elastic_graph/support/time_util"
|
|
17
|
+
require "graphql"
|
|
18
|
+
|
|
19
|
+
module ElasticGraph
|
|
20
|
+
class GraphQL
|
|
21
|
+
module Filtering
|
|
22
|
+
# Contains all query logic related to filtering. Not tested directly; tests drive the `Query` interface instead.
|
|
23
|
+
# For more info on how this works, see:
|
|
24
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html
|
|
25
|
+
# https://www.elastic.co/blog/lost-in-translation-boolean-operations-and-filters-in-the-bool-query
|
|
26
|
+
FilterInterpreter = Support::MemoizableData.define(:runtime_metadata, :schema_names, :logger) do
|
|
27
|
+
# @implements FilterInterpreter
|
|
28
|
+
|
|
29
|
+
def initialize(runtime_metadata:, logger:)
|
|
30
|
+
super(
|
|
31
|
+
runtime_metadata: runtime_metadata,
|
|
32
|
+
schema_names: runtime_metadata.schema_element_names,
|
|
33
|
+
logger: logger
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Builds a datastore query from the given collection of filter hashes.
|
|
38
|
+
#
|
|
39
|
+
# Returns `nil` if there are no query clauses, to make it easy for a caller to `compact` out
|
|
40
|
+
# `query: {}` in a larger search request body.
|
|
41
|
+
#
|
|
42
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.11/query-dsl.html
|
|
43
|
+
def build_query(filter_hashes, from_field_path: FieldPath.empty)
|
|
44
|
+
build_bool_hash do |bool_node|
|
|
45
|
+
filter_hashes.each do |filter_hash|
|
|
46
|
+
process_filter_hash(bool_node, filter_hash, from_field_path)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def to_s
|
|
52
|
+
# The inspect/to_s output of `runtime_metadata` and `logger` can be quite large and noisy. We generally don't care about
|
|
53
|
+
# those details but want to be able to tell at a glance if two `FilterInterpreter` instances are equal or not--and, if they
|
|
54
|
+
# aren't equal, which part is responsible for the inequality.
|
|
55
|
+
#
|
|
56
|
+
# Using the hash of the two initialize args provides us with that.
|
|
57
|
+
"#<data #{FilterInterpreter.name} runtime_metadata=(hash: #{runtime_metadata.hash}) logger=(hash: #{logger.hash})>"
|
|
58
|
+
end
|
|
59
|
+
alias_method :inspect, :to_s
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def process_filter_hash(bool_node, filter_hash, field_path)
|
|
64
|
+
filter_hash.each do |field_or_op, expression|
|
|
65
|
+
case identify_expression_type(field_or_op, expression)
|
|
66
|
+
when :empty
|
|
67
|
+
# This is an "empty" filter predicate and we can ignore it.
|
|
68
|
+
when :not
|
|
69
|
+
process_not_expression(bool_node, expression, field_path)
|
|
70
|
+
when :list_any_filter
|
|
71
|
+
process_list_any_filter_expression(bool_node, expression, field_path)
|
|
72
|
+
when :any_of
|
|
73
|
+
process_any_of_expression(bool_node, expression, field_path)
|
|
74
|
+
when :all_of
|
|
75
|
+
process_all_of_expression(bool_node, expression, field_path)
|
|
76
|
+
when :operator
|
|
77
|
+
process_operator_expression(bool_node, field_or_op, expression, field_path)
|
|
78
|
+
when :list_count
|
|
79
|
+
process_list_count_expression(bool_node, expression, field_path)
|
|
80
|
+
when :sub_field
|
|
81
|
+
process_sub_field_expression(bool_node, expression, field_path + field_or_op)
|
|
82
|
+
else
|
|
83
|
+
logger.warn("Ignoring unknown filtering operator (#{field_or_op}: #{expression.inspect}) on field `#{field_path.from_root.join(".")}`")
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def identify_expression_type(field_or_op, expression)
|
|
89
|
+
return :empty if expression.nil?
|
|
90
|
+
return :not if field_or_op == schema_names.not
|
|
91
|
+
return :list_any_filter if field_or_op == schema_names.any_satisfy
|
|
92
|
+
return :all_of if field_or_op == schema_names.all_of
|
|
93
|
+
return :any_of if field_or_op == schema_names.any_of
|
|
94
|
+
return :operator if filter_operators.key?(field_or_op)
|
|
95
|
+
return :list_count if field_or_op == LIST_COUNTS_FIELD
|
|
96
|
+
return :sub_field if expression.is_a?(::Hash)
|
|
97
|
+
:unknown
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Indicates if the given `expression` applies filtering to subfields or just applies
|
|
101
|
+
# operators at the current field path.
|
|
102
|
+
def filters_on_sub_fields?(expression)
|
|
103
|
+
expression.any? do |field_or_op, sub_expression|
|
|
104
|
+
case identify_expression_type(field_or_op, sub_expression)
|
|
105
|
+
when :sub_field
|
|
106
|
+
true
|
|
107
|
+
when :not, :list_any_filter
|
|
108
|
+
filters_on_sub_fields?(sub_expression)
|
|
109
|
+
when :any_of, :all_of
|
|
110
|
+
# These are the only two cases where the `sub_expression` is an array of filter sub expressions,
|
|
111
|
+
# so we use `.any?` on it here. (Even for `all_of`--the overall `expression` filters on sub fields so
|
|
112
|
+
# long as at least one of the sub expressions does, regardless of it being `any_of` vs `all_of`).
|
|
113
|
+
sub_expression.any? { |expr| filters_on_sub_fields?(expr) }
|
|
114
|
+
else # :empty, :operator, :unknown, :list_count
|
|
115
|
+
false
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def process_not_expression(bool_node, expression, field_path)
|
|
121
|
+
sub_filter = build_bool_hash do |inner_node|
|
|
122
|
+
process_filter_hash(inner_node, expression, field_path)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
return unless sub_filter
|
|
126
|
+
|
|
127
|
+
# Prevent any negated filters from being unnecessarily double-negated by
|
|
128
|
+
# converting them to a positive filter (i.e., !!A == A).
|
|
129
|
+
if sub_filter[:bool].key?(:must_not)
|
|
130
|
+
# Pull clauses up to current bool_node to remove negation
|
|
131
|
+
sub_filter[:bool][:must_not].each do |negated_clause|
|
|
132
|
+
negated_clause[:bool].each { |k, v| bool_node[k].concat(v) }
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Don't drop any other filters! Let's negate them now.
|
|
137
|
+
other_filters = sub_filter[:bool].except(:must_not)
|
|
138
|
+
bool_node[:must_not] << {bool: other_filters} unless other_filters.empty?
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# There are two cases for `any_satisfy`, each of which is handled differently:
|
|
142
|
+
#
|
|
143
|
+
# - List-of-scalars
|
|
144
|
+
# - List-of-nested-objects
|
|
145
|
+
#
|
|
146
|
+
# We can detect which it is by checking `filter` to see if it filters on any subfields.
|
|
147
|
+
# If so, we know the filter is being applied to a `nested` list field. We can count on
|
|
148
|
+
# this because we do not generate `any_satisfy` filters on `object` list fields (instead,
|
|
149
|
+
# they get generated on their leaf fields).
|
|
150
|
+
def process_list_any_filter_expression(bool_node, filter, field_path)
|
|
151
|
+
if filters_on_sub_fields?(filter)
|
|
152
|
+
process_any_satisfy_filter_expression_on_nested_object_list(bool_node, filter, field_path)
|
|
153
|
+
else
|
|
154
|
+
process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, field_path)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def process_any_satisfy_filter_expression_on_nested_object_list(bool_node, filter, field_path)
|
|
159
|
+
sub_filter = build_bool_hash do |inner_node|
|
|
160
|
+
process_filter_hash(inner_node, filter, field_path.nested)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
if sub_filter
|
|
164
|
+
bool_node[:filter] << {nested: {path: field_path.from_root.join("."), query: sub_filter}}
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# On a list-of-leaf-values field, `any_satisfy` doesn't _do_ anything: it just expresses
|
|
169
|
+
# the fact that documents with any list element values matching the predicates will match
|
|
170
|
+
# the overall filter.
|
|
171
|
+
def process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, field_path)
|
|
172
|
+
return unless (processed = build_bool_hash { |node| process_filter_hash(node, filter, field_path) })
|
|
173
|
+
|
|
174
|
+
processed_bool_query = processed.fetch(:bool)
|
|
175
|
+
|
|
176
|
+
# The semantics we want for `any_satisfy` are that it matches when a value exists in the list that
|
|
177
|
+
# satisfies all of the provided subfilter. That's the semantics the datastore provides when the bool
|
|
178
|
+
# query only requires one clause to match, but if multiple clauses are required to match there's a subtle
|
|
179
|
+
# issue. A document matches so long as each required clause matches *some* value, but it doesn't require
|
|
180
|
+
# that they all match the *same* value. The list field on a document could contain N values, where
|
|
181
|
+
# each value matches a different one of the required clauses, and the document will be a search hit.
|
|
182
|
+
#
|
|
183
|
+
# Rather than behaving in a surprising way here, we'd rather disallow a filter that has multiple required
|
|
184
|
+
# clauses, so we return an error in this case.
|
|
185
|
+
if required_matching_clause_count(processed_bool_query) > 1
|
|
186
|
+
formatted_filter = Support::GraphQLFormatter.serialize(
|
|
187
|
+
{schema_names.any_satisfy => filter},
|
|
188
|
+
wrap_hash_with_braces: false
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
raise ::GraphQL::ExecutionError, "`#{formatted_filter}` is not supported because it produces " \
|
|
192
|
+
"multiple filtering clauses under `#{schema_names.any_satisfy}`, which doesn't work as expected. " \
|
|
193
|
+
"Remove one or more of your `#{schema_names.any_satisfy}` predicates and try again."
|
|
194
|
+
else
|
|
195
|
+
bool_node.update(processed_bool_query) do |_, existing_clauses, any_satisfy_clauses|
|
|
196
|
+
existing_clauses + any_satisfy_clauses
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def process_any_of_expression(bool_node, expressions, field_path)
|
|
202
|
+
shoulds = expressions.filter_map do |expression|
|
|
203
|
+
build_bool_hash do |inner_bool_node|
|
|
204
|
+
process_filter_hash(inner_bool_node, expression, field_path)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# When our `shoulds` array is empty, the filtering semantics we want is to match no documents.
|
|
209
|
+
# However, that's not the behavior the datastore will give us if we have an empty array in the
|
|
210
|
+
# query under `should`. To get the behavior we want, we need to pass the datastore some filter
|
|
211
|
+
# criteria that will evaluate to false for every document.
|
|
212
|
+
bool_query = shoulds.empty? ? BooleanQuery::ALWAYS_FALSE_FILTER : BooleanQuery.should(*shoulds)
|
|
213
|
+
bool_query.merge_into(bool_node)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def process_all_of_expression(bool_node, expressions, field_path)
|
|
217
|
+
# `all_of` represents an AND. AND is the default way that `process_filter_hash` combines
|
|
218
|
+
# filters so we just have to call it for each sub-expression.
|
|
219
|
+
expressions.each do |sub_expression|
|
|
220
|
+
process_filter_hash(bool_node, sub_expression, field_path)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def process_operator_expression(bool_node, operator, expression, field_path)
|
|
225
|
+
# `operator` is a filtering operator, and `expression` is the value the filtering
|
|
226
|
+
# operator should be applied to. The `op_applicator` lambda, when called, will
|
|
227
|
+
# return a Clause instance (defined in this module).
|
|
228
|
+
bool_query = filter_operators.fetch(operator).call(field_path.from_root.join("."), expression)
|
|
229
|
+
bool_query&.merge_into(bool_node)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def process_sub_field_expression(bool_node, expression, field_path)
|
|
233
|
+
# `sub_field` is a field name, and `expression` is a hash of filters to apply to that field.
|
|
234
|
+
# We want to add the field name to the field path and recursively process the hash.
|
|
235
|
+
#
|
|
236
|
+
# However, if the hash has `any_of` in it, then we need to process the filter hash on
|
|
237
|
+
# a nested bool node instead of on the `bool_node` we are already operating on.
|
|
238
|
+
#
|
|
239
|
+
# To understand why, first consider a filter that has no `any_of` but does use field nesting:
|
|
240
|
+
#
|
|
241
|
+
# filter: {
|
|
242
|
+
# weight: {lt: 2000},
|
|
243
|
+
# cost: {
|
|
244
|
+
# currency: {equal_to_any_of: ["USD"]}
|
|
245
|
+
# amount: {gt: 1000}
|
|
246
|
+
# }
|
|
247
|
+
# }
|
|
248
|
+
#
|
|
249
|
+
# While this `currency` and `amount` are expressed as sub-filters under `cost` in our GraphQL
|
|
250
|
+
# syntax, we do not actually need to create a nested bool node structure for the datastore
|
|
251
|
+
# query. We get a flat filter structure like this:
|
|
252
|
+
#
|
|
253
|
+
# {bool: {filter: [
|
|
254
|
+
# {range: {"weight": {lt: 2000}}},
|
|
255
|
+
# {terms: {"cost.currency": ["USD"]}},
|
|
256
|
+
# {range: {"amount": {gt: 1000}}}
|
|
257
|
+
# ]}}
|
|
258
|
+
#
|
|
259
|
+
# The 3 filter conditions are ANDed together as a single list under `filter`.
|
|
260
|
+
# The nested field structure gets flattened using a dot-separated path.
|
|
261
|
+
#
|
|
262
|
+
# Now consider a filter that has multiple `any_of` sub-expressions:
|
|
263
|
+
#
|
|
264
|
+
# filter: {
|
|
265
|
+
# weight: {any_of: [
|
|
266
|
+
# {gt: 9000},
|
|
267
|
+
# {lt: 2000}
|
|
268
|
+
# ]},
|
|
269
|
+
# cost: {any_of: [
|
|
270
|
+
# currency: {equal_to_any_of: ["USD"]},
|
|
271
|
+
# amount: {gt: 1000}
|
|
272
|
+
# ]}
|
|
273
|
+
# }
|
|
274
|
+
#
|
|
275
|
+
# If we did not make a nested structure, we would wind up with a single list of sub-expressions
|
|
276
|
+
# that are OR'd together:
|
|
277
|
+
#
|
|
278
|
+
# {bool: {filter: [{bool: {should: [
|
|
279
|
+
# {range: {"weight": {gt: 9000}}},
|
|
280
|
+
# {range: {"weight": {lt: 2000}}},
|
|
281
|
+
# {terms: {"cost.currency": ["USD"]}},
|
|
282
|
+
# {range: {"amount": {gt: 1000}}}
|
|
283
|
+
# ]}}]}}
|
|
284
|
+
#
|
|
285
|
+
# ...but that's clearly wrong. By creating a nested bool node based on the presence of `any_of`,
|
|
286
|
+
# we can instead produce a structure like this:
|
|
287
|
+
#
|
|
288
|
+
# {bool: {filter: [
|
|
289
|
+
# {bool: {should: [
|
|
290
|
+
# {range: {"weight": {gt: 9000}}},
|
|
291
|
+
# {range: {"weight": {lt: 2000}}}
|
|
292
|
+
# ]}},
|
|
293
|
+
# {bool: {should: [
|
|
294
|
+
# {terms: {"cost.currency": ["USD"]}},
|
|
295
|
+
# {range: {"amount": {gt: 1000}}}
|
|
296
|
+
# ]}}
|
|
297
|
+
# ]}}
|
|
298
|
+
#
|
|
299
|
+
# ...which will actually work correctly.
|
|
300
|
+
if expression.key?(schema_names.any_of)
|
|
301
|
+
sub_filter = build_bool_hash do |inner_node|
|
|
302
|
+
process_filter_hash(inner_node, expression, field_path)
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
bool_node[:filter] << sub_filter if sub_filter
|
|
306
|
+
else
|
|
307
|
+
process_filter_hash(bool_node, expression, field_path)
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def process_list_count_expression(bool_node, expression, field_path)
|
|
312
|
+
# Normally, we don't have to do anything special for list count expressions.
|
|
313
|
+
# That's the case, for example, for an expression like:
|
|
314
|
+
#
|
|
315
|
+
# filter: {tags: {count: {gt: 2}}}
|
|
316
|
+
#
|
|
317
|
+
# However, if the count expression could match count of 0 (that is, if it doesn't
|
|
318
|
+
# exclude a count of zero), such as this:
|
|
319
|
+
#
|
|
320
|
+
# filter: {tags: {count: {lt: 1}}}
|
|
321
|
+
#
|
|
322
|
+
# ...then we need some special handling here. A count of 0 is equivalent to the list field not existing.
|
|
323
|
+
# While we index an explicit count of 0, the count field will be missing from documents indexed before
|
|
324
|
+
# the list field was defined on the ElasticGraph schema. To properly match those documents, we need to
|
|
325
|
+
# convert this into an OR (using `any_of`) to also match documents that lack the field entirely.
|
|
326
|
+
unless excludes_zero?(expression)
|
|
327
|
+
expression = {schema_names.any_of => [
|
|
328
|
+
expression,
|
|
329
|
+
{schema_names.equal_to_any_of => [nil]}
|
|
330
|
+
]}
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
process_sub_field_expression(bool_node, expression, field_path.counts_path)
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def build_bool_hash(&block)
|
|
337
|
+
bool_node = Hash.new { |h, k| h[k] = [] }.tap(&block)
|
|
338
|
+
|
|
339
|
+
# To ignore "empty" filter predicates we need to return `nil` here.
|
|
340
|
+
return nil if bool_node.empty?
|
|
341
|
+
|
|
342
|
+
# According to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html#bool-min-should-match,
|
|
343
|
+
# if the bool query includes at least one should clause and no must or filter clauses, the default value is 1. Otherwise, the default value is 0.
|
|
344
|
+
# However, we want should clauses to work with musts and filters, so we need to set it explicitly to 1 when we have should clauses.
|
|
345
|
+
bool_node[:minimum_should_match] = 1 if bool_node.key?(:should)
|
|
346
|
+
|
|
347
|
+
{bool: bool_node}
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Determines if the given filter expression excludes the value `0`.
|
|
351
|
+
def excludes_zero?(expression)
|
|
352
|
+
expression.any? do |operator, operand|
|
|
353
|
+
case operator
|
|
354
|
+
when schema_names.equal_to_any_of then !operand.include?(0)
|
|
355
|
+
when schema_names.lt then operand <= 0
|
|
356
|
+
when schema_names.lte then operand < 0
|
|
357
|
+
when schema_names.gt then operand >= 0
|
|
358
|
+
when schema_names.gte then operand > 0
|
|
359
|
+
else
|
|
360
|
+
# :nocov: -- all operators are covered above. But simplecov complains about an implicit `else` branch being uncovered, so here we've defined it to wrap it with `:nocov:`.
|
|
361
|
+
false
|
|
362
|
+
# :nocov:
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def filter_operators
|
|
368
|
+
@filter_operators ||= build_filter_operators(runtime_metadata)
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def build_filter_operators(runtime_metadata)
|
|
372
|
+
schema_names = runtime_metadata.schema_element_names
|
|
373
|
+
|
|
374
|
+
filter_by_time_of_day_script_id = runtime_metadata
|
|
375
|
+
.static_script_ids_by_scoped_name
|
|
376
|
+
.fetch("filter/by_time_of_day")
|
|
377
|
+
|
|
378
|
+
{
|
|
379
|
+
schema_names.equal_to_any_of => ->(field_name, value) {
|
|
380
|
+
values = to_datastore_value(value.compact.uniq) # : ::Array[untyped]
|
|
381
|
+
|
|
382
|
+
equality_sub_expression =
|
|
383
|
+
if field_name == "id"
|
|
384
|
+
# Use specialized "ids" query when querying on ID field.
|
|
385
|
+
# See: https://www.elastic.co/guide/en/elasticsearch/reference/7.15/query-dsl-ids-query.html
|
|
386
|
+
#
|
|
387
|
+
# We reject empty strings because we otherwise get an error from the datastore:
|
|
388
|
+
# "failed to create query: Ids can't be empty"
|
|
389
|
+
{ids: {values: values - [""]}}
|
|
390
|
+
else
|
|
391
|
+
{terms: {field_name => values}}
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
exists_sub_expression = {exists: {"field" => field_name}}
|
|
395
|
+
|
|
396
|
+
if !value.empty? && value.all?(&:nil?)
|
|
397
|
+
BooleanQuery.new(:must_not, [{bool: {filter: [exists_sub_expression]}}])
|
|
398
|
+
elsif value.include?(nil)
|
|
399
|
+
BooleanQuery.filter({bool: {
|
|
400
|
+
minimum_should_match: 1,
|
|
401
|
+
should: [
|
|
402
|
+
{bool: {filter: [equality_sub_expression]}},
|
|
403
|
+
{bool: {must_not: [{bool: {filter: [exists_sub_expression]}}]}}
|
|
404
|
+
]
|
|
405
|
+
}})
|
|
406
|
+
else
|
|
407
|
+
BooleanQuery.filter(equality_sub_expression)
|
|
408
|
+
end
|
|
409
|
+
},
|
|
410
|
+
schema_names.gt => ->(field_name, value) { RangeQuery.new(field_name, :gt, value) },
|
|
411
|
+
schema_names.gte => ->(field_name, value) { RangeQuery.new(field_name, :gte, value) },
|
|
412
|
+
schema_names.lt => ->(field_name, value) { RangeQuery.new(field_name, :lt, value) },
|
|
413
|
+
schema_names.lte => ->(field_name, value) { RangeQuery.new(field_name, :lte, value) },
|
|
414
|
+
schema_names.matches => ->(field_name, value) { BooleanQuery.must({match: {field_name => value}}) },
|
|
415
|
+
schema_names.matches_query => ->(field_name, value) do
|
|
416
|
+
allowed_edits_per_term = value.fetch(schema_names.allowed_edits_per_term).runtime_metadata.datastore_abbreviation
|
|
417
|
+
|
|
418
|
+
BooleanQuery.must(
|
|
419
|
+
{
|
|
420
|
+
match: {
|
|
421
|
+
field_name => {
|
|
422
|
+
query: value.fetch(schema_names.query),
|
|
423
|
+
# This is always a string field, even though the value is often an integer
|
|
424
|
+
fuzziness: allowed_edits_per_term.to_s,
|
|
425
|
+
operator: value[schema_names.require_all_terms] ? "AND" : "OR"
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
)
|
|
430
|
+
end,
|
|
431
|
+
schema_names.matches_phrase => ->(field_name, value) {
|
|
432
|
+
BooleanQuery.must(
|
|
433
|
+
{
|
|
434
|
+
match_phrase_prefix: {
|
|
435
|
+
field_name => {
|
|
436
|
+
query: value.fetch(schema_names.phrase)
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
)
|
|
441
|
+
},
|
|
442
|
+
|
|
443
|
+
# This filter operator wraps a geo distance query:
|
|
444
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.10/query-dsl-geo-distance-query.html
|
|
445
|
+
schema_names.near => ->(field_name, value) do
|
|
446
|
+
unit_abbreviation = value.fetch(schema_names.unit).runtime_metadata.datastore_abbreviation
|
|
447
|
+
|
|
448
|
+
BooleanQuery.filter({geo_distance: {
|
|
449
|
+
"distance" => "#{value.fetch(schema_names.max_distance)}#{unit_abbreviation}",
|
|
450
|
+
field_name => {
|
|
451
|
+
"lat" => value.fetch(schema_names.latitude),
|
|
452
|
+
"lon" => value.fetch(schema_names.longitude)
|
|
453
|
+
}
|
|
454
|
+
}})
|
|
455
|
+
end,
|
|
456
|
+
|
|
457
|
+
schema_names.time_of_day => ->(field_name, value) do
|
|
458
|
+
# To filter on time of day, we use the `filter/by_time_of_day` script. We accomplish
|
|
459
|
+
# this with a script because Elasticsearch/OpenSearch do not support this natively, and it's
|
|
460
|
+
# incredibly hard to implement correctly with respect to time zones without using a
|
|
461
|
+
# script. We considered indexing the `time_of_day` as a separate index field
|
|
462
|
+
# that we could directly filter on, but since we need the time of day to be relative
|
|
463
|
+
# to a specific time zone, there's no way to make that work with the reality of
|
|
464
|
+
# daylight savings time. For example, the `America/Los_Angeles` time zone has a -07:00
|
|
465
|
+
# UTC offset for part of the year and a `America/Los_Angeles` -08:00 UTC offset for
|
|
466
|
+
# part of the year. In a script we can use Java time zone APIs to handle this correctly.
|
|
467
|
+
params = {
|
|
468
|
+
field: field_name,
|
|
469
|
+
equal_to_any_of: list_of_nanos_of_day_from(value, schema_names.equal_to_any_of),
|
|
470
|
+
gt: nano_of_day_from(value, schema_names.gt),
|
|
471
|
+
gte: nano_of_day_from(value, schema_names.gte),
|
|
472
|
+
lt: nano_of_day_from(value, schema_names.lt),
|
|
473
|
+
lte: nano_of_day_from(value, schema_names.lte),
|
|
474
|
+
time_zone: value[schema_names.time_zone]
|
|
475
|
+
}.compact
|
|
476
|
+
|
|
477
|
+
# If there are no comparison operators, return `nil` instead of a `Clause` so that we avoid
|
|
478
|
+
# invoking the script for no reason. Note that `field` and `time_zone` will always be in
|
|
479
|
+
# `params` so we can't just check for an empty hash here.
|
|
480
|
+
if (params.keys - [:field, :time_zone]).any?
|
|
481
|
+
BooleanQuery.filter({script: {script: {id: filter_by_time_of_day_script_id, params: params}}})
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
}.freeze
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
def to_datastore_value(value)
|
|
488
|
+
case value
|
|
489
|
+
when ::Array
|
|
490
|
+
value.map { |v| to_datastore_value(v) }
|
|
491
|
+
when Schema::EnumValue
|
|
492
|
+
value.name.to_s
|
|
493
|
+
else
|
|
494
|
+
value
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def nano_of_day_from(value, field)
|
|
499
|
+
local_time = value[field]
|
|
500
|
+
Support::TimeUtil.nano_of_day_from_local_time(local_time) if local_time
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
def list_of_nanos_of_day_from(value, field)
|
|
504
|
+
value[field]&.map { |t| Support::TimeUtil.nano_of_day_from_local_time(t) }
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# Counts how many clauses in `bool_query` are required to match for a document to be a search hit.
|
|
508
|
+
def required_matching_clause_count(bool_query)
|
|
509
|
+
bool_query.reduce(0) do |count, (occurrence, clauses)|
|
|
510
|
+
case occurrence
|
|
511
|
+
when :should
|
|
512
|
+
# The number of required matching clauses imposed by `:should` depends on the `:minimum_should_match` value.
|
|
513
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.9/query-dsl-bool-query.html#bool-min-should-match
|
|
514
|
+
bool_query.fetch(:minimum_should_match)
|
|
515
|
+
when :minimum_should_match
|
|
516
|
+
0 # doesn't have any clauses on its own, just controls how many `:should` clauses are required.
|
|
517
|
+
else
|
|
518
|
+
# For all other occurrences, each cluse must match.
|
|
519
|
+
clauses.size
|
|
520
|
+
end + count
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
end
|
|
526
|
+
end
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
module ElasticGraph
|
|
10
|
+
class GraphQL
|
|
11
|
+
module Filtering
|
|
12
|
+
# Responsible for extracting a set of values from query filters, based on a using a custom
|
|
13
|
+
# set type that is able to efficiently model the "all values" case.
|
|
14
|
+
class FilterValueSetExtractor
|
|
15
|
+
def initialize(schema_names, all_values_set, &build_set_for_filter)
|
|
16
|
+
@schema_names = schema_names
|
|
17
|
+
@all_values_set = all_values_set
|
|
18
|
+
@build_set_for_filter = build_set_for_filter
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Given a list of `filter_hashes` and a list of `target_field_paths`, returns a representation
|
|
22
|
+
# of a set that includes all values that could be matched by the given filters.
|
|
23
|
+
#
|
|
24
|
+
# Essentially, this method guarantees that the following pseudo code is always satisfied:
|
|
25
|
+
#
|
|
26
|
+
# ``` ruby
|
|
27
|
+
# filter_value_set = extract_filter_value_set(filter_hashes, target_field_paths)
|
|
28
|
+
# Datastore.all_documents_matching(filter_hashes).each do |document|
|
|
29
|
+
# target_field_paths.each do |field_path|
|
|
30
|
+
# expect(filter_value_set).to include(document.value_at(field_path))
|
|
31
|
+
# end
|
|
32
|
+
# end
|
|
33
|
+
# ```
|
|
34
|
+
def extract_filter_value_set(filter_hashes, target_field_paths)
|
|
35
|
+
# We union the filter values together in cases where we have multiple target field paths
|
|
36
|
+
# to make sure we cover all the values we need to. We generally do not have multiple
|
|
37
|
+
# `target_field_paths` except for specialized cases, such as when searching multiple
|
|
38
|
+
# indices in one query, where those indices are configured to use differing `routing_field_paths`.
|
|
39
|
+
# In such a situation we must use the set union of values. Remember: including additional
|
|
40
|
+
# routing values causes no adverse behavior (although it may introduce an inefficiency)
|
|
41
|
+
# but if we fail to route to a shard that contains a matching document, the search results
|
|
42
|
+
# will be incorrect.
|
|
43
|
+
map_reduce_sets(target_field_paths, :union, negate: false) do |target_field_path|
|
|
44
|
+
filter_value_set_for_target_field_path(target_field_path, filter_hashes)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Determines a set of filter values for one of our `target_field_paths`,
|
|
51
|
+
# based on a list of `filter_hashes`.
|
|
52
|
+
def filter_value_set_for_target_field_path(target_field_path, filter_hashes)
|
|
53
|
+
# Pre-split the `target_field_path` to make it easy to compare as an array,
|
|
54
|
+
# since we build up the `traversed_field_path_parts` as an array as we recurse. We do this here
|
|
55
|
+
# outside the `map_reduce_sets` block below so we only do it once instead of N times.
|
|
56
|
+
target_field_path_parts = target_field_path.split(".")
|
|
57
|
+
|
|
58
|
+
# Here we intersect the filter value setbecause when we have multiple `filter_hashes`,
|
|
59
|
+
# the filters are ANDed together. Only documents that match ALL the filters will be
|
|
60
|
+
# returned. Therefore, we want the intersection of filter value sets.
|
|
61
|
+
map_reduce_sets(filter_hashes, :intersection, negate: false) do |filter_hash|
|
|
62
|
+
filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, negate: false)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Determines the set of filter values for one of our `target_field_paths` values and one
|
|
67
|
+
# `filter_hash` from a list of filter hashes. Note that this method is called recursively,
|
|
68
|
+
# with `traversed_field_path_parts` as an accumulator that accumulates that path to a nested
|
|
69
|
+
# field we are filtering on.
|
|
70
|
+
def filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, traversed_field_path_parts = [], negate:)
|
|
71
|
+
# Here we intersect the filter value sets because when we have multiple entries in a filter hash,
|
|
72
|
+
# the filters are ANDed together. Only documents that match ALL the filters will be
|
|
73
|
+
# returned. Therefore, we want the intersection of filter value sets.
|
|
74
|
+
map_reduce_sets(filter_hash, :intersection, negate: negate) do |key, value|
|
|
75
|
+
filter_value_set_for_filter_hash_entry(key, value, target_field_path_parts, traversed_field_path_parts, negate: negate)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Determines the set of filter values for one of our `target_field_paths` and one
|
|
80
|
+
# entry from one `filter_hash`. The key/value pair from a single entry is passed as the
|
|
81
|
+
# first two arguments. Depending on where we are at in recursing through the nested structure,
|
|
82
|
+
# the key could identify either a field we are filtering on or a filtering operator to apply
|
|
83
|
+
# to a particular field.
|
|
84
|
+
def filter_value_set_for_filter_hash_entry(field_or_op, filter_value, target_field_path_parts, traversed_field_path_parts, negate:)
|
|
85
|
+
if filter_value.nil?
|
|
86
|
+
# Any filter with a `nil` value is effectively ignored by our filtering logic, so we need
|
|
87
|
+
# to return our `@all_values_set` to indicate this filter matches all documents.
|
|
88
|
+
@all_values_set
|
|
89
|
+
elsif field_or_op == @schema_names.not
|
|
90
|
+
filter_value_set_for_filter_hash(filter_value, target_field_path_parts, traversed_field_path_parts, negate: !negate)
|
|
91
|
+
elsif filter_value.is_a?(::Hash)
|
|
92
|
+
# the only time `value` is a hash is when `field_or_op` is a field name.
|
|
93
|
+
# In that case, `value` is a hash of filters that apply to that field.
|
|
94
|
+
filter_value_set_for_filter_hash(filter_value, target_field_path_parts, traversed_field_path_parts + [field_or_op], negate: negate)
|
|
95
|
+
elsif field_or_op == @schema_names.any_of
|
|
96
|
+
filter_value_set_for_any_of(filter_value, target_field_path_parts, traversed_field_path_parts, negate: negate)
|
|
97
|
+
elsif target_field_path_parts == traversed_field_path_parts
|
|
98
|
+
set = filter_value_set_for_field_filter(field_or_op, filter_value)
|
|
99
|
+
negate ? set.negate : set
|
|
100
|
+
else
|
|
101
|
+
# Otherwise, we have no information in this clause to limit our filter value set.
|
|
102
|
+
@all_values_set
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Determines the set of filter values for an `any_of` clause, which is used for ORing multiple filters together.
|
|
107
|
+
def filter_value_set_for_any_of(filter_hashes, target_field_path_parts, traversed_field_path_parts, negate:)
|
|
108
|
+
# Here we union the filter value sets because `any_of` represents an OR. If we can determine specific
|
|
109
|
+
# filter values for all `any_of` clauses, we will OR them together. Alternately, if we cannot
|
|
110
|
+
# determine specific filter values for any clauses, we will union `@all_values_set`,
|
|
111
|
+
# which will result in a return value of `@all_values_set`. This is correct because if there
|
|
112
|
+
# is an `any_of` clause that does not match on the `target_field_path_parts` then the filter
|
|
113
|
+
# excludes no documents on the basis of the target filter.
|
|
114
|
+
map_reduce_sets(filter_hashes, :union, negate: negate) do |filter_hash|
|
|
115
|
+
filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, traversed_field_path_parts, negate: negate)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Determines the set of filter values for a single filter on a single field.
|
|
120
|
+
def filter_value_set_for_field_filter(filter_op, filter_value)
|
|
121
|
+
operator_name = @schema_names.canonical_name_for(filter_op)
|
|
122
|
+
@build_set_for_filter.call(operator_name, filter_value) || @all_values_set
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Maps over the provided `collection` by applying the given `map_transform`
|
|
126
|
+
# (which must transform a collection entry to an instance of our set representation), then reduces
|
|
127
|
+
# the resulting collection to a single set value. `reduction` will be either `:union` or `:intersection`.
|
|
128
|
+
#
|
|
129
|
+
# If the collection is empty, we return `@all_values_set` because it's the only "safe" value
|
|
130
|
+
# we can return. We don't have any information that would allow us to limit the set of filter
|
|
131
|
+
# values in any way.
|
|
132
|
+
def map_reduce_sets(collection, reduction, negate:, &map_transform)
|
|
133
|
+
return @all_values_set if collection.empty?
|
|
134
|
+
|
|
135
|
+
# In the case where `negate` is true (`not` is present somewhere in the filtering expression),
|
|
136
|
+
# we negate the reduction operator. Utilizing De Morgan’s Law (¬(A ∪ B) <-> (¬A) ∩ (¬B)),
|
|
137
|
+
# the negation of the union of two sets is the intersection of the negation of each set (the negation
|
|
138
|
+
# of each set is the difference between @all_values_set and the given set)--and vice versa.
|
|
139
|
+
reduction = REDUCTION_INVERSIONS.fetch(reduction) if negate
|
|
140
|
+
|
|
141
|
+
collection.map(&map_transform).reduce(reduction)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
REDUCTION_INVERSIONS = {union: :intersection, intersection: :union}
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|