elasticgraph-graphql 0.19.3.0 → 1.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +6 -38
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +2 -2
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +8 -7
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +3 -3
- data/lib/elastic_graph/graphql/config.rb +0 -15
- data/lib/elastic_graph/graphql/datastore_query.rb +53 -13
- data/lib/elastic_graph/graphql/datastore_response/document.rb +4 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +23 -6
- data/lib/elastic_graph/graphql/filtering/filter_node_interpreter.rb +62 -22
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +10 -11
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +21 -21
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +1 -2
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter_builder.rb +5 -9
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +1 -3
- data/lib/elastic_graph/graphql/resolvers/nested_relationships_source.rb +7 -74
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +17 -16
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +6 -4
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +66 -2
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +3 -3
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +3 -3
- data/lib/elastic_graph/graphql/schema/field.rb +1 -1
- data/lib/elastic_graph/graphql/schema/type.rb +4 -0
- data/lib/elastic_graph/graphql.rb +1 -7
- metadata +25 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24f8c6670bb21bcf8936b9bd55cc2e3d6964e78ef9e3742881e720dc0e212404
|
4
|
+
data.tar.gz: 8b1de56fd5b6732905fb07fc4d2ece0480784104e6b860d42b0f62027463cef2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 399538281bc06bcea0a5ede7fc1c63990658def8aca9e9d2d22994e59d57d157fb0ef6b7d6c6b0270b528b278e8b77b3e20303bb9cbec2a5894bfccdc209e9fa
|
7
|
+
data.tar.gz: '072188ff912c10f81da236a0f5c9faab646976ad0bd33e57a0008bb2fc82add9e38896aa4e55331c308520145c4e79e6a5eba942e8f8b903862d05b29b2c82b6'
|
@@ -209,29 +209,9 @@ module ElasticGraph
|
|
209
209
|
# New date/time grouping API (DateGroupedBy, DateTimeGroupedBy)
|
210
210
|
if field.type.elasticgraph_category == :date_grouped_by_object
|
211
211
|
date_time_groupings_from(field_path: field_path, node: node)
|
212
|
-
|
213
212
|
elsif !field.type.object?
|
214
|
-
case field.type.name
|
215
|
-
# Legacy date grouping API
|
216
|
-
when "Date"
|
217
|
-
legacy_date_histogram_groupings_from(
|
218
|
-
field_path: field_path,
|
219
|
-
node: node,
|
220
|
-
get_time_zone: ->(args) {},
|
221
|
-
get_offset: ->(args) { args[element_names.offset_days]&.then { |days| "#{days}d" } }
|
222
|
-
)
|
223
|
-
# Legacy datetime grouping API
|
224
|
-
when "DateTime"
|
225
|
-
legacy_date_histogram_groupings_from(
|
226
|
-
field_path: field_path,
|
227
|
-
node: node,
|
228
|
-
get_time_zone: ->(args) { args.fetch(element_names.time_zone) },
|
229
|
-
get_offset: ->(args) { datetime_offset_from(node, args) }
|
230
|
-
)
|
231
213
|
# Non-date/time grouping
|
232
|
-
|
233
|
-
[FieldTermGrouping.new(field_path: field_path)]
|
234
|
-
end
|
214
|
+
[FieldTermGrouping.new(field_path: field_path)]
|
235
215
|
end
|
236
216
|
end
|
237
217
|
end
|
@@ -264,7 +244,7 @@ module ElasticGraph
|
|
264
244
|
field_path: child_field_path,
|
265
245
|
script_id: runtime_metadata.static_script_ids_by_scoped_name.fetch("field/as_time_of_day"),
|
266
246
|
params: {
|
267
|
-
"interval" => interval_from(child_node, schema_args
|
247
|
+
"interval" => interval_from(child_node, schema_args),
|
268
248
|
"offset_ms" => datetime_offset_as_ms_from(child_node, schema_args),
|
269
249
|
"time_zone" => time_zone
|
270
250
|
}
|
@@ -272,7 +252,7 @@ module ElasticGraph
|
|
272
252
|
else
|
273
253
|
DateHistogramGrouping.new(
|
274
254
|
field_path: child_field_path,
|
275
|
-
interval: interval_from(child_node, schema_args
|
255
|
+
interval: interval_from(child_node, schema_args),
|
276
256
|
offset: datetime_offset_from(child_node, schema_args),
|
277
257
|
time_zone: time_zone
|
278
258
|
)
|
@@ -280,22 +260,10 @@ module ElasticGraph
|
|
280
260
|
end
|
281
261
|
end
|
282
262
|
|
283
|
-
def legacy_date_histogram_groupings_from(field_path:, node:, get_time_zone:, get_offset:)
|
284
|
-
schema_args = Schema::Arguments.to_schema_form(node.arguments, node.field)
|
285
|
-
|
286
|
-
[DateHistogramGrouping.new(
|
287
|
-
field_path: field_path,
|
288
|
-
interval: interval_from(node, schema_args, interval_unit_key: element_names.granularity),
|
289
|
-
time_zone: get_time_zone.call(schema_args),
|
290
|
-
offset: get_offset.call(schema_args)
|
291
|
-
)]
|
292
|
-
end
|
293
|
-
|
294
263
|
# Figure out the Date histogram grouping interval for the given node based on the `grouped_by` argument.
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
enum_value_name = schema_args.fetch(interval_unit_key)
|
264
|
+
def interval_from(node, schema_args)
|
265
|
+
enum_type_name = node.field.arguments.fetch(element_names.truncation_unit).type.unwrap.graphql_name
|
266
|
+
enum_value_name = schema_args.fetch(element_names.truncation_unit)
|
299
267
|
enum_value = schema.type_named(enum_type_name).enum_value_named(enum_value_name)
|
300
268
|
|
301
269
|
_ = enum_value.runtime_metadata.datastore_value
|
@@ -29,7 +29,7 @@ module ElasticGraph
|
|
29
29
|
|
30
30
|
def sub_aggregations
|
31
31
|
@sub_aggregations ||= SubAggregations.new(
|
32
|
-
|
32
|
+
schema,
|
33
33
|
query.sub_aggregations,
|
34
34
|
parent_queries + [query],
|
35
35
|
bucket,
|
@@ -42,7 +42,7 @@ module ElasticGraph
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def count_detail
|
45
|
-
@count_detail ||= CountDetail.new(
|
45
|
+
@count_detail ||= CountDetail.new(schema, bucket)
|
46
46
|
end
|
47
47
|
|
48
48
|
def cursor
|
@@ -15,18 +15,19 @@ module ElasticGraph
|
|
15
15
|
module Aggregation
|
16
16
|
module Resolvers
|
17
17
|
module RelayConnectionBuilder
|
18
|
-
def self.build_from_search_response(query:, search_response:,
|
19
|
-
build_from_buckets(query: query, parent_queries: [],
|
18
|
+
def self.build_from_search_response(query:, search_response:, schema:)
|
19
|
+
build_from_buckets(query: query, parent_queries: [], schema: schema) do
|
20
20
|
extract_buckets_from(search_response, for_query: query)
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.build_from_buckets(query:, parent_queries:,
|
24
|
+
def self.build_from_buckets(query:, parent_queries:, schema:, field_path: [], &build_buckets)
|
25
25
|
GraphQL::Resolvers::RelayConnection::GenericAdapter.new(
|
26
|
-
|
27
|
-
raw_nodes: raw_nodes_for(query, parent_queries,
|
26
|
+
schema: schema,
|
27
|
+
raw_nodes: raw_nodes_for(query, parent_queries, schema, field_path, &build_buckets),
|
28
28
|
paginator: query.paginator,
|
29
29
|
get_total_edge_count: -> {},
|
30
|
+
edge_class: (_ = GraphQL::Resolvers::RelayConnection::GenericAdapter::Edge),
|
30
31
|
to_sort_value: ->(node, decoded_cursor) do
|
31
32
|
query.groupings.map do |grouping|
|
32
33
|
DatastoreQuery::Paginator::SortValue.new(
|
@@ -39,13 +40,13 @@ module ElasticGraph
|
|
39
40
|
)
|
40
41
|
end
|
41
42
|
|
42
|
-
private_class_method def self.raw_nodes_for(query, parent_queries,
|
43
|
+
private_class_method def self.raw_nodes_for(query, parent_queries, schema, field_path)
|
43
44
|
# The `DecodedCursor::SINGLETON` is a special case, so handle it here.
|
44
45
|
return [] if query.paginator.paginated_from_singleton_cursor?
|
45
46
|
|
46
47
|
yield.map do |bucket|
|
47
48
|
Node.new(
|
48
|
-
|
49
|
+
schema: schema,
|
49
50
|
query: query,
|
50
51
|
parent_queries: parent_queries,
|
51
52
|
bucket: bucket,
|
@@ -19,7 +19,7 @@ module ElasticGraph
|
|
19
19
|
class GraphQL
|
20
20
|
module Aggregation
|
21
21
|
module Resolvers
|
22
|
-
class SubAggregations < ::Data.define(:
|
22
|
+
class SubAggregations < ::Data.define(:schema, :sub_aggregations, :parent_queries, :sub_aggs_by_agg_key, :field_path)
|
23
23
|
def resolve(field:, object:, args:, context:, lookahead:)
|
24
24
|
path_segment = PathSegment.for(field: field, lookahead: lookahead)
|
25
25
|
new_field_path = field_path + [path_segment]
|
@@ -31,7 +31,7 @@ module ElasticGraph
|
|
31
31
|
RelayConnectionBuilder.build_from_buckets(
|
32
32
|
query: sub_agg_query,
|
33
33
|
parent_queries: parent_queries,
|
34
|
-
|
34
|
+
schema: schema,
|
35
35
|
field_path: new_field_path
|
36
36
|
) { extract_buckets(sub_agg_key, args) }
|
37
37
|
end
|
@@ -41,7 +41,7 @@ module ElasticGraph
|
|
41
41
|
def extract_buckets(aggregation_field_path, args)
|
42
42
|
# When the client passes `first: 0`, we omit the sub-aggregation from the query body entirely,
|
43
43
|
# and it wont' be in `sub_aggs_by_agg_key`. Instead, we can just return an empty list of buckets.
|
44
|
-
return [] if args[
|
44
|
+
return [] if args[schema.element_names.first] == 0
|
45
45
|
|
46
46
|
sub_agg_key = Key.encode(parent_queries.map(&:name) + [aggregation_field_path])
|
47
47
|
sub_agg = Support::HashUtil.verbose_fetch(sub_aggs_by_agg_key, sub_agg_key)
|
@@ -20,12 +20,6 @@ module ElasticGraph
|
|
20
20
|
:max_page_size,
|
21
21
|
# Queries that take longer than this configured threshold will have a sanitized version logged.
|
22
22
|
:slow_query_latency_warning_threshold_in_ms,
|
23
|
-
# How to resolve nested relationships:
|
24
|
-
#
|
25
|
-
# - `optimized` (default): uses the new (in ElasticGraph 0.19.2.0) optimized resolver logic.
|
26
|
-
# - `original`: uses the resolver logic from ElasticGraph v0.19.1.1 and before.
|
27
|
-
# - `comparison`: runs both versions of the logic in serial, to compare them for correctness and performance. Results are logged.
|
28
|
-
:nested_relationship_resolver_mode,
|
29
23
|
# Object used to identify the client of a GraphQL query based on the HTTP request.
|
30
24
|
:client_resolver,
|
31
25
|
# Array of modules that will be extended onto the `GraphQL` instance to support extension libraries.
|
@@ -51,17 +45,10 @@ module ElasticGraph
|
|
51
45
|
end
|
52
46
|
end
|
53
47
|
|
54
|
-
nested_relationship_resolver_mode = parsed_yaml["nested_relationship_resolver_mode"]&.to_sym || :optimized
|
55
|
-
unless VALID_NESTED_RELATIONSHIP_RESOLVER_MODES.include?(nested_relationship_resolver_mode)
|
56
|
-
raise Errors::ConfigError, "Invalid value for `nested_relationship_resolver_mode`: #{nested_relationship_resolver_mode}. " \
|
57
|
-
"Valid values: #{VALID_NESTED_RELATIONSHIP_RESOLVER_MODES.join(", ")}."
|
58
|
-
end
|
59
|
-
|
60
48
|
new(
|
61
49
|
default_page_size: parsed_yaml.fetch("default_page_size"),
|
62
50
|
max_page_size: parsed_yaml.fetch("max_page_size"),
|
63
51
|
slow_query_latency_warning_threshold_in_ms: parsed_yaml["slow_query_latency_warning_threshold_in_ms"] || 5000,
|
64
|
-
nested_relationship_resolver_mode: nested_relationship_resolver_mode,
|
65
52
|
client_resolver: load_client_resolver(parsed_yaml),
|
66
53
|
extension_modules: extension_mods,
|
67
54
|
extension_settings: entire_parsed_yaml.except(*ELASTICGRAPH_CONFIG_KEYS)
|
@@ -74,8 +61,6 @@ module ElasticGraph
|
|
74
61
|
# The standard ElasticGraph root config setting keys; anything else is assumed to be extension settings.
|
75
62
|
ELASTICGRAPH_CONFIG_KEYS = %w[graphql indexer logger datastore schema_artifacts]
|
76
63
|
|
77
|
-
VALID_NESTED_RELATIONSHIP_RESOLVER_MODES = [:optimized, :original, :comparison]
|
78
|
-
|
79
64
|
private_class_method def self.load_client_resolver(parsed_yaml)
|
80
65
|
config = parsed_yaml.fetch("client_resolver") do
|
81
66
|
return Client::DefaultResolver.new({})
|
@@ -29,7 +29,8 @@ module ElasticGraph
|
|
29
29
|
class DatastoreQuery < Support::MemoizableData.define(
|
30
30
|
:total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
|
31
31
|
:index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
|
32
|
-
:
|
32
|
+
:client_filters, :internal_filters, :sort, :document_pagination,
|
33
|
+
:requested_fields, :request_all_fields, :requested_highlights, :request_all_highlights,
|
33
34
|
:individual_docs_needed, :size_multiplier, :monotonic_clock_deadline, :schema_element_names
|
34
35
|
)
|
35
36
|
# Load these files after the `Query` class has been defined, to avoid
|
@@ -96,22 +97,34 @@ module ElasticGraph
|
|
96
97
|
def merge_with(
|
97
98
|
individual_docs_needed: false,
|
98
99
|
total_document_count_needed: false,
|
99
|
-
|
100
|
+
client_filters: [],
|
101
|
+
internal_filters: [],
|
100
102
|
sort: [],
|
101
103
|
requested_fields: [],
|
102
104
|
request_all_fields: false,
|
105
|
+
requested_highlights: [],
|
106
|
+
request_all_highlights: false,
|
103
107
|
document_pagination: {},
|
104
108
|
size_multiplier: 1,
|
105
109
|
monotonic_clock_deadline: nil,
|
106
110
|
aggregations: {}
|
107
111
|
)
|
112
|
+
individual_docs_needed ||= self.individual_docs_needed ||
|
113
|
+
!requested_fields.empty? || request_all_fields ||
|
114
|
+
!requested_highlights.empty? || request_all_highlights
|
115
|
+
|
116
|
+
total_document_count_needed ||= self.total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?)
|
117
|
+
|
108
118
|
with(
|
109
|
-
individual_docs_needed:
|
110
|
-
total_document_count_needed:
|
111
|
-
|
119
|
+
individual_docs_needed: individual_docs_needed,
|
120
|
+
total_document_count_needed: total_document_count_needed,
|
121
|
+
client_filters: self.client_filters + client_filters,
|
122
|
+
internal_filters: self.internal_filters + internal_filters,
|
112
123
|
sort: merge_attribute(:sort, sort),
|
113
124
|
requested_fields: self.requested_fields + requested_fields,
|
114
125
|
request_all_fields: self.request_all_fields || request_all_fields,
|
126
|
+
requested_highlights: self.requested_highlights + requested_highlights,
|
127
|
+
request_all_highlights: self.request_all_highlights || request_all_highlights,
|
115
128
|
document_pagination: merge_attribute(:document_pagination, document_pagination),
|
116
129
|
size_multiplier: self.size_multiplier * size_multiplier,
|
117
130
|
monotonic_clock_deadline: [self.monotonic_clock_deadline, monotonic_clock_deadline].compact.min,
|
@@ -130,7 +143,7 @@ module ElasticGraph
|
|
130
143
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
|
131
144
|
def search_index_expression
|
132
145
|
@search_index_expression ||= index_expression_builder.determine_search_index_expression(
|
133
|
-
|
146
|
+
all_filters,
|
134
147
|
search_index_definitions,
|
135
148
|
# When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
|
136
149
|
# the standard aggregations response structure, which causes problems.
|
@@ -167,7 +180,7 @@ module ElasticGraph
|
|
167
180
|
# `[]` means that we are routing to no shards.
|
168
181
|
def shard_routing_values
|
169
182
|
return @shard_routing_values if defined?(@shard_routing_values)
|
170
|
-
routing_values = routing_picker.extract_eligible_routing_values(
|
183
|
+
routing_values = routing_picker.extract_eligible_routing_values(all_filters, route_with_field_paths)
|
171
184
|
|
172
185
|
@shard_routing_values ||=
|
173
186
|
if routing_values&.empty? && !aggregations_datastore_body.empty?
|
@@ -240,6 +253,10 @@ module ElasticGraph
|
|
240
253
|
document_paginator.effective_size
|
241
254
|
end
|
242
255
|
|
256
|
+
def all_filters
|
257
|
+
client_filters + internal_filters
|
258
|
+
end
|
259
|
+
|
243
260
|
private
|
244
261
|
|
245
262
|
def merge_attribute(attribute, other_value)
|
@@ -286,8 +303,7 @@ module ElasticGraph
|
|
286
303
|
def to_datastore_body
|
287
304
|
@to_datastore_body ||= aggregations_datastore_body
|
288
305
|
.merge(document_paginator.to_datastore_body)
|
289
|
-
.merge({query: filter_interpreter.build_query(
|
290
|
-
.merge({_source: source})
|
306
|
+
.merge({highlight: highlight, query: filter_interpreter.build_query(all_filters), _source: source}.compact)
|
291
307
|
end
|
292
308
|
|
293
309
|
def aggregations_datastore_body
|
@@ -314,6 +330,19 @@ module ElasticGraph
|
|
314
330
|
{includes: requested_source_fields.to_a}
|
315
331
|
end
|
316
332
|
|
333
|
+
def highlight
|
334
|
+
return nil if !request_all_highlights && requested_highlights.empty?
|
335
|
+
|
336
|
+
# If there are no filters, there's nothing to highlight.
|
337
|
+
return nil if client_filters.empty?
|
338
|
+
|
339
|
+
field_paths = request_all_highlights ? ["*"] : requested_highlights
|
340
|
+
fields = field_paths.to_h { |field| [field, {}] }
|
341
|
+
highlight_query = filter_interpreter.build_query(client_filters) unless internal_filters.empty?
|
342
|
+
|
343
|
+
{fields:, highlight_query:}.compact
|
344
|
+
end
|
345
|
+
|
317
346
|
# Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
|
318
347
|
# to build queries when desired.
|
319
348
|
class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size)
|
@@ -333,13 +362,16 @@ module ElasticGraph
|
|
333
362
|
|
334
363
|
def new_query(
|
335
364
|
search_index_definitions:,
|
336
|
-
|
365
|
+
client_filters: [],
|
366
|
+
internal_filters: [],
|
337
367
|
sort: [],
|
338
368
|
document_pagination: {},
|
339
369
|
size_multiplier: 1,
|
340
370
|
aggregations: {},
|
341
371
|
requested_fields: [],
|
342
372
|
request_all_fields: false,
|
373
|
+
requested_highlights: [],
|
374
|
+
request_all_highlights: false,
|
343
375
|
individual_docs_needed: false,
|
344
376
|
total_document_count_needed: false,
|
345
377
|
monotonic_clock_deadline: nil
|
@@ -348,21 +380,29 @@ module ElasticGraph
|
|
348
380
|
raise Errors::SearchFailedError, "Query is invalid, since it contains no `search_index_definitions`."
|
349
381
|
end
|
350
382
|
|
383
|
+
individual_docs_needed ||= !requested_fields.empty? || request_all_fields ||
|
384
|
+
!requested_highlights.empty? || request_all_highlights
|
385
|
+
|
386
|
+
total_document_count_needed ||= aggregations.values.any?(&:needs_total_doc_count?)
|
387
|
+
|
351
388
|
DatastoreQuery.new(
|
352
389
|
routing_picker: routing_picker,
|
353
390
|
index_expression_builder: index_expression_builder,
|
354
391
|
logger: logger,
|
355
392
|
schema_element_names: runtime_metadata.schema_element_names,
|
356
393
|
search_index_definitions: search_index_definitions,
|
357
|
-
|
394
|
+
client_filters: client_filters.to_set,
|
395
|
+
internal_filters: internal_filters.to_set,
|
358
396
|
sort: sort,
|
359
397
|
document_pagination: document_pagination,
|
360
398
|
size_multiplier: size_multiplier,
|
361
399
|
aggregations: aggregations,
|
362
400
|
requested_fields: requested_fields.to_set,
|
401
|
+
requested_highlights: requested_highlights.to_set,
|
363
402
|
request_all_fields: request_all_fields,
|
364
|
-
|
365
|
-
|
403
|
+
request_all_highlights: request_all_highlights,
|
404
|
+
individual_docs_needed: individual_docs_needed,
|
405
|
+
total_document_count_needed: total_document_count_needed,
|
366
406
|
monotonic_clock_deadline: monotonic_clock_deadline,
|
367
407
|
filter_interpreter: filter_interpreter,
|
368
408
|
default_page_size: default_page_size,
|
@@ -13,16 +13,33 @@ module ElasticGraph
|
|
13
13
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html
|
14
14
|
#
|
15
15
|
# It is composed of:
|
16
|
-
# 1) The occurrence type (:
|
16
|
+
# 1) The occurrence type (:filter, :should, or :must_not)
|
17
17
|
# 2) A list of query clauses evaluated by the given occurrence type
|
18
18
|
# 3) An optional flag indicating whether the occurrence should be negated
|
19
|
+
#
|
20
|
+
# Note: since we never do anything with the score, we always prefer `filter` over `must`. If we ever
|
21
|
+
# decide to do something with the score (such as sorting by it), then we'll want to introduce `must`.
|
19
22
|
class BooleanQuery < ::Data.define(:occurrence, :clauses)
|
20
|
-
def self.must(*clauses)
|
21
|
-
new(:must, clauses)
|
22
|
-
end
|
23
|
-
|
24
23
|
def self.filter(*clauses)
|
25
|
-
|
24
|
+
unwrapped_clauses = clauses.map do |clause|
|
25
|
+
__skip__ = case clause
|
26
|
+
in {bool: {minimum_should_match: 1, should: [::Hash => single_should], **nil}, **nil}
|
27
|
+
# This case represents an `anyOf` with a single subfilter (`filter: {anyOf: [X]}`).
|
28
|
+
# Such an expression is semantically equivalent to `filter: X`, and we can unwrap the
|
29
|
+
# should clause in this case since there is only a single one.
|
30
|
+
#
|
31
|
+
# While it adds a bit of complexity to do this unwrapping, we believe it's worth it because
|
32
|
+
# it preserves the datastore's ability to apply caching. As the Elasticsearch documentation[^1]
|
33
|
+
# explains, the results of `filter` clauses can be cached, but not `should` clauses.
|
34
|
+
#
|
35
|
+
# [^1]: https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-bool-query
|
36
|
+
single_should
|
37
|
+
else
|
38
|
+
clause
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
new(:filter, unwrapped_clauses)
|
26
43
|
end
|
27
44
|
|
28
45
|
def self.should(*clauses)
|
@@ -90,33 +90,64 @@ module ElasticGraph
|
|
90
90
|
schema_names.gte => ->(field_name, value) { RangeQuery.new(field_name, :gte, value) },
|
91
91
|
schema_names.lt => ->(field_name, value) { RangeQuery.new(field_name, :lt, value) },
|
92
92
|
schema_names.lte => ->(field_name, value) { RangeQuery.new(field_name, :lte, value) },
|
93
|
-
|
93
|
+
|
94
94
|
schema_names.matches_query => ->(field_name, value) do
|
95
95
|
allowed_edits_per_term = value.fetch(schema_names.allowed_edits_per_term).runtime_metadata.datastore_abbreviation
|
96
96
|
|
97
|
-
BooleanQuery.
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
fuzziness: allowed_edits_per_term.to_s,
|
104
|
-
operator: value[schema_names.require_all_terms] ? "AND" : "OR"
|
105
|
-
}
|
106
|
-
}
|
107
|
-
}
|
108
|
-
)
|
97
|
+
BooleanQuery.filter({match: {field_name => {
|
98
|
+
query: value.fetch(schema_names.query),
|
99
|
+
# This is always a string field, even though the value is often an integer
|
100
|
+
fuzziness: allowed_edits_per_term.to_s,
|
101
|
+
operator: value[schema_names.require_all_terms] ? "AND" : "OR"
|
102
|
+
}}})
|
109
103
|
end,
|
104
|
+
|
110
105
|
schema_names.matches_phrase => ->(field_name, value) {
|
111
|
-
BooleanQuery.
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
106
|
+
BooleanQuery.filter({match_phrase_prefix: {field_name => {
|
107
|
+
query: value.fetch(schema_names.phrase)
|
108
|
+
}}})
|
109
|
+
},
|
110
|
+
|
111
|
+
schema_names.contains => ->(field_name, value) {
|
112
|
+
case_insensitive = value[schema_names.ignore_case] || false
|
113
|
+
anded_substrings = value[schema_names.all_substrings_of] || []
|
114
|
+
ored_substrings = value[schema_names.any_substring_of]
|
115
|
+
|
116
|
+
sub_expressions = anded_substrings.map do |substring|
|
117
|
+
substring_clause(field_name, substring, case_insensitive)
|
118
|
+
end
|
119
|
+
|
120
|
+
if ored_substrings
|
121
|
+
should_sub_expressions = ored_substrings.map do |substring|
|
122
|
+
substring_clause(field_name, substring, case_insensitive)
|
123
|
+
end
|
124
|
+
|
125
|
+
sub_expressions << {bool: {minimum_should_match: 1, should: should_sub_expressions}}
|
126
|
+
end
|
127
|
+
|
128
|
+
if ored_substrings&.empty?
|
129
|
+
BooleanQuery::ALWAYS_FALSE_FILTER
|
130
|
+
elsif sub_expressions.size > 0
|
131
|
+
BooleanQuery.filter(*sub_expressions)
|
132
|
+
end
|
133
|
+
},
|
134
|
+
|
135
|
+
schema_names.starts_with => ->(field_name, value) {
|
136
|
+
case_insensitive = value[schema_names.ignore_case] || false
|
137
|
+
ored_prefixes = value[schema_names.any_prefix_of]
|
138
|
+
|
139
|
+
sub_expressions = (ored_prefixes || []).map do |prefix|
|
140
|
+
{prefix: {field_name => {
|
141
|
+
value: prefix,
|
142
|
+
case_insensitive: case_insensitive
|
143
|
+
}}}
|
144
|
+
end
|
145
|
+
|
146
|
+
if ored_prefixes&.empty?
|
147
|
+
BooleanQuery::ALWAYS_FALSE_FILTER
|
148
|
+
elsif sub_expressions.size > 0
|
149
|
+
BooleanQuery.filter({bool: {minimum_should_match: 1, should: sub_expressions}})
|
150
|
+
end
|
120
151
|
},
|
121
152
|
|
122
153
|
# This filter operator wraps a geo distance query:
|
@@ -163,6 +194,15 @@ module ElasticGraph
|
|
163
194
|
}.freeze
|
164
195
|
end
|
165
196
|
|
197
|
+
def substring_clause(field_name, substring, case_insensitive)
|
198
|
+
{wildcard: {field_name => {
|
199
|
+
# We squeeze("*") to convert "**" to "*", which is not needed for correctness but is a bit simpler.
|
200
|
+
# There's no point in two consecutive "*" wildcards.
|
201
|
+
value: "*#{substring}*".squeeze("*"),
|
202
|
+
case_insensitive: case_insensitive
|
203
|
+
}}}
|
204
|
+
end
|
205
|
+
|
166
206
|
def to_datastore_value(value)
|
167
207
|
case value
|
168
208
|
when ::Array
|
@@ -15,31 +15,30 @@ module ElasticGraph
|
|
15
15
|
class QueryAdapter
|
16
16
|
class Filters < Support::MemoizableData.define(:schema_element_names, :filter_args_translator, :filter_node_interpreter)
|
17
17
|
def call(field:, query:, args:, lookahead:, context:)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
return query if filters.empty?
|
18
|
+
client_filter = filter_args_translator.translate_filter_args(field: field, args: args)
|
19
|
+
internal_filter = build_automatic_filter(client_filter: client_filter, query: query)
|
20
|
+
return query if client_filter.nil? && internal_filter.nil?
|
22
21
|
|
23
|
-
query.merge_with(
|
22
|
+
query.merge_with(client_filters: [client_filter].compact, internal_filters: [internal_filter].compact)
|
24
23
|
end
|
25
24
|
|
26
25
|
private
|
27
26
|
|
28
|
-
def build_automatic_filter(
|
27
|
+
def build_automatic_filter(client_filter:, query:)
|
29
28
|
# If an incomplete document could be hit by a search with our filters against any of the
|
30
29
|
# index definitions, we must add a filter that will exclude incomplete documents.
|
31
30
|
exclude_incomplete_docs_filter if query
|
32
31
|
.search_index_definitions
|
33
|
-
.any? { |index_def| search_could_hit_incomplete_docs?(index_def,
|
32
|
+
.any? { |index_def| search_could_hit_incomplete_docs?(index_def, client_filter || {}) }
|
34
33
|
end
|
35
34
|
|
36
35
|
def exclude_incomplete_docs_filter
|
37
36
|
{"__sources" => {schema_element_names.equal_to_any_of => [SELF_RELATIONSHIP_NAME]}}
|
38
37
|
end
|
39
38
|
|
40
|
-
# Indicates if a search against the given `index_def` using the given `
|
39
|
+
# Indicates if a search against the given `index_def` using the given `client_filter`
|
41
40
|
# could hit an incomplete document.
|
42
|
-
def search_could_hit_incomplete_docs?(index_def,
|
41
|
+
def search_could_hit_incomplete_docs?(index_def, client_filter)
|
43
42
|
# If the index definition doesn't allow any searches to hit incomplete documents, we
|
44
43
|
# can immediately return `false` without checking the filters.
|
45
44
|
return false unless index_def.searches_could_hit_incomplete_docs?
|
@@ -53,7 +52,7 @@ module ElasticGraph
|
|
53
52
|
#
|
54
53
|
# Here we determine what field paths we need to check (e.g. only those field paths that are against
|
55
54
|
# self-sourced fields).
|
56
|
-
paths_to_check = determine_paths_to_check(
|
55
|
+
paths_to_check = determine_paths_to_check(client_filter, index_def.fields_by_path)
|
57
56
|
|
58
57
|
# If we have no paths to check, then our filters don't exclude incomplete documents and we must return `true`.
|
59
58
|
return true if paths_to_check.empty?
|
@@ -61,7 +60,7 @@ module ElasticGraph
|
|
61
60
|
# Finally, we look over each path. If all our filters allow the search to match documents that have `nil`
|
62
61
|
# at that path, then the search can hit incomplete documents. But if even one path excludes documents
|
63
62
|
# that have a `null` value for the field, we can safely return `false` for a more efficient query.
|
64
|
-
paths_to_check.all? { |path| can_match_nil_values_at?(path,
|
63
|
+
paths_to_check.all? { |path| can_match_nil_values_at?(path, client_filter) }
|
65
64
|
end
|
66
65
|
|
67
66
|
# Figures out which field paths we need to check to see if a filter on it could match an incomplete document.
|