elasticgraph-graphql 0.19.3.0 → 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. checksums.yaml +4 -4
  2. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +6 -38
  3. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +2 -2
  4. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +8 -7
  5. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +3 -3
  6. data/lib/elastic_graph/graphql/config.rb +0 -15
  7. data/lib/elastic_graph/graphql/datastore_query.rb +53 -13
  8. data/lib/elastic_graph/graphql/datastore_response/document.rb +4 -0
  9. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +23 -6
  10. data/lib/elastic_graph/graphql/filtering/filter_node_interpreter.rb +62 -22
  11. data/lib/elastic_graph/graphql/query_adapter/filters.rb +10 -11
  12. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +21 -21
  13. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +1 -2
  14. data/lib/elastic_graph/graphql/resolvers/graphql_adapter_builder.rb +5 -9
  15. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +1 -3
  16. data/lib/elastic_graph/graphql/resolvers/nested_relationships_source.rb +7 -74
  17. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +17 -16
  18. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +6 -4
  19. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +66 -2
  20. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +3 -3
  21. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +3 -3
  22. data/lib/elastic_graph/graphql/schema/field.rb +1 -1
  23. data/lib/elastic_graph/graphql/schema/type.rb +4 -0
  24. data/lib/elastic_graph/graphql.rb +1 -7
  25. metadata +25 -25
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 22324b1b8acd027bd6836c86194e398e8684cdaf85df7e98cd213482b451146f
4
- data.tar.gz: 82b689cf8dda30d2988dc0a775123fb549929ca0b0fb3c3ff042da9b5dbf3944
3
+ metadata.gz: 24f8c6670bb21bcf8936b9bd55cc2e3d6964e78ef9e3742881e720dc0e212404
4
+ data.tar.gz: 8b1de56fd5b6732905fb07fc4d2ece0480784104e6b860d42b0f62027463cef2
5
5
  SHA512:
6
- metadata.gz: fecaef223e61e420e4111391e236e6f8e7f2aceb26039fa8bd070704c5f2dec52321b3877c85a6aefa24ad87cca286780dbf8dc033615410885cff7b229a18ef
7
- data.tar.gz: f5bdbe3017ad833d6c5cc7e847812a3af051179c2becdc11f39755f98c5f0eadca9dc0d5ce86ad6a711c3fecfa50c2b745be70c2e914350ec6b8ed82a8ee2284
6
+ metadata.gz: 399538281bc06bcea0a5ede7fc1c63990658def8aca9e9d2d22994e59d57d157fb0ef6b7d6c6b0270b528b278e8b77b3e20303bb9cbec2a5894bfccdc209e9fa
7
+ data.tar.gz: '072188ff912c10f81da236a0f5c9faab646976ad0bd33e57a0008bb2fc82add9e38896aa4e55331c308520145c4e79e6a5eba942e8f8b903862d05b29b2c82b6'
@@ -209,29 +209,9 @@ module ElasticGraph
209
209
  # New date/time grouping API (DateGroupedBy, DateTimeGroupedBy)
210
210
  if field.type.elasticgraph_category == :date_grouped_by_object
211
211
  date_time_groupings_from(field_path: field_path, node: node)
212
-
213
212
  elsif !field.type.object?
214
- case field.type.name
215
- # Legacy date grouping API
216
- when "Date"
217
- legacy_date_histogram_groupings_from(
218
- field_path: field_path,
219
- node: node,
220
- get_time_zone: ->(args) {},
221
- get_offset: ->(args) { args[element_names.offset_days]&.then { |days| "#{days}d" } }
222
- )
223
- # Legacy datetime grouping API
224
- when "DateTime"
225
- legacy_date_histogram_groupings_from(
226
- field_path: field_path,
227
- node: node,
228
- get_time_zone: ->(args) { args.fetch(element_names.time_zone) },
229
- get_offset: ->(args) { datetime_offset_from(node, args) }
230
- )
231
213
  # Non-date/time grouping
232
- else
233
- [FieldTermGrouping.new(field_path: field_path)]
234
- end
214
+ [FieldTermGrouping.new(field_path: field_path)]
235
215
  end
236
216
  end
237
217
  end
@@ -264,7 +244,7 @@ module ElasticGraph
264
244
  field_path: child_field_path,
265
245
  script_id: runtime_metadata.static_script_ids_by_scoped_name.fetch("field/as_time_of_day"),
266
246
  params: {
267
- "interval" => interval_from(child_node, schema_args, interval_unit_key: element_names.truncation_unit),
247
+ "interval" => interval_from(child_node, schema_args),
268
248
  "offset_ms" => datetime_offset_as_ms_from(child_node, schema_args),
269
249
  "time_zone" => time_zone
270
250
  }
@@ -272,7 +252,7 @@ module ElasticGraph
272
252
  else
273
253
  DateHistogramGrouping.new(
274
254
  field_path: child_field_path,
275
- interval: interval_from(child_node, schema_args, interval_unit_key: element_names.truncation_unit),
255
+ interval: interval_from(child_node, schema_args),
276
256
  offset: datetime_offset_from(child_node, schema_args),
277
257
  time_zone: time_zone
278
258
  )
@@ -280,22 +260,10 @@ module ElasticGraph
280
260
  end
281
261
  end
282
262
 
283
- def legacy_date_histogram_groupings_from(field_path:, node:, get_time_zone:, get_offset:)
284
- schema_args = Schema::Arguments.to_schema_form(node.arguments, node.field)
285
-
286
- [DateHistogramGrouping.new(
287
- field_path: field_path,
288
- interval: interval_from(node, schema_args, interval_unit_key: element_names.granularity),
289
- time_zone: get_time_zone.call(schema_args),
290
- offset: get_offset.call(schema_args)
291
- )]
292
- end
293
-
294
263
  # Figure out the Date histogram grouping interval for the given node based on the `grouped_by` argument.
295
- # Until `legacy_grouping_schema` is removed, we need to check both `granularity` and `truncation_unit`.
296
- def interval_from(node, schema_args, interval_unit_key:)
297
- enum_type_name = node.field.arguments.fetch(interval_unit_key).type.unwrap.graphql_name
298
- enum_value_name = schema_args.fetch(interval_unit_key)
264
+ def interval_from(node, schema_args)
265
+ enum_type_name = node.field.arguments.fetch(element_names.truncation_unit).type.unwrap.graphql_name
266
+ enum_value_name = schema_args.fetch(element_names.truncation_unit)
299
267
  enum_value = schema.type_named(enum_type_name).enum_value_named(enum_value_name)
300
268
 
301
269
  _ = enum_value.runtime_metadata.datastore_value
@@ -29,7 +29,7 @@ module ElasticGraph
29
29
 
30
30
  def sub_aggregations
31
31
  @sub_aggregations ||= SubAggregations.new(
32
- schema_element_names,
32
+ schema,
33
33
  query.sub_aggregations,
34
34
  parent_queries + [query],
35
35
  bucket,
@@ -42,7 +42,7 @@ module ElasticGraph
42
42
  end
43
43
 
44
44
  def count_detail
45
- @count_detail ||= CountDetail.new(schema_element_names, bucket)
45
+ @count_detail ||= CountDetail.new(schema, bucket)
46
46
  end
47
47
 
48
48
  def cursor
@@ -15,18 +15,19 @@ module ElasticGraph
15
15
  module Aggregation
16
16
  module Resolvers
17
17
  module RelayConnectionBuilder
18
- def self.build_from_search_response(query:, search_response:, schema_element_names:)
19
- build_from_buckets(query: query, parent_queries: [], schema_element_names: schema_element_names) do
18
+ def self.build_from_search_response(query:, search_response:, schema:)
19
+ build_from_buckets(query: query, parent_queries: [], schema: schema) do
20
20
  extract_buckets_from(search_response, for_query: query)
21
21
  end
22
22
  end
23
23
 
24
- def self.build_from_buckets(query:, parent_queries:, schema_element_names:, field_path: [], &build_buckets)
24
+ def self.build_from_buckets(query:, parent_queries:, schema:, field_path: [], &build_buckets)
25
25
  GraphQL::Resolvers::RelayConnection::GenericAdapter.new(
26
- schema_element_names: schema_element_names,
27
- raw_nodes: raw_nodes_for(query, parent_queries, schema_element_names, field_path, &build_buckets),
26
+ schema: schema,
27
+ raw_nodes: raw_nodes_for(query, parent_queries, schema, field_path, &build_buckets),
28
28
  paginator: query.paginator,
29
29
  get_total_edge_count: -> {},
30
+ edge_class: (_ = GraphQL::Resolvers::RelayConnection::GenericAdapter::Edge),
30
31
  to_sort_value: ->(node, decoded_cursor) do
31
32
  query.groupings.map do |grouping|
32
33
  DatastoreQuery::Paginator::SortValue.new(
@@ -39,13 +40,13 @@ module ElasticGraph
39
40
  )
40
41
  end
41
42
 
42
- private_class_method def self.raw_nodes_for(query, parent_queries, schema_element_names, field_path)
43
+ private_class_method def self.raw_nodes_for(query, parent_queries, schema, field_path)
43
44
  # The `DecodedCursor::SINGLETON` is a special case, so handle it here.
44
45
  return [] if query.paginator.paginated_from_singleton_cursor?
45
46
 
46
47
  yield.map do |bucket|
47
48
  Node.new(
48
- schema_element_names: schema_element_names,
49
+ schema: schema,
49
50
  query: query,
50
51
  parent_queries: parent_queries,
51
52
  bucket: bucket,
@@ -19,7 +19,7 @@ module ElasticGraph
19
19
  class GraphQL
20
20
  module Aggregation
21
21
  module Resolvers
22
- class SubAggregations < ::Data.define(:schema_element_names, :sub_aggregations, :parent_queries, :sub_aggs_by_agg_key, :field_path)
22
+ class SubAggregations < ::Data.define(:schema, :sub_aggregations, :parent_queries, :sub_aggs_by_agg_key, :field_path)
23
23
  def resolve(field:, object:, args:, context:, lookahead:)
24
24
  path_segment = PathSegment.for(field: field, lookahead: lookahead)
25
25
  new_field_path = field_path + [path_segment]
@@ -31,7 +31,7 @@ module ElasticGraph
31
31
  RelayConnectionBuilder.build_from_buckets(
32
32
  query: sub_agg_query,
33
33
  parent_queries: parent_queries,
34
- schema_element_names: schema_element_names,
34
+ schema: schema,
35
35
  field_path: new_field_path
36
36
  ) { extract_buckets(sub_agg_key, args) }
37
37
  end
@@ -41,7 +41,7 @@ module ElasticGraph
41
41
  def extract_buckets(aggregation_field_path, args)
42
42
  # When the client passes `first: 0`, we omit the sub-aggregation from the query body entirely,
43
43
  # and it wont' be in `sub_aggs_by_agg_key`. Instead, we can just return an empty list of buckets.
44
- return [] if args[schema_element_names.first] == 0
44
+ return [] if args[schema.element_names.first] == 0
45
45
 
46
46
  sub_agg_key = Key.encode(parent_queries.map(&:name) + [aggregation_field_path])
47
47
  sub_agg = Support::HashUtil.verbose_fetch(sub_aggs_by_agg_key, sub_agg_key)
@@ -20,12 +20,6 @@ module ElasticGraph
20
20
  :max_page_size,
21
21
  # Queries that take longer than this configured threshold will have a sanitized version logged.
22
22
  :slow_query_latency_warning_threshold_in_ms,
23
- # How to resolve nested relationships:
24
- #
25
- # - `optimized` (default): uses the new (in ElasticGraph 0.19.2.0) optimized resolver logic.
26
- # - `original`: uses the resolver logic from ElasticGraph v0.19.1.1 and before.
27
- # - `comparison`: runs both versions of the logic in serial, to compare them for correctness and performance. Results are logged.
28
- :nested_relationship_resolver_mode,
29
23
  # Object used to identify the client of a GraphQL query based on the HTTP request.
30
24
  :client_resolver,
31
25
  # Array of modules that will be extended onto the `GraphQL` instance to support extension libraries.
@@ -51,17 +45,10 @@ module ElasticGraph
51
45
  end
52
46
  end
53
47
 
54
- nested_relationship_resolver_mode = parsed_yaml["nested_relationship_resolver_mode"]&.to_sym || :optimized
55
- unless VALID_NESTED_RELATIONSHIP_RESOLVER_MODES.include?(nested_relationship_resolver_mode)
56
- raise Errors::ConfigError, "Invalid value for `nested_relationship_resolver_mode`: #{nested_relationship_resolver_mode}. " \
57
- "Valid values: #{VALID_NESTED_RELATIONSHIP_RESOLVER_MODES.join(", ")}."
58
- end
59
-
60
48
  new(
61
49
  default_page_size: parsed_yaml.fetch("default_page_size"),
62
50
  max_page_size: parsed_yaml.fetch("max_page_size"),
63
51
  slow_query_latency_warning_threshold_in_ms: parsed_yaml["slow_query_latency_warning_threshold_in_ms"] || 5000,
64
- nested_relationship_resolver_mode: nested_relationship_resolver_mode,
65
52
  client_resolver: load_client_resolver(parsed_yaml),
66
53
  extension_modules: extension_mods,
67
54
  extension_settings: entire_parsed_yaml.except(*ELASTICGRAPH_CONFIG_KEYS)
@@ -74,8 +61,6 @@ module ElasticGraph
74
61
  # The standard ElasticGraph root config setting keys; anything else is assumed to be extension settings.
75
62
  ELASTICGRAPH_CONFIG_KEYS = %w[graphql indexer logger datastore schema_artifacts]
76
63
 
77
- VALID_NESTED_RELATIONSHIP_RESOLVER_MODES = [:optimized, :original, :comparison]
78
-
79
64
  private_class_method def self.load_client_resolver(parsed_yaml)
80
65
  config = parsed_yaml.fetch("client_resolver") do
81
66
  return Client::DefaultResolver.new({})
@@ -29,7 +29,8 @@ module ElasticGraph
29
29
  class DatastoreQuery < Support::MemoizableData.define(
30
30
  :total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
31
31
  :index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
32
- :filters, :sort, :document_pagination, :requested_fields, :request_all_fields,
32
+ :client_filters, :internal_filters, :sort, :document_pagination,
33
+ :requested_fields, :request_all_fields, :requested_highlights, :request_all_highlights,
33
34
  :individual_docs_needed, :size_multiplier, :monotonic_clock_deadline, :schema_element_names
34
35
  )
35
36
  # Load these files after the `Query` class has been defined, to avoid
@@ -96,22 +97,34 @@ module ElasticGraph
96
97
  def merge_with(
97
98
  individual_docs_needed: false,
98
99
  total_document_count_needed: false,
99
- filters: [],
100
+ client_filters: [],
101
+ internal_filters: [],
100
102
  sort: [],
101
103
  requested_fields: [],
102
104
  request_all_fields: false,
105
+ requested_highlights: [],
106
+ request_all_highlights: false,
103
107
  document_pagination: {},
104
108
  size_multiplier: 1,
105
109
  monotonic_clock_deadline: nil,
106
110
  aggregations: {}
107
111
  )
112
+ individual_docs_needed ||= self.individual_docs_needed ||
113
+ !requested_fields.empty? || request_all_fields ||
114
+ !requested_highlights.empty? || request_all_highlights
115
+
116
+ total_document_count_needed ||= self.total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?)
117
+
108
118
  with(
109
- individual_docs_needed: self.individual_docs_needed || individual_docs_needed || !requested_fields.empty? || request_all_fields,
110
- total_document_count_needed: self.total_document_count_needed || total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?),
111
- filters: self.filters + filters,
119
+ individual_docs_needed: individual_docs_needed,
120
+ total_document_count_needed: total_document_count_needed,
121
+ client_filters: self.client_filters + client_filters,
122
+ internal_filters: self.internal_filters + internal_filters,
112
123
  sort: merge_attribute(:sort, sort),
113
124
  requested_fields: self.requested_fields + requested_fields,
114
125
  request_all_fields: self.request_all_fields || request_all_fields,
126
+ requested_highlights: self.requested_highlights + requested_highlights,
127
+ request_all_highlights: self.request_all_highlights || request_all_highlights,
115
128
  document_pagination: merge_attribute(:document_pagination, document_pagination),
116
129
  size_multiplier: self.size_multiplier * size_multiplier,
117
130
  monotonic_clock_deadline: [self.monotonic_clock_deadline, monotonic_clock_deadline].compact.min,
@@ -130,7 +143,7 @@ module ElasticGraph
130
143
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
131
144
  def search_index_expression
132
145
  @search_index_expression ||= index_expression_builder.determine_search_index_expression(
133
- filters,
146
+ all_filters,
134
147
  search_index_definitions,
135
148
  # When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
136
149
  # the standard aggregations response structure, which causes problems.
@@ -167,7 +180,7 @@ module ElasticGraph
167
180
  # `[]` means that we are routing to no shards.
168
181
  def shard_routing_values
169
182
  return @shard_routing_values if defined?(@shard_routing_values)
170
- routing_values = routing_picker.extract_eligible_routing_values(filters, route_with_field_paths)
183
+ routing_values = routing_picker.extract_eligible_routing_values(all_filters, route_with_field_paths)
171
184
 
172
185
  @shard_routing_values ||=
173
186
  if routing_values&.empty? && !aggregations_datastore_body.empty?
@@ -240,6 +253,10 @@ module ElasticGraph
240
253
  document_paginator.effective_size
241
254
  end
242
255
 
256
+ def all_filters
257
+ client_filters + internal_filters
258
+ end
259
+
243
260
  private
244
261
 
245
262
  def merge_attribute(attribute, other_value)
@@ -286,8 +303,7 @@ module ElasticGraph
286
303
  def to_datastore_body
287
304
  @to_datastore_body ||= aggregations_datastore_body
288
305
  .merge(document_paginator.to_datastore_body)
289
- .merge({query: filter_interpreter.build_query(filters)}.compact)
290
- .merge({_source: source})
306
+ .merge({highlight: highlight, query: filter_interpreter.build_query(all_filters), _source: source}.compact)
291
307
  end
292
308
 
293
309
  def aggregations_datastore_body
@@ -314,6 +330,19 @@ module ElasticGraph
314
330
  {includes: requested_source_fields.to_a}
315
331
  end
316
332
 
333
+ def highlight
334
+ return nil if !request_all_highlights && requested_highlights.empty?
335
+
336
+ # If there are no filters, there's nothing to highlight.
337
+ return nil if client_filters.empty?
338
+
339
+ field_paths = request_all_highlights ? ["*"] : requested_highlights
340
+ fields = field_paths.to_h { |field| [field, {}] }
341
+ highlight_query = filter_interpreter.build_query(client_filters) unless internal_filters.empty?
342
+
343
+ {fields:, highlight_query:}.compact
344
+ end
345
+
317
346
  # Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
318
347
  # to build queries when desired.
319
348
  class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size)
@@ -333,13 +362,16 @@ module ElasticGraph
333
362
 
334
363
  def new_query(
335
364
  search_index_definitions:,
336
- filters: [],
365
+ client_filters: [],
366
+ internal_filters: [],
337
367
  sort: [],
338
368
  document_pagination: {},
339
369
  size_multiplier: 1,
340
370
  aggregations: {},
341
371
  requested_fields: [],
342
372
  request_all_fields: false,
373
+ requested_highlights: [],
374
+ request_all_highlights: false,
343
375
  individual_docs_needed: false,
344
376
  total_document_count_needed: false,
345
377
  monotonic_clock_deadline: nil
@@ -348,21 +380,29 @@ module ElasticGraph
348
380
  raise Errors::SearchFailedError, "Query is invalid, since it contains no `search_index_definitions`."
349
381
  end
350
382
 
383
+ individual_docs_needed ||= !requested_fields.empty? || request_all_fields ||
384
+ !requested_highlights.empty? || request_all_highlights
385
+
386
+ total_document_count_needed ||= aggregations.values.any?(&:needs_total_doc_count?)
387
+
351
388
  DatastoreQuery.new(
352
389
  routing_picker: routing_picker,
353
390
  index_expression_builder: index_expression_builder,
354
391
  logger: logger,
355
392
  schema_element_names: runtime_metadata.schema_element_names,
356
393
  search_index_definitions: search_index_definitions,
357
- filters: filters.to_set,
394
+ client_filters: client_filters.to_set,
395
+ internal_filters: internal_filters.to_set,
358
396
  sort: sort,
359
397
  document_pagination: document_pagination,
360
398
  size_multiplier: size_multiplier,
361
399
  aggregations: aggregations,
362
400
  requested_fields: requested_fields.to_set,
401
+ requested_highlights: requested_highlights.to_set,
363
402
  request_all_fields: request_all_fields,
364
- individual_docs_needed: individual_docs_needed || !requested_fields.empty? || request_all_fields,
365
- total_document_count_needed: total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?),
403
+ request_all_highlights: request_all_highlights,
404
+ individual_docs_needed: individual_docs_needed,
405
+ total_document_count_needed: total_document_count_needed,
366
406
  monotonic_clock_deadline: monotonic_clock_deadline,
367
407
  filter_interpreter: filter_interpreter,
368
408
  default_page_size: default_page_size,
@@ -58,6 +58,10 @@ module ElasticGraph
58
58
  payload["version"]
59
59
  end
60
60
 
61
+ def highlights
62
+ raw_data["highlight"] || {}
63
+ end
64
+
61
65
  def cursor
62
66
  @cursor ||= decoded_cursor_factory.build(raw_data.fetch("sort"))
63
67
  end
@@ -13,16 +13,33 @@ module ElasticGraph
13
13
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html
14
14
  #
15
15
  # It is composed of:
16
- # 1) The occurrence type (:must, :filter, :should, or :must_not)
16
+ # 1) The occurrence type (:filter, :should, or :must_not)
17
17
  # 2) A list of query clauses evaluated by the given occurrence type
18
18
  # 3) An optional flag indicating whether the occurrence should be negated
19
+ #
20
+ # Note: since we never do anything with the score, we always prefer `filter` over `must`. If we ever
21
+ # decide to do something with the score (such as sorting by it), then we'll want to introduce `must`.
19
22
  class BooleanQuery < ::Data.define(:occurrence, :clauses)
20
- def self.must(*clauses)
21
- new(:must, clauses)
22
- end
23
-
24
23
  def self.filter(*clauses)
25
- new(:filter, clauses)
24
+ unwrapped_clauses = clauses.map do |clause|
25
+ __skip__ = case clause
26
+ in {bool: {minimum_should_match: 1, should: [::Hash => single_should], **nil}, **nil}
27
+ # This case represents an `anyOf` with a single subfilter (`filter: {anyOf: [X]}`).
28
+ # Such an expression is semantically equivalent to `filter: X`, and we can unwrap the
29
+ # should clause in this case since there is only a single one.
30
+ #
31
+ # While it adds a bit of complexity to do this unwrapping, we believe it's worth it because
32
+ # it preserves the datastore's ability to apply caching. As the Elasticsearch documentation[^1]
33
+ # explains, the results of `filter` clauses can be cached, but not `should` clauses.
34
+ #
35
+ # [^1]: https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-bool-query
36
+ single_should
37
+ else
38
+ clause
39
+ end
40
+ end
41
+
42
+ new(:filter, unwrapped_clauses)
26
43
  end
27
44
 
28
45
  def self.should(*clauses)
@@ -90,33 +90,64 @@ module ElasticGraph
90
90
  schema_names.gte => ->(field_name, value) { RangeQuery.new(field_name, :gte, value) },
91
91
  schema_names.lt => ->(field_name, value) { RangeQuery.new(field_name, :lt, value) },
92
92
  schema_names.lte => ->(field_name, value) { RangeQuery.new(field_name, :lte, value) },
93
- schema_names.matches => ->(field_name, value) { BooleanQuery.must({match: {field_name => value}}) },
93
+
94
94
  schema_names.matches_query => ->(field_name, value) do
95
95
  allowed_edits_per_term = value.fetch(schema_names.allowed_edits_per_term).runtime_metadata.datastore_abbreviation
96
96
 
97
- BooleanQuery.must(
98
- {
99
- match: {
100
- field_name => {
101
- query: value.fetch(schema_names.query),
102
- # This is always a string field, even though the value is often an integer
103
- fuzziness: allowed_edits_per_term.to_s,
104
- operator: value[schema_names.require_all_terms] ? "AND" : "OR"
105
- }
106
- }
107
- }
108
- )
97
+ BooleanQuery.filter({match: {field_name => {
98
+ query: value.fetch(schema_names.query),
99
+ # This is always a string field, even though the value is often an integer
100
+ fuzziness: allowed_edits_per_term.to_s,
101
+ operator: value[schema_names.require_all_terms] ? "AND" : "OR"
102
+ }}})
109
103
  end,
104
+
110
105
  schema_names.matches_phrase => ->(field_name, value) {
111
- BooleanQuery.must(
112
- {
113
- match_phrase_prefix: {
114
- field_name => {
115
- query: value.fetch(schema_names.phrase)
116
- }
117
- }
118
- }
119
- )
106
+ BooleanQuery.filter({match_phrase_prefix: {field_name => {
107
+ query: value.fetch(schema_names.phrase)
108
+ }}})
109
+ },
110
+
111
+ schema_names.contains => ->(field_name, value) {
112
+ case_insensitive = value[schema_names.ignore_case] || false
113
+ anded_substrings = value[schema_names.all_substrings_of] || []
114
+ ored_substrings = value[schema_names.any_substring_of]
115
+
116
+ sub_expressions = anded_substrings.map do |substring|
117
+ substring_clause(field_name, substring, case_insensitive)
118
+ end
119
+
120
+ if ored_substrings
121
+ should_sub_expressions = ored_substrings.map do |substring|
122
+ substring_clause(field_name, substring, case_insensitive)
123
+ end
124
+
125
+ sub_expressions << {bool: {minimum_should_match: 1, should: should_sub_expressions}}
126
+ end
127
+
128
+ if ored_substrings&.empty?
129
+ BooleanQuery::ALWAYS_FALSE_FILTER
130
+ elsif sub_expressions.size > 0
131
+ BooleanQuery.filter(*sub_expressions)
132
+ end
133
+ },
134
+
135
+ schema_names.starts_with => ->(field_name, value) {
136
+ case_insensitive = value[schema_names.ignore_case] || false
137
+ ored_prefixes = value[schema_names.any_prefix_of]
138
+
139
+ sub_expressions = (ored_prefixes || []).map do |prefix|
140
+ {prefix: {field_name => {
141
+ value: prefix,
142
+ case_insensitive: case_insensitive
143
+ }}}
144
+ end
145
+
146
+ if ored_prefixes&.empty?
147
+ BooleanQuery::ALWAYS_FALSE_FILTER
148
+ elsif sub_expressions.size > 0
149
+ BooleanQuery.filter({bool: {minimum_should_match: 1, should: sub_expressions}})
150
+ end
120
151
  },
121
152
 
122
153
  # This filter operator wraps a geo distance query:
@@ -163,6 +194,15 @@ module ElasticGraph
163
194
  }.freeze
164
195
  end
165
196
 
197
+ def substring_clause(field_name, substring, case_insensitive)
198
+ {wildcard: {field_name => {
199
+ # We squeeze("*") to convert "**" to "*", which is not needed for correctness but is a bit simpler.
200
+ # There's no point in two consecutive "*" wildcards.
201
+ value: "*#{substring}*".squeeze("*"),
202
+ case_insensitive: case_insensitive
203
+ }}}
204
+ end
205
+
166
206
  def to_datastore_value(value)
167
207
  case value
168
208
  when ::Array
@@ -15,31 +15,30 @@ module ElasticGraph
15
15
  class QueryAdapter
16
16
  class Filters < Support::MemoizableData.define(:schema_element_names, :filter_args_translator, :filter_node_interpreter)
17
17
  def call(field:, query:, args:, lookahead:, context:)
18
- filter_from_args = filter_args_translator.translate_filter_args(field: field, args: args)
19
- automatic_filter = build_automatic_filter(filter_from_args: filter_from_args, query: query)
20
- filters = [filter_from_args, automatic_filter].compact
21
- return query if filters.empty?
18
+ client_filter = filter_args_translator.translate_filter_args(field: field, args: args)
19
+ internal_filter = build_automatic_filter(client_filter: client_filter, query: query)
20
+ return query if client_filter.nil? && internal_filter.nil?
22
21
 
23
- query.merge_with(filters: filters)
22
+ query.merge_with(client_filters: [client_filter].compact, internal_filters: [internal_filter].compact)
24
23
  end
25
24
 
26
25
  private
27
26
 
28
- def build_automatic_filter(filter_from_args:, query:)
27
+ def build_automatic_filter(client_filter:, query:)
29
28
  # If an incomplete document could be hit by a search with our filters against any of the
30
29
  # index definitions, we must add a filter that will exclude incomplete documents.
31
30
  exclude_incomplete_docs_filter if query
32
31
  .search_index_definitions
33
- .any? { |index_def| search_could_hit_incomplete_docs?(index_def, filter_from_args || {}) }
32
+ .any? { |index_def| search_could_hit_incomplete_docs?(index_def, client_filter || {}) }
34
33
  end
35
34
 
36
35
  def exclude_incomplete_docs_filter
37
36
  {"__sources" => {schema_element_names.equal_to_any_of => [SELF_RELATIONSHIP_NAME]}}
38
37
  end
39
38
 
40
- # Indicates if a search against the given `index_def` using the given `filter_from_args`
39
+ # Indicates if a search against the given `index_def` using the given `client_filter`
41
40
  # could hit an incomplete document.
42
- def search_could_hit_incomplete_docs?(index_def, filter_from_args)
41
+ def search_could_hit_incomplete_docs?(index_def, client_filter)
43
42
  # If the index definition doesn't allow any searches to hit incomplete documents, we
44
43
  # can immediately return `false` without checking the filters.
45
44
  return false unless index_def.searches_could_hit_incomplete_docs?
@@ -53,7 +52,7 @@ module ElasticGraph
53
52
  #
54
53
  # Here we determine what field paths we need to check (e.g. only those field paths that are against
55
54
  # self-sourced fields).
56
- paths_to_check = determine_paths_to_check(filter_from_args, index_def.fields_by_path)
55
+ paths_to_check = determine_paths_to_check(client_filter, index_def.fields_by_path)
57
56
 
58
57
  # If we have no paths to check, then our filters don't exclude incomplete documents and we must return `true`.
59
58
  return true if paths_to_check.empty?
@@ -61,7 +60,7 @@ module ElasticGraph
61
60
  # Finally, we look over each path. If all our filters allow the search to match documents that have `nil`
62
61
  # at that path, then the search can hit incomplete documents. But if even one path excludes documents
63
62
  # that have a `null` value for the field, we can safely return `false` for a more efficient query.
64
- paths_to_check.all? { |path| can_match_nil_values_at?(path, filter_from_args) }
63
+ paths_to_check.all? { |path| can_match_nil_values_at?(path, client_filter) }
65
64
  end
66
65
 
67
66
  # Figures out which field paths we need to check to see if a filter on it could match an incomplete document.