elasticgraph-graphql 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
|
10
|
+
require "elastic_graph/support/memoizable_data"
|
|
11
|
+
|
|
12
|
+
module ElasticGraph
|
|
13
|
+
class GraphQL
|
|
14
|
+
module Aggregation
|
|
15
|
+
# Represents a grouping on a term.
|
|
16
|
+
# For the relevant Elasticsearch docs, see:
|
|
17
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-terms-aggregation.html
|
|
18
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_terms
|
|
19
|
+
module TermGrouping
|
|
20
|
+
def key
|
|
21
|
+
@key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query))
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def encoded_index_field_path
|
|
25
|
+
@encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index))
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def composite_clause(grouping_options: {})
|
|
29
|
+
{"terms" => terms_subclause.merge(grouping_options)}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def non_composite_clause_for(query)
|
|
33
|
+
clause_value = work_around_elasticsearch_bug(terms_subclause)
|
|
34
|
+
{
|
|
35
|
+
"terms" => clause_value.merge({
|
|
36
|
+
"size" => query.paginator.desired_page_size,
|
|
37
|
+
"show_term_doc_count_error" => query.needs_doc_count_error
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
INNER_META = {"key_path" => ["key"], "merge_into_bucket" => {}}
|
|
43
|
+
|
|
44
|
+
def inner_meta
|
|
45
|
+
INNER_META
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Here we force the `collect_mode` to `depth_first`. Without doing that, we've observed that some of our acceptance
|
|
51
|
+
# specs fail on CI when running against Elasticsearch 8.11 with an error like:
|
|
52
|
+
#
|
|
53
|
+
# ```
|
|
54
|
+
# {
|
|
55
|
+
# "root_cause": [
|
|
56
|
+
# {
|
|
57
|
+
# "type": "runtime_exception",
|
|
58
|
+
# "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible"
|
|
59
|
+
# }
|
|
60
|
+
# ],
|
|
61
|
+
# "type": "search_phase_execution_exception",
|
|
62
|
+
# "reason": "all shards failed",
|
|
63
|
+
# "phase": "query",
|
|
64
|
+
# "grouped": true,
|
|
65
|
+
# "failed_shards": [
|
|
66
|
+
# {
|
|
67
|
+
# "shard": 0,
|
|
68
|
+
# "index": "teams_camel",
|
|
69
|
+
# "node": "pDXJzLTsRJCRjKe83DqipA",
|
|
70
|
+
# "reason": {
|
|
71
|
+
# "type": "runtime_exception",
|
|
72
|
+
# "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible"
|
|
73
|
+
# }
|
|
74
|
+
# }
|
|
75
|
+
# ],
|
|
76
|
+
# "caused_by": {
|
|
77
|
+
# "type": "runtime_exception",
|
|
78
|
+
# "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible",
|
|
79
|
+
# "caused_by": {
|
|
80
|
+
# "type": "runtime_exception",
|
|
81
|
+
# "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible"
|
|
82
|
+
# }
|
|
83
|
+
# }
|
|
84
|
+
# }
|
|
85
|
+
# ```
|
|
86
|
+
#
|
|
87
|
+
# This specific exception message was introduced in https://github.com/elastic/elasticsearch/pull/89993, but that was done to provide
|
|
88
|
+
# a better error than a NullPointerException (which is what used to happen). This error also appears to be non-deterministic; I wasn't
|
|
89
|
+
# able to reproduce the CI failure locally until I forced `"collect_mode" => "breadth_first"`, at which point I did see the same error
|
|
90
|
+
# locally. The Elasticsearch docs[^1] mention that a heuristic (partially based on if a field's cardinality is known!) is used to pick
|
|
91
|
+
# whether `breadth_first` or `depth_first` is used when `collect_mode`is not specified:
|
|
92
|
+
#
|
|
93
|
+
# > The `breadth_first` is the default mode for fields with a cardinality bigger than the requested size or when the cardinality is unknown
|
|
94
|
+
# > (numeric fields or scripts for instance).
|
|
95
|
+
#
|
|
96
|
+
# In addition, the docs[^2] make it clear that `depth_first` is usually what you want:
|
|
97
|
+
#
|
|
98
|
+
# > The strategy we outlined previously—building the tree fully and then pruning—is called depth-first and it is the default.
|
|
99
|
+
# > Depth-first works well for the majority of aggregations, but can fall apart in situations like our actors and costars example.
|
|
100
|
+
# >
|
|
101
|
+
# > ...
|
|
102
|
+
# >
|
|
103
|
+
# > Breadth-first should be used only when you expect more buckets to be generated than documents landing in the buckets.
|
|
104
|
+
#
|
|
105
|
+
# So, for now we are forcing the collect mode to `depth_first`, as it avoids an issue with Elasticsearch and is a generally
|
|
106
|
+
# sane default. It may fall over in the case breadth-first is intended for, but we can cross that bridge when it comes.
|
|
107
|
+
#
|
|
108
|
+
# Long term, we're hoping to switch sub-aggregations to use a `composite` aggregation instead of `terms`, rendering this moot.
|
|
109
|
+
#
|
|
110
|
+
# [^1]: https://www.elastic.co/guide/en/elasticsearch/reference/8.11/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-collect
|
|
111
|
+
# [^2]: https://www.elastic.co/guide/en/elasticsearch/guide/current/_preventing_combinatorial_explosions.html#_depth_first_versus_breadth_first
|
|
112
|
+
def work_around_elasticsearch_bug(terms_clause)
|
|
113
|
+
terms_clause.merge({"collect_mode" => "depth_first"})
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
module ElasticGraph
|
|
10
|
+
class GraphQL
|
|
11
|
+
# Represents a client of an ElasticGraph GraphQL endpoint.
|
|
12
|
+
# `name` and `source_description` can really be any string, but `name` is
|
|
13
|
+
# meant to be a friendly/human readable string (such as a service name)
|
|
14
|
+
# where as `source_description` is meant to be an opaque string describing
|
|
15
|
+
# where `name` came from.
|
|
16
|
+
class Client < Data.define(:source_description, :name)
|
|
17
|
+
# `Data.define` provides the following methods:
|
|
18
|
+
# @dynamic initialize, name, source_description, with
|
|
19
|
+
|
|
20
|
+
ANONYMOUS = new("(anonymous)", "(anonymous)")
|
|
21
|
+
ELASTICGRAPH_INTERNAL = new("(ElasticGraphInternal)", "(ElasticGraphInternal)")
|
|
22
|
+
|
|
23
|
+
def description
|
|
24
|
+
if source_description == name
|
|
25
|
+
name
|
|
26
|
+
else
|
|
27
|
+
"#{name} (#{source_description})"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Default resolver used to determine the client for a given HTTP request.
|
|
32
|
+
# Also defines the interface of a client resolver. (This is why we define `initialize`).
|
|
33
|
+
class DefaultResolver
|
|
34
|
+
def initialize(config)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def resolve(http_request)
|
|
38
|
+
Client::ANONYMOUS
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/error"
|
|
10
|
+
require "elastic_graph/graphql/client"
|
|
11
|
+
require "elastic_graph/schema_artifacts/runtime_metadata/extension_loader"
|
|
12
|
+
|
|
13
|
+
module ElasticGraph
|
|
14
|
+
class GraphQL
|
|
15
|
+
class Config < Data.define(
|
|
16
|
+
# Determines the size of our datastore search requests if the query does not specify.
|
|
17
|
+
:default_page_size,
|
|
18
|
+
# Determines the maximum size of a requested page. If the client requests a page larger
|
|
19
|
+
# than this value, `max_page_size` elements will be returned instead.
|
|
20
|
+
:max_page_size,
|
|
21
|
+
# Queries that take longer than this configured threshold will have a sanitized version logged.
|
|
22
|
+
:slow_query_latency_warning_threshold_in_ms,
|
|
23
|
+
# Object used to identify the client of a GraphQL query based on the HTTP request.
|
|
24
|
+
:client_resolver,
|
|
25
|
+
# Array of modules that will be extended onto the `GraphQL` instance to support extension libraries.
|
|
26
|
+
:extension_modules,
|
|
27
|
+
# Contains any additional settings that were in the settings file beyond settings that are expected as part of ElasticGraph
|
|
28
|
+
# itself. Extensions are free to use these extra settings.
|
|
29
|
+
:extension_settings
|
|
30
|
+
)
|
|
31
|
+
def self.from_parsed_yaml(entire_parsed_yaml)
|
|
32
|
+
parsed_yaml = entire_parsed_yaml.fetch("graphql")
|
|
33
|
+
extra_keys = parsed_yaml.keys - EXPECTED_KEYS
|
|
34
|
+
|
|
35
|
+
unless extra_keys.empty?
|
|
36
|
+
raise ConfigError, "Unknown `graphql` config settings: #{extra_keys.join(", ")}"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
extension_loader = SchemaArtifacts::RuntimeMetadata::ExtensionLoader.new(::Module.new)
|
|
40
|
+
extension_mods = parsed_yaml.fetch("extension_modules", []).map do |mod_hash|
|
|
41
|
+
extension_loader.load(mod_hash.fetch("extension_name"), from: mod_hash.fetch("require_path"), config: {}).extension_class.tap do |mod|
|
|
42
|
+
unless mod.instance_of?(::Module)
|
|
43
|
+
raise ConfigError, "`#{mod_hash.fetch("extension_name")}` is not a module, but all application extension modules must be modules."
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
new(
|
|
49
|
+
default_page_size: parsed_yaml.fetch("default_page_size"),
|
|
50
|
+
max_page_size: parsed_yaml.fetch("max_page_size"),
|
|
51
|
+
slow_query_latency_warning_threshold_in_ms: parsed_yaml["slow_query_latency_warning_threshold_in_ms"] || 5000,
|
|
52
|
+
client_resolver: load_client_resolver(parsed_yaml),
|
|
53
|
+
extension_modules: extension_mods,
|
|
54
|
+
extension_settings: entire_parsed_yaml.except(*ELASTICGRAPH_CONFIG_KEYS)
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# The keys we expect under `graphql`.
|
|
59
|
+
EXPECTED_KEYS = members.map(&:to_s)
|
|
60
|
+
|
|
61
|
+
# The standard ElasticGraph root config setting keys; anything else is assumed to be extension settings.
|
|
62
|
+
ELASTICGRAPH_CONFIG_KEYS = %w[graphql indexer logger datastore schema_artifacts]
|
|
63
|
+
|
|
64
|
+
private_class_method def self.load_client_resolver(parsed_yaml)
|
|
65
|
+
config = parsed_yaml.fetch("client_resolver") do
|
|
66
|
+
return Client::DefaultResolver.new({})
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
client_resolver_loader = SchemaArtifacts::RuntimeMetadata::ExtensionLoader.new(Client::DefaultResolver)
|
|
70
|
+
extension = client_resolver_loader.load(
|
|
71
|
+
config.fetch("extension_name"),
|
|
72
|
+
from: config.fetch("require_path"),
|
|
73
|
+
config: config.except("extension_name", "require_path")
|
|
74
|
+
)
|
|
75
|
+
extension_class = extension.extension_class # : ::Class
|
|
76
|
+
|
|
77
|
+
__skip__ = extension_class.new(extension.extension_config)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/error"
|
|
10
|
+
require "graphql"
|
|
11
|
+
|
|
12
|
+
module ElasticGraph
|
|
13
|
+
class GraphQL
|
|
14
|
+
class DatastoreQuery
|
|
15
|
+
# Contains query logic related to pagination. Mostly delegates to `Paginator`, which
|
|
16
|
+
# contains most of the logic. This merely adapts the `Paginator` to the needs of document
|
|
17
|
+
# pagination. (Paginator also supports aggregation bucket pagination.)
|
|
18
|
+
class DocumentPaginator < Support::MemoizableData.define(
|
|
19
|
+
:sort_clauses, :paginator, :decoded_cursor_factory, :schema_element_names,
|
|
20
|
+
# `individual_docs_needed`: when false, we request a `size` of 0. Set to `true` when the client is
|
|
21
|
+
# requesting any document fields, or if we need documents to compute any parts of the `PageInfo`.
|
|
22
|
+
:individual_docs_needed,
|
|
23
|
+
# `total_document_count_needed`: when false, `track_total_hits` will be 0 in our datastore query.
|
|
24
|
+
# This will prevent the datastore from doing extra work to get an accurate count
|
|
25
|
+
:total_document_count_needed
|
|
26
|
+
)
|
|
27
|
+
# Builds a hash containing the portions of a datastore search body related to pagination.
|
|
28
|
+
def to_datastore_body
|
|
29
|
+
{
|
|
30
|
+
size: effective_size,
|
|
31
|
+
sort: effective_sort,
|
|
32
|
+
search_after: search_after,
|
|
33
|
+
track_total_hits: total_document_count_needed
|
|
34
|
+
}.reject { |key, value| Array(value).empty? }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def sort
|
|
38
|
+
@sort ||= sort_clauses.map do |clause|
|
|
39
|
+
clause.transform_values do |options|
|
|
40
|
+
# As per the Elasticsearch docs[^1] missing/null values get sorted last by default, but we can control
|
|
41
|
+
# it here. We want to control it here to make our sorting behavior more consistent in a couple ways:
|
|
42
|
+
#
|
|
43
|
+
# 1. We want _document_ sorting and _aggregation_ sorting to behave the same. Aggregation sorting puts
|
|
44
|
+
# missing value buckets first when sorting ascending and last when sorting descending[^2]. Note that in
|
|
45
|
+
# Elasticsearch 7.16[^3] and above, you can control if missing buckets go first or last, but below that
|
|
46
|
+
# version you have no control. Here we match that behavior.
|
|
47
|
+
# 2. Clients are likely to expect that descending sorting will produce a list in reverse order from what
|
|
48
|
+
# ascending sorting produces, but with the default behavior (missing/null values get sorted last), this
|
|
49
|
+
# is not the case. We have to use the opposite `missing` option when the `order` is the opposite.
|
|
50
|
+
#
|
|
51
|
+
# [^1]: https://www.elastic.co/guide/en/elasticsearch/reference/7.10/sort-search-results.html#_missing_values
|
|
52
|
+
# [^2]: https://www.elastic.co/guide/en/elasticsearch/reference/7.10/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
|
|
53
|
+
# [^3]: https://www.elastic.co/guide/en/elasticsearch/reference/7.16/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
|
|
54
|
+
missing = (options.fetch("order") == "asc") ? "_first" : "_last"
|
|
55
|
+
options.merge({"missing" => missing})
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def effective_size
|
|
63
|
+
individual_docs_needed ? paginator.requested_page_size : 0
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def effective_sort
|
|
67
|
+
return [] unless effective_size > 0
|
|
68
|
+
paginator.search_in_reverse? ? reverse_sort : sort
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
DIRECTION_OPPOSITES = {"asc" => "desc", "desc" => "asc"}.freeze
|
|
72
|
+
MISSING_OPPOSITES = {"_first" => "_last", "_last" => "_first"}.freeze
|
|
73
|
+
|
|
74
|
+
def reverse_sort
|
|
75
|
+
@reverse_sort ||= sort.map do |sort_clause|
|
|
76
|
+
sort_clause.transform_values do |options|
|
|
77
|
+
{
|
|
78
|
+
"order" => DIRECTION_OPPOSITES.fetch(options.fetch("order")),
|
|
79
|
+
"missing" => MISSING_OPPOSITES.fetch(options.fetch("missing"))
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def search_after
|
|
86
|
+
paginator.search_after&.then do |cursor|
|
|
87
|
+
decoded_cursor_factory.sort_fields.map do |field|
|
|
88
|
+
cursor.sort_values.fetch(field) do
|
|
89
|
+
raise ::GraphQL::ExecutionError, "`#{cursor.encode}` is not a valid cursor for the current `#{schema_element_names.order_by}` argument."
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# `Query::DocumentPaginator` exists only for use by `Query` and is effectively private.
|
|
97
|
+
private_constant :DocumentPaginator
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/graphql/filtering/filter_value_set_extractor"
|
|
10
|
+
require "elastic_graph/support/time_set"
|
|
11
|
+
|
|
12
|
+
module ElasticGraph
|
|
13
|
+
class GraphQL
|
|
14
|
+
class DatastoreQuery
|
|
15
|
+
# Responsible for building a search index expression for a specific query based on the filters.
|
|
16
|
+
class IndexExpressionBuilder
|
|
17
|
+
def initialize(schema_names:)
|
|
18
|
+
@filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(schema_names, Support::TimeSet::ALL) do |operator, filter_value|
|
|
19
|
+
case operator
|
|
20
|
+
when :gt, :gte, :lt, :lte
|
|
21
|
+
if date_string?(filter_value)
|
|
22
|
+
# Here we translate into a range of time objects. When translating dates to times,
|
|
23
|
+
# we need to use an appropriate time suffix:
|
|
24
|
+
#
|
|
25
|
+
# - `> 2024-04-01` == `> 2024-04-01T23:59:59.999Z`
|
|
26
|
+
# - `≥ 2024-04-01` == `≥ 2024-04-01T00:00:00Z`
|
|
27
|
+
# - `< 2024-04-01` == `< 2024-04-01T00:00:00Z`
|
|
28
|
+
# - `≤ 2024-04-01` == `≤ 2024-04-01T23:59:59.999Z`
|
|
29
|
+
time_suffix = (operator == :gt || operator == :lte) ? "T23:59:59.999Z" : "T00:00:00Z"
|
|
30
|
+
Support::TimeSet.of_range(operator => ::Time.iso8601(filter_value + time_suffix))
|
|
31
|
+
else
|
|
32
|
+
Support::TimeSet.of_range(operator => ::Time.iso8601(filter_value))
|
|
33
|
+
end
|
|
34
|
+
when :equal_to_any_of
|
|
35
|
+
# This calls `.compact` to remove `nil` timestamp values.
|
|
36
|
+
ranges = filter_value.compact.map do |iso8601_string|
|
|
37
|
+
if date_string?(iso8601_string)
|
|
38
|
+
# When we have a date string, build a range for the entire day.
|
|
39
|
+
start_of_day = ::Time.iso8601("#{iso8601_string}T00:00:00Z")
|
|
40
|
+
end_of_day = ::Time.iso8601("#{iso8601_string}T23:59:59.999Z")
|
|
41
|
+
::Range.new(start_of_day, end_of_day)
|
|
42
|
+
else
|
|
43
|
+
value = ::Time.iso8601(iso8601_string)
|
|
44
|
+
::Range.new(value, value)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
Support::TimeSet.of_range_objects(ranges)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Returns an index_definition expression string to use for searches. This string can specify
|
|
54
|
+
# multiple indices, use wildcards, etc. For info about what is supported, see:
|
|
55
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
|
|
56
|
+
def determine_search_index_expression(filter_hashes, search_index_definitions, require_indices:)
|
|
57
|
+
# Here we sort the index expressions. It won't change the behavior in the datastore, but
|
|
58
|
+
# makes the return value here deterministic which makes it easier to assert on in tests.
|
|
59
|
+
search_index_definitions.sort_by(&:name).reduce(IndexExpression::EMPTY) do |index_expression, index_def|
|
|
60
|
+
index_expression + index_expression_for(filter_hashes, index_def, require_indices: require_indices)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def index_expression_for(filter_hashes, maybe_rollover_index_def, require_indices:)
|
|
67
|
+
unless maybe_rollover_index_def.rollover_index_template?
|
|
68
|
+
return IndexExpression.only(maybe_rollover_index_def.index_expression_for_search)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @type var index_def: DatastoreCore::IndexDefinition::RolloverIndexTemplate
|
|
72
|
+
index_def = _ = maybe_rollover_index_def
|
|
73
|
+
|
|
74
|
+
time_set = @filter_value_set_extractor.extract_filter_value_set(filter_hashes, [index_def.timestamp_field_path])
|
|
75
|
+
|
|
76
|
+
if time_set.empty?
|
|
77
|
+
return require_indices ?
|
|
78
|
+
# Indices are required. Given the time set is empty, it's impossible for any documents to match our search.
|
|
79
|
+
# Therefore, which index we use here doesn't matter. We just pick the first one, alphabetically.
|
|
80
|
+
IndexExpression.only(index_def.known_related_query_rollover_indices.map(&:index_expression_for_search).min) :
|
|
81
|
+
# No indices are required, so we can return an empty index expression.
|
|
82
|
+
IndexExpression::EMPTY
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
indices_to_exclude = index_def.known_related_query_rollover_indices.reject do |index|
|
|
86
|
+
time_set.intersect?(index.time_set)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
if require_indices && (index_def.known_related_query_rollover_indices - indices_to_exclude).empty?
|
|
90
|
+
# Indices are required, but all known indices have been excluded. We satisfy the requirement for an index by excluding one
|
|
91
|
+
# less index. This is preferable to the alternative ways to satisfy the requirement.
|
|
92
|
+
#
|
|
93
|
+
# - We could return an `IndexExpression` with no exclusions, but that would search across all indices, which is less efficient.
|
|
94
|
+
# - We could pick the first index to search (as we do for the `time_set.empty?` case), but that could cause matching documents
|
|
95
|
+
# to be be missed, because it's possible that matching documents exist in just-created index that is not in
|
|
96
|
+
# `known_related_query_rollover_indices`. Therefore, it's important that we still search the rollover wildcard expression,
|
|
97
|
+
# and we want to exclude all but one of the known indices.
|
|
98
|
+
indices_to_exclude = indices_to_exclude.drop(1)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
IndexExpression.new(
|
|
102
|
+
names_to_include: ::Set.new([index_def.index_expression_for_search]),
|
|
103
|
+
names_to_exclude: ::Set.new(indices_to_exclude.map(&:index_expression_for_search))
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def date_string?(string)
|
|
108
|
+
/\A\d{4}-\d{2}-\d{2}\z/.match?(string)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
class IndexExpression < ::Data.define(:names_to_include, :names_to_exclude)
|
|
113
|
+
EMPTY = new(names_to_include: ::Set.new, names_to_exclude: ::Set.new)
|
|
114
|
+
|
|
115
|
+
def self.only(name)
|
|
116
|
+
IndexExpression.new(names_to_include: ::Set.new([name].compact), names_to_exclude: ::Set.new)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def to_s
|
|
120
|
+
# Note: exclusions must come after inclusions. I can't find anything in the Elasticsearch or OpenSearch docs
|
|
121
|
+
# that mention this, but when exclusions come first I found that we got errors.
|
|
122
|
+
parts = names_to_include.sort + names_to_exclude.sort.map { |name| "-#{name}" }
|
|
123
|
+
parts.join(",")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def +(other)
|
|
127
|
+
with(
|
|
128
|
+
names_to_include: names_to_include.union(other.names_to_include),
|
|
129
|
+
names_to_exclude: names_to_exclude.union(other.names_to_exclude)
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# `Query::IndexExpressionBuilder` exists only for use by `Query` and is effectively private.
|
|
135
|
+
private_constant :IndexExpressionBuilder
|
|
136
|
+
|
|
137
|
+
# Steep is complaining that it can't find some `Query` but they are not in this file...
|
|
138
|
+
# @dynamic aggregations, shard_routing_values, search_index_definitions, merge_with, search_index_expression
|
|
139
|
+
# @dynamic with, to_datastore_msearch_header_and_body, document_paginator
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|