elasticgraph-graphql 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/error"
|
|
10
|
+
require "elastic_graph/graphql/aggregation/query"
|
|
11
|
+
require "elastic_graph/graphql/aggregation/query_optimizer"
|
|
12
|
+
require "elastic_graph/graphql/decoded_cursor"
|
|
13
|
+
require "elastic_graph/graphql/datastore_response/search_response"
|
|
14
|
+
require "elastic_graph/graphql/filtering/filter_interpreter"
|
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
|
16
|
+
|
|
17
|
+
module ElasticGraph
|
|
18
|
+
class GraphQL
|
|
19
|
+
# An immutable class that represents a datastore query. Since this represents
|
|
20
|
+
# a datastore query, and not a GraphQL query, all the data in it is modeled
|
|
21
|
+
# in datastore terms, not GraphQL terms. For example, any field names in a
|
|
22
|
+
# `Query` should be references to index fields, not GraphQL fields.
|
|
23
|
+
#
|
|
24
|
+
# Filters are modeled as a `Set` of filtering hashes. While we usually expect only
|
|
25
|
+
# a single `filter` hash, modeling it as a set makes it easy for us to support
|
|
26
|
+
# merging queries. The datastore knows how to apply multiple `must` clauses that
|
|
27
|
+
# apply to the same field, giving us the exact semantics we want in such a situation
|
|
28
|
+
# with minimal effort.
|
|
29
|
+
class DatastoreQuery < Support::MemoizableData.define(
|
|
30
|
+
:total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
|
|
31
|
+
:index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
|
|
32
|
+
:filters, :sort, :document_pagination, :requested_fields, :individual_docs_needed,
|
|
33
|
+
:monotonic_clock_deadline, :schema_element_names
|
|
34
|
+
) {
|
|
35
|
+
def initialize(
|
|
36
|
+
filter: nil,
|
|
37
|
+
filters: nil,
|
|
38
|
+
sort: nil,
|
|
39
|
+
document_pagination: nil,
|
|
40
|
+
aggregations: nil,
|
|
41
|
+
requested_fields: nil,
|
|
42
|
+
individual_docs_needed: false,
|
|
43
|
+
total_document_count_needed: false,
|
|
44
|
+
monotonic_clock_deadline: nil,
|
|
45
|
+
**kwargs
|
|
46
|
+
)
|
|
47
|
+
# Deal with `:filter` vs `:filters` input and normalize it to a single `filters` set.
|
|
48
|
+
filters = ::Set.new(filters || [])
|
|
49
|
+
filters << filter if filter && !filter.empty?
|
|
50
|
+
filters.freeze
|
|
51
|
+
|
|
52
|
+
aggregations ||= {}
|
|
53
|
+
requested_fields ||= []
|
|
54
|
+
|
|
55
|
+
super(
|
|
56
|
+
filters: filters,
|
|
57
|
+
sort: sort || [],
|
|
58
|
+
document_pagination: document_pagination || {},
|
|
59
|
+
aggregations: aggregations,
|
|
60
|
+
requested_fields: requested_fields.to_set,
|
|
61
|
+
individual_docs_needed: individual_docs_needed || !requested_fields.empty?,
|
|
62
|
+
total_document_count_needed: total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?),
|
|
63
|
+
monotonic_clock_deadline: monotonic_clock_deadline,
|
|
64
|
+
**kwargs
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if search_index_definitions.empty?
|
|
68
|
+
raise SearchFailedError, "Query is invalid, since it contains no `search_index_definitions`."
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
}
|
|
72
|
+
# Load these files after the `Query` class has been defined, to avoid
|
|
73
|
+
# `TypeError: superclass mismatch for class Query`
|
|
74
|
+
require "elastic_graph/graphql/datastore_query/document_paginator"
|
|
75
|
+
require "elastic_graph/graphql/datastore_query/index_expression_builder"
|
|
76
|
+
require "elastic_graph/graphql/datastore_query/paginator"
|
|
77
|
+
require "elastic_graph/graphql/datastore_query/routing_picker"
|
|
78
|
+
|
|
79
|
+
# Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed
|
|
80
|
+
# by query), yielding them to the caller, and then post-processing the results. The caller is
|
|
81
|
+
# responsible for returning a hash of responses by query from its block.
|
|
82
|
+
#
|
|
83
|
+
# Note that some of the passed queries may not be yielded to the caller; when we can tell
|
|
84
|
+
# that a query does not have to be sent to the datastore we avoid yielding it from here.
|
|
85
|
+
# Therefore, the caller should not assume that all queries passed to this method will be
|
|
86
|
+
# yielded back.
|
|
87
|
+
#
|
|
88
|
+
# The return value is a hash of `DatastoreResponse::SearchResponse` objects by query.
|
|
89
|
+
#
|
|
90
|
+
# Note: this method uses `send` to work around ruby visibility rules. We do not want
|
|
91
|
+
# `#decoded_cursor_factory` to be public, as we only need it here, but we cannot access
|
|
92
|
+
# it from a class method without using `send`.
|
|
93
|
+
def self.perform(queries)
|
|
94
|
+
empty_queries, present_queries = queries.partition(&:empty?)
|
|
95
|
+
|
|
96
|
+
responses_by_query = Aggregation::QueryOptimizer.optimize_queries(present_queries) do |optimized_queries|
|
|
97
|
+
header_body_tuples_by_query = optimized_queries.each_with_object({}) do |query, hash|
|
|
98
|
+
hash[query] = query.to_datastore_msearch_header_and_body
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
yield(header_body_tuples_by_query)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
empty_responses = empty_queries.each_with_object({}) do |query, hash|
|
|
105
|
+
hash[query] = DatastoreResponse::SearchResponse::RAW_EMPTY
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
empty_responses.merge(responses_by_query).each_with_object({}) do |(query, response), hash|
|
|
109
|
+
hash[query] = DatastoreResponse::SearchResponse.build(response, decoded_cursor_factory: query.send(:decoded_cursor_factory))
|
|
110
|
+
end.tap do |responses_hash|
|
|
111
|
+
# Callers expect this `perform` method to provide an invariant: the returned hash MUST contain one entry
|
|
112
|
+
# for each of the `queries` passed in the args. In practice, violating this invariant primarily causes a
|
|
113
|
+
# problem when the caller uses the `GraphQL::Dataloader` (which happens for every GraphQL request in production...).
|
|
114
|
+
# However, our tests do not always run queries end-to-end, so this is an added check we want to do, so that
|
|
115
|
+
# anytime our logic here fails to include a query in the response in any test, we'll be notified of the
|
|
116
|
+
# problem.
|
|
117
|
+
expected_queries = queries.to_set
|
|
118
|
+
actual_queries = responses_hash.keys.to_set
|
|
119
|
+
|
|
120
|
+
if expected_queries != actual_queries
|
|
121
|
+
missing_queries = expected_queries - actual_queries
|
|
122
|
+
extra_queries = actual_queries - expected_queries
|
|
123
|
+
|
|
124
|
+
raise SearchFailedError, "The `responses_hash` does not have the expected set of queries as keys. " \
|
|
125
|
+
"This can cause problems for the `GraphQL::Dataloader` and suggests a bug in the logic that should be fixed.\n\n" \
|
|
126
|
+
"Missing queries (#{missing_queries.size}):\n#{missing_queries.map(&:inspect).join("\n")}.\n\n" \
|
|
127
|
+
"Extra queries (#{extra_queries.size}): #{extra_queries.map(&:inspect).join("\n")}"
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Merges the provided query, returning a new combined query object.
|
|
133
|
+
# Both query objects are left unchanged.
|
|
134
|
+
def merge(other_query)
|
|
135
|
+
if search_index_definitions != other_query.search_index_definitions
|
|
136
|
+
raise ElasticGraph::InvalidMergeError, "`search_index_definitions` conflict while merging between " \
|
|
137
|
+
"#{search_index_definitions} and #{other_query.search_index_definitions}"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
with(
|
|
141
|
+
individual_docs_needed: individual_docs_needed || other_query.individual_docs_needed,
|
|
142
|
+
total_document_count_needed: total_document_count_needed || other_query.total_document_count_needed,
|
|
143
|
+
filters: filters + other_query.filters,
|
|
144
|
+
sort: merge_attribute(other_query, :sort),
|
|
145
|
+
requested_fields: requested_fields + other_query.requested_fields,
|
|
146
|
+
document_pagination: merge_attribute(other_query, :document_pagination),
|
|
147
|
+
monotonic_clock_deadline: [monotonic_clock_deadline, other_query.monotonic_clock_deadline].compact.min,
|
|
148
|
+
aggregations: aggregations.merge(other_query.aggregations)
|
|
149
|
+
)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Convenience method for merging when you do not have access to an
|
|
153
|
+
# `DatastoreQuery::Builder`. Allows you to pass the query options you
|
|
154
|
+
# would like to merge. As with `#merge`, leaves the original query unchanged
|
|
155
|
+
# and returns a combined query object.
|
|
156
|
+
def merge_with(**query_options)
|
|
157
|
+
merge(with(**query_options))
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Pairs the multi-search headers and body into a tuple, as per the format required by the datastore:
|
|
161
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html#search-multi-search-api-desc
|
|
162
|
+
def to_datastore_msearch_header_and_body
|
|
163
|
+
@to_datastore_msearch_header_and_body ||= [to_datastore_msearch_header, to_datastore_body]
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Returns an index_definition expression string to use for searches. This string can specify
|
|
167
|
+
# multiple indices, use wildcards, etc. For info about what is supported, see:
|
|
168
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
|
|
169
|
+
def search_index_expression
|
|
170
|
+
@search_index_expression ||= index_expression_builder.determine_search_index_expression(
|
|
171
|
+
filters,
|
|
172
|
+
search_index_definitions,
|
|
173
|
+
# When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
|
|
174
|
+
# the standard aggregations response structure, which causes problems.
|
|
175
|
+
require_indices: !aggregations_datastore_body.empty?
|
|
176
|
+
).to_s
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Returns the name of the datastore cluster as a String where this query should be setn.
|
|
180
|
+
# Unless exactly 1 cluster name is found, this method raises a ConfigError.
|
|
181
|
+
def cluster_name
|
|
182
|
+
cluster_name = search_index_definitions.map(&:cluster_to_query).uniq
|
|
183
|
+
return cluster_name.first if cluster_name.size == 1
|
|
184
|
+
raise ConfigError, "Found different datastore clusters (#{cluster_name}) to query " \
|
|
185
|
+
"for query targeting indices: #{search_index_definitions}"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Returns a list of unique field paths that should be used for shard routing during searches.
|
|
189
|
+
#
|
|
190
|
+
# If a search is filtering on one of these fields, we can optimize the search by routing
|
|
191
|
+
# it to only the shards containing documents for that routing value.
|
|
192
|
+
#
|
|
193
|
+
# Note that this returns a list due to our support for type unions. A unioned type
|
|
194
|
+
# can be composed of subtypes that have use different shard routing; this will return
|
|
195
|
+
# the set union of them all.
|
|
196
|
+
def route_with_field_paths
|
|
197
|
+
search_index_definitions.map(&:route_with).uniq
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# The shard routing values used for this search. Can be `nil` if the query will hit all shards.
|
|
201
|
+
# `[]` means that we are routing to no shards.
|
|
202
|
+
def shard_routing_values
|
|
203
|
+
return @shard_routing_values if defined?(@shard_routing_values)
|
|
204
|
+
routing_values = routing_picker.extract_eligible_routing_values(filters, route_with_field_paths)
|
|
205
|
+
|
|
206
|
+
@shard_routing_values ||=
|
|
207
|
+
if routing_values&.empty? && !aggregations_datastore_body.empty?
|
|
208
|
+
# If we return an empty array of routing values, no shards will get searched, which causes a problem for aggregations.
|
|
209
|
+
# When a query includes aggregations, there are normally aggregation structures on the respopnse (even when there are no
|
|
210
|
+
# search hits to aggregate over!) but if there are no routing values, those aggregation structures will be missing from
|
|
211
|
+
# the response. It's complex to handle that in our downstream response handling code, so we prefer to force a "fallback"
|
|
212
|
+
# routing value here to ensure that at least one shard gets searched. Which shard gets searched doesn't matter; the search
|
|
213
|
+
# filter that led to an empty set of routing values will match on documents on any shard.
|
|
214
|
+
["fallback_shard_routing_value"]
|
|
215
|
+
elsif contains_ignored_values_for_routing?(routing_values)
|
|
216
|
+
nil
|
|
217
|
+
else
|
|
218
|
+
routing_values&.sort # order doesn't matter, but sorting it makes it easier to assert on in our tests.
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Indicates if the query does not need any results from the datastore. As an optimization,
|
|
223
|
+
# we can reply with a default "empty" response for an empty query.
|
|
224
|
+
def empty?
|
|
225
|
+
# If we are searching no indices or routing to an empty set of shards, there is no need to query the datastore at all.
|
|
226
|
+
# This only happens when our filter processing has deduced that the query will match no results.
|
|
227
|
+
return true if search_index_expression.empty? || shard_routing_values&.empty?
|
|
228
|
+
|
|
229
|
+
datastore_body = to_datastore_body
|
|
230
|
+
datastore_body.fetch(:size) == 0 && !datastore_body.fetch(:track_total_hits) && aggregations_datastore_body.empty?
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def inspect
|
|
234
|
+
description = to_datastore_msearch_header.merge(to_datastore_body).map do |key, value|
|
|
235
|
+
"#{key}=#{(key == :query) ? "<REDACTED>" : value.inspect}"
|
|
236
|
+
end.join(" ")
|
|
237
|
+
|
|
238
|
+
"#<#{self.class.name} #{description}>"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def to_datastore_msearch_header
|
|
242
|
+
@to_datastore_msearch_header ||= {index: search_index_expression, routing: shard_routing_values&.join(",")}.compact
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# `DatastoreQuery` objects are used as keys in a hash. Computing `#hash` can be expensive (given how many fields
|
|
246
|
+
# an `DatastoreQuery` has) and it's safe to cache since `DatastoreQuery` instances are immutable, so we memoize it
|
|
247
|
+
# here. We've observed this making a very noticeable difference in our test suite runtime.
|
|
248
|
+
def hash
|
|
249
|
+
@hash ||= super
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def document_paginator
|
|
253
|
+
@document_paginator ||= DocumentPaginator.new(
|
|
254
|
+
sort_clauses: sort_with_tiebreaker,
|
|
255
|
+
individual_docs_needed: individual_docs_needed,
|
|
256
|
+
total_document_count_needed: total_document_count_needed,
|
|
257
|
+
decoded_cursor_factory: decoded_cursor_factory,
|
|
258
|
+
schema_element_names: schema_element_names,
|
|
259
|
+
paginator: Paginator.new(
|
|
260
|
+
default_page_size: default_page_size,
|
|
261
|
+
max_page_size: max_page_size,
|
|
262
|
+
first: document_pagination[:first],
|
|
263
|
+
after: document_pagination[:after],
|
|
264
|
+
last: document_pagination[:last],
|
|
265
|
+
before: document_pagination[:before],
|
|
266
|
+
schema_element_names: schema_element_names
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
private
|
|
272
|
+
|
|
273
|
+
def merge_attribute(other_query, attribute)
|
|
274
|
+
value = public_send(attribute)
|
|
275
|
+
other_value = other_query.public_send(attribute)
|
|
276
|
+
|
|
277
|
+
if value.empty?
|
|
278
|
+
other_value
|
|
279
|
+
elsif other_value.empty?
|
|
280
|
+
value
|
|
281
|
+
elsif value == other_value
|
|
282
|
+
value
|
|
283
|
+
else
|
|
284
|
+
logger.warn("Tried to merge two queries that both define `#{attribute}`, using the value from the query being merged: #{value}, #{other_value}")
|
|
285
|
+
other_value
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
TIEBREAKER_SORT_CLAUSES = [{"id" => {"order" => "asc"}}].freeze
|
|
290
|
+
|
|
291
|
+
# We want to use `id` as a tiebreaker ONLY when `id` isn't explicitly specified as a sort field
|
|
292
|
+
def sort_with_tiebreaker
|
|
293
|
+
@sort_with_tiebreaker ||= remove_duplicate_sort_clauses(sort + TIEBREAKER_SORT_CLAUSES)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def remove_duplicate_sort_clauses(sort_clauses)
|
|
297
|
+
seen_fields = Set.new
|
|
298
|
+
sort_clauses.select do |clause|
|
|
299
|
+
clause.keys.all? { |key| seen_fields.add?(key) }
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def decoded_cursor_factory
|
|
304
|
+
@decoded_cursor_factory ||= DecodedCursor::Factory.from_sort_list(sort_with_tiebreaker)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def contains_ignored_values_for_routing?(routing_values)
|
|
308
|
+
ignored_values_for_routing.intersect?(routing_values.to_set) if routing_values
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def ignored_values_for_routing
|
|
312
|
+
@ignored_values_for_routing ||= search_index_definitions.flat_map { |i| i.ignored_values_for_routing.to_a }.to_set
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def to_datastore_body
|
|
316
|
+
@to_datastore_body ||= aggregations_datastore_body
|
|
317
|
+
.merge(document_paginator.to_datastore_body)
|
|
318
|
+
.merge({query: filter_interpreter.build_query(filters)}.compact)
|
|
319
|
+
.merge({_source: source})
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def aggregations_datastore_body
|
|
323
|
+
@aggregations_datastore_body ||= begin
|
|
324
|
+
aggs = aggregations
|
|
325
|
+
.values
|
|
326
|
+
.map { |agg| agg.build_agg_hash(filter_interpreter) }
|
|
327
|
+
.reduce({}, :merge)
|
|
328
|
+
|
|
329
|
+
aggs.empty? ? {} : {aggs: aggs}
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Make our query as efficient as possible by limiting what parts of `_source` we fetch.
|
|
334
|
+
# For an id-only query (or a query that has no requested fields) we don't need to fetch `_source`
|
|
335
|
+
# at all--which means the datastore can avoid decompressing the _source field. Otherwise,
|
|
336
|
+
# we only ask for the fields we need to return.
|
|
337
|
+
def source
|
|
338
|
+
requested_source_fields = requested_fields - ["id"]
|
|
339
|
+
return false if requested_source_fields.empty?
|
|
340
|
+
# Merging in requested_fields as _source:{includes:} based on Elasticsearch documentation:
|
|
341
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#include-exclude
|
|
342
|
+
{includes: requested_source_fields.to_a}
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
|
|
346
|
+
# to build queries when desired.
|
|
347
|
+
class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :query_defaults)
|
|
348
|
+
def self.with(runtime_metadata:, logger:, **query_defaults)
|
|
349
|
+
new(runtime_metadata: runtime_metadata, logger: logger, query_defaults: query_defaults)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def routing_picker
|
|
353
|
+
@routing_picker ||= RoutingPicker.new(schema_names: runtime_metadata.schema_element_names)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def index_expression_builder
|
|
357
|
+
@index_expression_builder ||= IndexExpressionBuilder.new(schema_names: runtime_metadata.schema_element_names)
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def new_query(**options)
|
|
361
|
+
DatastoreQuery.new(
|
|
362
|
+
routing_picker: routing_picker,
|
|
363
|
+
index_expression_builder: index_expression_builder,
|
|
364
|
+
logger: logger,
|
|
365
|
+
schema_element_names: runtime_metadata.schema_element_names,
|
|
366
|
+
**query_defaults.merge(options)
|
|
367
|
+
)
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/graphql/decoded_cursor"
|
|
10
|
+
require "elastic_graph/support/memoizable_data"
|
|
11
|
+
require "forwardable"
|
|
12
|
+
|
|
13
|
+
module ElasticGraph
|
|
14
|
+
class GraphQL
|
|
15
|
+
module DatastoreResponse
|
|
16
|
+
# Represents a document fetched from the datastore. Exposes both the raw metadata
|
|
17
|
+
# provided by the datastore and the doc payload itself. In addition, you can treat
|
|
18
|
+
# it just like a document hash using `#[]` or `#fetch`.
|
|
19
|
+
Document = Support::MemoizableData.define(:raw_data, :payload, :decoded_cursor_factory) do
|
|
20
|
+
# @implements Document
|
|
21
|
+
extend Forwardable
|
|
22
|
+
def_delegators :payload, :[], :fetch
|
|
23
|
+
|
|
24
|
+
def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
|
|
25
|
+
source = raw_data.fetch("_source") do
|
|
26
|
+
{} # : ::Hash[::String, untyped]
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
new(
|
|
30
|
+
raw_data: raw_data,
|
|
31
|
+
# Since we no longer fetch _source for id only queries, merge id into _source to take care of that case
|
|
32
|
+
payload: source.merge("id" => raw_data["_id"]),
|
|
33
|
+
decoded_cursor_factory: decoded_cursor_factory
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.with_payload(payload)
|
|
38
|
+
build({"_source" => payload})
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def index_name
|
|
42
|
+
raw_data["_index"]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def index_definition_name
|
|
46
|
+
index_name.split(ROLLOVER_INDEX_INFIX_MARKER).first # : ::String
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def id
|
|
50
|
+
raw_data["_id"]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def sort
|
|
54
|
+
raw_data["sort"]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def version
|
|
58
|
+
payload["version"]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def cursor
|
|
62
|
+
@cursor ||= decoded_cursor_factory.build(raw_data.fetch("sort"))
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def datastore_path
|
|
66
|
+
# Path based on this API:
|
|
67
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
|
|
68
|
+
"/#{index_name}/_doc/#{id}".squeeze("/")
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def to_s
|
|
72
|
+
"#<#{self.class.name} #{datastore_path}>"
|
|
73
|
+
end
|
|
74
|
+
alias_method :inspect, :to_s
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/error"
|
|
10
|
+
require "elastic_graph/graphql/decoded_cursor"
|
|
11
|
+
require "elastic_graph/graphql/datastore_response/document"
|
|
12
|
+
require "forwardable"
|
|
13
|
+
|
|
14
|
+
module ElasticGraph
|
|
15
|
+
class GraphQL
|
|
16
|
+
module DatastoreResponse
|
|
17
|
+
# Represents a search response from the datastore. Exposes both the raw metadata
|
|
18
|
+
# provided by the datastore and the collection of documents. Can be treated as a
|
|
19
|
+
# collection of documents when you don't care about the metadata.
|
|
20
|
+
class SearchResponse < ::Data.define(:raw_data, :metadata, :documents, :total_document_count)
|
|
21
|
+
include Enumerable
|
|
22
|
+
extend Forwardable
|
|
23
|
+
|
|
24
|
+
def_delegators :documents, :each, :to_a, :size, :empty?
|
|
25
|
+
|
|
26
|
+
EXCLUDED_METADATA_KEYS = %w[hits aggregations].freeze
|
|
27
|
+
|
|
28
|
+
def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
|
|
29
|
+
documents = raw_data.fetch("hits").fetch("hits").map do |doc|
|
|
30
|
+
Document.build(doc, decoded_cursor_factory: decoded_cursor_factory)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
metadata = raw_data.except(*EXCLUDED_METADATA_KEYS)
|
|
34
|
+
metadata["hits"] = raw_data.fetch("hits").except("hits")
|
|
35
|
+
|
|
36
|
+
# `hits.total` is exposed as an object like:
|
|
37
|
+
#
|
|
38
|
+
# {
|
|
39
|
+
# "value" => 200,
|
|
40
|
+
# "relation" => "eq", # or "gte"
|
|
41
|
+
# }
|
|
42
|
+
#
|
|
43
|
+
# This allows it to provide a lower bound on the number of hits, rather than having
|
|
44
|
+
# to give an exact count. We may want to handle the `gte` case differently at some
|
|
45
|
+
# point but for now we just use the value as-is.
|
|
46
|
+
#
|
|
47
|
+
# In the case where `track_total_hits` flag is set to `false`, `hits.total` field will be completely absent.
|
|
48
|
+
# This means the client intentionally chose not to query the total doc count, and `total_document_count` will be nil.
|
|
49
|
+
# In this case, we will throw an exception if the client later tries to access `total_document_count`.
|
|
50
|
+
total_document_count = metadata.dig("hits", "total", "value")
|
|
51
|
+
|
|
52
|
+
new(
|
|
53
|
+
raw_data: raw_data,
|
|
54
|
+
metadata: metadata,
|
|
55
|
+
documents: documents,
|
|
56
|
+
total_document_count: total_document_count
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Benign empty response that can be used in place of datastore response errors as needed.
|
|
61
|
+
RAW_EMPTY = {"hits" => {"hits" => [], "total" => {"value" => 0}}}.freeze
|
|
62
|
+
EMPTY = build(RAW_EMPTY)
|
|
63
|
+
|
|
64
|
+
def docs_description
|
|
65
|
+
(documents.size < 3) ? documents.inspect : "[#{documents.first}, ..., #{documents.last}]"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def total_document_count
|
|
69
|
+
super || raise(CountUnavailableError, "#{__method__} is unavailable; set `query.total_document_count_needed = true` to make it available")
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def to_s
|
|
73
|
+
"#<#{self.class.name} size=#{documents.size} #{docs_description}>"
|
|
74
|
+
end
|
|
75
|
+
alias_method :inspect, :to_s
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/constants"
|
|
10
|
+
require "elastic_graph/error"
|
|
11
|
+
require "elastic_graph/graphql/datastore_response/search_response"
|
|
12
|
+
require "elastic_graph/graphql/query_details_tracker"
|
|
13
|
+
require "elastic_graph/support/threading"
|
|
14
|
+
|
|
15
|
+
module ElasticGraph
|
|
16
|
+
class GraphQL
|
|
17
|
+
# Responsible for routing datastore search requests to the appropriate cluster and index.
|
|
18
|
+
class DatastoreSearchRouter
|
|
19
|
+
def initialize(
|
|
20
|
+
datastore_clients_by_name:,
|
|
21
|
+
logger:,
|
|
22
|
+
monotonic_clock:,
|
|
23
|
+
config:
|
|
24
|
+
)
|
|
25
|
+
@datastore_clients_by_name = datastore_clients_by_name
|
|
26
|
+
@logger = logger
|
|
27
|
+
@monotonic_clock = monotonic_clock
|
|
28
|
+
@config = config
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Sends the datastore a multi-search request based on the given queries.
|
|
32
|
+
# Returns a hash of responses keyed by the query.
|
|
33
|
+
def msearch(queries, query_tracker: QueryDetailsTracker.empty)
|
|
34
|
+
DatastoreQuery.perform(queries) do |header_body_tuples_by_query|
|
|
35
|
+
# Here we set a client-side timeout, which causes the client to give up and close the connection.
|
|
36
|
+
# According to [1]--"We have a new way to cancel search requests efficiently from the client
|
|
37
|
+
# in 7.4 (by closing the underlying http channel)"--this should cause the server to stop
|
|
38
|
+
# executing the search, and more importantly, gives us a strictly enforced timeout.
|
|
39
|
+
#
|
|
40
|
+
# In addition, the datastore supports a `timeout` option on a search body, but this timeout is
|
|
41
|
+
# "best effort", applies to each shard (and not to the overall search request), and only interrupts
|
|
42
|
+
# certain kinds of operations. [2] and [3] below have more info.
|
|
43
|
+
#
|
|
44
|
+
# Note that I have not been able to observe this `timeout` on a search body ever working
|
|
45
|
+
# as documented. In our test suite, none of the slow queries I have tried (both via
|
|
46
|
+
# slow aggregation query and a slow script) have ever aborted early when that option is
|
|
47
|
+
# set. In Kibana in production, @bsorbo observed it aborting a `search` request early
|
|
48
|
+
# (but not necessarily an `msearch` request...), but even then, the response said `timed_out: false`!
|
|
49
|
+
# Other people ([4]) have reported observing timeout having no effect on msearch requests.
|
|
50
|
+
#
|
|
51
|
+
# So, the client-side timeout is the main one we want here, and for now we are not using the
|
|
52
|
+
# datastore search `timeout` option at all.
|
|
53
|
+
#
|
|
54
|
+
# For more info, see:
|
|
55
|
+
#
|
|
56
|
+
# [1] https://github.com/elastic/elasticsearch/issues/47716
|
|
57
|
+
# [2] https://github.com/elastic/elasticsearch/pull/51858
|
|
58
|
+
# [3] https://www.elastic.co/guide/en/elasticsearch/guide/current/_search_options.html#_timeout_2
|
|
59
|
+
# [4] https://discuss.elastic.co/t/timeouts-ignored-in-multisearch/23673
|
|
60
|
+
|
|
61
|
+
# Unfortunately, the Elasticsearch/OpenSearch clients don't support setting a per-request client-side timeout,
|
|
62
|
+
# even though Faraday (the underlying HTTP client) does. To work around this, we pass our desired
|
|
63
|
+
# timeout in a specific header that the `SupportTimeouts` Faraday middleware will use.
|
|
64
|
+
headers = {TIMEOUT_MS_HEADER => msearch_request_timeout_from(queries)}.compact
|
|
65
|
+
|
|
66
|
+
queries_and_header_body_tuples_by_datastore_client = header_body_tuples_by_query.group_by do |(query, header_body_tuples)|
|
|
67
|
+
@datastore_clients_by_name.fetch(query.cluster_name)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
datastore_query_started_at = @monotonic_clock.now_in_ms
|
|
71
|
+
|
|
72
|
+
server_took_and_results = Support::Threading.parallel_map(queries_and_header_body_tuples_by_datastore_client) do |datastore_client, query_and_header_body_tuples_for_cluster|
|
|
73
|
+
queries_for_cluster, header_body_tuples = query_and_header_body_tuples_for_cluster.transpose
|
|
74
|
+
msearch_body = header_body_tuples.flatten(1)
|
|
75
|
+
response = datastore_client.msearch(body: msearch_body, headers: headers)
|
|
76
|
+
debug_query(query: msearch_body, response: response)
|
|
77
|
+
ordered_responses = response.fetch("responses")
|
|
78
|
+
[response["took"], queries_for_cluster.zip(ordered_responses)]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
query_tracker.record_datastore_query_duration_ms(
|
|
82
|
+
client: @monotonic_clock.now_in_ms - datastore_query_started_at,
|
|
83
|
+
server: server_took_and_results.map(&:first).compact.max
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
server_took_and_results.flat_map(&:last).to_h.tap do |responses_by_query|
|
|
87
|
+
log_shard_failure_if_necessary(responses_by_query)
|
|
88
|
+
raise_search_failed_if_any_failures(responses_by_query)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# Prefix tests with `DEBUG_QUERY=1 ...` or run `export DEBUG_QUERY=1` to print the actual
|
|
96
|
+
# Elasticsearch/OpenSearch query and response. This is particularly useful for adding new specs.
|
|
97
|
+
def debug_query(**debug_messages)
|
|
98
|
+
return unless ::ENV["DEBUG_QUERY"]
|
|
99
|
+
|
|
100
|
+
formatted_messages = debug_messages.map do |key, msg|
|
|
101
|
+
"#{key.to_s.upcase}:\n#{::JSON.pretty_generate(msg)}\n"
|
|
102
|
+
end.join("\n")
|
|
103
|
+
puts "\n#{formatted_messages}\n\n"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def msearch_request_timeout_from(queries)
|
|
107
|
+
return nil unless (min_query_deadline = queries.map(&:monotonic_clock_deadline).compact.min)
|
|
108
|
+
|
|
109
|
+
(min_query_deadline - @monotonic_clock.now_in_ms).tap do |timeout|
|
|
110
|
+
if timeout <= 0
|
|
111
|
+
raise RequestExceededDeadlineError, "It is already #{timeout.abs} ms past the search deadline."
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def raise_search_failed_if_any_failures(responses_by_query)
|
|
117
|
+
failures = responses_by_query.each_with_index.select { |(_query, response), _index| response["error"] }
|
|
118
|
+
return if failures.empty?
|
|
119
|
+
|
|
120
|
+
formatted_failures = failures.map do |(query, response), index|
|
|
121
|
+
# Note: we intentionally omit the body of the request here, because it could contain PII
|
|
122
|
+
# or other sensitive values that we don't want logged.
|
|
123
|
+
<<~ERROR
|
|
124
|
+
#{index + 1}) Header: #{::JSON.generate(query.to_datastore_msearch_header)}
|
|
125
|
+
#{response.fetch("error").inspect}"
|
|
126
|
+
On cluster: #{query.cluster_name}
|
|
127
|
+
ERROR
|
|
128
|
+
end.join("\n\n")
|
|
129
|
+
|
|
130
|
+
raise SearchFailedError, "Got #{failures.size} search failure(s):\n\n#{formatted_failures}"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Examine successful query responses and log any shard failure they encounter
|
|
134
|
+
def log_shard_failure_if_necessary(responses_by_query)
|
|
135
|
+
shard_failures = responses_by_query.each_with_index.select do |(query, response), query_numeric_index|
|
|
136
|
+
(200..299).cover?(response["status"]) && response["_shards"]["failed"] != 0
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
unless shard_failures.empty?
|
|
140
|
+
formatted_failures = shard_failures.map do |(query, response), query_numeric_index|
|
|
141
|
+
"Query #{query_numeric_index + 1} against index `#{query.search_index_expression}` on cluster `#{query.cluster_name}`}: " +
|
|
142
|
+
JSON.pretty_generate(response["_shards"])
|
|
143
|
+
end.join("\n\n")
|
|
144
|
+
|
|
145
|
+
formatted_shard_failures = "The following queries have failed shards: \n\n#{formatted_failures}"
|
|
146
|
+
@logger.warn(formatted_shard_failures)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|