elasticgraph-graphql 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
@@ -0,0 +1,372 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
require "elastic_graph/graphql/aggregation/query"
|
11
|
+
require "elastic_graph/graphql/aggregation/query_optimizer"
|
12
|
+
require "elastic_graph/graphql/decoded_cursor"
|
13
|
+
require "elastic_graph/graphql/datastore_response/search_response"
|
14
|
+
require "elastic_graph/graphql/filtering/filter_interpreter"
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
16
|
+
|
17
|
+
module ElasticGraph
|
18
|
+
class GraphQL
|
19
|
+
# An immutable class that represents a datastore query. Since this represents
|
20
|
+
# a datastore query, and not a GraphQL query, all the data in it is modeled
|
21
|
+
# in datastore terms, not GraphQL terms. For example, any field names in a
|
22
|
+
# `Query` should be references to index fields, not GraphQL fields.
|
23
|
+
#
|
24
|
+
# Filters are modeled as a `Set` of filtering hashes. While we usually expect only
|
25
|
+
# a single `filter` hash, modeling it as a set makes it easy for us to support
|
26
|
+
# merging queries. The datastore knows how to apply multiple `must` clauses that
|
27
|
+
# apply to the same field, giving us the exact semantics we want in such a situation
|
28
|
+
# with minimal effort.
|
29
|
+
class DatastoreQuery < Support::MemoizableData.define(
|
30
|
+
:total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
|
31
|
+
:index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
|
32
|
+
:filters, :sort, :document_pagination, :requested_fields, :individual_docs_needed,
|
33
|
+
:monotonic_clock_deadline, :schema_element_names
|
34
|
+
) {
|
35
|
+
def initialize(
|
36
|
+
filter: nil,
|
37
|
+
filters: nil,
|
38
|
+
sort: nil,
|
39
|
+
document_pagination: nil,
|
40
|
+
aggregations: nil,
|
41
|
+
requested_fields: nil,
|
42
|
+
individual_docs_needed: false,
|
43
|
+
total_document_count_needed: false,
|
44
|
+
monotonic_clock_deadline: nil,
|
45
|
+
**kwargs
|
46
|
+
)
|
47
|
+
# Deal with `:filter` vs `:filters` input and normalize it to a single `filters` set.
|
48
|
+
filters = ::Set.new(filters || [])
|
49
|
+
filters << filter if filter && !filter.empty?
|
50
|
+
filters.freeze
|
51
|
+
|
52
|
+
aggregations ||= {}
|
53
|
+
requested_fields ||= []
|
54
|
+
|
55
|
+
super(
|
56
|
+
filters: filters,
|
57
|
+
sort: sort || [],
|
58
|
+
document_pagination: document_pagination || {},
|
59
|
+
aggregations: aggregations,
|
60
|
+
requested_fields: requested_fields.to_set,
|
61
|
+
individual_docs_needed: individual_docs_needed || !requested_fields.empty?,
|
62
|
+
total_document_count_needed: total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?),
|
63
|
+
monotonic_clock_deadline: monotonic_clock_deadline,
|
64
|
+
**kwargs
|
65
|
+
)
|
66
|
+
|
67
|
+
if search_index_definitions.empty?
|
68
|
+
raise SearchFailedError, "Query is invalid, since it contains no `search_index_definitions`."
|
69
|
+
end
|
70
|
+
end
|
71
|
+
}
|
72
|
+
# Load these files after the `Query` class has been defined, to avoid
|
73
|
+
# `TypeError: superclass mismatch for class Query`
|
74
|
+
require "elastic_graph/graphql/datastore_query/document_paginator"
|
75
|
+
require "elastic_graph/graphql/datastore_query/index_expression_builder"
|
76
|
+
require "elastic_graph/graphql/datastore_query/paginator"
|
77
|
+
require "elastic_graph/graphql/datastore_query/routing_picker"
|
78
|
+
|
79
|
+
# Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed
|
80
|
+
# by query), yielding them to the caller, and then post-processing the results. The caller is
|
81
|
+
# responsible for returning a hash of responses by query from its block.
|
82
|
+
#
|
83
|
+
# Note that some of the passed queries may not be yielded to the caller; when we can tell
|
84
|
+
# that a query does not have to be sent to the datastore we avoid yielding it from here.
|
85
|
+
# Therefore, the caller should not assume that all queries passed to this method will be
|
86
|
+
# yielded back.
|
87
|
+
#
|
88
|
+
# The return value is a hash of `DatastoreResponse::SearchResponse` objects by query.
|
89
|
+
#
|
90
|
+
# Note: this method uses `send` to work around ruby visibility rules. We do not want
|
91
|
+
# `#decoded_cursor_factory` to be public, as we only need it here, but we cannot access
|
92
|
+
# it from a class method without using `send`.
|
93
|
+
def self.perform(queries)
|
94
|
+
empty_queries, present_queries = queries.partition(&:empty?)
|
95
|
+
|
96
|
+
responses_by_query = Aggregation::QueryOptimizer.optimize_queries(present_queries) do |optimized_queries|
|
97
|
+
header_body_tuples_by_query = optimized_queries.each_with_object({}) do |query, hash|
|
98
|
+
hash[query] = query.to_datastore_msearch_header_and_body
|
99
|
+
end
|
100
|
+
|
101
|
+
yield(header_body_tuples_by_query)
|
102
|
+
end
|
103
|
+
|
104
|
+
empty_responses = empty_queries.each_with_object({}) do |query, hash|
|
105
|
+
hash[query] = DatastoreResponse::SearchResponse::RAW_EMPTY
|
106
|
+
end
|
107
|
+
|
108
|
+
empty_responses.merge(responses_by_query).each_with_object({}) do |(query, response), hash|
|
109
|
+
hash[query] = DatastoreResponse::SearchResponse.build(response, decoded_cursor_factory: query.send(:decoded_cursor_factory))
|
110
|
+
end.tap do |responses_hash|
|
111
|
+
# Callers expect this `perform` method to provide an invariant: the returned hash MUST contain one entry
|
112
|
+
# for each of the `queries` passed in the args. In practice, violating this invariant primarily causes a
|
113
|
+
# problem when the caller uses the `GraphQL::Dataloader` (which happens for every GraphQL request in production...).
|
114
|
+
# However, our tests do not always run queries end-to-end, so this is an added check we want to do, so that
|
115
|
+
# anytime our logic here fails to include a query in the response in any test, we'll be notified of the
|
116
|
+
# problem.
|
117
|
+
expected_queries = queries.to_set
|
118
|
+
actual_queries = responses_hash.keys.to_set
|
119
|
+
|
120
|
+
if expected_queries != actual_queries
|
121
|
+
missing_queries = expected_queries - actual_queries
|
122
|
+
extra_queries = actual_queries - expected_queries
|
123
|
+
|
124
|
+
raise SearchFailedError, "The `responses_hash` does not have the expected set of queries as keys. " \
|
125
|
+
"This can cause problems for the `GraphQL::Dataloader` and suggests a bug in the logic that should be fixed.\n\n" \
|
126
|
+
"Missing queries (#{missing_queries.size}):\n#{missing_queries.map(&:inspect).join("\n")}.\n\n" \
|
127
|
+
"Extra queries (#{extra_queries.size}): #{extra_queries.map(&:inspect).join("\n")}"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Merges the provided query, returning a new combined query object.
|
133
|
+
# Both query objects are left unchanged.
|
134
|
+
def merge(other_query)
|
135
|
+
if search_index_definitions != other_query.search_index_definitions
|
136
|
+
raise ElasticGraph::InvalidMergeError, "`search_index_definitions` conflict while merging between " \
|
137
|
+
"#{search_index_definitions} and #{other_query.search_index_definitions}"
|
138
|
+
end
|
139
|
+
|
140
|
+
with(
|
141
|
+
individual_docs_needed: individual_docs_needed || other_query.individual_docs_needed,
|
142
|
+
total_document_count_needed: total_document_count_needed || other_query.total_document_count_needed,
|
143
|
+
filters: filters + other_query.filters,
|
144
|
+
sort: merge_attribute(other_query, :sort),
|
145
|
+
requested_fields: requested_fields + other_query.requested_fields,
|
146
|
+
document_pagination: merge_attribute(other_query, :document_pagination),
|
147
|
+
monotonic_clock_deadline: [monotonic_clock_deadline, other_query.monotonic_clock_deadline].compact.min,
|
148
|
+
aggregations: aggregations.merge(other_query.aggregations)
|
149
|
+
)
|
150
|
+
end
|
151
|
+
|
152
|
+
# Convenience method for merging when you do not have access to an
|
153
|
+
# `DatastoreQuery::Builder`. Allows you to pass the query options you
|
154
|
+
# would like to merge. As with `#merge`, leaves the original query unchanged
|
155
|
+
# and returns a combined query object.
|
156
|
+
def merge_with(**query_options)
|
157
|
+
merge(with(**query_options))
|
158
|
+
end
|
159
|
+
|
160
|
+
# Pairs the multi-search headers and body into a tuple, as per the format required by the datastore:
|
161
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html#search-multi-search-api-desc
|
162
|
+
def to_datastore_msearch_header_and_body
|
163
|
+
@to_datastore_msearch_header_and_body ||= [to_datastore_msearch_header, to_datastore_body]
|
164
|
+
end
|
165
|
+
|
166
|
+
# Returns an index_definition expression string to use for searches. This string can specify
|
167
|
+
# multiple indices, use wildcards, etc. For info about what is supported, see:
|
168
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
|
169
|
+
def search_index_expression
|
170
|
+
@search_index_expression ||= index_expression_builder.determine_search_index_expression(
|
171
|
+
filters,
|
172
|
+
search_index_definitions,
|
173
|
+
# When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
|
174
|
+
# the standard aggregations response structure, which causes problems.
|
175
|
+
require_indices: !aggregations_datastore_body.empty?
|
176
|
+
).to_s
|
177
|
+
end
|
178
|
+
|
179
|
+
# Returns the name of the datastore cluster as a String where this query should be setn.
|
180
|
+
# Unless exactly 1 cluster name is found, this method raises a ConfigError.
|
181
|
+
def cluster_name
|
182
|
+
cluster_name = search_index_definitions.map(&:cluster_to_query).uniq
|
183
|
+
return cluster_name.first if cluster_name.size == 1
|
184
|
+
raise ConfigError, "Found different datastore clusters (#{cluster_name}) to query " \
|
185
|
+
"for query targeting indices: #{search_index_definitions}"
|
186
|
+
end
|
187
|
+
|
188
|
+
# Returns a list of unique field paths that should be used for shard routing during searches.
|
189
|
+
#
|
190
|
+
# If a search is filtering on one of these fields, we can optimize the search by routing
|
191
|
+
# it to only the shards containing documents for that routing value.
|
192
|
+
#
|
193
|
+
# Note that this returns a list due to our support for type unions. A unioned type
|
194
|
+
# can be composed of subtypes that have use different shard routing; this will return
|
195
|
+
# the set union of them all.
|
196
|
+
def route_with_field_paths
|
197
|
+
search_index_definitions.map(&:route_with).uniq
|
198
|
+
end
|
199
|
+
|
200
|
+
# The shard routing values used for this search. Can be `nil` if the query will hit all shards.
|
201
|
+
# `[]` means that we are routing to no shards.
|
202
|
+
def shard_routing_values
|
203
|
+
return @shard_routing_values if defined?(@shard_routing_values)
|
204
|
+
routing_values = routing_picker.extract_eligible_routing_values(filters, route_with_field_paths)
|
205
|
+
|
206
|
+
@shard_routing_values ||=
|
207
|
+
if routing_values&.empty? && !aggregations_datastore_body.empty?
|
208
|
+
# If we return an empty array of routing values, no shards will get searched, which causes a problem for aggregations.
|
209
|
+
# When a query includes aggregations, there are normally aggregation structures on the respopnse (even when there are no
|
210
|
+
# search hits to aggregate over!) but if there are no routing values, those aggregation structures will be missing from
|
211
|
+
# the response. It's complex to handle that in our downstream response handling code, so we prefer to force a "fallback"
|
212
|
+
# routing value here to ensure that at least one shard gets searched. Which shard gets searched doesn't matter; the search
|
213
|
+
# filter that led to an empty set of routing values will match on documents on any shard.
|
214
|
+
["fallback_shard_routing_value"]
|
215
|
+
elsif contains_ignored_values_for_routing?(routing_values)
|
216
|
+
nil
|
217
|
+
else
|
218
|
+
routing_values&.sort # order doesn't matter, but sorting it makes it easier to assert on in our tests.
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# Indicates if the query does not need any results from the datastore. As an optimization,
|
223
|
+
# we can reply with a default "empty" response for an empty query.
|
224
|
+
def empty?
|
225
|
+
# If we are searching no indices or routing to an empty set of shards, there is no need to query the datastore at all.
|
226
|
+
# This only happens when our filter processing has deduced that the query will match no results.
|
227
|
+
return true if search_index_expression.empty? || shard_routing_values&.empty?
|
228
|
+
|
229
|
+
datastore_body = to_datastore_body
|
230
|
+
datastore_body.fetch(:size) == 0 && !datastore_body.fetch(:track_total_hits) && aggregations_datastore_body.empty?
|
231
|
+
end
|
232
|
+
|
233
|
+
def inspect
|
234
|
+
description = to_datastore_msearch_header.merge(to_datastore_body).map do |key, value|
|
235
|
+
"#{key}=#{(key == :query) ? "<REDACTED>" : value.inspect}"
|
236
|
+
end.join(" ")
|
237
|
+
|
238
|
+
"#<#{self.class.name} #{description}>"
|
239
|
+
end
|
240
|
+
|
241
|
+
def to_datastore_msearch_header
|
242
|
+
@to_datastore_msearch_header ||= {index: search_index_expression, routing: shard_routing_values&.join(",")}.compact
|
243
|
+
end
|
244
|
+
|
245
|
+
# `DatastoreQuery` objects are used as keys in a hash. Computing `#hash` can be expensive (given how many fields
|
246
|
+
# an `DatastoreQuery` has) and it's safe to cache since `DatastoreQuery` instances are immutable, so we memoize it
|
247
|
+
# here. We've observed this making a very noticeable difference in our test suite runtime.
|
248
|
+
def hash
|
249
|
+
@hash ||= super
|
250
|
+
end
|
251
|
+
|
252
|
+
def document_paginator
|
253
|
+
@document_paginator ||= DocumentPaginator.new(
|
254
|
+
sort_clauses: sort_with_tiebreaker,
|
255
|
+
individual_docs_needed: individual_docs_needed,
|
256
|
+
total_document_count_needed: total_document_count_needed,
|
257
|
+
decoded_cursor_factory: decoded_cursor_factory,
|
258
|
+
schema_element_names: schema_element_names,
|
259
|
+
paginator: Paginator.new(
|
260
|
+
default_page_size: default_page_size,
|
261
|
+
max_page_size: max_page_size,
|
262
|
+
first: document_pagination[:first],
|
263
|
+
after: document_pagination[:after],
|
264
|
+
last: document_pagination[:last],
|
265
|
+
before: document_pagination[:before],
|
266
|
+
schema_element_names: schema_element_names
|
267
|
+
)
|
268
|
+
)
|
269
|
+
end
|
270
|
+
|
271
|
+
private
|
272
|
+
|
273
|
+
def merge_attribute(other_query, attribute)
|
274
|
+
value = public_send(attribute)
|
275
|
+
other_value = other_query.public_send(attribute)
|
276
|
+
|
277
|
+
if value.empty?
|
278
|
+
other_value
|
279
|
+
elsif other_value.empty?
|
280
|
+
value
|
281
|
+
elsif value == other_value
|
282
|
+
value
|
283
|
+
else
|
284
|
+
logger.warn("Tried to merge two queries that both define `#{attribute}`, using the value from the query being merged: #{value}, #{other_value}")
|
285
|
+
other_value
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
TIEBREAKER_SORT_CLAUSES = [{"id" => {"order" => "asc"}}].freeze
|
290
|
+
|
291
|
+
# We want to use `id` as a tiebreaker ONLY when `id` isn't explicitly specified as a sort field
|
292
|
+
def sort_with_tiebreaker
|
293
|
+
@sort_with_tiebreaker ||= remove_duplicate_sort_clauses(sort + TIEBREAKER_SORT_CLAUSES)
|
294
|
+
end
|
295
|
+
|
296
|
+
def remove_duplicate_sort_clauses(sort_clauses)
|
297
|
+
seen_fields = Set.new
|
298
|
+
sort_clauses.select do |clause|
|
299
|
+
clause.keys.all? { |key| seen_fields.add?(key) }
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def decoded_cursor_factory
|
304
|
+
@decoded_cursor_factory ||= DecodedCursor::Factory.from_sort_list(sort_with_tiebreaker)
|
305
|
+
end
|
306
|
+
|
307
|
+
def contains_ignored_values_for_routing?(routing_values)
|
308
|
+
ignored_values_for_routing.intersect?(routing_values.to_set) if routing_values
|
309
|
+
end
|
310
|
+
|
311
|
+
def ignored_values_for_routing
|
312
|
+
@ignored_values_for_routing ||= search_index_definitions.flat_map { |i| i.ignored_values_for_routing.to_a }.to_set
|
313
|
+
end
|
314
|
+
|
315
|
+
def to_datastore_body
|
316
|
+
@to_datastore_body ||= aggregations_datastore_body
|
317
|
+
.merge(document_paginator.to_datastore_body)
|
318
|
+
.merge({query: filter_interpreter.build_query(filters)}.compact)
|
319
|
+
.merge({_source: source})
|
320
|
+
end
|
321
|
+
|
322
|
+
def aggregations_datastore_body
|
323
|
+
@aggregations_datastore_body ||= begin
|
324
|
+
aggs = aggregations
|
325
|
+
.values
|
326
|
+
.map { |agg| agg.build_agg_hash(filter_interpreter) }
|
327
|
+
.reduce({}, :merge)
|
328
|
+
|
329
|
+
aggs.empty? ? {} : {aggs: aggs}
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
# Make our query as efficient as possible by limiting what parts of `_source` we fetch.
|
334
|
+
# For an id-only query (or a query that has no requested fields) we don't need to fetch `_source`
|
335
|
+
# at all--which means the datastore can avoid decompressing the _source field. Otherwise,
|
336
|
+
# we only ask for the fields we need to return.
|
337
|
+
def source
|
338
|
+
requested_source_fields = requested_fields - ["id"]
|
339
|
+
return false if requested_source_fields.empty?
|
340
|
+
# Merging in requested_fields as _source:{includes:} based on Elasticsearch documentation:
|
341
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#include-exclude
|
342
|
+
{includes: requested_source_fields.to_a}
|
343
|
+
end
|
344
|
+
|
345
|
+
# Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
|
346
|
+
# to build queries when desired.
|
347
|
+
class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :query_defaults)
|
348
|
+
def self.with(runtime_metadata:, logger:, **query_defaults)
|
349
|
+
new(runtime_metadata: runtime_metadata, logger: logger, query_defaults: query_defaults)
|
350
|
+
end
|
351
|
+
|
352
|
+
def routing_picker
|
353
|
+
@routing_picker ||= RoutingPicker.new(schema_names: runtime_metadata.schema_element_names)
|
354
|
+
end
|
355
|
+
|
356
|
+
def index_expression_builder
|
357
|
+
@index_expression_builder ||= IndexExpressionBuilder.new(schema_names: runtime_metadata.schema_element_names)
|
358
|
+
end
|
359
|
+
|
360
|
+
def new_query(**options)
|
361
|
+
DatastoreQuery.new(
|
362
|
+
routing_picker: routing_picker,
|
363
|
+
index_expression_builder: index_expression_builder,
|
364
|
+
logger: logger,
|
365
|
+
schema_element_names: runtime_metadata.schema_element_names,
|
366
|
+
**query_defaults.merge(options)
|
367
|
+
)
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/graphql/decoded_cursor"
|
10
|
+
require "elastic_graph/support/memoizable_data"
|
11
|
+
require "forwardable"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class GraphQL
|
15
|
+
module DatastoreResponse
|
16
|
+
# Represents a document fetched from the datastore. Exposes both the raw metadata
|
17
|
+
# provided by the datastore and the doc payload itself. In addition, you can treat
|
18
|
+
# it just like a document hash using `#[]` or `#fetch`.
|
19
|
+
Document = Support::MemoizableData.define(:raw_data, :payload, :decoded_cursor_factory) do
|
20
|
+
# @implements Document
|
21
|
+
extend Forwardable
|
22
|
+
def_delegators :payload, :[], :fetch
|
23
|
+
|
24
|
+
def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
|
25
|
+
source = raw_data.fetch("_source") do
|
26
|
+
{} # : ::Hash[::String, untyped]
|
27
|
+
end
|
28
|
+
|
29
|
+
new(
|
30
|
+
raw_data: raw_data,
|
31
|
+
# Since we no longer fetch _source for id only queries, merge id into _source to take care of that case
|
32
|
+
payload: source.merge("id" => raw_data["_id"]),
|
33
|
+
decoded_cursor_factory: decoded_cursor_factory
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.with_payload(payload)
|
38
|
+
build({"_source" => payload})
|
39
|
+
end
|
40
|
+
|
41
|
+
def index_name
|
42
|
+
raw_data["_index"]
|
43
|
+
end
|
44
|
+
|
45
|
+
def index_definition_name
|
46
|
+
index_name.split(ROLLOVER_INDEX_INFIX_MARKER).first # : ::String
|
47
|
+
end
|
48
|
+
|
49
|
+
def id
|
50
|
+
raw_data["_id"]
|
51
|
+
end
|
52
|
+
|
53
|
+
def sort
|
54
|
+
raw_data["sort"]
|
55
|
+
end
|
56
|
+
|
57
|
+
def version
|
58
|
+
payload["version"]
|
59
|
+
end
|
60
|
+
|
61
|
+
def cursor
|
62
|
+
@cursor ||= decoded_cursor_factory.build(raw_data.fetch("sort"))
|
63
|
+
end
|
64
|
+
|
65
|
+
def datastore_path
|
66
|
+
# Path based on this API:
|
67
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
|
68
|
+
"/#{index_name}/_doc/#{id}".squeeze("/")
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_s
|
72
|
+
"#<#{self.class.name} #{datastore_path}>"
|
73
|
+
end
|
74
|
+
alias_method :inspect, :to_s
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
require "elastic_graph/graphql/decoded_cursor"
|
11
|
+
require "elastic_graph/graphql/datastore_response/document"
|
12
|
+
require "forwardable"
|
13
|
+
|
14
|
+
module ElasticGraph
|
15
|
+
class GraphQL
|
16
|
+
module DatastoreResponse
|
17
|
+
# Represents a search response from the datastore. Exposes both the raw metadata
|
18
|
+
# provided by the datastore and the collection of documents. Can be treated as a
|
19
|
+
# collection of documents when you don't care about the metadata.
|
20
|
+
class SearchResponse < ::Data.define(:raw_data, :metadata, :documents, :total_document_count)
|
21
|
+
include Enumerable
|
22
|
+
extend Forwardable
|
23
|
+
|
24
|
+
def_delegators :documents, :each, :to_a, :size, :empty?
|
25
|
+
|
26
|
+
EXCLUDED_METADATA_KEYS = %w[hits aggregations].freeze
|
27
|
+
|
28
|
+
def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
|
29
|
+
documents = raw_data.fetch("hits").fetch("hits").map do |doc|
|
30
|
+
Document.build(doc, decoded_cursor_factory: decoded_cursor_factory)
|
31
|
+
end
|
32
|
+
|
33
|
+
metadata = raw_data.except(*EXCLUDED_METADATA_KEYS)
|
34
|
+
metadata["hits"] = raw_data.fetch("hits").except("hits")
|
35
|
+
|
36
|
+
# `hits.total` is exposed as an object like:
|
37
|
+
#
|
38
|
+
# {
|
39
|
+
# "value" => 200,
|
40
|
+
# "relation" => "eq", # or "gte"
|
41
|
+
# }
|
42
|
+
#
|
43
|
+
# This allows it to provide a lower bound on the number of hits, rather than having
|
44
|
+
# to give an exact count. We may want to handle the `gte` case differently at some
|
45
|
+
# point but for now we just use the value as-is.
|
46
|
+
#
|
47
|
+
# In the case where `track_total_hits` flag is set to `false`, `hits.total` field will be completely absent.
|
48
|
+
# This means the client intentionally chose not to query the total doc count, and `total_document_count` will be nil.
|
49
|
+
# In this case, we will throw an exception if the client later tries to access `total_document_count`.
|
50
|
+
total_document_count = metadata.dig("hits", "total", "value")
|
51
|
+
|
52
|
+
new(
|
53
|
+
raw_data: raw_data,
|
54
|
+
metadata: metadata,
|
55
|
+
documents: documents,
|
56
|
+
total_document_count: total_document_count
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Benign empty response that can be used in place of datastore response errors as needed.
|
61
|
+
RAW_EMPTY = {"hits" => {"hits" => [], "total" => {"value" => 0}}}.freeze
|
62
|
+
EMPTY = build(RAW_EMPTY)
|
63
|
+
|
64
|
+
def docs_description
|
65
|
+
(documents.size < 3) ? documents.inspect : "[#{documents.first}, ..., #{documents.last}]"
|
66
|
+
end
|
67
|
+
|
68
|
+
def total_document_count
|
69
|
+
super || raise(CountUnavailableError, "#{__method__} is unavailable; set `query.total_document_count_needed = true` to make it available")
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_s
|
73
|
+
"#<#{self.class.name} size=#{documents.size} #{docs_description}>"
|
74
|
+
end
|
75
|
+
alias_method :inspect, :to_s
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/error"
|
11
|
+
require "elastic_graph/graphql/datastore_response/search_response"
|
12
|
+
require "elastic_graph/graphql/query_details_tracker"
|
13
|
+
require "elastic_graph/support/threading"
|
14
|
+
|
15
|
+
module ElasticGraph
|
16
|
+
class GraphQL
|
17
|
+
# Responsible for routing datastore search requests to the appropriate cluster and index.
|
18
|
+
class DatastoreSearchRouter
|
19
|
+
def initialize(
|
20
|
+
datastore_clients_by_name:,
|
21
|
+
logger:,
|
22
|
+
monotonic_clock:,
|
23
|
+
config:
|
24
|
+
)
|
25
|
+
@datastore_clients_by_name = datastore_clients_by_name
|
26
|
+
@logger = logger
|
27
|
+
@monotonic_clock = monotonic_clock
|
28
|
+
@config = config
|
29
|
+
end
|
30
|
+
|
31
|
+
# Sends the datastore a multi-search request based on the given queries.
|
32
|
+
# Returns a hash of responses keyed by the query.
|
33
|
+
def msearch(queries, query_tracker: QueryDetailsTracker.empty)
|
34
|
+
DatastoreQuery.perform(queries) do |header_body_tuples_by_query|
|
35
|
+
# Here we set a client-side timeout, which causes the client to give up and close the connection.
|
36
|
+
# According to [1]--"We have a new way to cancel search requests efficiently from the client
|
37
|
+
# in 7.4 (by closing the underlying http channel)"--this should cause the server to stop
|
38
|
+
# executing the search, and more importantly, gives us a strictly enforced timeout.
|
39
|
+
#
|
40
|
+
# In addition, the datastore supports a `timeout` option on a search body, but this timeout is
|
41
|
+
# "best effort", applies to each shard (and not to the overall search request), and only interrupts
|
42
|
+
# certain kinds of operations. [2] and [3] below have more info.
|
43
|
+
#
|
44
|
+
# Note that I have not been able to observe this `timeout` on a search body ever working
|
45
|
+
# as documented. In our test suite, none of the slow queries I have tried (both via
|
46
|
+
# slow aggregation query and a slow script) have ever aborted early when that option is
|
47
|
+
# set. In Kibana in production, @bsorbo observed it aborting a `search` request early
|
48
|
+
# (but not necessarily an `msearch` request...), but even then, the response said `timed_out: false`!
|
49
|
+
# Other people ([4]) have reported observing timeout having no effect on msearch requests.
|
50
|
+
#
|
51
|
+
# So, the client-side timeout is the main one we want here, and for now we are not using the
|
52
|
+
# datastore search `timeout` option at all.
|
53
|
+
#
|
54
|
+
# For more info, see:
|
55
|
+
#
|
56
|
+
# [1] https://github.com/elastic/elasticsearch/issues/47716
|
57
|
+
# [2] https://github.com/elastic/elasticsearch/pull/51858
|
58
|
+
# [3] https://www.elastic.co/guide/en/elasticsearch/guide/current/_search_options.html#_timeout_2
|
59
|
+
# [4] https://discuss.elastic.co/t/timeouts-ignored-in-multisearch/23673
|
60
|
+
|
61
|
+
# Unfortunately, the Elasticsearch/OpenSearch clients don't support setting a per-request client-side timeout,
|
62
|
+
# even though Faraday (the underlying HTTP client) does. To work around this, we pass our desired
|
63
|
+
# timeout in a specific header that the `SupportTimeouts` Faraday middleware will use.
|
64
|
+
headers = {TIMEOUT_MS_HEADER => msearch_request_timeout_from(queries)}.compact
|
65
|
+
|
66
|
+
queries_and_header_body_tuples_by_datastore_client = header_body_tuples_by_query.group_by do |(query, header_body_tuples)|
|
67
|
+
@datastore_clients_by_name.fetch(query.cluster_name)
|
68
|
+
end
|
69
|
+
|
70
|
+
datastore_query_started_at = @monotonic_clock.now_in_ms
|
71
|
+
|
72
|
+
server_took_and_results = Support::Threading.parallel_map(queries_and_header_body_tuples_by_datastore_client) do |datastore_client, query_and_header_body_tuples_for_cluster|
|
73
|
+
queries_for_cluster, header_body_tuples = query_and_header_body_tuples_for_cluster.transpose
|
74
|
+
msearch_body = header_body_tuples.flatten(1)
|
75
|
+
response = datastore_client.msearch(body: msearch_body, headers: headers)
|
76
|
+
debug_query(query: msearch_body, response: response)
|
77
|
+
ordered_responses = response.fetch("responses")
|
78
|
+
[response["took"], queries_for_cluster.zip(ordered_responses)]
|
79
|
+
end
|
80
|
+
|
81
|
+
query_tracker.record_datastore_query_duration_ms(
|
82
|
+
client: @monotonic_clock.now_in_ms - datastore_query_started_at,
|
83
|
+
server: server_took_and_results.map(&:first).compact.max
|
84
|
+
)
|
85
|
+
|
86
|
+
server_took_and_results.flat_map(&:last).to_h.tap do |responses_by_query|
|
87
|
+
log_shard_failure_if_necessary(responses_by_query)
|
88
|
+
raise_search_failed_if_any_failures(responses_by_query)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
# Prefix tests with `DEBUG_QUERY=1 ...` or run `export DEBUG_QUERY=1` to print the actual
|
96
|
+
# Elasticsearch/OpenSearch query and response. This is particularly useful for adding new specs.
|
97
|
+
def debug_query(**debug_messages)
|
98
|
+
return unless ::ENV["DEBUG_QUERY"]
|
99
|
+
|
100
|
+
formatted_messages = debug_messages.map do |key, msg|
|
101
|
+
"#{key.to_s.upcase}:\n#{::JSON.pretty_generate(msg)}\n"
|
102
|
+
end.join("\n")
|
103
|
+
puts "\n#{formatted_messages}\n\n"
|
104
|
+
end
|
105
|
+
|
106
|
+
def msearch_request_timeout_from(queries)
|
107
|
+
return nil unless (min_query_deadline = queries.map(&:monotonic_clock_deadline).compact.min)
|
108
|
+
|
109
|
+
(min_query_deadline - @monotonic_clock.now_in_ms).tap do |timeout|
|
110
|
+
if timeout <= 0
|
111
|
+
raise RequestExceededDeadlineError, "It is already #{timeout.abs} ms past the search deadline."
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def raise_search_failed_if_any_failures(responses_by_query)
|
117
|
+
failures = responses_by_query.each_with_index.select { |(_query, response), _index| response["error"] }
|
118
|
+
return if failures.empty?
|
119
|
+
|
120
|
+
formatted_failures = failures.map do |(query, response), index|
|
121
|
+
# Note: we intentionally omit the body of the request here, because it could contain PII
|
122
|
+
# or other sensitive values that we don't want logged.
|
123
|
+
<<~ERROR
|
124
|
+
#{index + 1}) Header: #{::JSON.generate(query.to_datastore_msearch_header)}
|
125
|
+
#{response.fetch("error").inspect}"
|
126
|
+
On cluster: #{query.cluster_name}
|
127
|
+
ERROR
|
128
|
+
end.join("\n\n")
|
129
|
+
|
130
|
+
raise SearchFailedError, "Got #{failures.size} search failure(s):\n\n#{formatted_failures}"
|
131
|
+
end
|
132
|
+
|
133
|
+
# Examine successful query responses and log any shard failure they encounter
|
134
|
+
def log_shard_failure_if_necessary(responses_by_query)
|
135
|
+
shard_failures = responses_by_query.each_with_index.select do |(query, response), query_numeric_index|
|
136
|
+
(200..299).cover?(response["status"]) && response["_shards"]["failed"] != 0
|
137
|
+
end
|
138
|
+
|
139
|
+
unless shard_failures.empty?
|
140
|
+
formatted_failures = shard_failures.map do |(query, response), query_numeric_index|
|
141
|
+
"Query #{query_numeric_index + 1} against index `#{query.search_index_expression}` on cluster `#{query.cluster_name}`}: " +
|
142
|
+
JSON.pretty_generate(response["_shards"])
|
143
|
+
end.join("\n\n")
|
144
|
+
|
145
|
+
formatted_shard_failures = "The following queries have failed shards: \n\n#{formatted_failures}"
|
146
|
+
@logger.warn(formatted_shard_failures)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|