elasticgraph-graphql 0.19.1.1 → 0.19.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/computation.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/key.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +2 -2
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +2 -2
- data/lib/elastic_graph/graphql/aggregation/query.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +33 -6
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +2 -6
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +26 -6
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +5 -6
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +10 -8
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +1 -1
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +2 -2
- data/lib/elastic_graph/graphql/client.rb +1 -1
- data/lib/elastic_graph/graphql/config.rb +21 -6
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +10 -5
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +2 -3
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +1 -1
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +2 -3
- data/lib/elastic_graph/graphql/datastore_query.rb +66 -74
- data/lib/elastic_graph/graphql/datastore_response/document.rb +1 -1
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +83 -9
- data/lib/elastic_graph/graphql/datastore_search_router.rb +19 -4
- data/lib/elastic_graph/graphql/decoded_cursor.rb +1 -1
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +1 -1
- data/lib/elastic_graph/graphql/filtering/field_path.rb +1 -1
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +2 -2
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +10 -5
- data/lib/elastic_graph/graphql/filtering/filter_node_interpreter.rb +2 -2
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +17 -2
- data/lib/elastic_graph/graphql/filtering/range_query.rb +1 -1
- data/lib/elastic_graph/graphql/http_endpoint.rb +2 -2
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +1 -1
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +1 -1
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +1 -1
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +1 -1
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +18 -3
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +1 -1
- data/lib/elastic_graph/graphql/query_details_tracker.rb +13 -4
- data/lib/elastic_graph/graphql/query_executor.rb +12 -5
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +6 -12
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter_builder.rb +123 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +4 -4
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +57 -27
- data/lib/elastic_graph/graphql/resolvers/nested_relationships_source.rb +324 -0
- data/lib/elastic_graph/graphql/resolvers/object.rb +36 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +2 -2
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +6 -3
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +1 -1
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +1 -1
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +1 -1
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +1 -1
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +1 -1
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +2 -7
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +1 -1
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +1 -1
- data/lib/elastic_graph/graphql/schema/arguments.rb +1 -1
- data/lib/elastic_graph/graphql/schema/enum_value.rb +1 -1
- data/lib/elastic_graph/graphql/schema/field.rb +12 -27
- data/lib/elastic_graph/graphql/schema/relation_join.rb +17 -9
- data/lib/elastic_graph/graphql/schema/type.rb +15 -7
- data/lib/elastic_graph/graphql/schema.rb +11 -31
- data/lib/elastic_graph/graphql.rb +38 -40
- data/script/dump_time_zones +1 -1
- metadata +25 -27
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +0 -114
@@ -0,0 +1,324 @@
|
|
1
|
+
# Copyright 2024 - 2025 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/graphql/resolvers/query_source"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class GraphQL
|
13
|
+
module Resolvers
|
14
|
+
# A GraphQL dataloader responsible for solving a thorny N+1 query problem related to our `NestedRelationships` resolver.
|
15
|
+
# The `QuerySource` dataloader implements a basic batching optimization: multiple datastore queries are batched up into
|
16
|
+
# a single `msearch` call against the dataastore. This is significantly better than submitting a separate request per
|
17
|
+
# query, but is still not optimal--the datastore still must execute N different queries, which could cause significant load.
|
18
|
+
#
|
19
|
+
# A significantly improved optimization is possible in one particular situation from our `NestedRelationships` resolver.
|
20
|
+
# Here's an example of that situation:
|
21
|
+
#
|
22
|
+
# - `Part` documents are indexed in a `parts` index and `Manufacturer` documents are indexed in a `manufacturers` index.
|
23
|
+
# - `Part.manufacturer` is defined as: `t.relates_to_one "manufacturer", "Manufacturer", via: "manufacturer_id", dir: :out`.
|
24
|
+
# - We are processing a GraphQL query like this: `parts(first: 10) { nodes { manufacturer { name } } }`.
|
25
|
+
# - For each of the 10 parts, the `NestedRelationships` resolver has to resolve its related `Part.manufacturer`.
|
26
|
+
# - Without the optimization provided by this class, `NestedRelationships` would have to execute 10 different queries,
|
27
|
+
# each of which is identical except for a different filter: `{id: {equal_to_any_of: [part.manufacturer_id]}}`.
|
28
|
+
# - Instead of executing this as 10 different queries, we can instead execute it as one query with this combined filter:
|
29
|
+
# `{id: {equal_to_any_of: [part1.manufacturer_id, ..., part10.manufacturer_id]}}`
|
30
|
+
# - When we do this, we get a single response, but `NestedRelationships` expects a separate response for each one.
|
31
|
+
# - To satisfy that, we can split the single response into 10 different responses (one per filter).
|
32
|
+
#
|
33
|
+
# This optimization, when we can apply it, results in much less load on the datastore. In addition, it also helps to reduce
|
34
|
+
# the amount of overhead imposed by ElasticGraph. Profiling has shown that significant overhead is incurred when we repeatedly
|
35
|
+
# merge filters into a query (e.g. `query.merge_with(filters: [{id: {equal_to_any_of: [part.manufacturer_id]}}])` 10 times to
|
36
|
+
# produce 10 different queries). This optimization also avoids that overhead.
|
37
|
+
#
|
38
|
+
# Note: while the comments discuss the examples in terms of _parent objects_, in the implementation, we deal with id sets.
|
39
|
+
# A set of ids is contributed by each parent object.
|
40
|
+
class NestedRelationshipsSource < ::GraphQL::Dataloader::Source
|
41
|
+
# The optimization implemented by this class is not guaranteed to get all expected results in a single query for cases where
|
42
|
+
# the sorted search results are not well-distributed among each of the parent objects while we're resolving a `relates_to_many`
|
43
|
+
# field. (See the comments on `fetch_via_single_query_with_merged_filters` for a detailed description of when this occurs).
|
44
|
+
#
|
45
|
+
# To deal with this situation, we retry the query for just the parent objects which may have incomplete results. However,
|
46
|
+
# each attempt is run in serial, and we want to put a strict upper bound on how many attempts are made. This constant defines
|
47
|
+
# the maximum number of optimized attempts we allow.
|
48
|
+
#
|
49
|
+
# When exceeded, we fall back to building and executing a separate query (via a single `msearch` request) for each parent object.
|
50
|
+
MAX_OPTIMIZED_ATTEMPTS = 3
|
51
|
+
|
52
|
+
# Reattempts are less likely to be needed when we execute the query with a larger `size`, because we are more likely to get back
|
53
|
+
# complete results for each parent object. This multiplier is applied to the requested size to achieve that.
|
54
|
+
#
|
55
|
+
# 4 was chosen somewhat arbitrarily, but should make reattempts needed much less often while avoiding asking for an unreasonably
|
56
|
+
# large number of results.
|
57
|
+
#
|
58
|
+
# Note: asking the datastore for a larger `size` is quite a bit more efficient than needing to execute more queries.
|
59
|
+
# Once the datastore has gone to the spot in its inverted index with the matching documents, asking for more results
|
60
|
+
# isn't particularly expensive, compared to needing to re-run an extra query.
|
61
|
+
EXTRA_SIZE_MULTIPLIER = 4
|
62
|
+
|
63
|
+
def initialize(query:, join:, context:, monotonic_clock:, mode:)
|
64
|
+
@query = query
|
65
|
+
@join = join
|
66
|
+
@filter_id_field_name_path = @join.filter_id_field_name.split(".")
|
67
|
+
@context = context
|
68
|
+
@schema_element_names = @context.fetch(:schema_element_names)
|
69
|
+
@logger = context.fetch(:logger)
|
70
|
+
@monotonic_clock = monotonic_clock
|
71
|
+
@mode = mode
|
72
|
+
end
|
73
|
+
|
74
|
+
def fetch(id_sets)
|
75
|
+
return fetch_original(id_sets) unless can_merge_filters?
|
76
|
+
|
77
|
+
case @mode
|
78
|
+
when :original
|
79
|
+
fetch_original(id_sets)
|
80
|
+
when :comparison
|
81
|
+
fetch_comparison(id_sets)
|
82
|
+
else
|
83
|
+
fetch_optimized(id_sets)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.execute_one(ids, query:, join:, context:, monotonic_clock:, mode:)
|
88
|
+
context.dataloader.with(self, query:, join:, context:, monotonic_clock:, mode:).load(ids)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def fetch_optimized(id_sets)
|
94
|
+
attempt_count = 0
|
95
|
+
duration_ms, responses_by_id_set = time_duration do
|
96
|
+
fetch_via_single_query_with_merged_filters(id_sets) { attempt_count += 1 }
|
97
|
+
end
|
98
|
+
|
99
|
+
if id_sets.size > 1
|
100
|
+
@logger.info({
|
101
|
+
"message_type" => "NestedRelationshipsMergedQueries",
|
102
|
+
"field" => @join.field.description,
|
103
|
+
"optimized_attempt_count" => [attempt_count, MAX_OPTIMIZED_ATTEMPTS].min,
|
104
|
+
"degraded_to_separate_queries" => (attempt_count > MAX_OPTIMIZED_ATTEMPTS),
|
105
|
+
"id_set_count" => id_sets.size,
|
106
|
+
"total_id_count" => id_sets.reduce(:union).size,
|
107
|
+
"duration_ms" => duration_ms
|
108
|
+
})
|
109
|
+
end
|
110
|
+
|
111
|
+
id_sets.map { |id_set| responses_by_id_set.fetch(id_set) }
|
112
|
+
end
|
113
|
+
|
114
|
+
def fetch_original(id_sets, requested_fields: [])
|
115
|
+
fetch_via_separate_queries(id_sets, requested_fields: requested_fields)
|
116
|
+
end
|
117
|
+
|
118
|
+
def fetch_comparison(id_sets)
|
119
|
+
# Note: we'd ideally run both versions of the logic in parallel, but our attempts to do that resulted in errors
|
120
|
+
# because of the fiber context in which dataloaders run.
|
121
|
+
original_duration_ms, original_results = time_duration do
|
122
|
+
# In the `fetch_optimized` implementation, we request this extra field. We don't need it for
|
123
|
+
# the original implementation (so `fetch_original` doesn't also request that field...) but for
|
124
|
+
# the purposes of comparison we need to request it so that the document payloads will have the
|
125
|
+
# same fields.
|
126
|
+
#
|
127
|
+
# Note: we don't add the requested field if we have only a single id set, in order to align with
|
128
|
+
# the short-circuiting logic in `fetch_via_single_query_with_merged_filters`. Otherwise, any time
|
129
|
+
# we have a single id set we always get reported differences which are not actually real!
|
130
|
+
requested_fields = (id_sets.size > 1) ? [@join.filter_id_field_name] : [] # : ::Array[::String]
|
131
|
+
fetch_original(id_sets, requested_fields: requested_fields)
|
132
|
+
end
|
133
|
+
|
134
|
+
optimized_duration_ms, optimized_results = time_duration do
|
135
|
+
fetch_optimized(id_sets)
|
136
|
+
end
|
137
|
+
|
138
|
+
# To see if we got the same results we only look at the documents, because we expect differences outside
|
139
|
+
# of the documents--for example, the `SearchResponse#metadata` will report different `took` values.
|
140
|
+
got_same_results = original_results.map(&:documents) == optimized_results.map(&:documents)
|
141
|
+
message = {
|
142
|
+
"message_type" => "NestedRelationshipsComparisonResults",
|
143
|
+
"field" => @join.field.description,
|
144
|
+
"original_duration_ms" => original_duration_ms,
|
145
|
+
"optimized_duration_ms" => optimized_duration_ms,
|
146
|
+
"optimized_faster" => (optimized_duration_ms < original_duration_ms),
|
147
|
+
"id_set_count" => id_sets.size,
|
148
|
+
"total_id_count" => id_sets.reduce(:union).size,
|
149
|
+
"got_same_results" => got_same_results
|
150
|
+
}
|
151
|
+
|
152
|
+
if got_same_results
|
153
|
+
@logger.info(message)
|
154
|
+
else
|
155
|
+
@logger.error(message.merge({
|
156
|
+
"original_documents" => loggable_results(original_results),
|
157
|
+
"optimized_documents" => loggable_results(optimized_results)
|
158
|
+
}))
|
159
|
+
end
|
160
|
+
|
161
|
+
original_results
|
162
|
+
end
|
163
|
+
|
164
|
+
# For "simple", document-based queries, we can safely merge filters. However, this cannot be done safely when the response
|
165
|
+
# cannot safely be "pulled part" into the bits that apply to a particular set of ids for a parent object. Specifically:
|
166
|
+
#
|
167
|
+
# - If `total_document_count_needed` is true, we can't merge filters, because there's no way to get a separate count
|
168
|
+
# for each parent object unless we execute separate queries (or combine them into a grouped aggregation count query,
|
169
|
+
# but that requires a much more challenging transformation of the query and response).
|
170
|
+
# - If the query has any `aggregations`, we likewise can't merge the filters, because we have no way to "pull apart"
|
171
|
+
# the aggregations response.
|
172
|
+
def can_merge_filters?
|
173
|
+
!@query.total_document_count_needed && @query.aggregations.empty?
|
174
|
+
end
|
175
|
+
|
176
|
+
# Executes a single query that contains a merged filter from the set union of the given `id_sets`.
|
177
|
+
# This merged query is (theoretically) capable of getting all the results we're looking for in a
|
178
|
+
# single query, which is much more efficient than building and performing a separate query for each
|
179
|
+
# id set. We can use `search_response.filter_results(id_set)` with each id set to get a
|
180
|
+
# response with the documents filtered down to just the ones that match the id set. (Essentially,
|
181
|
+
# this is the response we would have gotten if we had executed a separate query for the id set).
|
182
|
+
#
|
183
|
+
# However, it is not guaranteed that we will get back complete results with this approach. Consider this example:
|
184
|
+
#
|
185
|
+
# - The datastore has 50 documents that match `id_set_1`, and 50 that match `id_set_2`.
|
186
|
+
# - The requested size of `@query` is 10 (meaning the client expects the first 10 results matching `id_set_1` and
|
187
|
+
# the first 10 results matching `id_set_2).
|
188
|
+
# - All 50 documents that match `id_set_1` sort before all 50 documents that match `id_set_2`.
|
189
|
+
# - When we execute our merged query filtering on the `union(id_set_1, id_set_2)` set, we ask for
|
190
|
+
# 20 documents (since we want 10 for `id_set_1` and 10 for `id_set_2`).
|
191
|
+
# - ...but we get back 20 documents for `id_set_1` and 0 documents for `id_set_2`.
|
192
|
+
#
|
193
|
+
# There is no way to guarantee that we get back the desired number of results for each id set unless we build and
|
194
|
+
# execute a separate query per id set, which is inefficient (in some situations, it causes one GraphQL query to
|
195
|
+
# execute hundreds of queries against the datastore!).
|
196
|
+
#
|
197
|
+
# To deal with this possibility, this method takes an iterative approach:
|
198
|
+
#
|
199
|
+
# - It builds and executes an initial optimized merged query, with a large `size_multiplier` which gives us a good bit of
|
200
|
+
# "headroom" for this kind of situation. In the example above, if we requested 60 results from the datastore, we'd be
|
201
|
+
# able to get the 10 results for both id sets we are looking for--50 for `id_set_1` nad 10 for `id_set_2`.
|
202
|
+
# - It then inspects the response. If the datastore returned fewer results than we asked for, then there are no missing
|
203
|
+
# results and we can trust that we got all the results we would have gotten if we had executed a separate query per
|
204
|
+
# id set.
|
205
|
+
# - If we got back the number of results we asked for, then it's possible that we've run into this situation. We need
|
206
|
+
# to inspect each filtered response produced for each id set to see if more results were expected.
|
207
|
+
# - Note: the fact that more results were expected doesn't necessarily mean there are more results. But we have no way
|
208
|
+
# to tell for sure without querying the datastore again, so we err on the side of safety and treat this kind of response
|
209
|
+
# as being incomplete.
|
210
|
+
# - For each id set that appears to be incomplete, we try again. But on the next attempt, we exclude the id sets
|
211
|
+
# which got a complete set of results.
|
212
|
+
# - This may cause us to iterate a couple of times (which could make the single GraphQL query we are processing slower than
|
213
|
+
# it would have been without this optimization, particularly if the datastore was not under any other load...) but we expect
|
214
|
+
# it to make a big difference in the amount of load we put on the datastore, and that helps _all_ query traffic to be more
|
215
|
+
# performant overall.
|
216
|
+
def fetch_via_single_query_with_merged_filters(id_sets, remaining_attempts: MAX_OPTIMIZED_ATTEMPTS)
|
217
|
+
yield # yield to signal an attempt
|
218
|
+
|
219
|
+
# Fallback to executing separate queries when one of the following occurs:
|
220
|
+
#
|
221
|
+
# - We lack multiple sets of ids.
|
222
|
+
# - We have exhausted our MAX_OPTIMIZED_ATTEMPTS.
|
223
|
+
if id_sets.size < 2 || remaining_attempts < 1
|
224
|
+
return id_sets.zip(fetch_via_separate_queries(id_sets)).to_h
|
225
|
+
end
|
226
|
+
|
227
|
+
# First, we build a combined query with filters that account for all ids we are filtering on from all `id_sets`.
|
228
|
+
filtered_query = @query.merge_with(
|
229
|
+
filters: filters_for(id_sets.reduce(:union)),
|
230
|
+
requested_fields: [@join.filter_id_field_name],
|
231
|
+
# We need to request a larger size than `@query` originally had. If the original size was `10` and we have
|
232
|
+
# 5 sets of ids, then, at a minimum, we need to request 50 results (10 results for each id set).
|
233
|
+
#
|
234
|
+
# In addition, we apply `EXTRA_SIZE_MULTIPLIER` to increase the size further and make it less likely that
|
235
|
+
# we we get incomplete results and have to retry.
|
236
|
+
size_multiplier: id_sets.size * EXTRA_SIZE_MULTIPLIER
|
237
|
+
)
|
238
|
+
|
239
|
+
# Then we execute that combined query.
|
240
|
+
response = QuerySource.execute_one(filtered_query, for_context: @context)
|
241
|
+
|
242
|
+
# Next, we produce a separate response for each id set by filtering the results to the ones that match the ids in the set.
|
243
|
+
filtered_responses_by_id_set = id_sets.to_h do |id_set|
|
244
|
+
filtered_response = response.filter_results(@filter_id_field_name_path, id_set, @query.effective_size)
|
245
|
+
[id_set, filtered_response]
|
246
|
+
end
|
247
|
+
|
248
|
+
# If our merged/filtered query got back fewer results than we requested, then no matching results are missing,
|
249
|
+
# and we know that we've gotten complete results for all id sets.
|
250
|
+
if response.size < filtered_query.effective_size
|
251
|
+
return filtered_responses_by_id_set
|
252
|
+
end
|
253
|
+
|
254
|
+
# Since our `filtered_query` got back as many results as we asked for, there may be additional matching results that
|
255
|
+
# were not returned, and some id sets may have gotten fewer results than requested by the client.
|
256
|
+
# Here we determine which id sets that applies to.
|
257
|
+
id_sets_with_apparently_incomplete_results = filtered_responses_by_id_set.filter_map do |id_set, filtered_response|
|
258
|
+
id_set if filtered_response.size < @query.effective_size
|
259
|
+
end
|
260
|
+
|
261
|
+
# Then we try again, excluding the id sets which have already gotten complete results.
|
262
|
+
another_attempt_results = fetch_via_single_query_with_merged_filters(
|
263
|
+
id_sets_with_apparently_incomplete_results,
|
264
|
+
remaining_attempts: remaining_attempts - 1
|
265
|
+
) { yield }
|
266
|
+
|
267
|
+
# Finally, we merge the results.
|
268
|
+
filtered_responses_by_id_set.merge(another_attempt_results)
|
269
|
+
end
|
270
|
+
|
271
|
+
def fetch_via_separate_queries(id_sets, requested_fields: [])
|
272
|
+
queries = id_sets.map do |ids|
|
273
|
+
@query.merge_with(filters: filters_for(ids), requested_fields: requested_fields)
|
274
|
+
end
|
275
|
+
|
276
|
+
results = QuerySource.execute_many(queries, for_context: @context)
|
277
|
+
queries.map { |q| results.fetch(q) }
|
278
|
+
end
|
279
|
+
|
280
|
+
def filters_for(ids)
|
281
|
+
join_filter = build_filter(@join.filter_id_field_name, nil, @join.foreign_key_nested_paths, ids.to_a)
|
282
|
+
|
283
|
+
if @join.additional_filter.empty?
|
284
|
+
[join_filter]
|
285
|
+
else
|
286
|
+
[join_filter, @join.additional_filter]
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
def build_filter(path, previous_nested_path, nested_paths, ids)
|
291
|
+
next_nested_path, *rest_nested_paths = nested_paths
|
292
|
+
|
293
|
+
if next_nested_path.nil?
|
294
|
+
path = path.delete_prefix("#{previous_nested_path}.") if previous_nested_path
|
295
|
+
{path => {@schema_element_names.equal_to_any_of => ids}}
|
296
|
+
else
|
297
|
+
sub_filter = build_filter(path, next_nested_path, rest_nested_paths, ids)
|
298
|
+
next_nested_path = next_nested_path.delete_prefix("#{previous_nested_path}.") if previous_nested_path
|
299
|
+
{next_nested_path => {@schema_element_names.any_satisfy => sub_filter}}
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def time_duration
|
304
|
+
start_time = @monotonic_clock.now_in_ms
|
305
|
+
result = yield
|
306
|
+
stop_time = @monotonic_clock.now_in_ms
|
307
|
+
[stop_time - start_time, result]
|
308
|
+
end
|
309
|
+
|
310
|
+
# Converts the given list of responses into a format we can safely log when we are logging
|
311
|
+
# response differences. We include the `id` (to identify the document) and the `hash` (so
|
312
|
+
# we can tell if the payload of a document differed, without logging the contents of that
|
313
|
+
# payload).
|
314
|
+
def loggable_results(responses)
|
315
|
+
responses.map do |response|
|
316
|
+
response.documents.map do |doc|
|
317
|
+
"#{doc.id} (hash: #{doc.hash})"
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Copyright 2024 - 2025 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class GraphQL
|
11
|
+
module Resolvers
|
12
|
+
# Resolvers which just delegates to `object` for resolving.
|
13
|
+
module Object
|
14
|
+
class WithLookahead
|
15
|
+
def initialize(elasticgraph_graphql:, config:)
|
16
|
+
# Nothing to initialize, but needs to be defined to satisfy the resolver interface.
|
17
|
+
end
|
18
|
+
|
19
|
+
def resolve(field:, object:, args:, context:, lookahead:)
|
20
|
+
object.resolve(field: field, object: object, args: args, context: context, lookahead: lookahead)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class WithoutLookahead
|
25
|
+
def initialize(elasticgraph_graphql:, config:)
|
26
|
+
# Nothing to initialize, but needs to be defined to satisfy the resolver interface.
|
27
|
+
end
|
28
|
+
|
29
|
+
def resolve(field:, object:, args:, context:)
|
30
|
+
object.resolve(field: field, object: object, args: args, context: context)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2024 Block, Inc.
|
1
|
+
# Copyright 2024 - 2025 Block, Inc.
|
2
2
|
#
|
3
3
|
# Use of this source code is governed by an MIT-style
|
4
4
|
# license that can be found in the LICENSE file or at
|
@@ -17,7 +17,7 @@ module ElasticGraph
|
|
17
17
|
@datastore_query_adapters = datastore_query_adapters
|
18
18
|
end
|
19
19
|
|
20
|
-
def build_query_from(field:, args:, lookahead:, context:
|
20
|
+
def build_query_from(field:, args:, lookahead:, context:)
|
21
21
|
monotonic_clock_deadline = context[:monotonic_clock_deadline]
|
22
22
|
|
23
23
|
# Building an `DatastoreQuery` is not cheap; we do a lot of work to:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2024 Block, Inc.
|
1
|
+
# Copyright 2024 - 2025 Block, Inc.
|
2
2
|
#
|
3
3
|
# Use of this source code is governed by an MIT-style
|
4
4
|
# license that can be found in the LICENSE file or at
|
@@ -11,11 +11,14 @@ require "graphql"
|
|
11
11
|
module ElasticGraph
|
12
12
|
class GraphQL
|
13
13
|
module Resolvers
|
14
|
-
# Provides a way to avoid N+1
|
15
|
-
# datastore queries into one `msearch`
|
14
|
+
# Provides a way to avoid N+1 request problems by batching up multiple
|
15
|
+
# datastore queries into one `msearch` request. In general, it is recommended
|
16
16
|
# that you use this from any resolver that needs to query the datastore, to
|
17
17
|
# maximize our ability to combine multiple datastore requests. Importantly,
|
18
18
|
# this should never be instantiated directly; instead use the `execute` method from below.
|
19
|
+
#
|
20
|
+
# Note: `NestedRelationshipsSource` implements further optimizations on top of this, and should
|
21
|
+
# be used rather than this class when applicable.
|
19
22
|
class QuerySource < ::GraphQL::Dataloader::Source
|
20
23
|
def initialize(datastore_router, query_tracker)
|
21
24
|
@datastore_router = datastore_router
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2024 Block, Inc.
|
1
|
+
# Copyright 2024 - 2025 Block, Inc.
|
2
2
|
#
|
3
3
|
# Use of this source code is governed by an MIT-style
|
4
4
|
# license that can be found in the LICENSE file or at
|
@@ -27,16 +27,11 @@ module ElasticGraph
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def resolve(field:, object:, context:, args
|
30
|
+
def resolve(field:, object:, context:, args:)
|
31
31
|
method_name = canonical_name_for(field.name, "Field")
|
32
32
|
public_send(method_name, **args_to_canonical_form(args))
|
33
33
|
end
|
34
34
|
|
35
|
-
def can_resolve?(field:, object:)
|
36
|
-
method_name = schema_element_names.canonical_name_for(field.name)
|
37
|
-
!!method_name && respond_to?(method_name)
|
38
|
-
end
|
39
|
-
|
40
35
|
private
|
41
36
|
|
42
37
|
def args_to_canonical_form(args)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2024 Block, Inc.
|
1
|
+
# Copyright 2024 - 2025 Block, Inc.
|
2
2
|
#
|
3
3
|
# Use of this source code is governed by an MIT-style
|
4
4
|
# license that can be found in the LICENSE file or at
|
@@ -18,37 +18,18 @@ module ElasticGraph
|
|
18
18
|
# The type in which the field resides.
|
19
19
|
attr_reader :parent_type
|
20
20
|
|
21
|
-
attr_reader :schema, :schema_element_names, :graphql_field, :name_in_index, :relation, :computation_detail
|
21
|
+
attr_reader :schema, :schema_element_names, :graphql_field, :name_in_index, :relation, :computation_detail, :resolver
|
22
22
|
|
23
|
-
def initialize(schema, parent_type, graphql_field, runtime_metadata)
|
23
|
+
def initialize(schema, parent_type, graphql_field, runtime_metadata, resolvers_needing_lookahead)
|
24
24
|
@schema = schema
|
25
25
|
@schema_element_names = schema.element_names
|
26
26
|
@parent_type = parent_type
|
27
27
|
@graphql_field = graphql_field
|
28
28
|
@relation = runtime_metadata&.relation
|
29
29
|
@computation_detail = runtime_metadata&.computation_detail
|
30
|
-
@
|
31
|
-
|
32
|
-
|
33
|
-
# to each field so that we have access to what the child selections are, as described here:
|
34
|
-
#
|
35
|
-
# https://graphql-ruby.org/queries/lookahead
|
36
|
-
#
|
37
|
-
# Currently we only need this when building an `DatastoreQuery` (which is not done for all
|
38
|
-
# fields) so a future optimization may only add this to fields where we actually need it.
|
39
|
-
# For now we add it to all fields because it's simplest and it's not clear if there is
|
40
|
-
# any performance benefit to not adding it when we do not use it.
|
41
|
-
#
|
42
|
-
# Note: input fields do not respond to `extras`, which is why we do it conditionally here.
|
43
|
-
#
|
44
|
-
# Note: on GraphQL gem introspection types (e.g. `__Field`), the fields respond to `:extras`,
|
45
|
-
# but that later causes a weird error (`ArgumentError: unknown keyword: :lookahead`)
|
46
|
-
# when those types are accessed in a Query. We don't really want to mutate the fields on the
|
47
|
-
# built-in types by adding `:lookahead` so it's best to avoid setting that extra on the built
|
48
|
-
# in types.
|
49
|
-
if @graphql_field.respond_to?(:extras) && !BUILT_IN_TYPE_NAMES.include?(parent_type.name.to_s)
|
50
|
-
@graphql_field.extras([:lookahead])
|
51
|
-
end
|
30
|
+
@resolver = runtime_metadata&.resolver
|
31
|
+
@name_in_index = runtime_metadata&.name_in_index || name
|
32
|
+
@graphql_field.extras([:lookahead]) if resolvers_needing_lookahead.include?(@resolver)
|
52
33
|
end
|
53
34
|
|
54
35
|
def type
|
@@ -56,7 +37,11 @@ module ElasticGraph
|
|
56
37
|
end
|
57
38
|
|
58
39
|
def name
|
59
|
-
@name ||= @graphql_field.name
|
40
|
+
@name ||= @graphql_field.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def path_in_index
|
44
|
+
@path_in_index ||= name_in_index.split(".")
|
60
45
|
end
|
61
46
|
|
62
47
|
# Returns an object that knows how this field joins to its relation.
|
@@ -93,7 +78,7 @@ module ElasticGraph
|
|
93
78
|
return [] if parent_type.relay_connection? || parent_type.relay_edge?
|
94
79
|
return index_id_field_names_for_relation if relation_join
|
95
80
|
|
96
|
-
[name_in_index
|
81
|
+
[name_in_index]
|
97
82
|
end
|
98
83
|
|
99
84
|
# Indicates this field should be hidden in the GraphQL schema so as to not be queryable.
|