elasticgraph-graphql 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +3 -0
  4. data/elasticgraph-graphql.gemspec +23 -0
  5. data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
  6. data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
  7. data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
  8. data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
  9. data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
  10. data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
  11. data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
  12. data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
  13. data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
  14. data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
  15. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
  16. data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
  17. data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
  18. data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
  19. data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
  20. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
  21. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
  22. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
  23. data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
  24. data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
  25. data/lib/elastic_graph/graphql/client.rb +43 -0
  26. data/lib/elastic_graph/graphql/config.rb +81 -0
  27. data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
  28. data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
  29. data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
  30. data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
  31. data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
  32. data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
  33. data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
  34. data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
  35. data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
  36. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
  37. data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
  38. data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
  39. data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
  40. data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
  41. data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
  42. data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
  43. data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
  44. data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
  45. data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
  46. data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
  47. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
  48. data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
  49. data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
  50. data/lib/elastic_graph/graphql/query_executor.rb +200 -0
  51. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
  52. data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
  53. data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
  54. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
  55. data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
  56. data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
  57. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
  58. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
  59. data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
  60. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
  61. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
  62. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
  63. data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
  64. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
  65. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
  66. data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
  67. data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
  68. data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
  69. data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
  70. data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
  71. data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
  72. data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
  73. data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
  74. data/lib/elastic_graph/graphql/schema/field.rb +147 -0
  75. data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
  76. data/lib/elastic_graph/graphql/schema/type.rb +263 -0
  77. data/lib/elastic_graph/graphql/schema.rb +164 -0
  78. data/lib/elastic_graph/graphql.rb +253 -0
  79. data/script/dump_time_zones +81 -0
  80. data/script/dump_time_zones.java +17 -0
  81. metadata +503 -0
@@ -0,0 +1,372 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/aggregation/query"
11
+ require "elastic_graph/graphql/aggregation/query_optimizer"
12
+ require "elastic_graph/graphql/decoded_cursor"
13
+ require "elastic_graph/graphql/datastore_response/search_response"
14
+ require "elastic_graph/graphql/filtering/filter_interpreter"
15
+ require "elastic_graph/support/memoizable_data"
16
+
17
+ module ElasticGraph
18
+ class GraphQL
19
+ # An immutable class that represents a datastore query. Since this represents
20
+ # a datastore query, and not a GraphQL query, all the data in it is modeled
21
+ # in datastore terms, not GraphQL terms. For example, any field names in a
22
+ # `Query` should be references to index fields, not GraphQL fields.
23
+ #
24
+ # Filters are modeled as a `Set` of filtering hashes. While we usually expect only
25
+ # a single `filter` hash, modeling it as a set makes it easy for us to support
26
+ # merging queries. The datastore knows how to apply multiple `must` clauses that
27
+ # apply to the same field, giving us the exact semantics we want in such a situation
28
+ # with minimal effort.
29
+ class DatastoreQuery < Support::MemoizableData.define(
30
+ :total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
31
+ :index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
32
+ :filters, :sort, :document_pagination, :requested_fields, :individual_docs_needed,
33
+ :monotonic_clock_deadline, :schema_element_names
34
+ ) {
35
+ def initialize(
36
+ filter: nil,
37
+ filters: nil,
38
+ sort: nil,
39
+ document_pagination: nil,
40
+ aggregations: nil,
41
+ requested_fields: nil,
42
+ individual_docs_needed: false,
43
+ total_document_count_needed: false,
44
+ monotonic_clock_deadline: nil,
45
+ **kwargs
46
+ )
47
+ # Deal with `:filter` vs `:filters` input and normalize it to a single `filters` set.
48
+ filters = ::Set.new(filters || [])
49
+ filters << filter if filter && !filter.empty?
50
+ filters.freeze
51
+
52
+ aggregations ||= {}
53
+ requested_fields ||= []
54
+
55
+ super(
56
+ filters: filters,
57
+ sort: sort || [],
58
+ document_pagination: document_pagination || {},
59
+ aggregations: aggregations,
60
+ requested_fields: requested_fields.to_set,
61
+ individual_docs_needed: individual_docs_needed || !requested_fields.empty?,
62
+ total_document_count_needed: total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?),
63
+ monotonic_clock_deadline: monotonic_clock_deadline,
64
+ **kwargs
65
+ )
66
+
67
+ if search_index_definitions.empty?
68
+ raise SearchFailedError, "Query is invalid, since it contains no `search_index_definitions`."
69
+ end
70
+ end
71
+ }
72
+ # Load these files after the `Query` class has been defined, to avoid
73
+ # `TypeError: superclass mismatch for class Query`
74
+ require "elastic_graph/graphql/datastore_query/document_paginator"
75
+ require "elastic_graph/graphql/datastore_query/index_expression_builder"
76
+ require "elastic_graph/graphql/datastore_query/paginator"
77
+ require "elastic_graph/graphql/datastore_query/routing_picker"
78
+
79
+ # Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed
80
+ # by query), yielding them to the caller, and then post-processing the results. The caller is
81
+ # responsible for returning a hash of responses by query from its block.
82
+ #
83
+ # Note that some of the passed queries may not be yielded to the caller; when we can tell
84
+ # that a query does not have to be sent to the datastore we avoid yielding it from here.
85
+ # Therefore, the caller should not assume that all queries passed to this method will be
86
+ # yielded back.
87
+ #
88
+ # The return value is a hash of `DatastoreResponse::SearchResponse` objects by query.
89
+ #
90
+ # Note: this method uses `send` to work around ruby visibility rules. We do not want
91
+ # `#decoded_cursor_factory` to be public, as we only need it here, but we cannot access
92
+ # it from a class method without using `send`.
93
+ def self.perform(queries)
94
+ empty_queries, present_queries = queries.partition(&:empty?)
95
+
96
+ responses_by_query = Aggregation::QueryOptimizer.optimize_queries(present_queries) do |optimized_queries|
97
+ header_body_tuples_by_query = optimized_queries.each_with_object({}) do |query, hash|
98
+ hash[query] = query.to_datastore_msearch_header_and_body
99
+ end
100
+
101
+ yield(header_body_tuples_by_query)
102
+ end
103
+
104
+ empty_responses = empty_queries.each_with_object({}) do |query, hash|
105
+ hash[query] = DatastoreResponse::SearchResponse::RAW_EMPTY
106
+ end
107
+
108
+ empty_responses.merge(responses_by_query).each_with_object({}) do |(query, response), hash|
109
+ hash[query] = DatastoreResponse::SearchResponse.build(response, decoded_cursor_factory: query.send(:decoded_cursor_factory))
110
+ end.tap do |responses_hash|
111
+ # Callers expect this `perform` method to provide an invariant: the returned hash MUST contain one entry
112
+ # for each of the `queries` passed in the args. In practice, violating this invariant primarily causes a
113
+ # problem when the caller uses the `GraphQL::Dataloader` (which happens for every GraphQL request in production...).
114
+ # However, our tests do not always run queries end-to-end, so this is an added check we want to do, so that
115
+ # anytime our logic here fails to include a query in the response in any test, we'll be notified of the
116
+ # problem.
117
+ expected_queries = queries.to_set
118
+ actual_queries = responses_hash.keys.to_set
119
+
120
+ if expected_queries != actual_queries
121
+ missing_queries = expected_queries - actual_queries
122
+ extra_queries = actual_queries - expected_queries
123
+
124
+ raise SearchFailedError, "The `responses_hash` does not have the expected set of queries as keys. " \
125
+ "This can cause problems for the `GraphQL::Dataloader` and suggests a bug in the logic that should be fixed.\n\n" \
126
+ "Missing queries (#{missing_queries.size}):\n#{missing_queries.map(&:inspect).join("\n")}.\n\n" \
127
+ "Extra queries (#{extra_queries.size}): #{extra_queries.map(&:inspect).join("\n")}"
128
+ end
129
+ end
130
+ end
131
+
132
+ # Merges the provided query, returning a new combined query object.
133
+ # Both query objects are left unchanged.
134
+ def merge(other_query)
135
+ if search_index_definitions != other_query.search_index_definitions
136
+ raise ElasticGraph::InvalidMergeError, "`search_index_definitions` conflict while merging between " \
137
+ "#{search_index_definitions} and #{other_query.search_index_definitions}"
138
+ end
139
+
140
+ with(
141
+ individual_docs_needed: individual_docs_needed || other_query.individual_docs_needed,
142
+ total_document_count_needed: total_document_count_needed || other_query.total_document_count_needed,
143
+ filters: filters + other_query.filters,
144
+ sort: merge_attribute(other_query, :sort),
145
+ requested_fields: requested_fields + other_query.requested_fields,
146
+ document_pagination: merge_attribute(other_query, :document_pagination),
147
+ monotonic_clock_deadline: [monotonic_clock_deadline, other_query.monotonic_clock_deadline].compact.min,
148
+ aggregations: aggregations.merge(other_query.aggregations)
149
+ )
150
+ end
151
+
152
+ # Convenience method for merging when you do not have access to an
153
+ # `DatastoreQuery::Builder`. Allows you to pass the query options you
154
+ # would like to merge. As with `#merge`, leaves the original query unchanged
155
+ # and returns a combined query object.
156
+ def merge_with(**query_options)
157
+ merge(with(**query_options))
158
+ end
159
+
160
+ # Pairs the multi-search headers and body into a tuple, as per the format required by the datastore:
161
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html#search-multi-search-api-desc
162
+ def to_datastore_msearch_header_and_body
163
+ @to_datastore_msearch_header_and_body ||= [to_datastore_msearch_header, to_datastore_body]
164
+ end
165
+
166
+ # Returns an index_definition expression string to use for searches. This string can specify
167
+ # multiple indices, use wildcards, etc. For info about what is supported, see:
168
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
169
+ def search_index_expression
170
+ @search_index_expression ||= index_expression_builder.determine_search_index_expression(
171
+ filters,
172
+ search_index_definitions,
173
+ # When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
174
+ # the standard aggregations response structure, which causes problems.
175
+ require_indices: !aggregations_datastore_body.empty?
176
+ ).to_s
177
+ end
178
+
179
+ # Returns the name of the datastore cluster as a String where this query should be setn.
180
+ # Unless exactly 1 cluster name is found, this method raises a ConfigError.
181
+ def cluster_name
182
+ cluster_name = search_index_definitions.map(&:cluster_to_query).uniq
183
+ return cluster_name.first if cluster_name.size == 1
184
+ raise ConfigError, "Found different datastore clusters (#{cluster_name}) to query " \
185
+ "for query targeting indices: #{search_index_definitions}"
186
+ end
187
+
188
+ # Returns a list of unique field paths that should be used for shard routing during searches.
189
+ #
190
+ # If a search is filtering on one of these fields, we can optimize the search by routing
191
+ # it to only the shards containing documents for that routing value.
192
+ #
193
+ # Note that this returns a list due to our support for type unions. A unioned type
194
+ # can be composed of subtypes that have use different shard routing; this will return
195
+ # the set union of them all.
196
+ def route_with_field_paths
197
+ search_index_definitions.map(&:route_with).uniq
198
+ end
199
+
200
+ # The shard routing values used for this search. Can be `nil` if the query will hit all shards.
201
+ # `[]` means that we are routing to no shards.
202
+ def shard_routing_values
203
+ return @shard_routing_values if defined?(@shard_routing_values)
204
+ routing_values = routing_picker.extract_eligible_routing_values(filters, route_with_field_paths)
205
+
206
+ @shard_routing_values ||=
207
+ if routing_values&.empty? && !aggregations_datastore_body.empty?
208
+ # If we return an empty array of routing values, no shards will get searched, which causes a problem for aggregations.
209
+ # When a query includes aggregations, there are normally aggregation structures on the respopnse (even when there are no
210
+ # search hits to aggregate over!) but if there are no routing values, those aggregation structures will be missing from
211
+ # the response. It's complex to handle that in our downstream response handling code, so we prefer to force a "fallback"
212
+ # routing value here to ensure that at least one shard gets searched. Which shard gets searched doesn't matter; the search
213
+ # filter that led to an empty set of routing values will match on documents on any shard.
214
+ ["fallback_shard_routing_value"]
215
+ elsif contains_ignored_values_for_routing?(routing_values)
216
+ nil
217
+ else
218
+ routing_values&.sort # order doesn't matter, but sorting it makes it easier to assert on in our tests.
219
+ end
220
+ end
221
+
222
+ # Indicates if the query does not need any results from the datastore. As an optimization,
223
+ # we can reply with a default "empty" response for an empty query.
224
+ def empty?
225
+ # If we are searching no indices or routing to an empty set of shards, there is no need to query the datastore at all.
226
+ # This only happens when our filter processing has deduced that the query will match no results.
227
+ return true if search_index_expression.empty? || shard_routing_values&.empty?
228
+
229
+ datastore_body = to_datastore_body
230
+ datastore_body.fetch(:size) == 0 && !datastore_body.fetch(:track_total_hits) && aggregations_datastore_body.empty?
231
+ end
232
+
233
+ def inspect
234
+ description = to_datastore_msearch_header.merge(to_datastore_body).map do |key, value|
235
+ "#{key}=#{(key == :query) ? "<REDACTED>" : value.inspect}"
236
+ end.join(" ")
237
+
238
+ "#<#{self.class.name} #{description}>"
239
+ end
240
+
241
+ def to_datastore_msearch_header
242
+ @to_datastore_msearch_header ||= {index: search_index_expression, routing: shard_routing_values&.join(",")}.compact
243
+ end
244
+
245
+ # `DatastoreQuery` objects are used as keys in a hash. Computing `#hash` can be expensive (given how many fields
246
+ # an `DatastoreQuery` has) and it's safe to cache since `DatastoreQuery` instances are immutable, so we memoize it
247
+ # here. We've observed this making a very noticeable difference in our test suite runtime.
248
+ def hash
249
+ @hash ||= super
250
+ end
251
+
252
+ def document_paginator
253
+ @document_paginator ||= DocumentPaginator.new(
254
+ sort_clauses: sort_with_tiebreaker,
255
+ individual_docs_needed: individual_docs_needed,
256
+ total_document_count_needed: total_document_count_needed,
257
+ decoded_cursor_factory: decoded_cursor_factory,
258
+ schema_element_names: schema_element_names,
259
+ paginator: Paginator.new(
260
+ default_page_size: default_page_size,
261
+ max_page_size: max_page_size,
262
+ first: document_pagination[:first],
263
+ after: document_pagination[:after],
264
+ last: document_pagination[:last],
265
+ before: document_pagination[:before],
266
+ schema_element_names: schema_element_names
267
+ )
268
+ )
269
+ end
270
+
271
+ private
272
+
273
+ def merge_attribute(other_query, attribute)
274
+ value = public_send(attribute)
275
+ other_value = other_query.public_send(attribute)
276
+
277
+ if value.empty?
278
+ other_value
279
+ elsif other_value.empty?
280
+ value
281
+ elsif value == other_value
282
+ value
283
+ else
284
+ logger.warn("Tried to merge two queries that both define `#{attribute}`, using the value from the query being merged: #{value}, #{other_value}")
285
+ other_value
286
+ end
287
+ end
288
+
289
+ TIEBREAKER_SORT_CLAUSES = [{"id" => {"order" => "asc"}}].freeze
290
+
291
+ # We want to use `id` as a tiebreaker ONLY when `id` isn't explicitly specified as a sort field
292
+ def sort_with_tiebreaker
293
+ @sort_with_tiebreaker ||= remove_duplicate_sort_clauses(sort + TIEBREAKER_SORT_CLAUSES)
294
+ end
295
+
296
+ def remove_duplicate_sort_clauses(sort_clauses)
297
+ seen_fields = Set.new
298
+ sort_clauses.select do |clause|
299
+ clause.keys.all? { |key| seen_fields.add?(key) }
300
+ end
301
+ end
302
+
303
+ def decoded_cursor_factory
304
+ @decoded_cursor_factory ||= DecodedCursor::Factory.from_sort_list(sort_with_tiebreaker)
305
+ end
306
+
307
+ def contains_ignored_values_for_routing?(routing_values)
308
+ ignored_values_for_routing.intersect?(routing_values.to_set) if routing_values
309
+ end
310
+
311
+ def ignored_values_for_routing
312
+ @ignored_values_for_routing ||= search_index_definitions.flat_map { |i| i.ignored_values_for_routing.to_a }.to_set
313
+ end
314
+
315
+ def to_datastore_body
316
+ @to_datastore_body ||= aggregations_datastore_body
317
+ .merge(document_paginator.to_datastore_body)
318
+ .merge({query: filter_interpreter.build_query(filters)}.compact)
319
+ .merge({_source: source})
320
+ end
321
+
322
+ def aggregations_datastore_body
323
+ @aggregations_datastore_body ||= begin
324
+ aggs = aggregations
325
+ .values
326
+ .map { |agg| agg.build_agg_hash(filter_interpreter) }
327
+ .reduce({}, :merge)
328
+
329
+ aggs.empty? ? {} : {aggs: aggs}
330
+ end
331
+ end
332
+
333
+ # Make our query as efficient as possible by limiting what parts of `_source` we fetch.
334
+ # For an id-only query (or a query that has no requested fields) we don't need to fetch `_source`
335
+ # at all--which means the datastore can avoid decompressing the _source field. Otherwise,
336
+ # we only ask for the fields we need to return.
337
+ def source
338
+ requested_source_fields = requested_fields - ["id"]
339
+ return false if requested_source_fields.empty?
340
+ # Merging in requested_fields as _source:{includes:} based on Elasticsearch documentation:
341
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#include-exclude
342
+ {includes: requested_source_fields.to_a}
343
+ end
344
+
345
+ # Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
346
+ # to build queries when desired.
347
+ class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :query_defaults)
348
+ def self.with(runtime_metadata:, logger:, **query_defaults)
349
+ new(runtime_metadata: runtime_metadata, logger: logger, query_defaults: query_defaults)
350
+ end
351
+
352
+ def routing_picker
353
+ @routing_picker ||= RoutingPicker.new(schema_names: runtime_metadata.schema_element_names)
354
+ end
355
+
356
+ def index_expression_builder
357
+ @index_expression_builder ||= IndexExpressionBuilder.new(schema_names: runtime_metadata.schema_element_names)
358
+ end
359
+
360
+ def new_query(**options)
361
+ DatastoreQuery.new(
362
+ routing_picker: routing_picker,
363
+ index_expression_builder: index_expression_builder,
364
+ logger: logger,
365
+ schema_element_names: runtime_metadata.schema_element_names,
366
+ **query_defaults.merge(options)
367
+ )
368
+ end
369
+ end
370
+ end
371
+ end
372
+ end
@@ -0,0 +1,78 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/decoded_cursor"
10
+ require "elastic_graph/support/memoizable_data"
11
+ require "forwardable"
12
+
13
+ module ElasticGraph
14
+ class GraphQL
15
+ module DatastoreResponse
16
+ # Represents a document fetched from the datastore. Exposes both the raw metadata
17
+ # provided by the datastore and the doc payload itself. In addition, you can treat
18
+ # it just like a document hash using `#[]` or `#fetch`.
19
+ Document = Support::MemoizableData.define(:raw_data, :payload, :decoded_cursor_factory) do
20
+ # @implements Document
21
+ extend Forwardable
22
+ def_delegators :payload, :[], :fetch
23
+
24
+ def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
25
+ source = raw_data.fetch("_source") do
26
+ {} # : ::Hash[::String, untyped]
27
+ end
28
+
29
+ new(
30
+ raw_data: raw_data,
31
+ # Since we no longer fetch _source for id only queries, merge id into _source to take care of that case
32
+ payload: source.merge("id" => raw_data["_id"]),
33
+ decoded_cursor_factory: decoded_cursor_factory
34
+ )
35
+ end
36
+
37
+ def self.with_payload(payload)
38
+ build({"_source" => payload})
39
+ end
40
+
41
+ def index_name
42
+ raw_data["_index"]
43
+ end
44
+
45
+ def index_definition_name
46
+ index_name.split(ROLLOVER_INDEX_INFIX_MARKER).first # : ::String
47
+ end
48
+
49
+ def id
50
+ raw_data["_id"]
51
+ end
52
+
53
+ def sort
54
+ raw_data["sort"]
55
+ end
56
+
57
+ def version
58
+ payload["version"]
59
+ end
60
+
61
+ def cursor
62
+ @cursor ||= decoded_cursor_factory.build(raw_data.fetch("sort"))
63
+ end
64
+
65
+ def datastore_path
66
+ # Path based on this API:
67
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
68
+ "/#{index_name}/_doc/#{id}".squeeze("/")
69
+ end
70
+
71
+ def to_s
72
+ "#<#{self.class.name} #{datastore_path}>"
73
+ end
74
+ alias_method :inspect, :to_s
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/decoded_cursor"
11
+ require "elastic_graph/graphql/datastore_response/document"
12
+ require "forwardable"
13
+
14
+ module ElasticGraph
15
+ class GraphQL
16
+ module DatastoreResponse
17
+ # Represents a search response from the datastore. Exposes both the raw metadata
18
+ # provided by the datastore and the collection of documents. Can be treated as a
19
+ # collection of documents when you don't care about the metadata.
20
+ class SearchResponse < ::Data.define(:raw_data, :metadata, :documents, :total_document_count)
21
+ include Enumerable
22
+ extend Forwardable
23
+
24
+ def_delegators :documents, :each, :to_a, :size, :empty?
25
+
26
+ EXCLUDED_METADATA_KEYS = %w[hits aggregations].freeze
27
+
28
+ def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
29
+ documents = raw_data.fetch("hits").fetch("hits").map do |doc|
30
+ Document.build(doc, decoded_cursor_factory: decoded_cursor_factory)
31
+ end
32
+
33
+ metadata = raw_data.except(*EXCLUDED_METADATA_KEYS)
34
+ metadata["hits"] = raw_data.fetch("hits").except("hits")
35
+
36
+ # `hits.total` is exposed as an object like:
37
+ #
38
+ # {
39
+ # "value" => 200,
40
+ # "relation" => "eq", # or "gte"
41
+ # }
42
+ #
43
+ # This allows it to provide a lower bound on the number of hits, rather than having
44
+ # to give an exact count. We may want to handle the `gte` case differently at some
45
+ # point but for now we just use the value as-is.
46
+ #
47
+ # In the case where `track_total_hits` flag is set to `false`, `hits.total` field will be completely absent.
48
+ # This means the client intentionally chose not to query the total doc count, and `total_document_count` will be nil.
49
+ # In this case, we will throw an exception if the client later tries to access `total_document_count`.
50
+ total_document_count = metadata.dig("hits", "total", "value")
51
+
52
+ new(
53
+ raw_data: raw_data,
54
+ metadata: metadata,
55
+ documents: documents,
56
+ total_document_count: total_document_count
57
+ )
58
+ end
59
+
60
+ # Benign empty response that can be used in place of datastore response errors as needed.
61
+ RAW_EMPTY = {"hits" => {"hits" => [], "total" => {"value" => 0}}}.freeze
62
+ EMPTY = build(RAW_EMPTY)
63
+
64
+ def docs_description
65
+ (documents.size < 3) ? documents.inspect : "[#{documents.first}, ..., #{documents.last}]"
66
+ end
67
+
68
+ def total_document_count
69
+ super || raise(CountUnavailableError, "#{__method__} is unavailable; set `query.total_document_count_needed = true` to make it available")
70
+ end
71
+
72
+ def to_s
73
+ "#<#{self.class.name} size=#{documents.size} #{docs_description}>"
74
+ end
75
+ alias_method :inspect, :to_s
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,151 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/error"
11
+ require "elastic_graph/graphql/datastore_response/search_response"
12
+ require "elastic_graph/graphql/query_details_tracker"
13
+ require "elastic_graph/support/threading"
14
+
15
+ module ElasticGraph
16
+ class GraphQL
17
+ # Responsible for routing datastore search requests to the appropriate cluster and index.
18
+ class DatastoreSearchRouter
19
+ def initialize(
20
+ datastore_clients_by_name:,
21
+ logger:,
22
+ monotonic_clock:,
23
+ config:
24
+ )
25
+ @datastore_clients_by_name = datastore_clients_by_name
26
+ @logger = logger
27
+ @monotonic_clock = monotonic_clock
28
+ @config = config
29
+ end
30
+
31
+ # Sends the datastore a multi-search request based on the given queries.
32
+ # Returns a hash of responses keyed by the query.
33
+ def msearch(queries, query_tracker: QueryDetailsTracker.empty)
34
+ DatastoreQuery.perform(queries) do |header_body_tuples_by_query|
35
+ # Here we set a client-side timeout, which causes the client to give up and close the connection.
36
+ # According to [1]--"We have a new way to cancel search requests efficiently from the client
37
+ # in 7.4 (by closing the underlying http channel)"--this should cause the server to stop
38
+ # executing the search, and more importantly, gives us a strictly enforced timeout.
39
+ #
40
+ # In addition, the datastore supports a `timeout` option on a search body, but this timeout is
41
+ # "best effort", applies to each shard (and not to the overall search request), and only interrupts
42
+ # certain kinds of operations. [2] and [3] below have more info.
43
+ #
44
+ # Note that I have not been able to observe this `timeout` on a search body ever working
45
+ # as documented. In our test suite, none of the slow queries I have tried (both via
46
+ # slow aggregation query and a slow script) have ever aborted early when that option is
47
+ # set. In Kibana in production, @bsorbo observed it aborting a `search` request early
48
+ # (but not necessarily an `msearch` request...), but even then, the response said `timed_out: false`!
49
+ # Other people ([4]) have reported observing timeout having no effect on msearch requests.
50
+ #
51
+ # So, the client-side timeout is the main one we want here, and for now we are not using the
52
+ # datastore search `timeout` option at all.
53
+ #
54
+ # For more info, see:
55
+ #
56
+ # [1] https://github.com/elastic/elasticsearch/issues/47716
57
+ # [2] https://github.com/elastic/elasticsearch/pull/51858
58
+ # [3] https://www.elastic.co/guide/en/elasticsearch/guide/current/_search_options.html#_timeout_2
59
+ # [4] https://discuss.elastic.co/t/timeouts-ignored-in-multisearch/23673
60
+
61
+ # Unfortunately, the Elasticsearch/OpenSearch clients don't support setting a per-request client-side timeout,
62
+ # even though Faraday (the underlying HTTP client) does. To work around this, we pass our desired
63
+ # timeout in a specific header that the `SupportTimeouts` Faraday middleware will use.
64
+ headers = {TIMEOUT_MS_HEADER => msearch_request_timeout_from(queries)}.compact
65
+
66
+ queries_and_header_body_tuples_by_datastore_client = header_body_tuples_by_query.group_by do |(query, header_body_tuples)|
67
+ @datastore_clients_by_name.fetch(query.cluster_name)
68
+ end
69
+
70
+ datastore_query_started_at = @monotonic_clock.now_in_ms
71
+
72
+ server_took_and_results = Support::Threading.parallel_map(queries_and_header_body_tuples_by_datastore_client) do |datastore_client, query_and_header_body_tuples_for_cluster|
73
+ queries_for_cluster, header_body_tuples = query_and_header_body_tuples_for_cluster.transpose
74
+ msearch_body = header_body_tuples.flatten(1)
75
+ response = datastore_client.msearch(body: msearch_body, headers: headers)
76
+ debug_query(query: msearch_body, response: response)
77
+ ordered_responses = response.fetch("responses")
78
+ [response["took"], queries_for_cluster.zip(ordered_responses)]
79
+ end
80
+
81
+ query_tracker.record_datastore_query_duration_ms(
82
+ client: @monotonic_clock.now_in_ms - datastore_query_started_at,
83
+ server: server_took_and_results.map(&:first).compact.max
84
+ )
85
+
86
+ server_took_and_results.flat_map(&:last).to_h.tap do |responses_by_query|
87
+ log_shard_failure_if_necessary(responses_by_query)
88
+ raise_search_failed_if_any_failures(responses_by_query)
89
+ end
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Prefix tests with `DEBUG_QUERY=1 ...` or run `export DEBUG_QUERY=1` to print the actual
96
+ # Elasticsearch/OpenSearch query and response. This is particularly useful for adding new specs.
97
+ def debug_query(**debug_messages)
98
+ return unless ::ENV["DEBUG_QUERY"]
99
+
100
+ formatted_messages = debug_messages.map do |key, msg|
101
+ "#{key.to_s.upcase}:\n#{::JSON.pretty_generate(msg)}\n"
102
+ end.join("\n")
103
+ puts "\n#{formatted_messages}\n\n"
104
+ end
105
+
106
+ def msearch_request_timeout_from(queries)
107
+ return nil unless (min_query_deadline = queries.map(&:monotonic_clock_deadline).compact.min)
108
+
109
+ (min_query_deadline - @monotonic_clock.now_in_ms).tap do |timeout|
110
+ if timeout <= 0
111
+ raise RequestExceededDeadlineError, "It is already #{timeout.abs} ms past the search deadline."
112
+ end
113
+ end
114
+ end
115
+
116
+ def raise_search_failed_if_any_failures(responses_by_query)
117
+ failures = responses_by_query.each_with_index.select { |(_query, response), _index| response["error"] }
118
+ return if failures.empty?
119
+
120
+ formatted_failures = failures.map do |(query, response), index|
121
+ # Note: we intentionally omit the body of the request here, because it could contain PII
122
+ # or other sensitive values that we don't want logged.
123
+ <<~ERROR
124
+ #{index + 1}) Header: #{::JSON.generate(query.to_datastore_msearch_header)}
125
+ #{response.fetch("error").inspect}"
126
+ On cluster: #{query.cluster_name}
127
+ ERROR
128
+ end.join("\n\n")
129
+
130
+ raise SearchFailedError, "Got #{failures.size} search failure(s):\n\n#{formatted_failures}"
131
+ end
132
+
133
+ # Examine successful query responses and log any shard failure they encounter
134
+ def log_shard_failure_if_necessary(responses_by_query)
135
+ shard_failures = responses_by_query.each_with_index.select do |(query, response), query_numeric_index|
136
+ (200..299).cover?(response["status"]) && response["_shards"]["failed"] != 0
137
+ end
138
+
139
+ unless shard_failures.empty?
140
+ formatted_failures = shard_failures.map do |(query, response), query_numeric_index|
141
+ "Query #{query_numeric_index + 1} against index `#{query.search_index_expression}` on cluster `#{query.cluster_name}`}: " +
142
+ JSON.pretty_generate(response["_shards"])
143
+ end.join("\n\n")
144
+
145
+ formatted_shard_failures = "The following queries have failed shards: \n\n#{formatted_failures}"
146
+ @logger.warn(formatted_shard_failures)
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end