elasticgraph-graphql 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +3 -0
  4. data/elasticgraph-graphql.gemspec +23 -0
  5. data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
  6. data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
  7. data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
  8. data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
  9. data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
  10. data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
  11. data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
  12. data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
  13. data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
  14. data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
  15. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
  16. data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
  17. data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
  18. data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
  19. data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
  20. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
  21. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
  22. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
  23. data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
  24. data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
  25. data/lib/elastic_graph/graphql/client.rb +43 -0
  26. data/lib/elastic_graph/graphql/config.rb +81 -0
  27. data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
  28. data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
  29. data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
  30. data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
  31. data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
  32. data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
  33. data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
  34. data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
  35. data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
  36. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
  37. data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
  38. data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
  39. data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
  40. data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
  41. data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
  42. data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
  43. data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
  44. data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
  45. data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
  46. data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
  47. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
  48. data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
  49. data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
  50. data/lib/elastic_graph/graphql/query_executor.rb +200 -0
  51. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
  52. data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
  53. data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
  54. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
  55. data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
  56. data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
  57. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
  58. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
  59. data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
  60. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
  61. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
  62. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
  63. data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
  64. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
  65. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
  66. data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
  67. data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
  68. data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
  69. data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
  70. data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
  71. data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
  72. data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
  73. data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
  74. data/lib/elastic_graph/graphql/schema/field.rb +147 -0
  75. data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
  76. data/lib/elastic_graph/graphql/schema/type.rb +263 -0
  77. data/lib/elastic_graph/graphql/schema.rb +164 -0
  78. data/lib/elastic_graph/graphql.rb +253 -0
  79. data/script/dump_time_zones +81 -0
  80. data/script/dump_time_zones.java +17 -0
  81. metadata +503 -0
@@ -0,0 +1,372 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/aggregation/query"
11
+ require "elastic_graph/graphql/aggregation/query_optimizer"
12
+ require "elastic_graph/graphql/decoded_cursor"
13
+ require "elastic_graph/graphql/datastore_response/search_response"
14
+ require "elastic_graph/graphql/filtering/filter_interpreter"
15
+ require "elastic_graph/support/memoizable_data"
16
+
17
+ module ElasticGraph
18
+ class GraphQL
19
+ # An immutable class that represents a datastore query. Since this represents
20
+ # a datastore query, and not a GraphQL query, all the data in it is modeled
21
+ # in datastore terms, not GraphQL terms. For example, any field names in a
22
+ # `Query` should be references to index fields, not GraphQL fields.
23
+ #
24
+ # Filters are modeled as a `Set` of filtering hashes. While we usually expect only
25
+ # a single `filter` hash, modeling it as a set makes it easy for us to support
26
+ # merging queries. The datastore knows how to apply multiple `must` clauses that
27
+ # apply to the same field, giving us the exact semantics we want in such a situation
28
+ # with minimal effort.
29
+ class DatastoreQuery < Support::MemoizableData.define(
30
+ :total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
31
+ :index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
32
+ :filters, :sort, :document_pagination, :requested_fields, :individual_docs_needed,
33
+ :monotonic_clock_deadline, :schema_element_names
34
+ ) {
35
+ def initialize(
36
+ filter: nil,
37
+ filters: nil,
38
+ sort: nil,
39
+ document_pagination: nil,
40
+ aggregations: nil,
41
+ requested_fields: nil,
42
+ individual_docs_needed: false,
43
+ total_document_count_needed: false,
44
+ monotonic_clock_deadline: nil,
45
+ **kwargs
46
+ )
47
+ # Deal with `:filter` vs `:filters` input and normalize it to a single `filters` set.
48
+ filters = ::Set.new(filters || [])
49
+ filters << filter if filter && !filter.empty?
50
+ filters.freeze
51
+
52
+ aggregations ||= {}
53
+ requested_fields ||= []
54
+
55
+ super(
56
+ filters: filters,
57
+ sort: sort || [],
58
+ document_pagination: document_pagination || {},
59
+ aggregations: aggregations,
60
+ requested_fields: requested_fields.to_set,
61
+ individual_docs_needed: individual_docs_needed || !requested_fields.empty?,
62
+ total_document_count_needed: total_document_count_needed || aggregations.values.any?(&:needs_total_doc_count?),
63
+ monotonic_clock_deadline: monotonic_clock_deadline,
64
+ **kwargs
65
+ )
66
+
67
+ if search_index_definitions.empty?
68
+ raise SearchFailedError, "Query is invalid, since it contains no `search_index_definitions`."
69
+ end
70
+ end
71
+ }
72
+ # Load these files after the `Query` class has been defined, to avoid
73
+ # `TypeError: superclass mismatch for class Query`
74
+ require "elastic_graph/graphql/datastore_query/document_paginator"
75
+ require "elastic_graph/graphql/datastore_query/index_expression_builder"
76
+ require "elastic_graph/graphql/datastore_query/paginator"
77
+ require "elastic_graph/graphql/datastore_query/routing_picker"
78
+
79
+ # Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed
80
+ # by query), yielding them to the caller, and then post-processing the results. The caller is
81
+ # responsible for returning a hash of responses by query from its block.
82
+ #
83
+ # Note that some of the passed queries may not be yielded to the caller; when we can tell
84
+ # that a query does not have to be sent to the datastore we avoid yielding it from here.
85
+ # Therefore, the caller should not assume that all queries passed to this method will be
86
+ # yielded back.
87
+ #
88
+ # The return value is a hash of `DatastoreResponse::SearchResponse` objects by query.
89
+ #
90
+ # Note: this method uses `send` to work around ruby visibility rules. We do not want
91
+ # `#decoded_cursor_factory` to be public, as we only need it here, but we cannot access
92
+ # it from a class method without using `send`.
93
+ def self.perform(queries)
94
+ empty_queries, present_queries = queries.partition(&:empty?)
95
+
96
+ responses_by_query = Aggregation::QueryOptimizer.optimize_queries(present_queries) do |optimized_queries|
97
+ header_body_tuples_by_query = optimized_queries.each_with_object({}) do |query, hash|
98
+ hash[query] = query.to_datastore_msearch_header_and_body
99
+ end
100
+
101
+ yield(header_body_tuples_by_query)
102
+ end
103
+
104
+ empty_responses = empty_queries.each_with_object({}) do |query, hash|
105
+ hash[query] = DatastoreResponse::SearchResponse::RAW_EMPTY
106
+ end
107
+
108
+ empty_responses.merge(responses_by_query).each_with_object({}) do |(query, response), hash|
109
+ hash[query] = DatastoreResponse::SearchResponse.build(response, decoded_cursor_factory: query.send(:decoded_cursor_factory))
110
+ end.tap do |responses_hash|
111
+ # Callers expect this `perform` method to provide an invariant: the returned hash MUST contain one entry
112
+ # for each of the `queries` passed in the args. In practice, violating this invariant primarily causes a
113
+ # problem when the caller uses the `GraphQL::Dataloader` (which happens for every GraphQL request in production...).
114
+ # However, our tests do not always run queries end-to-end, so this is an added check we want to do, so that
115
+ # anytime our logic here fails to include a query in the response in any test, we'll be notified of the
116
+ # problem.
117
+ expected_queries = queries.to_set
118
+ actual_queries = responses_hash.keys.to_set
119
+
120
+ if expected_queries != actual_queries
121
+ missing_queries = expected_queries - actual_queries
122
+ extra_queries = actual_queries - expected_queries
123
+
124
+ raise SearchFailedError, "The `responses_hash` does not have the expected set of queries as keys. " \
125
+ "This can cause problems for the `GraphQL::Dataloader` and suggests a bug in the logic that should be fixed.\n\n" \
126
+ "Missing queries (#{missing_queries.size}):\n#{missing_queries.map(&:inspect).join("\n")}.\n\n" \
127
+ "Extra queries (#{extra_queries.size}): #{extra_queries.map(&:inspect).join("\n")}"
128
+ end
129
+ end
130
+ end
131
+
132
+ # Merges the provided query, returning a new combined query object.
133
+ # Both query objects are left unchanged.
134
+ def merge(other_query)
135
+ if search_index_definitions != other_query.search_index_definitions
136
+ raise ElasticGraph::InvalidMergeError, "`search_index_definitions` conflict while merging between " \
137
+ "#{search_index_definitions} and #{other_query.search_index_definitions}"
138
+ end
139
+
140
+ with(
141
+ individual_docs_needed: individual_docs_needed || other_query.individual_docs_needed,
142
+ total_document_count_needed: total_document_count_needed || other_query.total_document_count_needed,
143
+ filters: filters + other_query.filters,
144
+ sort: merge_attribute(other_query, :sort),
145
+ requested_fields: requested_fields + other_query.requested_fields,
146
+ document_pagination: merge_attribute(other_query, :document_pagination),
147
+ monotonic_clock_deadline: [monotonic_clock_deadline, other_query.monotonic_clock_deadline].compact.min,
148
+ aggregations: aggregations.merge(other_query.aggregations)
149
+ )
150
+ end
151
+
152
+ # Convenience method for merging when you do not have access to an
153
+ # `DatastoreQuery::Builder`. Allows you to pass the query options you
154
+ # would like to merge. As with `#merge`, leaves the original query unchanged
155
+ # and returns a combined query object.
156
+ def merge_with(**query_options)
157
+ merge(with(**query_options))
158
+ end
159
+
160
+ # Pairs the multi-search headers and body into a tuple, as per the format required by the datastore:
161
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html#search-multi-search-api-desc
162
+ def to_datastore_msearch_header_and_body
163
+ @to_datastore_msearch_header_and_body ||= [to_datastore_msearch_header, to_datastore_body]
164
+ end
165
+
166
+ # Returns an index_definition expression string to use for searches. This string can specify
167
+ # multiple indices, use wildcards, etc. For info about what is supported, see:
168
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
169
+ def search_index_expression
170
+ @search_index_expression ||= index_expression_builder.determine_search_index_expression(
171
+ filters,
172
+ search_index_definitions,
173
+ # When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
174
+ # the standard aggregations response structure, which causes problems.
175
+ require_indices: !aggregations_datastore_body.empty?
176
+ ).to_s
177
+ end
178
+
179
+ # Returns the name of the datastore cluster as a String where this query should be setn.
180
+ # Unless exactly 1 cluster name is found, this method raises a ConfigError.
181
+ def cluster_name
182
+ cluster_name = search_index_definitions.map(&:cluster_to_query).uniq
183
+ return cluster_name.first if cluster_name.size == 1
184
+ raise ConfigError, "Found different datastore clusters (#{cluster_name}) to query " \
185
+ "for query targeting indices: #{search_index_definitions}"
186
+ end
187
+
188
+ # Returns a list of unique field paths that should be used for shard routing during searches.
189
+ #
190
+ # If a search is filtering on one of these fields, we can optimize the search by routing
191
+ # it to only the shards containing documents for that routing value.
192
+ #
193
+ # Note that this returns a list due to our support for type unions. A unioned type
194
+ # can be composed of subtypes that have use different shard routing; this will return
195
+ # the set union of them all.
196
+ def route_with_field_paths
197
+ search_index_definitions.map(&:route_with).uniq
198
+ end
199
+
200
+ # The shard routing values used for this search. Can be `nil` if the query will hit all shards.
201
+ # `[]` means that we are routing to no shards.
202
+ def shard_routing_values
203
+ return @shard_routing_values if defined?(@shard_routing_values)
204
+ routing_values = routing_picker.extract_eligible_routing_values(filters, route_with_field_paths)
205
+
206
+ @shard_routing_values ||=
207
+ if routing_values&.empty? && !aggregations_datastore_body.empty?
208
+ # If we return an empty array of routing values, no shards will get searched, which causes a problem for aggregations.
209
+ # When a query includes aggregations, there are normally aggregation structures on the respopnse (even when there are no
210
+ # search hits to aggregate over!) but if there are no routing values, those aggregation structures will be missing from
211
+ # the response. It's complex to handle that in our downstream response handling code, so we prefer to force a "fallback"
212
+ # routing value here to ensure that at least one shard gets searched. Which shard gets searched doesn't matter; the search
213
+ # filter that led to an empty set of routing values will match on documents on any shard.
214
+ ["fallback_shard_routing_value"]
215
+ elsif contains_ignored_values_for_routing?(routing_values)
216
+ nil
217
+ else
218
+ routing_values&.sort # order doesn't matter, but sorting it makes it easier to assert on in our tests.
219
+ end
220
+ end
221
+
222
+ # Indicates if the query does not need any results from the datastore. As an optimization,
223
+ # we can reply with a default "empty" response for an empty query.
224
+ def empty?
225
+ # If we are searching no indices or routing to an empty set of shards, there is no need to query the datastore at all.
226
+ # This only happens when our filter processing has deduced that the query will match no results.
227
+ return true if search_index_expression.empty? || shard_routing_values&.empty?
228
+
229
+ datastore_body = to_datastore_body
230
+ datastore_body.fetch(:size) == 0 && !datastore_body.fetch(:track_total_hits) && aggregations_datastore_body.empty?
231
+ end
232
+
233
+ def inspect
234
+ description = to_datastore_msearch_header.merge(to_datastore_body).map do |key, value|
235
+ "#{key}=#{(key == :query) ? "<REDACTED>" : value.inspect}"
236
+ end.join(" ")
237
+
238
+ "#<#{self.class.name} #{description}>"
239
+ end
240
+
241
+ def to_datastore_msearch_header
242
+ @to_datastore_msearch_header ||= {index: search_index_expression, routing: shard_routing_values&.join(",")}.compact
243
+ end
244
+
245
+ # `DatastoreQuery` objects are used as keys in a hash. Computing `#hash` can be expensive (given how many fields
246
+ # an `DatastoreQuery` has) and it's safe to cache since `DatastoreQuery` instances are immutable, so we memoize it
247
+ # here. We've observed this making a very noticeable difference in our test suite runtime.
248
+ def hash
249
+ @hash ||= super
250
+ end
251
+
252
+ def document_paginator
253
+ @document_paginator ||= DocumentPaginator.new(
254
+ sort_clauses: sort_with_tiebreaker,
255
+ individual_docs_needed: individual_docs_needed,
256
+ total_document_count_needed: total_document_count_needed,
257
+ decoded_cursor_factory: decoded_cursor_factory,
258
+ schema_element_names: schema_element_names,
259
+ paginator: Paginator.new(
260
+ default_page_size: default_page_size,
261
+ max_page_size: max_page_size,
262
+ first: document_pagination[:first],
263
+ after: document_pagination[:after],
264
+ last: document_pagination[:last],
265
+ before: document_pagination[:before],
266
+ schema_element_names: schema_element_names
267
+ )
268
+ )
269
+ end
270
+
271
+ private
272
+
273
+ def merge_attribute(other_query, attribute)
274
+ value = public_send(attribute)
275
+ other_value = other_query.public_send(attribute)
276
+
277
+ if value.empty?
278
+ other_value
279
+ elsif other_value.empty?
280
+ value
281
+ elsif value == other_value
282
+ value
283
+ else
284
+ logger.warn("Tried to merge two queries that both define `#{attribute}`, using the value from the query being merged: #{value}, #{other_value}")
285
+ other_value
286
+ end
287
+ end
288
+
289
+ TIEBREAKER_SORT_CLAUSES = [{"id" => {"order" => "asc"}}].freeze
290
+
291
+ # We want to use `id` as a tiebreaker ONLY when `id` isn't explicitly specified as a sort field
292
+ def sort_with_tiebreaker
293
+ @sort_with_tiebreaker ||= remove_duplicate_sort_clauses(sort + TIEBREAKER_SORT_CLAUSES)
294
+ end
295
+
296
+ def remove_duplicate_sort_clauses(sort_clauses)
297
+ seen_fields = Set.new
298
+ sort_clauses.select do |clause|
299
+ clause.keys.all? { |key| seen_fields.add?(key) }
300
+ end
301
+ end
302
+
303
+ def decoded_cursor_factory
304
+ @decoded_cursor_factory ||= DecodedCursor::Factory.from_sort_list(sort_with_tiebreaker)
305
+ end
306
+
307
+ def contains_ignored_values_for_routing?(routing_values)
308
+ ignored_values_for_routing.intersect?(routing_values.to_set) if routing_values
309
+ end
310
+
311
+ def ignored_values_for_routing
312
+ @ignored_values_for_routing ||= search_index_definitions.flat_map { |i| i.ignored_values_for_routing.to_a }.to_set
313
+ end
314
+
315
+ def to_datastore_body
316
+ @to_datastore_body ||= aggregations_datastore_body
317
+ .merge(document_paginator.to_datastore_body)
318
+ .merge({query: filter_interpreter.build_query(filters)}.compact)
319
+ .merge({_source: source})
320
+ end
321
+
322
+ def aggregations_datastore_body
323
+ @aggregations_datastore_body ||= begin
324
+ aggs = aggregations
325
+ .values
326
+ .map { |agg| agg.build_agg_hash(filter_interpreter) }
327
+ .reduce({}, :merge)
328
+
329
+ aggs.empty? ? {} : {aggs: aggs}
330
+ end
331
+ end
332
+
333
+ # Make our query as efficient as possible by limiting what parts of `_source` we fetch.
334
+ # For an id-only query (or a query that has no requested fields) we don't need to fetch `_source`
335
+ # at all--which means the datastore can avoid decompressing the _source field. Otherwise,
336
+ # we only ask for the fields we need to return.
337
+ def source
338
+ requested_source_fields = requested_fields - ["id"]
339
+ return false if requested_source_fields.empty?
340
+ # Merging in requested_fields as _source:{includes:} based on Elasticsearch documentation:
341
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#include-exclude
342
+ {includes: requested_source_fields.to_a}
343
+ end
344
+
345
+ # Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
346
+ # to build queries when desired.
347
+ class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :query_defaults)
348
+ def self.with(runtime_metadata:, logger:, **query_defaults)
349
+ new(runtime_metadata: runtime_metadata, logger: logger, query_defaults: query_defaults)
350
+ end
351
+
352
+ def routing_picker
353
+ @routing_picker ||= RoutingPicker.new(schema_names: runtime_metadata.schema_element_names)
354
+ end
355
+
356
+ def index_expression_builder
357
+ @index_expression_builder ||= IndexExpressionBuilder.new(schema_names: runtime_metadata.schema_element_names)
358
+ end
359
+
360
+ def new_query(**options)
361
+ DatastoreQuery.new(
362
+ routing_picker: routing_picker,
363
+ index_expression_builder: index_expression_builder,
364
+ logger: logger,
365
+ schema_element_names: runtime_metadata.schema_element_names,
366
+ **query_defaults.merge(options)
367
+ )
368
+ end
369
+ end
370
+ end
371
+ end
372
+ end
@@ -0,0 +1,78 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/decoded_cursor"
10
+ require "elastic_graph/support/memoizable_data"
11
+ require "forwardable"
12
+
13
+ module ElasticGraph
14
+ class GraphQL
15
+ module DatastoreResponse
16
+ # Represents a document fetched from the datastore. Exposes both the raw metadata
17
+ # provided by the datastore and the doc payload itself. In addition, you can treat
18
+ # it just like a document hash using `#[]` or `#fetch`.
19
+ Document = Support::MemoizableData.define(:raw_data, :payload, :decoded_cursor_factory) do
20
+ # @implements Document
21
+ extend Forwardable
22
+ def_delegators :payload, :[], :fetch
23
+
24
+ def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
25
+ source = raw_data.fetch("_source") do
26
+ {} # : ::Hash[::String, untyped]
27
+ end
28
+
29
+ new(
30
+ raw_data: raw_data,
31
+ # Since we no longer fetch _source for id only queries, merge id into _source to take care of that case
32
+ payload: source.merge("id" => raw_data["_id"]),
33
+ decoded_cursor_factory: decoded_cursor_factory
34
+ )
35
+ end
36
+
37
+ def self.with_payload(payload)
38
+ build({"_source" => payload})
39
+ end
40
+
41
+ def index_name
42
+ raw_data["_index"]
43
+ end
44
+
45
+ def index_definition_name
46
+ index_name.split(ROLLOVER_INDEX_INFIX_MARKER).first # : ::String
47
+ end
48
+
49
+ def id
50
+ raw_data["_id"]
51
+ end
52
+
53
+ def sort
54
+ raw_data["sort"]
55
+ end
56
+
57
+ def version
58
+ payload["version"]
59
+ end
60
+
61
+ def cursor
62
+ @cursor ||= decoded_cursor_factory.build(raw_data.fetch("sort"))
63
+ end
64
+
65
+ def datastore_path
66
+ # Path based on this API:
67
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
68
+ "/#{index_name}/_doc/#{id}".squeeze("/")
69
+ end
70
+
71
+ def to_s
72
+ "#<#{self.class.name} #{datastore_path}>"
73
+ end
74
+ alias_method :inspect, :to_s
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/decoded_cursor"
11
+ require "elastic_graph/graphql/datastore_response/document"
12
+ require "forwardable"
13
+
14
+ module ElasticGraph
15
+ class GraphQL
16
+ module DatastoreResponse
17
+ # Represents a search response from the datastore. Exposes both the raw metadata
18
+ # provided by the datastore and the collection of documents. Can be treated as a
19
+ # collection of documents when you don't care about the metadata.
20
+ class SearchResponse < ::Data.define(:raw_data, :metadata, :documents, :total_document_count)
21
+ include Enumerable
22
+ extend Forwardable
23
+
24
+ def_delegators :documents, :each, :to_a, :size, :empty?
25
+
26
+ EXCLUDED_METADATA_KEYS = %w[hits aggregations].freeze
27
+
28
+ def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
29
+ documents = raw_data.fetch("hits").fetch("hits").map do |doc|
30
+ Document.build(doc, decoded_cursor_factory: decoded_cursor_factory)
31
+ end
32
+
33
+ metadata = raw_data.except(*EXCLUDED_METADATA_KEYS)
34
+ metadata["hits"] = raw_data.fetch("hits").except("hits")
35
+
36
+ # `hits.total` is exposed as an object like:
37
+ #
38
+ # {
39
+ # "value" => 200,
40
+ # "relation" => "eq", # or "gte"
41
+ # }
42
+ #
43
+ # This allows it to provide a lower bound on the number of hits, rather than having
44
+ # to give an exact count. We may want to handle the `gte` case differently at some
45
+ # point but for now we just use the value as-is.
46
+ #
47
+ # In the case where `track_total_hits` flag is set to `false`, `hits.total` field will be completely absent.
48
+ # This means the client intentionally chose not to query the total doc count, and `total_document_count` will be nil.
49
+ # In this case, we will throw an exception if the client later tries to access `total_document_count`.
50
+ total_document_count = metadata.dig("hits", "total", "value")
51
+
52
+ new(
53
+ raw_data: raw_data,
54
+ metadata: metadata,
55
+ documents: documents,
56
+ total_document_count: total_document_count
57
+ )
58
+ end
59
+
60
+ # Benign empty response that can be used in place of datastore response errors as needed.
61
+ RAW_EMPTY = {"hits" => {"hits" => [], "total" => {"value" => 0}}}.freeze
62
+ EMPTY = build(RAW_EMPTY)
63
+
64
+ def docs_description
65
+ (documents.size < 3) ? documents.inspect : "[#{documents.first}, ..., #{documents.last}]"
66
+ end
67
+
68
+ def total_document_count
69
+ super || raise(CountUnavailableError, "#{__method__} is unavailable; set `query.total_document_count_needed = true` to make it available")
70
+ end
71
+
72
+ def to_s
73
+ "#<#{self.class.name} size=#{documents.size} #{docs_description}>"
74
+ end
75
+ alias_method :inspect, :to_s
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,151 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/error"
11
+ require "elastic_graph/graphql/datastore_response/search_response"
12
+ require "elastic_graph/graphql/query_details_tracker"
13
+ require "elastic_graph/support/threading"
14
+
15
+ module ElasticGraph
16
+ class GraphQL
17
+ # Responsible for routing datastore search requests to the appropriate cluster and index.
18
+ class DatastoreSearchRouter
19
+ def initialize(
20
+ datastore_clients_by_name:,
21
+ logger:,
22
+ monotonic_clock:,
23
+ config:
24
+ )
25
+ @datastore_clients_by_name = datastore_clients_by_name
26
+ @logger = logger
27
+ @monotonic_clock = monotonic_clock
28
+ @config = config
29
+ end
30
+
31
+ # Sends the datastore a multi-search request based on the given queries.
32
+ # Returns a hash of responses keyed by the query.
33
+ def msearch(queries, query_tracker: QueryDetailsTracker.empty)
34
+ DatastoreQuery.perform(queries) do |header_body_tuples_by_query|
35
+ # Here we set a client-side timeout, which causes the client to give up and close the connection.
36
+ # According to [1]--"We have a new way to cancel search requests efficiently from the client
37
+ # in 7.4 (by closing the underlying http channel)"--this should cause the server to stop
38
+ # executing the search, and more importantly, gives us a strictly enforced timeout.
39
+ #
40
+ # In addition, the datastore supports a `timeout` option on a search body, but this timeout is
41
+ # "best effort", applies to each shard (and not to the overall search request), and only interrupts
42
+ # certain kinds of operations. [2] and [3] below have more info.
43
+ #
44
+ # Note that I have not been able to observe this `timeout` on a search body ever working
45
+ # as documented. In our test suite, none of the slow queries I have tried (both via
46
+ # slow aggregation query and a slow script) have ever aborted early when that option is
47
+ # set. In Kibana in production, @bsorbo observed it aborting a `search` request early
48
+ # (but not necessarily an `msearch` request...), but even then, the response said `timed_out: false`!
49
+ # Other people ([4]) have reported observing timeout having no effect on msearch requests.
50
+ #
51
+ # So, the client-side timeout is the main one we want here, and for now we are not using the
52
+ # datastore search `timeout` option at all.
53
+ #
54
+ # For more info, see:
55
+ #
56
+ # [1] https://github.com/elastic/elasticsearch/issues/47716
57
+ # [2] https://github.com/elastic/elasticsearch/pull/51858
58
+ # [3] https://www.elastic.co/guide/en/elasticsearch/guide/current/_search_options.html#_timeout_2
59
+ # [4] https://discuss.elastic.co/t/timeouts-ignored-in-multisearch/23673
60
+
61
+ # Unfortunately, the Elasticsearch/OpenSearch clients don't support setting a per-request client-side timeout,
62
+ # even though Faraday (the underlying HTTP client) does. To work around this, we pass our desired
63
+ # timeout in a specific header that the `SupportTimeouts` Faraday middleware will use.
64
+ headers = {TIMEOUT_MS_HEADER => msearch_request_timeout_from(queries)}.compact
65
+
66
+ queries_and_header_body_tuples_by_datastore_client = header_body_tuples_by_query.group_by do |(query, header_body_tuples)|
67
+ @datastore_clients_by_name.fetch(query.cluster_name)
68
+ end
69
+
70
+ datastore_query_started_at = @monotonic_clock.now_in_ms
71
+
72
+ server_took_and_results = Support::Threading.parallel_map(queries_and_header_body_tuples_by_datastore_client) do |datastore_client, query_and_header_body_tuples_for_cluster|
73
+ queries_for_cluster, header_body_tuples = query_and_header_body_tuples_for_cluster.transpose
74
+ msearch_body = header_body_tuples.flatten(1)
75
+ response = datastore_client.msearch(body: msearch_body, headers: headers)
76
+ debug_query(query: msearch_body, response: response)
77
+ ordered_responses = response.fetch("responses")
78
+ [response["took"], queries_for_cluster.zip(ordered_responses)]
79
+ end
80
+
81
+ query_tracker.record_datastore_query_duration_ms(
82
+ client: @monotonic_clock.now_in_ms - datastore_query_started_at,
83
+ server: server_took_and_results.map(&:first).compact.max
84
+ )
85
+
86
+ server_took_and_results.flat_map(&:last).to_h.tap do |responses_by_query|
87
+ log_shard_failure_if_necessary(responses_by_query)
88
+ raise_search_failed_if_any_failures(responses_by_query)
89
+ end
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Prefix tests with `DEBUG_QUERY=1 ...` or run `export DEBUG_QUERY=1` to print the actual
96
+ # Elasticsearch/OpenSearch query and response. This is particularly useful for adding new specs.
97
+ def debug_query(**debug_messages)
98
+ return unless ::ENV["DEBUG_QUERY"]
99
+
100
+ formatted_messages = debug_messages.map do |key, msg|
101
+ "#{key.to_s.upcase}:\n#{::JSON.pretty_generate(msg)}\n"
102
+ end.join("\n")
103
+ puts "\n#{formatted_messages}\n\n"
104
+ end
105
+
106
+ def msearch_request_timeout_from(queries)
107
+ return nil unless (min_query_deadline = queries.map(&:monotonic_clock_deadline).compact.min)
108
+
109
+ (min_query_deadline - @monotonic_clock.now_in_ms).tap do |timeout|
110
+ if timeout <= 0
111
+ raise RequestExceededDeadlineError, "It is already #{timeout.abs} ms past the search deadline."
112
+ end
113
+ end
114
+ end
115
+
116
+ def raise_search_failed_if_any_failures(responses_by_query)
117
+ failures = responses_by_query.each_with_index.select { |(_query, response), _index| response["error"] }
118
+ return if failures.empty?
119
+
120
+ formatted_failures = failures.map do |(query, response), index|
121
+ # Note: we intentionally omit the body of the request here, because it could contain PII
122
+ # or other sensitive values that we don't want logged.
123
+ <<~ERROR
124
+ #{index + 1}) Header: #{::JSON.generate(query.to_datastore_msearch_header)}
125
+ #{response.fetch("error").inspect}"
126
+ On cluster: #{query.cluster_name}
127
+ ERROR
128
+ end.join("\n\n")
129
+
130
+ raise SearchFailedError, "Got #{failures.size} search failure(s):\n\n#{formatted_failures}"
131
+ end
132
+
133
+ # Examine successful query responses and log any shard failure they encounter
134
+ def log_shard_failure_if_necessary(responses_by_query)
135
+ shard_failures = responses_by_query.each_with_index.select do |(query, response), query_numeric_index|
136
+ (200..299).cover?(response["status"]) && response["_shards"]["failed"] != 0
137
+ end
138
+
139
+ unless shard_failures.empty?
140
+ formatted_failures = shard_failures.map do |(query, response), query_numeric_index|
141
+ "Query #{query_numeric_index + 1} against index `#{query.search_index_expression}` on cluster `#{query.cluster_name}`}: " +
142
+ JSON.pretty_generate(response["_shards"])
143
+ end.join("\n\n")
144
+
145
+ formatted_shard_failures = "The following queries have failed shards: \n\n#{formatted_failures}"
146
+ @logger.warn(formatted_shard_failures)
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end