elasticgraph-graphql 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +3 -0
  4. data/elasticgraph-graphql.gemspec +23 -0
  5. data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
  6. data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
  7. data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
  8. data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
  9. data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
  10. data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
  11. data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
  12. data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
  13. data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
  14. data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
  15. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
  16. data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
  17. data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
  18. data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
  19. data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
  20. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
  21. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
  22. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
  23. data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
  24. data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
  25. data/lib/elastic_graph/graphql/client.rb +43 -0
  26. data/lib/elastic_graph/graphql/config.rb +81 -0
  27. data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
  28. data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
  29. data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
  30. data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
  31. data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
  32. data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
  33. data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
  34. data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
  35. data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
  36. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
  37. data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
  38. data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
  39. data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
  40. data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
  41. data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
  42. data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
  43. data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
  44. data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
  45. data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
  46. data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
  47. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
  48. data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
  49. data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
  50. data/lib/elastic_graph/graphql/query_executor.rb +200 -0
  51. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
  52. data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
  53. data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
  54. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
  55. data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
  56. data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
  57. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
  58. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
  59. data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
  60. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
  61. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
  62. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
  63. data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
  64. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
  65. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
  66. data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
  67. data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
  68. data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
  69. data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
  70. data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
  71. data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
  72. data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
  73. data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
  74. data/lib/elastic_graph/graphql/schema/field.rb +147 -0
  75. data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
  76. data/lib/elastic_graph/graphql/schema/type.rb +263 -0
  77. data/lib/elastic_graph/graphql/schema.rb +164 -0
  78. data/lib/elastic_graph/graphql.rb +253 -0
  79. data/script/dump_time_zones +81 -0
  80. data/script/dump_time_zones.java +17 -0
  81. metadata +503 -0
@@ -0,0 +1,118 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
10
+ require "elastic_graph/support/memoizable_data"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ module Aggregation
15
+ # Represents a grouping on a term.
16
+ # For the relevant Elasticsearch docs, see:
17
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-terms-aggregation.html
18
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_terms
19
+ module TermGrouping
20
+ def key
21
+ @key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query))
22
+ end
23
+
24
+ def encoded_index_field_path
25
+ @encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index))
26
+ end
27
+
28
+ def composite_clause(grouping_options: {})
29
+ {"terms" => terms_subclause.merge(grouping_options)}
30
+ end
31
+
32
+ def non_composite_clause_for(query)
33
+ clause_value = work_around_elasticsearch_bug(terms_subclause)
34
+ {
35
+ "terms" => clause_value.merge({
36
+ "size" => query.paginator.desired_page_size,
37
+ "show_term_doc_count_error" => query.needs_doc_count_error
38
+ })
39
+ }
40
+ end
41
+
42
+ INNER_META = {"key_path" => ["key"], "merge_into_bucket" => {}}
43
+
44
+ def inner_meta
45
+ INNER_META
46
+ end
47
+
48
+ private
49
+
50
+ # Here we force the `collect_mode` to `depth_first`. Without doing that, we've observed that some of our acceptance
51
+ # specs fail on CI when running against Elasticsearch 8.11 with an error like:
52
+ #
53
+ # ```
54
+ # {
55
+ # "root_cause": [
56
+ # {
57
+ # "type": "runtime_exception",
58
+ # "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible"
59
+ # }
60
+ # ],
61
+ # "type": "search_phase_execution_exception",
62
+ # "reason": "all shards failed",
63
+ # "phase": "query",
64
+ # "grouped": true,
65
+ # "failed_shards": [
66
+ # {
67
+ # "shard": 0,
68
+ # "index": "teams_camel",
69
+ # "node": "pDXJzLTsRJCRjKe83DqipA",
70
+ # "reason": {
71
+ # "type": "runtime_exception",
72
+ # "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible"
73
+ # }
74
+ # }
75
+ # ],
76
+ # "caused_by": {
77
+ # "type": "runtime_exception",
78
+ # "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible",
79
+ # "caused_by": {
80
+ # "type": "runtime_exception",
81
+ # "reason": "score for different docid, nesting an aggregation under a children aggregation and terms aggregation with collect mode breadth_first isn't possible"
82
+ # }
83
+ # }
84
+ # }
85
+ # ```
86
+ #
87
+ # This specific exception message was introduced in https://github.com/elastic/elasticsearch/pull/89993, but that was done to provide
88
+ # a better error than a NullPointerException (which is what used to happen). This error also appears to be non-deterministic; I wasn't
89
+ # able to reproduce the CI failure locally until I forced `"collect_mode" => "breadth_first"`, at which point I did see the same error
90
+ # locally. The Elasticsearch docs[^1] mention that a heuristic (partially based on if a field's cardinality is known!) is used to pick
91
+ # whether `breadth_first` or `depth_first` is used when `collect_mode`is not specified:
92
+ #
93
+ # > The `breadth_first` is the default mode for fields with a cardinality bigger than the requested size or when the cardinality is unknown
94
+ # > (numeric fields or scripts for instance).
95
+ #
96
+ # In addition, the docs[^2] make it clear that `depth_first` is usually what you want:
97
+ #
98
+ # > The strategy we outlined previously—building the tree fully and then pruning—is called depth-first and it is the default.
99
+ # > Depth-first works well for the majority of aggregations, but can fall apart in situations like our actors and costars example.
100
+ # >
101
+ # > ...
102
+ # >
103
+ # > Breadth-first should be used only when you expect more buckets to be generated than documents landing in the buckets.
104
+ #
105
+ # So, for now we are forcing the collect mode to `depth_first`, as it avoids an issue with Elasticsearch and is a generally
106
+ # sane default. It may fall over in the case breadth-first is intended for, but we can cross that bridge when it comes.
107
+ #
108
+ # Long term, we're hoping to switch sub-aggregations to use a `composite` aggregation instead of `terms`, rendering this moot.
109
+ #
110
+ # [^1]: https://www.elastic.co/guide/en/elasticsearch/reference/8.11/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-collect
111
+ # [^2]: https://www.elastic.co/guide/en/elasticsearch/guide/current/_preventing_combinatorial_explosions.html#_depth_first_versus_breadth_first
112
+ def work_around_elasticsearch_bug(terms_clause)
113
+ terms_clause.merge({"collect_mode" => "depth_first"})
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,43 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ # Represents a client of an ElasticGraph GraphQL endpoint.
12
+ # `name` and `source_description` can really be any string, but `name` is
13
+ # meant to be a friendly/human readable string (such as a service name)
14
+ # where as `source_description` is meant to be an opaque string describing
15
+ # where `name` came from.
16
+ class Client < Data.define(:source_description, :name)
17
+ # `Data.define` provides the following methods:
18
+ # @dynamic initialize, name, source_description, with
19
+
20
+ ANONYMOUS = new("(anonymous)", "(anonymous)")
21
+ ELASTICGRAPH_INTERNAL = new("(ElasticGraphInternal)", "(ElasticGraphInternal)")
22
+
23
+ def description
24
+ if source_description == name
25
+ name
26
+ else
27
+ "#{name} (#{source_description})"
28
+ end
29
+ end
30
+
31
+ # Default resolver used to determine the client for a given HTTP request.
32
+ # Also defines the interface of a client resolver. (This is why we define `initialize`).
33
+ class DefaultResolver
34
+ def initialize(config)
35
+ end
36
+
37
+ def resolve(http_request)
38
+ Client::ANONYMOUS
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,81 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/client"
11
+ require "elastic_graph/schema_artifacts/runtime_metadata/extension_loader"
12
+
13
+ module ElasticGraph
14
+ class GraphQL
15
+ class Config < Data.define(
16
+ # Determines the size of our datastore search requests if the query does not specify.
17
+ :default_page_size,
18
+ # Determines the maximum size of a requested page. If the client requests a page larger
19
+ # than this value, `max_page_size` elements will be returned instead.
20
+ :max_page_size,
21
+ # Queries that take longer than this configured threshold will have a sanitized version logged.
22
+ :slow_query_latency_warning_threshold_in_ms,
23
+ # Object used to identify the client of a GraphQL query based on the HTTP request.
24
+ :client_resolver,
25
+ # Array of modules that will be extended onto the `GraphQL` instance to support extension libraries.
26
+ :extension_modules,
27
+ # Contains any additional settings that were in the settings file beyond settings that are expected as part of ElasticGraph
28
+ # itself. Extensions are free to use these extra settings.
29
+ :extension_settings
30
+ )
31
+ def self.from_parsed_yaml(entire_parsed_yaml)
32
+ parsed_yaml = entire_parsed_yaml.fetch("graphql")
33
+ extra_keys = parsed_yaml.keys - EXPECTED_KEYS
34
+
35
+ unless extra_keys.empty?
36
+ raise ConfigError, "Unknown `graphql` config settings: #{extra_keys.join(", ")}"
37
+ end
38
+
39
+ extension_loader = SchemaArtifacts::RuntimeMetadata::ExtensionLoader.new(::Module.new)
40
+ extension_mods = parsed_yaml.fetch("extension_modules", []).map do |mod_hash|
41
+ extension_loader.load(mod_hash.fetch("extension_name"), from: mod_hash.fetch("require_path"), config: {}).extension_class.tap do |mod|
42
+ unless mod.instance_of?(::Module)
43
+ raise ConfigError, "`#{mod_hash.fetch("extension_name")}` is not a module, but all application extension modules must be modules."
44
+ end
45
+ end
46
+ end
47
+
48
+ new(
49
+ default_page_size: parsed_yaml.fetch("default_page_size"),
50
+ max_page_size: parsed_yaml.fetch("max_page_size"),
51
+ slow_query_latency_warning_threshold_in_ms: parsed_yaml["slow_query_latency_warning_threshold_in_ms"] || 5000,
52
+ client_resolver: load_client_resolver(parsed_yaml),
53
+ extension_modules: extension_mods,
54
+ extension_settings: entire_parsed_yaml.except(*ELASTICGRAPH_CONFIG_KEYS)
55
+ )
56
+ end
57
+
58
+ # The keys we expect under `graphql`.
59
+ EXPECTED_KEYS = members.map(&:to_s)
60
+
61
+ # The standard ElasticGraph root config setting keys; anything else is assumed to be extension settings.
62
+ ELASTICGRAPH_CONFIG_KEYS = %w[graphql indexer logger datastore schema_artifacts]
63
+
64
+ private_class_method def self.load_client_resolver(parsed_yaml)
65
+ config = parsed_yaml.fetch("client_resolver") do
66
+ return Client::DefaultResolver.new({})
67
+ end
68
+
69
+ client_resolver_loader = SchemaArtifacts::RuntimeMetadata::ExtensionLoader.new(Client::DefaultResolver)
70
+ extension = client_resolver_loader.load(
71
+ config.fetch("extension_name"),
72
+ from: config.fetch("require_path"),
73
+ config: config.except("extension_name", "require_path")
74
+ )
75
+ extension_class = extension.extension_class # : ::Class
76
+
77
+ __skip__ = extension_class.new(extension.extension_config)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,100 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "graphql"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ class DatastoreQuery
15
+ # Contains query logic related to pagination. Mostly delegates to `Paginator`, which
16
+ # contains most of the logic. This merely adapts the `Paginator` to the needs of document
17
+ # pagination. (Paginator also supports aggregation bucket pagination.)
18
+ class DocumentPaginator < Support::MemoizableData.define(
19
+ :sort_clauses, :paginator, :decoded_cursor_factory, :schema_element_names,
20
+ # `individual_docs_needed`: when false, we request a `size` of 0. Set to `true` when the client is
21
+ # requesting any document fields, or if we need documents to compute any parts of the `PageInfo`.
22
+ :individual_docs_needed,
23
+ # `total_document_count_needed`: when false, `track_total_hits` will be 0 in our datastore query.
24
+ # This will prevent the datastore from doing extra work to get an accurate count
25
+ :total_document_count_needed
26
+ )
27
+ # Builds a hash containing the portions of a datastore search body related to pagination.
28
+ def to_datastore_body
29
+ {
30
+ size: effective_size,
31
+ sort: effective_sort,
32
+ search_after: search_after,
33
+ track_total_hits: total_document_count_needed
34
+ }.reject { |key, value| Array(value).empty? }
35
+ end
36
+
37
+ def sort
38
+ @sort ||= sort_clauses.map do |clause|
39
+ clause.transform_values do |options|
40
+ # As per the Elasticsearch docs[^1] missing/null values get sorted last by default, but we can control
41
+ # it here. We want to control it here to make our sorting behavior more consistent in a couple ways:
42
+ #
43
+ # 1. We want _document_ sorting and _aggregation_ sorting to behave the same. Aggregation sorting puts
44
+ # missing value buckets first when sorting ascending and last when sorting descending[^2]. Note that in
45
+ # Elasticsearch 7.16[^3] and above, you can control if missing buckets go first or last, but below that
46
+ # version you have no control. Here we match that behavior.
47
+ # 2. Clients are likely to expect that descending sorting will produce a list in reverse order from what
48
+ # ascending sorting produces, but with the default behavior (missing/null values get sorted last), this
49
+ # is not the case. We have to use the opposite `missing` option when the `order` is the opposite.
50
+ #
51
+ # [^1]: https://www.elastic.co/guide/en/elasticsearch/reference/7.10/sort-search-results.html#_missing_values
52
+ # [^2]: https://www.elastic.co/guide/en/elasticsearch/reference/7.10/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
53
+ # [^3]: https://www.elastic.co/guide/en/elasticsearch/reference/7.16/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
54
+ missing = (options.fetch("order") == "asc") ? "_first" : "_last"
55
+ options.merge({"missing" => missing})
56
+ end
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def effective_size
63
+ individual_docs_needed ? paginator.requested_page_size : 0
64
+ end
65
+
66
+ def effective_sort
67
+ return [] unless effective_size > 0
68
+ paginator.search_in_reverse? ? reverse_sort : sort
69
+ end
70
+
71
+ DIRECTION_OPPOSITES = {"asc" => "desc", "desc" => "asc"}.freeze
72
+ MISSING_OPPOSITES = {"_first" => "_last", "_last" => "_first"}.freeze
73
+
74
+ def reverse_sort
75
+ @reverse_sort ||= sort.map do |sort_clause|
76
+ sort_clause.transform_values do |options|
77
+ {
78
+ "order" => DIRECTION_OPPOSITES.fetch(options.fetch("order")),
79
+ "missing" => MISSING_OPPOSITES.fetch(options.fetch("missing"))
80
+ }
81
+ end
82
+ end
83
+ end
84
+
85
+ def search_after
86
+ paginator.search_after&.then do |cursor|
87
+ decoded_cursor_factory.sort_fields.map do |field|
88
+ cursor.sort_values.fetch(field) do
89
+ raise ::GraphQL::ExecutionError, "`#{cursor.encode}` is not a valid cursor for the current `#{schema_element_names.order_by}` argument."
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ # `Query::DocumentPaginator` exists only for use by `Query` and is effectively private.
97
+ private_constant :DocumentPaginator
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,142 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/filtering/filter_value_set_extractor"
10
+ require "elastic_graph/support/time_set"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ class DatastoreQuery
15
+ # Responsible for building a search index expression for a specific query based on the filters.
16
+ class IndexExpressionBuilder
17
+ def initialize(schema_names:)
18
+ @filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(schema_names, Support::TimeSet::ALL) do |operator, filter_value|
19
+ case operator
20
+ when :gt, :gte, :lt, :lte
21
+ if date_string?(filter_value)
22
+ # Here we translate into a range of time objects. When translating dates to times,
23
+ # we need to use an appropriate time suffix:
24
+ #
25
+ # - `> 2024-04-01` == `> 2024-04-01T23:59:59.999Z`
26
+ # - `≥ 2024-04-01` == `≥ 2024-04-01T00:00:00Z`
27
+ # - `< 2024-04-01` == `< 2024-04-01T00:00:00Z`
28
+ # - `≤ 2024-04-01` == `≤ 2024-04-01T23:59:59.999Z`
29
+ time_suffix = (operator == :gt || operator == :lte) ? "T23:59:59.999Z" : "T00:00:00Z"
30
+ Support::TimeSet.of_range(operator => ::Time.iso8601(filter_value + time_suffix))
31
+ else
32
+ Support::TimeSet.of_range(operator => ::Time.iso8601(filter_value))
33
+ end
34
+ when :equal_to_any_of
35
+ # This calls `.compact` to remove `nil` timestamp values.
36
+ ranges = filter_value.compact.map do |iso8601_string|
37
+ if date_string?(iso8601_string)
38
+ # When we have a date string, build a range for the entire day.
39
+ start_of_day = ::Time.iso8601("#{iso8601_string}T00:00:00Z")
40
+ end_of_day = ::Time.iso8601("#{iso8601_string}T23:59:59.999Z")
41
+ ::Range.new(start_of_day, end_of_day)
42
+ else
43
+ value = ::Time.iso8601(iso8601_string)
44
+ ::Range.new(value, value)
45
+ end
46
+ end
47
+
48
+ Support::TimeSet.of_range_objects(ranges)
49
+ end
50
+ end
51
+ end
52
+
53
+ # Returns an index_definition expression string to use for searches. This string can specify
54
+ # multiple indices, use wildcards, etc. For info about what is supported, see:
55
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
56
+ def determine_search_index_expression(filter_hashes, search_index_definitions, require_indices:)
57
+ # Here we sort the index expressions. It won't change the behavior in the datastore, but
58
+ # makes the return value here deterministic which makes it easier to assert on in tests.
59
+ search_index_definitions.sort_by(&:name).reduce(IndexExpression::EMPTY) do |index_expression, index_def|
60
+ index_expression + index_expression_for(filter_hashes, index_def, require_indices: require_indices)
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def index_expression_for(filter_hashes, maybe_rollover_index_def, require_indices:)
67
+ unless maybe_rollover_index_def.rollover_index_template?
68
+ return IndexExpression.only(maybe_rollover_index_def.index_expression_for_search)
69
+ end
70
+
71
+ # @type var index_def: DatastoreCore::IndexDefinition::RolloverIndexTemplate
72
+ index_def = _ = maybe_rollover_index_def
73
+
74
+ time_set = @filter_value_set_extractor.extract_filter_value_set(filter_hashes, [index_def.timestamp_field_path])
75
+
76
+ if time_set.empty?
77
+ return require_indices ?
78
+ # Indices are required. Given the time set is empty, it's impossible for any documents to match our search.
79
+ # Therefore, which index we use here doesn't matter. We just pick the first one, alphabetically.
80
+ IndexExpression.only(index_def.known_related_query_rollover_indices.map(&:index_expression_for_search).min) :
81
+ # No indices are required, so we can return an empty index expression.
82
+ IndexExpression::EMPTY
83
+ end
84
+
85
+ indices_to_exclude = index_def.known_related_query_rollover_indices.reject do |index|
86
+ time_set.intersect?(index.time_set)
87
+ end
88
+
89
+ if require_indices && (index_def.known_related_query_rollover_indices - indices_to_exclude).empty?
90
+ # Indices are required, but all known indices have been excluded. We satisfy the requirement for an index by excluding one
91
+ # less index. This is preferable to the alternative ways to satisfy the requirement.
92
+ #
93
+ # - We could return an `IndexExpression` with no exclusions, but that would search across all indices, which is less efficient.
94
+ # - We could pick the first index to search (as we do for the `time_set.empty?` case), but that could cause matching documents
95
+ # to be be missed, because it's possible that matching documents exist in just-created index that is not in
96
+ # `known_related_query_rollover_indices`. Therefore, it's important that we still search the rollover wildcard expression,
97
+ # and we want to exclude all but one of the known indices.
98
+ indices_to_exclude = indices_to_exclude.drop(1)
99
+ end
100
+
101
+ IndexExpression.new(
102
+ names_to_include: ::Set.new([index_def.index_expression_for_search]),
103
+ names_to_exclude: ::Set.new(indices_to_exclude.map(&:index_expression_for_search))
104
+ )
105
+ end
106
+
107
+ def date_string?(string)
108
+ /\A\d{4}-\d{2}-\d{2}\z/.match?(string)
109
+ end
110
+ end
111
+
112
+ class IndexExpression < ::Data.define(:names_to_include, :names_to_exclude)
113
+ EMPTY = new(names_to_include: ::Set.new, names_to_exclude: ::Set.new)
114
+
115
+ def self.only(name)
116
+ IndexExpression.new(names_to_include: ::Set.new([name].compact), names_to_exclude: ::Set.new)
117
+ end
118
+
119
+ def to_s
120
+ # Note: exclusions must come after inclusions. I can't find anything in the Elasticsearch or OpenSearch docs
121
+ # that mention this, but when exclusions come first I found that we got errors.
122
+ parts = names_to_include.sort + names_to_exclude.sort.map { |name| "-#{name}" }
123
+ parts.join(",")
124
+ end
125
+
126
+ def +(other)
127
+ with(
128
+ names_to_include: names_to_include.union(other.names_to_include),
129
+ names_to_exclude: names_to_exclude.union(other.names_to_exclude)
130
+ )
131
+ end
132
+ end
133
+
134
+ # `Query::IndexExpressionBuilder` exists only for use by `Query` and is effectively private.
135
+ private_constant :IndexExpressionBuilder
136
+
137
+ # Steep is complaining that it can't find some `Query` but they are not in this file...
138
+ # @dynamic aggregations, shard_routing_values, search_index_definitions, merge_with, search_index_expression
139
+ # @dynamic with, to_datastore_msearch_header_and_body, document_paginator
140
+ end
141
+ end
142
+ end