elasticgraph-datastore_core 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7bd50dac79cefe5466066b048ebb298df524a33a904082b0871af9429a7521ed
4
+ data.tar.gz: 666da201ff6ef33c81c2d7b4926068913188f735ec1f9777ed68255c3433bc2a
5
+ SHA512:
6
+ metadata.gz: 9ec51fffb49a31152bfa0718bfb16d0063c964ca24128710639d287dac4d10ccc4ea0fbe969caae6f7c39df28ddb80b3a2bc225c2c7af1c4bb7d6f5baf5c1459
7
+ data.tar.gz: 671573cb43b2880450bac584ba4edd21154188a2de245bd195569cc166a0e323038caba947cc7d74bbf0b403b78952b37a800bea81fe602f125188dffa529275
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Block, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # ElasticGraph::DatastoreCore
2
+
3
+ Contains the core datastore logic used by the rest of ElasticGraph.
@@ -0,0 +1,21 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require_relative "../gemspec_helper"
10
+
11
+ ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
12
+ spec.summary = "ElasticGraph gem containing the core datastore support types and logic."
13
+
14
+ spec.add_dependency "elasticgraph-schema_artifacts", eg_version
15
+ spec.add_dependency "elasticgraph-support", eg_version
16
+
17
+ spec.add_development_dependency "elasticgraph-admin", eg_version
18
+ spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
19
+ spec.add_development_dependency "elasticgraph-opensearch", eg_version
20
+ spec.add_development_dependency "elasticgraph-schema_definition", eg_version
21
+ end
@@ -0,0 +1,58 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core/configuration/client_faraday_adapter"
10
+ require "elastic_graph/datastore_core/configuration/cluster_definition"
11
+ require "elastic_graph/datastore_core/configuration/index_definition"
12
+ require "elastic_graph/error"
13
+
14
+ module ElasticGraph
15
+ class DatastoreCore
16
+ # Defines the configuration related to datastores.
17
+ class Config < ::Data.define(
18
+ # Configuration of the faraday adapter to use with the datastore client.
19
+ :client_faraday_adapter,
20
+ # Map of datastore cluster definitions, keyed by cluster name. The names will be referenced within
21
+ # `index_definitions` by `query_cluster` and `index_into_clusters` to identify
22
+ # datastore clusters. Each definition has a `url` and `settings`. `settings` contains datastore
23
+ # settings in the flattened name form, e.g. `"cluster.max_shards_per_node": 2000`.
24
+ :clusters,
25
+ # Map of index definition names to `IndexDefinition` objects containing customizations
26
+ # for the named index definitions for this environment.
27
+ :index_definitions,
28
+ # Determines if we log requests/responses to/from the datastore.
29
+ # Defaults to `false`.
30
+ :log_traffic,
31
+ # Passed down to the datastore client, controls the number of times ElasticGraph attempts a call against
32
+ # the datastore before failing. Retrying a handful of times is generally advantageous, since some sporadic
33
+ # failures are expected during the course of operation, and better to retry than fail the entire call.
34
+ # Defaults to 3.
35
+ :max_client_retries
36
+ )
37
+ # Helper method to build an instance from parsed YAML config.
38
+ def self.from_parsed_yaml(parsed_yaml)
39
+ parsed_yaml = parsed_yaml.fetch("datastore")
40
+ extra_keys = parsed_yaml.keys - EXPECTED_KEYS
41
+
42
+ unless extra_keys.empty?
43
+ raise ConfigError, "Unknown `datastore` config settings: #{extra_keys.join(", ")}"
44
+ end
45
+
46
+ new(
47
+ client_faraday_adapter: Configuration::ClientFaradayAdapter.from_parsed_yaml(parsed_yaml),
48
+ clusters: Configuration::ClusterDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("clusters")),
49
+ index_definitions: Configuration::IndexDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("index_definitions")),
50
+ log_traffic: parsed_yaml.fetch("log_traffic", false),
51
+ max_client_retries: parsed_yaml.fetch("max_client_retries", 3)
52
+ )
53
+ end
54
+
55
+ EXPECTED_KEYS = members.map(&:to_s)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,38 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class DatastoreCore
11
+ module Configuration
12
+ class ClientFaradayAdapter < ::Data.define(
13
+ # The faraday adapter to use with the datastore client, such as `httpx` or `typhoeus`.
14
+ # For more info, see:
15
+ # https://github.com/elastic/elasticsearch-ruby/commit/a7bbdbf2a96168c1b33dca46ee160d2d4d75ada0
16
+ :name,
17
+ # A Ruby library to require which provides the named adapter (optional).
18
+ :require
19
+ )
20
+ def self.from_parsed_yaml(parsed_yaml)
21
+ parsed_yaml = parsed_yaml.fetch("client_faraday_adapter") || {}
22
+ extra_keys = parsed_yaml.keys - EXPECTED_KEYS
23
+
24
+ unless extra_keys.empty?
25
+ raise ConfigError, "Unknown `datastore.client_faraday_adapter` config settings: #{extra_keys.join(", ")}"
26
+ end
27
+
28
+ new(
29
+ name: parsed_yaml["name"]&.to_sym,
30
+ require: parsed_yaml["require"]
31
+ )
32
+ end
33
+
34
+ EXPECTED_KEYS = members.map(&:to_s)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,52 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+
11
+ module ElasticGraph
12
+ class DatastoreCore
13
+ module Configuration
14
+ class ClusterDefinition < ::Data.define(:url, :backend_client_class, :settings)
15
+ def self.from_hash(hash)
16
+ extra_keys = hash.keys - EXPECTED_KEYS
17
+
18
+ unless extra_keys.empty?
19
+ raise ConfigError, "Unknown `datastore.clusters` config settings: #{extra_keys.join(", ")}"
20
+ end
21
+
22
+ backend_name = hash["backend"]
23
+ backend_client_class =
24
+ case backend_name
25
+ when "elasticsearch"
26
+ require "elastic_graph/elasticsearch/client"
27
+ Elasticsearch::Client
28
+ when "opensearch"
29
+ require "elastic_graph/opensearch/client"
30
+ OpenSearch::Client
31
+ else
32
+ raise ConfigError, "Unknown `datastore.clusters` backend: `#{backend_name}`. Valid backends are `elasticsearch` and `opensearch`."
33
+ end
34
+
35
+ new(
36
+ url: hash.fetch("url"),
37
+ backend_client_class: backend_client_class,
38
+ settings: hash.fetch("settings")
39
+ )
40
+ end
41
+
42
+ def self.definitions_by_name_hash_from(cluster_def_hash_by_name)
43
+ cluster_def_hash_by_name.transform_values do |cluster_def_hash|
44
+ from_hash(cluster_def_hash)
45
+ end
46
+ end
47
+
48
+ EXPECTED_KEYS = members.map(&:to_s) - ["backend_client_class"] + ["backend"]
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,110 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/support/time_set"
10
+ require "elastic_graph/error"
11
+ require "time"
12
+
13
+ module ElasticGraph
14
+ class DatastoreCore
15
+ module Configuration
16
+ # Defines environment-specific customizations for an index definition.
17
+ #
18
+ # - ignore_routing_values: routing values for which we will ignore routing as configured on the index.
19
+ # This is intended to be used when a single routing value contains such a large portion of the dataset that it creates lopsided shards.
20
+ # By including that routing value in this config setting, it'll spread that value's data across all shards instead of concentrating it on a single shard.
21
+ # - query_cluster: named search cluster to be used for queries on this index.
22
+ # - index_into_cluster: named search clusters to index data into.
23
+ # - setting_overrides: overrides for index (or index template) settings.
24
+ # - setting_overrides_by_timestamp: overrides for index template settings for specific dates,
25
+ # allowing us to have different settings than the template for some timestamp.
26
+ # - custom_timestamp_ranges: defines indices for a custom timestamp range (rather than relying
27
+ # on the configured rollover frequency).
28
+ # - use_updates_for_indexing: when `true`, opts the index into using the `update` API instead of the `index` API for indexing.
29
+ # (Defaults to `true`).
30
+ class IndexDefinition < ::Data.define(
31
+ :ignore_routing_values,
32
+ :query_cluster,
33
+ :index_into_clusters,
34
+ :setting_overrides,
35
+ :setting_overrides_by_timestamp,
36
+ :custom_timestamp_ranges,
37
+ :use_updates_for_indexing
38
+ )
39
+ def initialize(ignore_routing_values:, **rest)
40
+ __skip__ = super(ignore_routing_values: ignore_routing_values.to_set, **rest)
41
+
42
+ # Verify the custom ranges are disjoint.
43
+ # Yeah, this is O(N^2), which isn't great, but we expect a _very_ small number of custom
44
+ # ranges (0-2) so this should be ok.
45
+ return if custom_timestamp_ranges
46
+ .map(&:time_set)
47
+ .combination(2)
48
+ .none? do |s1_s2|
49
+ s1, s2 = s1_s2
50
+ s1.intersect?(s2)
51
+ end
52
+
53
+ raise ConfigError, "Your configured `custom_timestamp_ranges` are not disjoint, as required."
54
+ end
55
+
56
+ def without_env_overrides
57
+ with(setting_overrides: {}, setting_overrides_by_timestamp: {}, custom_timestamp_ranges: [])
58
+ end
59
+
60
+ def custom_timestamp_range_for(timestamp)
61
+ custom_timestamp_ranges.find do |range|
62
+ range.time_set.member?(timestamp)
63
+ end
64
+ end
65
+
66
+ def self.definitions_by_name_hash_from(index_def_hash_by_name)
67
+ index_def_hash_by_name.transform_values do |index_def_hash|
68
+ __skip__ = from(**index_def_hash.transform_keys(&:to_sym))
69
+ end
70
+ end
71
+
72
+ def self.from(custom_timestamp_ranges:, use_updates_for_indexing: true, **rest)
73
+ __skip__ = new(
74
+ custom_timestamp_ranges: CustomTimestampRange.ranges_from(custom_timestamp_ranges),
75
+ use_updates_for_indexing: use_updates_for_indexing,
76
+ **rest
77
+ )
78
+ end
79
+
80
+ # Represents an index definition that is based on a custom timestamp range.
81
+ class CustomTimestampRange < ::Data.define(:index_name_suffix, :setting_overrides, :time_set)
82
+ def initialize(index_name_suffix:, setting_overrides:, time_set:)
83
+ super
84
+
85
+ if time_set.empty?
86
+ raise ConfigError, "Custom timestamp range with suffix `#{index_name_suffix}` is invalid: no timestamps exist in it."
87
+ end
88
+ end
89
+
90
+ def self.ranges_from(range_hashes)
91
+ range_hashes.map do |range_hash|
92
+ __skip__ = from(**range_hash.transform_keys(&:to_sym))
93
+ end
94
+ end
95
+
96
+ private_class_method def self.from(index_name_suffix:, setting_overrides:, **predicates_hash)
97
+ if predicates_hash.empty?
98
+ raise ConfigSettingNotSetError, "Custom timestamp range with suffix `#{index_name_suffix}` lacks boundary definitions."
99
+ end
100
+
101
+ range_options = predicates_hash.transform_values { |iso8601_string| ::Time.iso8601(iso8601_string) }
102
+ time_set = Support::TimeSet.of_range(**range_options)
103
+
104
+ new(index_name_suffix: index_name_suffix, setting_overrides: setting_overrides, time_set: time_set)
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class DatastoreCore
11
+ module IndexConfigNormalizer
12
+ # These are settings that the datastore exposes when you fetch an index, but that you can
13
+ # never set. We need to ignore them when figuring out what settings to update.
14
+ #
15
+ # Note: `index.routing.allocation.include._tier_preference` is not a read-only setting, but
16
+ # we want to treat it as one, because (1) Elasticsearch 7.10+ sets it and (2) we do not want
17
+ # to ever write it at this time.
18
+ #
19
+ # Note: `index.history.uuid` is a weird setting that sometimes shows up in managed AWS OpenSearch
20
+ # clusters, but only on _some_ indices. It's not documented and we don't want to mess with it here,
21
+ # so we want to treat it as a read only setting.
22
+ READ_ONLY_SETTINGS = %w[
23
+ index.creation_date
24
+ index.history.uuid
25
+ index.provided_name
26
+ index.replication.type
27
+ index.routing.allocation.include._tier_preference
28
+ index.uuid
29
+ index.version.created
30
+ index.version.upgraded
31
+ ]
32
+
33
+ # Normalizes the provided index configuration so that it is in a stable form that we can compare to what
34
+ # the datastore returns when we query it for the configuration of an index. This includes:
35
+ #
36
+ # - Dropping read-only settings that we never interact with but that the datastore automatically sets on an index.
37
+ # Omitting them makes it easier for us to compare our desired configuration to what is in the datastore.
38
+ # - Converting setting values to a normalized string form. The datastore oddly returns setting values as strings
39
+ # (e.g. `"false"` or `"7"` instead of `false` or `7`), so this matches that behavior.
40
+ # - Drops `type: object` from a mapping when there are `properties` because the datastore omits it in that
41
+ # situation, treating it as the default type.
42
+ def self.normalize(index_config)
43
+ if (settings = index_config["settings"])
44
+ index_config = index_config.merge("settings" => normalize_settings(settings))
45
+ end
46
+
47
+ if (mappings = index_config["mappings"])
48
+ index_config = index_config.merge("mappings" => normalize_mappings(mappings))
49
+ end
50
+
51
+ index_config
52
+ end
53
+
54
+ def self.normalize_mappings(mappings)
55
+ return mappings unless (properties = mappings["properties"])
56
+
57
+ mappings = mappings.except("type") if mappings["type"] == "object"
58
+ mappings.merge("properties" => properties.transform_values { |prop| normalize_mappings(prop) })
59
+ end
60
+
61
+ def self.normalize_settings(settings)
62
+ settings
63
+ .except(*READ_ONLY_SETTINGS)
64
+ .to_h { |name, value| [name, normalize_setting_value(value)] }
65
+ end
66
+
67
+ private_class_method def self.normalize_setting_value(value)
68
+ case value
69
+ when nil
70
+ nil
71
+ when ::Array
72
+ value.map { |v| normalize_setting_value(v) }
73
+ else
74
+ value.to_s
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,162 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core/index_config_normalizer"
10
+ require "elastic_graph/error"
11
+ require "elastic_graph/support/hash_util"
12
+
13
+ module ElasticGraph
14
+ class DatastoreCore
15
+ module IndexDefinition
16
+ # This module contains common implementation logic for both the rollover and non-rollover
17
+ # implementations of the common IndexDefinition type.
18
+ module Base
19
+ # Returns any setting overrides for this index from the environment-specific config file,
20
+ # after flattening it so that it can be directly used in a create index request.
21
+ def flattened_env_setting_overrides
22
+ @flattened_env_setting_overrides ||= Support::HashUtil.flatten_and_stringify_keys(
23
+ env_index_config.setting_overrides,
24
+ prefix: "index"
25
+ )
26
+ end
27
+
28
+ # Gets the routing value for the given `prepared_record`. Notably, `prepared_record` must be previously
29
+ # prepared with an `Indexer::RecordPreparer` in order to ensure that it uses internal index
30
+ # field names (to align with `route_with_path`/`route_with` which also use the internal name) rather
31
+ # than the public field name (which can differ).
32
+ def routing_value_for_prepared_record(prepared_record, route_with_path: route_with, id_path: "id")
33
+ return nil unless has_custom_routing?
34
+
35
+ unless route_with_path
36
+ raise ConfigError, "`#{self}` uses custom routing, but `route_with_path` is misconfigured (was `nil`)"
37
+ end
38
+
39
+ config_routing_value = Support::HashUtil.fetch_value_at_path(prepared_record, route_with_path).to_s
40
+ return config_routing_value unless ignored_values_for_routing.include?(config_routing_value)
41
+
42
+ Support::HashUtil.fetch_value_at_path(prepared_record, id_path).to_s
43
+ end
44
+
45
+ def has_custom_routing?
46
+ route_with != "id"
47
+ end
48
+
49
+ # Indicates if a search on this index definition may hit incomplete documents. An incomplete document
50
+ # can occur when multiple event types flow into the same index. An index that has only one source type
51
+ # can never have incomplete documents, but an index that has 2 or more sources can have incomplete
52
+ # documents when the "primary" event type hasn't yet been received for a document.
53
+ #
54
+ # This case is notable because we need to apply automatic filtering in order to hide documents that are
55
+ # not yet complete.
56
+ #
57
+ # Note: determining this value sometimes requires that we query the datastore for the record of all
58
+ # sources that an index has ever had. This value changes very, very rarely, and we don't want to slow
59
+ # down every GraphQL query by adding the extra query against the datastore, so we cache the value here.
60
+ def searches_could_hit_incomplete_docs?
61
+ return @searches_could_hit_incomplete_docs if defined?(@searches_could_hit_incomplete_docs)
62
+
63
+ if current_sources.size > 1
64
+ # We know that incomplete docs are possible, without needing to check sources recorded in `_meta`.
65
+ @searches_could_hit_incomplete_docs = true
66
+ else
67
+ # While our current configuration can't produce incomplete documents, some may already exist in the index
68
+ # if we previously had some `sourced_from` fields (but no longer have them). Here we check for the sources
69
+ # we've recorded in `_meta` to account for that.
70
+ client = datastore_clients_by_name.fetch(cluster_to_query)
71
+ recorded_sources = mappings_in_datastore(client).dig("_meta", "ElasticGraph", "sources") || []
72
+ sources = recorded_sources.union(current_sources.to_a)
73
+
74
+ @searches_could_hit_incomplete_docs = sources.size > 1
75
+ end
76
+ end
77
+
78
+ def cluster_to_query
79
+ env_index_config.query_cluster
80
+ end
81
+
82
+ def clusters_to_index_into
83
+ env_index_config.index_into_clusters.tap do |clusters_to_index_into|
84
+ raise ConfigError, "No `index_into_clusters` defined for #{self} in env_index_config" unless clusters_to_index_into
85
+ end
86
+ end
87
+
88
+ def use_updates_for_indexing?
89
+ env_index_config.use_updates_for_indexing
90
+ end
91
+
92
+ def ignored_values_for_routing
93
+ env_index_config.ignore_routing_values
94
+ end
95
+
96
+ # Returns a list of all defined datastore clusters this index resides within.
97
+ def all_accessible_cluster_names
98
+ @all_accessible_cluster_names ||=
99
+ # Using `_` because steep doesn't understand that `compact` removes nils.
100
+ (clusters_to_index_into + [_ = cluster_to_query]).compact.uniq.select do |name|
101
+ defined_clusters.include?(name)
102
+ end
103
+ end
104
+
105
+ def accessible_cluster_names_to_index_into
106
+ @accessible_cluster_names_to_index_into ||= clusters_to_index_into.select do |name|
107
+ defined_clusters.include?(name)
108
+ end
109
+ end
110
+
111
+ # Indicates whether not the index is be accessible from GraphQL queries, by virtue of
112
+ # the `cluster_to_query` being a defined cluster or not. This will be used to
113
+ # hide GraphQL schema elements that can't be queried when our config omits the means
114
+ # to query an index (e.g. due to lacking a configured URL).
115
+ def accessible_from_queries?
116
+ return false unless (cluster = cluster_to_query)
117
+ defined_clusters.include?(cluster)
118
+ end
119
+
120
+ # Returns a list of indices related to this template in the datastore cluster this
121
+ # index definition is configured to query. Note that for performance reasons, this method
122
+ # memoizes the result of querying the datastore for its current list of indices, and as
123
+ # a result the return value may be out of date. If it is absolutely essential that you get
124
+ # an up-to-date list of related indices, use `related_rollover_indices(datastore_client`) instead of
125
+ # this method.
126
+ #
127
+ # Note, however, that indices generally change *very* rarely (say, monthly or yearly) and as such
128
+ # this will very rarely be out of date, even with the memoization.
129
+ def known_related_query_rollover_indices
130
+ @known_related_query_rollover_indices ||= cluster_to_query&.then do |name|
131
+ # For query purposes, we only want indices that exist. If we return a query that is defined in our configuration
132
+ # but does not exist, and that gets used in a search index expression (even for the purposes of excluding it!),
133
+ # the datastore will return an error.
134
+ related_rollover_indices(datastore_clients_by_name.fetch(name), only_if_exists: true)
135
+ end || []
136
+ end
137
+
138
+ # Returns a set of all of the field paths to subfields of the special `LIST_COUNTS_FIELD`
139
+ # that contains the element counts of all list fields. The returned set is filtered based
140
+ # on the provided `source` to only contain the paths of fields that are populated by the
141
+ # given source.
142
+ def list_counts_field_paths_for_source(source)
143
+ @list_counts_field_paths_for_source ||= {} # : ::Hash[::String, ::Set[::String]]
144
+ @list_counts_field_paths_for_source[source] ||= identify_list_counts_field_paths_for_source(source)
145
+ end
146
+
147
+ def to_s
148
+ "#<#{self.class.name} #{name}>"
149
+ end
150
+ alias_method :inspect, :to_s
151
+
152
+ private
153
+
154
+ def identify_list_counts_field_paths_for_source(source)
155
+ fields_by_path.filter_map do |path, field|
156
+ path if field.source == source && path.split(".").include?(LIST_COUNTS_FIELD)
157
+ end.to_set
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,64 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core/index_config_normalizer"
10
+ require "elastic_graph/datastore_core/index_definition/base"
11
+ require "elastic_graph/support/memoizable_data"
12
+
13
+ module ElasticGraph
14
+ class DatastoreCore
15
+ module IndexDefinition
16
+ class Index < Support::MemoizableData.define(
17
+ :name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path,
18
+ :env_index_config, :defined_clusters, :datastore_clients_by_name
19
+ )
20
+ # `Data.define` provides all these methods:
21
+ # @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config, defined_clusters, datastore_clients_by_name, initialize
22
+
23
+ # `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it
24
+ # but can't for some reason so we have to declare them with `@dynamic`.
25
+ # @dynamic flattened_env_setting_overrides, routing_value_for_prepared_record, has_custom_routing?, cluster_to_query, use_updates_for_indexing?
26
+ # @dynamic clusters_to_index_into, all_accessible_cluster_names, ignored_values_for_routing, searches_could_hit_incomplete_docs?
27
+ # @dynamic accessible_cluster_names_to_index_into, accessible_from_queries?, known_related_query_rollover_indices, list_counts_field_paths_for_source
28
+ include IndexDefinition::Base
29
+
30
+ def mappings_in_datastore(datastore_client)
31
+ IndexConfigNormalizer.normalize_mappings(datastore_client.get_index(name)["mappings"] || {})
32
+ end
33
+
34
+ # `ignore_unavailable: true` is needed to prevent errors when we delete non-existing non-rollover indices
35
+ def delete_from_datastore(datastore_client)
36
+ datastore_client.delete_indices(name)
37
+ end
38
+
39
+ # Indicates if this is a rollover index definition.
40
+ #
41
+ # Use of this is considered a mild code smell. When feasible, it's generally better to
42
+ # implement a new polymorphic API on the IndexDefinition interface, rather
43
+ # then branching on the value of this predicate.
44
+ def rollover_index_template?
45
+ false
46
+ end
47
+
48
+ def index_expression_for_search
49
+ name
50
+ end
51
+
52
+ # Returns an index name to use for write operations.
53
+ def index_name_for_writes(record, timestamp_field_path: nil)
54
+ name
55
+ end
56
+
57
+ # A concrete index has no related indices (really only rollover indices do).
58
+ def related_rollover_indices(datastore_client, only_if_exists: false)
59
+ []
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,48 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "delegate"
10
+ require "elastic_graph/datastore_core/index_definition/index"
11
+
12
+ module ElasticGraph
13
+ class DatastoreCore
14
+ module IndexDefinition
15
+ # Represents a concrete index for specific time range, derived from a RolloverIndexTemplate.
16
+ class RolloverIndex < DelegateClass(Index)
17
+ # @dynamic time_set
18
+ attr_reader :time_set
19
+
20
+ def initialize(index, time_set)
21
+ super(index)
22
+ @time_set = time_set
23
+ end
24
+
25
+ # We need to override `==` so that two `RolloverIndex` objects that wrap the same `Index` object are
26
+ # considered equal. Oddly enough, the `DelegateClass` implementation of `==` returns `true` if `other`
27
+ # is the wrapped object, but not if it's another instance of the same `DelegateClass` wrapping the same
28
+ # instance.
29
+ #
30
+ # https://github.com/ruby/ruby/blob/v3_0_3/lib/delegate.rb#L156-L159
31
+ #
32
+ # We need this because we want two `RolloverIndex` instances that wrap the same
33
+ # underlying `Index` instance to be considered equal (something a test relies upon,
34
+ # but also generally useful and expected).
35
+ def ==(other)
36
+ if RolloverIndex === other
37
+ __getobj__ == other.__getobj__ && time_set == other.time_set
38
+ else
39
+ # :nocov: -- this method isn't explicitly covered by tests (not worth writing a test just to cover this line).
40
+ super
41
+ # :nocov:
42
+ end
43
+ end
44
+ alias_method :eql?, :==
45
+ end
46
+ end
47
+ end
48
+ end