elasticgraph-datastore_core 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7bd50dac79cefe5466066b048ebb298df524a33a904082b0871af9429a7521ed
4
+ data.tar.gz: 666da201ff6ef33c81c2d7b4926068913188f735ec1f9777ed68255c3433bc2a
5
+ SHA512:
6
+ metadata.gz: 9ec51fffb49a31152bfa0718bfb16d0063c964ca24128710639d287dac4d10ccc4ea0fbe969caae6f7c39df28ddb80b3a2bc225c2c7af1c4bb7d6f5baf5c1459
7
+ data.tar.gz: 671573cb43b2880450bac584ba4edd21154188a2de245bd195569cc166a0e323038caba947cc7d74bbf0b403b78952b37a800bea81fe602f125188dffa529275
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Block, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # ElasticGraph::DatastoreCore
2
+
3
+ Contains the core datastore logic used by the rest of ElasticGraph.
@@ -0,0 +1,21 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require_relative "../gemspec_helper"
10
+
11
+ ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
12
+ spec.summary = "ElasticGraph gem containing the core datastore support types and logic."
13
+
14
+ spec.add_dependency "elasticgraph-schema_artifacts", eg_version
15
+ spec.add_dependency "elasticgraph-support", eg_version
16
+
17
+ spec.add_development_dependency "elasticgraph-admin", eg_version
18
+ spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
19
+ spec.add_development_dependency "elasticgraph-opensearch", eg_version
20
+ spec.add_development_dependency "elasticgraph-schema_definition", eg_version
21
+ end
@@ -0,0 +1,58 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core/configuration/client_faraday_adapter"
10
+ require "elastic_graph/datastore_core/configuration/cluster_definition"
11
+ require "elastic_graph/datastore_core/configuration/index_definition"
12
+ require "elastic_graph/error"
13
+
14
+ module ElasticGraph
15
+ class DatastoreCore
16
+ # Defines the configuration related to datastores.
17
+ class Config < ::Data.define(
18
+ # Configuration of the faraday adapter to use with the datastore client.
19
+ :client_faraday_adapter,
20
+ # Map of datastore cluster definitions, keyed by cluster name. The names will be referenced within
21
+ # `index_definitions` by `query_cluster` and `index_into_clusters` to identify
22
+ # datastore clusters. Each definition has a `url` and `settings`. `settings` contains datastore
23
+ # settings in the flattened name form, e.g. `"cluster.max_shards_per_node": 2000`.
24
+ :clusters,
25
+ # Map of index definition names to `IndexDefinition` objects containing customizations
26
+ # for the named index definitions for this environment.
27
+ :index_definitions,
28
+ # Determines if we log requests/responses to/from the datastore.
29
+ # Defaults to `false`.
30
+ :log_traffic,
31
+ # Passed down to the datastore client, controls the number of times ElasticGraph attempts a call against
32
+ # the datastore before failing. Retrying a handful of times is generally advantageous, since some sporadic
33
+ # failures are expected during the course of operation, and better to retry than fail the entire call.
34
+ # Defaults to 3.
35
+ :max_client_retries
36
+ )
37
+ # Helper method to build an instance from parsed YAML config.
38
+ def self.from_parsed_yaml(parsed_yaml)
39
+ parsed_yaml = parsed_yaml.fetch("datastore")
40
+ extra_keys = parsed_yaml.keys - EXPECTED_KEYS
41
+
42
+ unless extra_keys.empty?
43
+ raise ConfigError, "Unknown `datastore` config settings: #{extra_keys.join(", ")}"
44
+ end
45
+
46
+ new(
47
+ client_faraday_adapter: Configuration::ClientFaradayAdapter.from_parsed_yaml(parsed_yaml),
48
+ clusters: Configuration::ClusterDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("clusters")),
49
+ index_definitions: Configuration::IndexDefinition.definitions_by_name_hash_from(parsed_yaml.fetch("index_definitions")),
50
+ log_traffic: parsed_yaml.fetch("log_traffic", false),
51
+ max_client_retries: parsed_yaml.fetch("max_client_retries", 3)
52
+ )
53
+ end
54
+
55
+ EXPECTED_KEYS = members.map(&:to_s)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,38 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class DatastoreCore
11
+ module Configuration
12
+ class ClientFaradayAdapter < ::Data.define(
13
+ # The faraday adapter to use with the datastore client, such as `httpx` or `typhoeus`.
14
+ # For more info, see:
15
+ # https://github.com/elastic/elasticsearch-ruby/commit/a7bbdbf2a96168c1b33dca46ee160d2d4d75ada0
16
+ :name,
17
+ # A Ruby library to require which provides the named adapter (optional).
18
+ :require
19
+ )
20
+ def self.from_parsed_yaml(parsed_yaml)
21
+ parsed_yaml = parsed_yaml.fetch("client_faraday_adapter") || {}
22
+ extra_keys = parsed_yaml.keys - EXPECTED_KEYS
23
+
24
+ unless extra_keys.empty?
25
+ raise ConfigError, "Unknown `datastore.client_faraday_adapter` config settings: #{extra_keys.join(", ")}"
26
+ end
27
+
28
+ new(
29
+ name: parsed_yaml["name"]&.to_sym,
30
+ require: parsed_yaml["require"]
31
+ )
32
+ end
33
+
34
+ EXPECTED_KEYS = members.map(&:to_s)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,52 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+
11
+ module ElasticGraph
12
+ class DatastoreCore
13
+ module Configuration
14
+ class ClusterDefinition < ::Data.define(:url, :backend_client_class, :settings)
15
+ def self.from_hash(hash)
16
+ extra_keys = hash.keys - EXPECTED_KEYS
17
+
18
+ unless extra_keys.empty?
19
+ raise ConfigError, "Unknown `datastore.clusters` config settings: #{extra_keys.join(", ")}"
20
+ end
21
+
22
+ backend_name = hash["backend"]
23
+ backend_client_class =
24
+ case backend_name
25
+ when "elasticsearch"
26
+ require "elastic_graph/elasticsearch/client"
27
+ Elasticsearch::Client
28
+ when "opensearch"
29
+ require "elastic_graph/opensearch/client"
30
+ OpenSearch::Client
31
+ else
32
+ raise ConfigError, "Unknown `datastore.clusters` backend: `#{backend_name}`. Valid backends are `elasticsearch` and `opensearch`."
33
+ end
34
+
35
+ new(
36
+ url: hash.fetch("url"),
37
+ backend_client_class: backend_client_class,
38
+ settings: hash.fetch("settings")
39
+ )
40
+ end
41
+
42
+ def self.definitions_by_name_hash_from(cluster_def_hash_by_name)
43
+ cluster_def_hash_by_name.transform_values do |cluster_def_hash|
44
+ from_hash(cluster_def_hash)
45
+ end
46
+ end
47
+
48
+ EXPECTED_KEYS = members.map(&:to_s) - ["backend_client_class"] + ["backend"]
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,110 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/support/time_set"
10
+ require "elastic_graph/error"
11
+ require "time"
12
+
13
+ module ElasticGraph
14
+ class DatastoreCore
15
+ module Configuration
16
+ # Defines environment-specific customizations for an index definition.
17
+ #
18
+ # - ignore_routing_values: routing values for which we will ignore routing as configured on the index.
19
+ # This is intended to be used when a single routing value contains such a large portion of the dataset that it creates lopsided shards.
20
+ # By including that routing value in this config setting, it'll spread that value's data across all shards instead of concentrating it on a single shard.
21
+ # - query_cluster: named search cluster to be used for queries on this index.
22
+ # - index_into_cluster: named search clusters to index data into.
23
+ # - setting_overrides: overrides for index (or index template) settings.
24
+ # - setting_overrides_by_timestamp: overrides for index template settings for specific dates,
25
+ # allowing us to have different settings than the template for some timestamp.
26
+ # - custom_timestamp_ranges: defines indices for a custom timestamp range (rather than relying
27
+ # on the configured rollover frequency).
28
+ # - use_updates_for_indexing: when `true`, opts the index into using the `update` API instead of the `index` API for indexing.
29
+ # (Defaults to `true`).
30
+ class IndexDefinition < ::Data.define(
31
+ :ignore_routing_values,
32
+ :query_cluster,
33
+ :index_into_clusters,
34
+ :setting_overrides,
35
+ :setting_overrides_by_timestamp,
36
+ :custom_timestamp_ranges,
37
+ :use_updates_for_indexing
38
+ )
39
+ def initialize(ignore_routing_values:, **rest)
40
+ __skip__ = super(ignore_routing_values: ignore_routing_values.to_set, **rest)
41
+
42
+ # Verify the custom ranges are disjoint.
43
+ # Yeah, this is O(N^2), which isn't great, but we expect a _very_ small number of custom
44
+ # ranges (0-2) so this should be ok.
45
+ return if custom_timestamp_ranges
46
+ .map(&:time_set)
47
+ .combination(2)
48
+ .none? do |s1_s2|
49
+ s1, s2 = s1_s2
50
+ s1.intersect?(s2)
51
+ end
52
+
53
+ raise ConfigError, "Your configured `custom_timestamp_ranges` are not disjoint, as required."
54
+ end
55
+
56
+ def without_env_overrides
57
+ with(setting_overrides: {}, setting_overrides_by_timestamp: {}, custom_timestamp_ranges: [])
58
+ end
59
+
60
+ def custom_timestamp_range_for(timestamp)
61
+ custom_timestamp_ranges.find do |range|
62
+ range.time_set.member?(timestamp)
63
+ end
64
+ end
65
+
66
+ def self.definitions_by_name_hash_from(index_def_hash_by_name)
67
+ index_def_hash_by_name.transform_values do |index_def_hash|
68
+ __skip__ = from(**index_def_hash.transform_keys(&:to_sym))
69
+ end
70
+ end
71
+
72
+ def self.from(custom_timestamp_ranges:, use_updates_for_indexing: true, **rest)
73
+ __skip__ = new(
74
+ custom_timestamp_ranges: CustomTimestampRange.ranges_from(custom_timestamp_ranges),
75
+ use_updates_for_indexing: use_updates_for_indexing,
76
+ **rest
77
+ )
78
+ end
79
+
80
+ # Represents an index definition that is based on a custom timestamp range.
81
+ class CustomTimestampRange < ::Data.define(:index_name_suffix, :setting_overrides, :time_set)
82
+ def initialize(index_name_suffix:, setting_overrides:, time_set:)
83
+ super
84
+
85
+ if time_set.empty?
86
+ raise ConfigError, "Custom timestamp range with suffix `#{index_name_suffix}` is invalid: no timestamps exist in it."
87
+ end
88
+ end
89
+
90
+ def self.ranges_from(range_hashes)
91
+ range_hashes.map do |range_hash|
92
+ __skip__ = from(**range_hash.transform_keys(&:to_sym))
93
+ end
94
+ end
95
+
96
+ private_class_method def self.from(index_name_suffix:, setting_overrides:, **predicates_hash)
97
+ if predicates_hash.empty?
98
+ raise ConfigSettingNotSetError, "Custom timestamp range with suffix `#{index_name_suffix}` lacks boundary definitions."
99
+ end
100
+
101
+ range_options = predicates_hash.transform_values { |iso8601_string| ::Time.iso8601(iso8601_string) }
102
+ time_set = Support::TimeSet.of_range(**range_options)
103
+
104
+ new(index_name_suffix: index_name_suffix, setting_overrides: setting_overrides, time_set: time_set)
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class DatastoreCore
11
+ module IndexConfigNormalizer
12
+ # These are settings that the datastore exposes when you fetch an index, but that you can
13
+ # never set. We need to ignore them when figuring out what settings to update.
14
+ #
15
+ # Note: `index.routing.allocation.include._tier_preference` is not a read-only setting, but
16
+ # we want to treat it as one, because (1) Elasticsearch 7.10+ sets it and (2) we do not want
17
+ # to ever write it at this time.
18
+ #
19
+ # Note: `index.history.uuid` is a weird setting that sometimes shows up in managed AWS OpenSearch
20
+ # clusters, but only on _some_ indices. It's not documented and we don't want to mess with it here,
21
+ # so we want to treat it as a read only setting.
22
+ READ_ONLY_SETTINGS = %w[
23
+ index.creation_date
24
+ index.history.uuid
25
+ index.provided_name
26
+ index.replication.type
27
+ index.routing.allocation.include._tier_preference
28
+ index.uuid
29
+ index.version.created
30
+ index.version.upgraded
31
+ ]
32
+
33
+ # Normalizes the provided index configuration so that it is in a stable form that we can compare to what
34
+ # the datastore returns when we query it for the configuration of an index. This includes:
35
+ #
36
+ # - Dropping read-only settings that we never interact with but that the datastore automatically sets on an index.
37
+ # Omitting them makes it easier for us to compare our desired configuration to what is in the datastore.
38
+ # - Converting setting values to a normalized string form. The datastore oddly returns setting values as strings
39
+ # (e.g. `"false"` or `"7"` instead of `false` or `7`), so this matches that behavior.
40
+ # - Drops `type: object` from a mapping when there are `properties` because the datastore omits it in that
41
+ # situation, treating it as the default type.
42
+ def self.normalize(index_config)
43
+ if (settings = index_config["settings"])
44
+ index_config = index_config.merge("settings" => normalize_settings(settings))
45
+ end
46
+
47
+ if (mappings = index_config["mappings"])
48
+ index_config = index_config.merge("mappings" => normalize_mappings(mappings))
49
+ end
50
+
51
+ index_config
52
+ end
53
+
54
+ def self.normalize_mappings(mappings)
55
+ return mappings unless (properties = mappings["properties"])
56
+
57
+ mappings = mappings.except("type") if mappings["type"] == "object"
58
+ mappings.merge("properties" => properties.transform_values { |prop| normalize_mappings(prop) })
59
+ end
60
+
61
+ def self.normalize_settings(settings)
62
+ settings
63
+ .except(*READ_ONLY_SETTINGS)
64
+ .to_h { |name, value| [name, normalize_setting_value(value)] }
65
+ end
66
+
67
+ private_class_method def self.normalize_setting_value(value)
68
+ case value
69
+ when nil
70
+ nil
71
+ when ::Array
72
+ value.map { |v| normalize_setting_value(v) }
73
+ else
74
+ value.to_s
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,162 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core/index_config_normalizer"
10
+ require "elastic_graph/error"
11
+ require "elastic_graph/support/hash_util"
12
+
13
+ module ElasticGraph
14
+ class DatastoreCore
15
+ module IndexDefinition
16
+ # This module contains common implementation logic for both the rollover and non-rollover
17
+ # implementations of the common IndexDefinition type.
18
+ module Base
19
+ # Returns any setting overrides for this index from the environment-specific config file,
20
+ # after flattening it so that it can be directly used in a create index request.
21
+ def flattened_env_setting_overrides
22
+ @flattened_env_setting_overrides ||= Support::HashUtil.flatten_and_stringify_keys(
23
+ env_index_config.setting_overrides,
24
+ prefix: "index"
25
+ )
26
+ end
27
+
28
+ # Gets the routing value for the given `prepared_record`. Notably, `prepared_record` must be previously
29
+ # prepared with an `Indexer::RecordPreparer` in order to ensure that it uses internal index
30
+ # field names (to align with `route_with_path`/`route_with` which also use the internal name) rather
31
+ # than the public field name (which can differ).
32
+ def routing_value_for_prepared_record(prepared_record, route_with_path: route_with, id_path: "id")
33
+ return nil unless has_custom_routing?
34
+
35
+ unless route_with_path
36
+ raise ConfigError, "`#{self}` uses custom routing, but `route_with_path` is misconfigured (was `nil`)"
37
+ end
38
+
39
+ config_routing_value = Support::HashUtil.fetch_value_at_path(prepared_record, route_with_path).to_s
40
+ return config_routing_value unless ignored_values_for_routing.include?(config_routing_value)
41
+
42
+ Support::HashUtil.fetch_value_at_path(prepared_record, id_path).to_s
43
+ end
44
+
45
+ def has_custom_routing?
46
+ route_with != "id"
47
+ end
48
+
49
+ # Indicates if a search on this index definition may hit incomplete documents. An incomplete document
50
+ # can occur when multiple event types flow into the same index. An index that has only one source type
51
+ # can never have incomplete documents, but an index that has 2 or more sources can have incomplete
52
+ # documents when the "primary" event type hasn't yet been received for a document.
53
+ #
54
+ # This case is notable because we need to apply automatic filtering in order to hide documents that are
55
+ # not yet complete.
56
+ #
57
+ # Note: determining this value sometimes requires that we query the datastore for the record of all
58
+ # sources that an index has ever had. This value changes very, very rarely, and we don't want to slow
59
+ # down every GraphQL query by adding the extra query against the datastore, so we cache the value here.
60
+ def searches_could_hit_incomplete_docs?
61
+ return @searches_could_hit_incomplete_docs if defined?(@searches_could_hit_incomplete_docs)
62
+
63
+ if current_sources.size > 1
64
+ # We know that incomplete docs are possible, without needing to check sources recorded in `_meta`.
65
+ @searches_could_hit_incomplete_docs = true
66
+ else
67
+ # While our current configuration can't produce incomplete documents, some may already exist in the index
68
+ # if we previously had some `sourced_from` fields (but no longer have them). Here we check for the sources
69
+ # we've recorded in `_meta` to account for that.
70
+ client = datastore_clients_by_name.fetch(cluster_to_query)
71
+ recorded_sources = mappings_in_datastore(client).dig("_meta", "ElasticGraph", "sources") || []
72
+ sources = recorded_sources.union(current_sources.to_a)
73
+
74
+ @searches_could_hit_incomplete_docs = sources.size > 1
75
+ end
76
+ end
77
+
78
+ def cluster_to_query
79
+ env_index_config.query_cluster
80
+ end
81
+
82
+ def clusters_to_index_into
83
+ env_index_config.index_into_clusters.tap do |clusters_to_index_into|
84
+ raise ConfigError, "No `index_into_clusters` defined for #{self} in env_index_config" unless clusters_to_index_into
85
+ end
86
+ end
87
+
88
+ def use_updates_for_indexing?
89
+ env_index_config.use_updates_for_indexing
90
+ end
91
+
92
+ def ignored_values_for_routing
93
+ env_index_config.ignore_routing_values
94
+ end
95
+
96
+ # Returns a list of all defined datastore clusters this index resides within.
97
+ def all_accessible_cluster_names
98
+ @all_accessible_cluster_names ||=
99
+ # Using `_` because steep doesn't understand that `compact` removes nils.
100
+ (clusters_to_index_into + [_ = cluster_to_query]).compact.uniq.select do |name|
101
+ defined_clusters.include?(name)
102
+ end
103
+ end
104
+
105
+ def accessible_cluster_names_to_index_into
106
+ @accessible_cluster_names_to_index_into ||= clusters_to_index_into.select do |name|
107
+ defined_clusters.include?(name)
108
+ end
109
+ end
110
+
111
+ # Indicates whether not the index is be accessible from GraphQL queries, by virtue of
112
+ # the `cluster_to_query` being a defined cluster or not. This will be used to
113
+ # hide GraphQL schema elements that can't be queried when our config omits the means
114
+ # to query an index (e.g. due to lacking a configured URL).
115
+ def accessible_from_queries?
116
+ return false unless (cluster = cluster_to_query)
117
+ defined_clusters.include?(cluster)
118
+ end
119
+
120
+ # Returns a list of indices related to this template in the datastore cluster this
121
+ # index definition is configured to query. Note that for performance reasons, this method
122
+ # memoizes the result of querying the datastore for its current list of indices, and as
123
+ # a result the return value may be out of date. If it is absolutely essential that you get
124
+ # an up-to-date list of related indices, use `related_rollover_indices(datastore_client`) instead of
125
+ # this method.
126
+ #
127
+ # Note, however, that indices generally change *very* rarely (say, monthly or yearly) and as such
128
+ # this will very rarely be out of date, even with the memoization.
129
+ def known_related_query_rollover_indices
130
+ @known_related_query_rollover_indices ||= cluster_to_query&.then do |name|
131
+ # For query purposes, we only want indices that exist. If we return a query that is defined in our configuration
132
+ # but does not exist, and that gets used in a search index expression (even for the purposes of excluding it!),
133
+ # the datastore will return an error.
134
+ related_rollover_indices(datastore_clients_by_name.fetch(name), only_if_exists: true)
135
+ end || []
136
+ end
137
+
138
+ # Returns a set of all of the field paths to subfields of the special `LIST_COUNTS_FIELD`
139
+ # that contains the element counts of all list fields. The returned set is filtered based
140
+ # on the provided `source` to only contain the paths of fields that are populated by the
141
+ # given source.
142
+ def list_counts_field_paths_for_source(source)
143
+ @list_counts_field_paths_for_source ||= {} # : ::Hash[::String, ::Set[::String]]
144
+ @list_counts_field_paths_for_source[source] ||= identify_list_counts_field_paths_for_source(source)
145
+ end
146
+
147
+ def to_s
148
+ "#<#{self.class.name} #{name}>"
149
+ end
150
+ alias_method :inspect, :to_s
151
+
152
+ private
153
+
154
+ def identify_list_counts_field_paths_for_source(source)
155
+ fields_by_path.filter_map do |path, field|
156
+ path if field.source == source && path.split(".").include?(LIST_COUNTS_FIELD)
157
+ end.to_set
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,64 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core/index_config_normalizer"
10
+ require "elastic_graph/datastore_core/index_definition/base"
11
+ require "elastic_graph/support/memoizable_data"
12
+
13
+ module ElasticGraph
14
+ class DatastoreCore
15
+ module IndexDefinition
16
+ class Index < Support::MemoizableData.define(
17
+ :name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path,
18
+ :env_index_config, :defined_clusters, :datastore_clients_by_name
19
+ )
20
+ # `Data.define` provides all these methods:
21
+ # @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config, defined_clusters, datastore_clients_by_name, initialize
22
+
23
+ # `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it
24
+ # but can't for some reason so we have to declare them with `@dynamic`.
25
+ # @dynamic flattened_env_setting_overrides, routing_value_for_prepared_record, has_custom_routing?, cluster_to_query, use_updates_for_indexing?
26
+ # @dynamic clusters_to_index_into, all_accessible_cluster_names, ignored_values_for_routing, searches_could_hit_incomplete_docs?
27
+ # @dynamic accessible_cluster_names_to_index_into, accessible_from_queries?, known_related_query_rollover_indices, list_counts_field_paths_for_source
28
+ include IndexDefinition::Base
29
+
30
+ def mappings_in_datastore(datastore_client)
31
+ IndexConfigNormalizer.normalize_mappings(datastore_client.get_index(name)["mappings"] || {})
32
+ end
33
+
34
+ # `ignore_unavailable: true` is needed to prevent errors when we delete non-existing non-rollover indices
35
+ def delete_from_datastore(datastore_client)
36
+ datastore_client.delete_indices(name)
37
+ end
38
+
39
+ # Indicates if this is a rollover index definition.
40
+ #
41
+ # Use of this is considered a mild code smell. When feasible, it's generally better to
42
+ # implement a new polymorphic API on the IndexDefinition interface, rather
43
+ # then branching on the value of this predicate.
44
+ def rollover_index_template?
45
+ false
46
+ end
47
+
48
+ def index_expression_for_search
49
+ name
50
+ end
51
+
52
+ # Returns an index name to use for write operations.
53
+ def index_name_for_writes(record, timestamp_field_path: nil)
54
+ name
55
+ end
56
+
57
+ # A concrete index has no related indices (really only rollover indices do).
58
+ def related_rollover_indices(datastore_client, only_if_exists: false)
59
+ []
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,48 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "delegate"
10
+ require "elastic_graph/datastore_core/index_definition/index"
11
+
12
+ module ElasticGraph
13
+ class DatastoreCore
14
+ module IndexDefinition
15
+ # Represents a concrete index for specific time range, derived from a RolloverIndexTemplate.
16
+ class RolloverIndex < DelegateClass(Index)
17
+ # @dynamic time_set
18
+ attr_reader :time_set
19
+
20
+ def initialize(index, time_set)
21
+ super(index)
22
+ @time_set = time_set
23
+ end
24
+
25
+ # We need to override `==` so that two `RolloverIndex` objects that wrap the same `Index` object are
26
+ # considered equal. Oddly enough, the `DelegateClass` implementation of `==` returns `true` if `other`
27
+ # is the wrapped object, but not if it's another instance of the same `DelegateClass` wrapping the same
28
+ # instance.
29
+ #
30
+ # https://github.com/ruby/ruby/blob/v3_0_3/lib/delegate.rb#L156-L159
31
+ #
32
+ # We need this because we want two `RolloverIndex` instances that wrap the same
33
+ # underlying `Index` instance to be considered equal (something a test relies upon,
34
+ # but also generally useful and expected).
35
+ def ==(other)
36
+ if RolloverIndex === other
37
+ __getobj__ == other.__getobj__ && time_set == other.time_set
38
+ else
39
+ # :nocov: -- this method isn't explicitly covered by tests (not worth writing a test just to cover this line).
40
+ super
41
+ # :nocov:
42
+ end
43
+ end
44
+ alias_method :eql?, :==
45
+ end
46
+ end
47
+ end
48
+ end