elasticgraph-datastore_core 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-datastore_core.gemspec +21 -0
- data/lib/elastic_graph/datastore_core/config.rb +58 -0
- data/lib/elastic_graph/datastore_core/configuration/client_faraday_adapter.rb +38 -0
- data/lib/elastic_graph/datastore_core/configuration/cluster_definition.rb +52 -0
- data/lib/elastic_graph/datastore_core/configuration/index_definition.rb +110 -0
- data/lib/elastic_graph/datastore_core/index_config_normalizer.rb +79 -0
- data/lib/elastic_graph/datastore_core/index_definition/base.rb +162 -0
- data/lib/elastic_graph/datastore_core/index_definition/index.rb +64 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index.rb +48 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb +232 -0
- data/lib/elastic_graph/datastore_core/index_definition.rb +51 -0
- data/lib/elastic_graph/datastore_core.rb +100 -0
- metadata +404 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "date"
|
10
|
+
require "elastic_graph/datastore_core/index_config_normalizer"
|
11
|
+
require "elastic_graph/datastore_core/index_definition/base"
|
12
|
+
require "elastic_graph/datastore_core/index_definition/index"
|
13
|
+
require "elastic_graph/datastore_core/index_definition/rollover_index"
|
14
|
+
require "elastic_graph/error"
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
16
|
+
require "elastic_graph/support/time_set"
|
17
|
+
require "elastic_graph/support/time_util"
|
18
|
+
require "time"
|
19
|
+
|
20
|
+
module ElasticGraph
|
21
|
+
class DatastoreCore
|
22
|
+
module IndexDefinition
|
23
|
+
class RolloverIndexTemplate < Support::MemoizableData.define(
|
24
|
+
:name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path, :env_index_config,
|
25
|
+
:index_args, :defined_clusters, :datastore_clients_by_name, :timestamp_field_path, :frequency
|
26
|
+
)
|
27
|
+
# `Data.define` provides all these methods:
|
28
|
+
# @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config,
|
29
|
+
# @dynamic index_args, defined_clusters, datastore_clients_by_name, timestamp_field_path, frequency, initialize
|
30
|
+
|
31
|
+
# `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it
|
32
|
+
# but can't for some reason so we have to declare them with `@dynamic`.
|
33
|
+
# @dynamic flattened_env_setting_overrides, routing_value_for_prepared_record, has_custom_routing?, cluster_to_query, use_updates_for_indexing?
|
34
|
+
# @dynamic clusters_to_index_into, all_accessible_cluster_names, ignored_values_for_routing, searches_could_hit_incomplete_docs?
|
35
|
+
# @dynamic accessible_cluster_names_to_index_into, accessible_from_queries?, known_related_query_rollover_indices, list_counts_field_paths_for_source
|
36
|
+
include IndexDefinition::Base
|
37
|
+
|
38
|
+
def mappings_in_datastore(datastore_client)
|
39
|
+
IndexConfigNormalizer.normalize_mappings(
|
40
|
+
datastore_client.get_index_template(name).dig("template", "mappings") || {}
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
# We need to delete both the template and the actual indices for rollover indices
|
45
|
+
def delete_from_datastore(datastore_client)
|
46
|
+
datastore_client.delete_index_template(name)
|
47
|
+
datastore_client.delete_indices(index_expression_for_search)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Indicates if this is a rollover index definition.
|
51
|
+
#
|
52
|
+
# Use of this is considered a mild code smell. When feasible, it's generally better to
|
53
|
+
# implement a new polymorphic API on the IndexDefinition interface, rather
|
54
|
+
# then branching on the value of this predicate.
|
55
|
+
def rollover_index_template?
|
56
|
+
true
|
57
|
+
end
|
58
|
+
|
59
|
+
# Two underscores used to avoid collisions
|
60
|
+
# with other types (e.g. payments_2020 and payments_xyz_2020), though regardless shouldn't
|
61
|
+
# happen if types follow naming conventions.
|
62
|
+
def index_expression_for_search
|
63
|
+
index_name_with_suffix("*")
|
64
|
+
end
|
65
|
+
|
66
|
+
# Returns an index name to use for write operations. The index_definition selection is a function of
|
67
|
+
# the index_definition's rollover configuration and the record's timestamp.
|
68
|
+
def index_name_for_writes(record, timestamp_field_path: nil)
|
69
|
+
index_name_with_suffix(rollover_index_suffix_for_record(
|
70
|
+
record,
|
71
|
+
timestamp_field_path: timestamp_field_path || self.timestamp_field_path
|
72
|
+
))
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns a list of indices related to this template. This includes both indices that are
|
76
|
+
# specified in our configuration settings (e.g. via `setting_overrides_by_timestamp` and
|
77
|
+
# `custom_time_sets`) and also indices that have been auto-created from the template.
|
78
|
+
#
|
79
|
+
# Note that there can be discrepancies between the configuration settings and the indices in
|
80
|
+
# the database. Sometimes this is planned/expected (e.g. such as when invoking `elasticgraph-admin`
|
81
|
+
# to configure an index newly defined in configuration) and in other cases it's not.
|
82
|
+
#
|
83
|
+
# The `only_if_exists` argument controls how a discrepancy is treated.
|
84
|
+
#
|
85
|
+
# - When `false` (the default), indices that are defined in config but do not exist in the datastore are still returned.
|
86
|
+
# This is generally what we want for indexing and cluster administration.
|
87
|
+
# - When `true`, any indices in our configuration that do not exist are ignored, and not included in the returned list.
|
88
|
+
# This is appropriate for searching the datastore: if we attempt to exclude an index which is defined in config but does
|
89
|
+
# not exist (e.g. via `-[index_name]` in the search index expression), the datastore will return an error, but we can
|
90
|
+
# safely ignore the index. Likewise, if we have an index in the datastore which we cannot infer a timestamp range, we
|
91
|
+
# need to ignore it to avoid getting errors. Ignoring an index is safe when searching because our search logic uses a
|
92
|
+
# wildcard to match _all_ indices with the same prefix, and then excludes certain known indices that it can safely
|
93
|
+
# exclude based on their timestamp range. Ignored indices which exist will still be searched.
|
94
|
+
#
|
95
|
+
# In addition, any indices which exist, but which are not controlled by our current configuration, are ignored. Examples:
|
96
|
+
#
|
97
|
+
# - An index with a custom suffix (e.g. `__before_2019`) which has no corresponding configuration. We have no way to guess
|
98
|
+
# what the timestamp range is for such an index, and we want to completely ignore it.
|
99
|
+
# - An index with for a different rollover frequency than our current configuration. For example, a `__2019-03` index,
|
100
|
+
# which must rollover monthly, would be ignored if our current rollover frequency is yearly or daily.
|
101
|
+
#
|
102
|
+
# These latter cases are quite rare but can happen when we are dealing with indices defined before an update to our
|
103
|
+
# configuration. Our searches will continue to search these indices so long as their name matches the pattern, and
|
104
|
+
# we otherwise want to ignore these indices (e.g. we don't want admin to attempt to configure them, or want our
|
105
|
+
# indexer to attempt to write to them).
|
106
|
+
def related_rollover_indices(datastore_client, only_if_exists: false)
|
107
|
+
config_indices_by_name = rollover_indices_to_pre_create.to_h { |i| [i.name, i] }
|
108
|
+
|
109
|
+
db_indices_by_name = datastore_client.list_indices_matching(index_expression_for_search).filter_map do |name|
|
110
|
+
index = concrete_rollover_index_for(name, {}, config_indices_by_name[name]&.time_set)
|
111
|
+
[name, index] if index
|
112
|
+
end.to_h
|
113
|
+
|
114
|
+
config_indices_by_name = config_indices_by_name.slice(*db_indices_by_name.keys) if only_if_exists
|
115
|
+
|
116
|
+
db_indices_by_name.merge(config_indices_by_name).values
|
117
|
+
end
|
118
|
+
|
119
|
+
# Gets a single related `RolloverIndex` for a given timestamp.
|
120
|
+
def related_rollover_index_for_timestamp(timestamp, setting_overrides = {})
|
121
|
+
# @type var record: ::Hash[::String, untyped]
|
122
|
+
# We need to use `__skip__` here because `inner_value` has different types on different
|
123
|
+
# block iterations: initially, it's a string, then it becomes a hash. Steep has trouble
|
124
|
+
# with this but it works fine.
|
125
|
+
__skip__ = record = timestamp_field_path.split(".").reverse.reduce(timestamp) do |inner_value, field_name|
|
126
|
+
{field_name => inner_value}
|
127
|
+
end
|
128
|
+
|
129
|
+
concrete_rollover_index_for(index_name_for_writes(record), setting_overrides)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def after_initialize
|
135
|
+
unless timestamp_field_path && ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY.key?(frequency)
|
136
|
+
raise SchemaError, "Rollover index config 'timestamp_field' or 'frequency' is invalid."
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Returns a list of indices that must be pre-created (rather than allowing them to be
|
141
|
+
# created lazily based on the template). This is done so that we can use different
|
142
|
+
# index settings for some indices. For example, you might want your template to be
|
143
|
+
# configured to use 5 shards, but for old months with a small data set you may only
|
144
|
+
# want to use 1 shard.
|
145
|
+
def rollover_indices_to_pre_create
|
146
|
+
@rollover_indices_to_pre_create ||= begin
|
147
|
+
indices_with_overrides = setting_overrides_by_timestamp.filter_map do |(timestamp, setting_overrides)|
|
148
|
+
related_rollover_index_for_timestamp(timestamp, setting_overrides)
|
149
|
+
end
|
150
|
+
|
151
|
+
indices_for_custom_timestamp_ranges = custom_timestamp_ranges.filter_map do |range|
|
152
|
+
concrete_rollover_index_for(
|
153
|
+
index_name_with_suffix(range.index_name_suffix),
|
154
|
+
range.setting_overrides,
|
155
|
+
range.time_set
|
156
|
+
)
|
157
|
+
end
|
158
|
+
|
159
|
+
indices_with_overrides + indices_for_custom_timestamp_ranges
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def setting_overrides_by_timestamp
|
164
|
+
env_index_config.setting_overrides_by_timestamp
|
165
|
+
end
|
166
|
+
|
167
|
+
def custom_timestamp_ranges
|
168
|
+
env_index_config.custom_timestamp_ranges
|
169
|
+
end
|
170
|
+
|
171
|
+
def index_name_with_suffix(suffix)
|
172
|
+
"#{name}#{ROLLOVER_INDEX_INFIX_MARKER}#{suffix}"
|
173
|
+
end
|
174
|
+
|
175
|
+
ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY = {hourly: "%Y-%m-%d-%H", daily: "%Y-%m-%d", monthly: "%Y-%m", yearly: "%Y"}
|
176
|
+
ROLLOVER_TIME_ELEMENT_COUNTS_BY_FREQUENCY = ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY.transform_values { |format| format.split("-").size }
|
177
|
+
TIME_UNIT_BY_FREQUENCY = {hourly: :hour, daily: :day, monthly: :month, yearly: :year}
|
178
|
+
|
179
|
+
def rollover_index_suffix_for_record(record, timestamp_field_path:)
|
180
|
+
timestamp_value = ::DateTime.iso8601(
|
181
|
+
Support::HashUtil.fetch_value_at_path(record, timestamp_field_path)
|
182
|
+
).to_time
|
183
|
+
|
184
|
+
if (matching_custom_range = env_index_config.custom_timestamp_range_for(timestamp_value))
|
185
|
+
return matching_custom_range.index_name_suffix
|
186
|
+
end
|
187
|
+
|
188
|
+
timestamp_value.strftime(ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY[frequency])
|
189
|
+
end
|
190
|
+
|
191
|
+
def concrete_rollover_index_for(index_name, setting_overrides, time_set = nil)
|
192
|
+
time_set ||= infer_time_set_from_index_name(index_name)
|
193
|
+
return nil if time_set.nil?
|
194
|
+
|
195
|
+
args = index_args.merge({
|
196
|
+
name: index_name,
|
197
|
+
env_index_config: env_index_config.without_env_overrides.with(
|
198
|
+
setting_overrides: env_index_config.setting_overrides.merge(setting_overrides)
|
199
|
+
)
|
200
|
+
})
|
201
|
+
|
202
|
+
RolloverIndex.new(Index.new(**args), time_set)
|
203
|
+
end
|
204
|
+
|
205
|
+
def infer_time_set_from_index_name(index_name)
|
206
|
+
time_args = index_name.split(ROLLOVER_INDEX_INFIX_MARKER).last.to_s.split("-")
|
207
|
+
|
208
|
+
# Verify that the index is for the same rollover frequency as we are currently configured to use.
|
209
|
+
# If not, return `nil` because we can't accurately infer the time set without the frequency aligning
|
210
|
+
# with the index itself.
|
211
|
+
#
|
212
|
+
# This can happen when we are migrating from one index frequency to another.
|
213
|
+
return nil unless time_args.size == ROLLOVER_TIME_ELEMENT_COUNTS_BY_FREQUENCY.fetch(frequency)
|
214
|
+
|
215
|
+
# Verify that the args are all numeric. If not, return `nil` because we have no idea what the
|
216
|
+
# time set for the index is.
|
217
|
+
#
|
218
|
+
# This can happen when we are migrating from one index configuration to another while also using
|
219
|
+
# custom timestamp ranges (e.g. to have a `__before_2020` index).
|
220
|
+
return nil if time_args.any? { |arg| /\A\d+\z/ !~ arg }
|
221
|
+
|
222
|
+
# Steep can't type the dynamic nature of `*time_args` so we have to use `__skip__` here.
|
223
|
+
# @type var lower_bound: ::Time
|
224
|
+
__skip__ = lower_bound = ::Time.utc(*time_args)
|
225
|
+
upper_bound = Support::TimeUtil.advance_one_unit(lower_bound, TIME_UNIT_BY_FREQUENCY.fetch(frequency))
|
226
|
+
|
227
|
+
Support::TimeSet.of_range(gte: lower_bound, lt: upper_bound)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/index_definition/index"
|
10
|
+
require "elastic_graph/datastore_core/index_definition/rollover_index_template"
|
11
|
+
require "elastic_graph/error"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
# Represents the definition of a datastore index (or rollover template).
|
16
|
+
# Intended to be an entry point for working with datastore indices.
|
17
|
+
#
|
18
|
+
# This module contains common implementation logic for both the rollover and non-rollover
|
19
|
+
# case, as well as a `with` factory method.
|
20
|
+
module IndexDefinition
|
21
|
+
def self.with(name:, runtime_metadata:, config:, datastore_clients_by_name:)
|
22
|
+
if (env_index_config = config.index_definitions[name]).nil?
|
23
|
+
raise ConfigError, "Configuration does not provide an index definition for `#{name}`, " \
|
24
|
+
"but it is required so we can identify the datastore cluster(s) to query and index into."
|
25
|
+
end
|
26
|
+
|
27
|
+
common_args = {
|
28
|
+
name: name,
|
29
|
+
route_with: runtime_metadata.route_with,
|
30
|
+
default_sort_clauses: runtime_metadata.default_sort_fields.map(&:to_query_clause),
|
31
|
+
current_sources: runtime_metadata.current_sources,
|
32
|
+
fields_by_path: runtime_metadata.fields_by_path,
|
33
|
+
env_index_config: env_index_config,
|
34
|
+
defined_clusters: config.clusters.keys.to_set,
|
35
|
+
datastore_clients_by_name: datastore_clients_by_name
|
36
|
+
}
|
37
|
+
|
38
|
+
if (rollover = runtime_metadata.rollover)
|
39
|
+
RolloverIndexTemplate.new(
|
40
|
+
timestamp_field_path: rollover.timestamp_field_path,
|
41
|
+
frequency: rollover.frequency,
|
42
|
+
index_args: common_args,
|
43
|
+
**common_args
|
44
|
+
)
|
45
|
+
else
|
46
|
+
Index.new(**common_args)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/config"
|
10
|
+
require "elastic_graph/schema_artifacts/from_disk"
|
11
|
+
require "elastic_graph/support/logger"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
# The entry point into this library. Create an instance of this class to get access to
|
15
|
+
# the public interfaces provided by this library.
|
16
|
+
class DatastoreCore
|
17
|
+
# @dynamic config, schema_artifacts, logger, client_customization_block
|
18
|
+
attr_reader :config, :schema_artifacts, :logger, :client_customization_block
|
19
|
+
|
20
|
+
def self.from_parsed_yaml(parsed_yaml, for_context:, &client_customization_block)
|
21
|
+
new(
|
22
|
+
config: DatastoreCore::Config.from_parsed_yaml(parsed_yaml),
|
23
|
+
logger: Support::Logger.from_parsed_yaml(parsed_yaml),
|
24
|
+
schema_artifacts: SchemaArtifacts.from_parsed_yaml(parsed_yaml, for_context: for_context),
|
25
|
+
client_customization_block: client_customization_block
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(
|
30
|
+
config:,
|
31
|
+
logger:,
|
32
|
+
schema_artifacts:,
|
33
|
+
clients_by_name: nil,
|
34
|
+
client_customization_block: nil
|
35
|
+
)
|
36
|
+
@config = config
|
37
|
+
@logger = logger
|
38
|
+
@schema_artifacts = schema_artifacts
|
39
|
+
@clients_by_name = clients_by_name
|
40
|
+
@client_customization_block = client_customization_block
|
41
|
+
end
|
42
|
+
|
43
|
+
# Exposes the datastore index definitions as a map, keyed by index definition name.
|
44
|
+
def index_definitions_by_name
|
45
|
+
@index_definitions_by_name ||= begin
|
46
|
+
require "elastic_graph/datastore_core/index_definition"
|
47
|
+
schema_artifacts.runtime_metadata.index_definitions_by_name.to_h do |name, index_def_metadata|
|
48
|
+
index_def = IndexDefinition.with(
|
49
|
+
name: name,
|
50
|
+
runtime_metadata: index_def_metadata,
|
51
|
+
config: config,
|
52
|
+
datastore_clients_by_name: clients_by_name
|
53
|
+
)
|
54
|
+
|
55
|
+
[name, index_def]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Exposes the datastore index definitions as a map, keyed by GraphQL type.
|
61
|
+
# Note: the GraphQL type name is also used in non-GraphQL contexts (e.g. it is
|
62
|
+
# used in events processed by elasticgraph-indexer), so we expose this hear instead
|
63
|
+
# of from elasticgraph-graphql.
|
64
|
+
def index_definitions_by_graphql_type
|
65
|
+
@index_definitions_by_graphql_type ||= schema_artifacts
|
66
|
+
.runtime_metadata
|
67
|
+
.object_types_by_name
|
68
|
+
.transform_values do |metadata|
|
69
|
+
metadata.index_definition_names.map do |name|
|
70
|
+
index_definitions_by_name.fetch(name)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Exposes the datastore clients in a map, keyed by cluster name.
|
76
|
+
def clients_by_name
|
77
|
+
@clients_by_name ||= begin
|
78
|
+
if (adapter_lib = config.client_faraday_adapter&.require)
|
79
|
+
require adapter_lib
|
80
|
+
end
|
81
|
+
|
82
|
+
adapter_name = config.client_faraday_adapter&.name
|
83
|
+
client_logger = config.log_traffic ? logger : nil
|
84
|
+
|
85
|
+
config.clusters.to_h do |name, cluster_def|
|
86
|
+
client = cluster_def.backend_client_class.new(
|
87
|
+
name,
|
88
|
+
faraday_adapter: adapter_name,
|
89
|
+
url: cluster_def.url,
|
90
|
+
logger: client_logger,
|
91
|
+
retry_on_failure: config.max_client_retries,
|
92
|
+
&@client_customization_block
|
93
|
+
)
|
94
|
+
|
95
|
+
[name, client]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|