elasticgraph-datastore_core 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-datastore_core.gemspec +21 -0
- data/lib/elastic_graph/datastore_core/config.rb +58 -0
- data/lib/elastic_graph/datastore_core/configuration/client_faraday_adapter.rb +38 -0
- data/lib/elastic_graph/datastore_core/configuration/cluster_definition.rb +52 -0
- data/lib/elastic_graph/datastore_core/configuration/index_definition.rb +110 -0
- data/lib/elastic_graph/datastore_core/index_config_normalizer.rb +79 -0
- data/lib/elastic_graph/datastore_core/index_definition/base.rb +162 -0
- data/lib/elastic_graph/datastore_core/index_definition/index.rb +64 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index.rb +48 -0
- data/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb +232 -0
- data/lib/elastic_graph/datastore_core/index_definition.rb +51 -0
- data/lib/elastic_graph/datastore_core.rb +100 -0
- metadata +404 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "date"
|
10
|
+
require "elastic_graph/datastore_core/index_config_normalizer"
|
11
|
+
require "elastic_graph/datastore_core/index_definition/base"
|
12
|
+
require "elastic_graph/datastore_core/index_definition/index"
|
13
|
+
require "elastic_graph/datastore_core/index_definition/rollover_index"
|
14
|
+
require "elastic_graph/error"
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
16
|
+
require "elastic_graph/support/time_set"
|
17
|
+
require "elastic_graph/support/time_util"
|
18
|
+
require "time"
|
19
|
+
|
20
|
+
module ElasticGraph
|
21
|
+
class DatastoreCore
|
22
|
+
module IndexDefinition
|
23
|
+
class RolloverIndexTemplate < Support::MemoizableData.define(
|
24
|
+
:name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path, :env_index_config,
|
25
|
+
:index_args, :defined_clusters, :datastore_clients_by_name, :timestamp_field_path, :frequency
|
26
|
+
)
|
27
|
+
# `Data.define` provides all these methods:
|
28
|
+
# @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config,
|
29
|
+
# @dynamic index_args, defined_clusters, datastore_clients_by_name, timestamp_field_path, frequency, initialize
|
30
|
+
|
31
|
+
# `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it
|
32
|
+
# but can't for some reason so we have to declare them with `@dynamic`.
|
33
|
+
# @dynamic flattened_env_setting_overrides, routing_value_for_prepared_record, has_custom_routing?, cluster_to_query, use_updates_for_indexing?
|
34
|
+
# @dynamic clusters_to_index_into, all_accessible_cluster_names, ignored_values_for_routing, searches_could_hit_incomplete_docs?
|
35
|
+
# @dynamic accessible_cluster_names_to_index_into, accessible_from_queries?, known_related_query_rollover_indices, list_counts_field_paths_for_source
|
36
|
+
include IndexDefinition::Base
|
37
|
+
|
38
|
+
def mappings_in_datastore(datastore_client)
|
39
|
+
IndexConfigNormalizer.normalize_mappings(
|
40
|
+
datastore_client.get_index_template(name).dig("template", "mappings") || {}
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
# We need to delete both the template and the actual indices for rollover indices
|
45
|
+
def delete_from_datastore(datastore_client)
|
46
|
+
datastore_client.delete_index_template(name)
|
47
|
+
datastore_client.delete_indices(index_expression_for_search)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Indicates if this is a rollover index definition.
|
51
|
+
#
|
52
|
+
# Use of this is considered a mild code smell. When feasible, it's generally better to
|
53
|
+
# implement a new polymorphic API on the IndexDefinition interface, rather
|
54
|
+
# then branching on the value of this predicate.
|
55
|
+
def rollover_index_template?
|
56
|
+
true
|
57
|
+
end
|
58
|
+
|
59
|
+
# Two underscores used to avoid collisions
|
60
|
+
# with other types (e.g. payments_2020 and payments_xyz_2020), though regardless shouldn't
|
61
|
+
# happen if types follow naming conventions.
|
62
|
+
def index_expression_for_search
|
63
|
+
index_name_with_suffix("*")
|
64
|
+
end
|
65
|
+
|
66
|
+
# Returns an index name to use for write operations. The index_definition selection is a function of
|
67
|
+
# the index_definition's rollover configuration and the record's timestamp.
|
68
|
+
def index_name_for_writes(record, timestamp_field_path: nil)
|
69
|
+
index_name_with_suffix(rollover_index_suffix_for_record(
|
70
|
+
record,
|
71
|
+
timestamp_field_path: timestamp_field_path || self.timestamp_field_path
|
72
|
+
))
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns a list of indices related to this template. This includes both indices that are
|
76
|
+
# specified in our configuration settings (e.g. via `setting_overrides_by_timestamp` and
|
77
|
+
# `custom_time_sets`) and also indices that have been auto-created from the template.
|
78
|
+
#
|
79
|
+
# Note that there can be discrepancies between the configuration settings and the indices in
|
80
|
+
# the database. Sometimes this is planned/expected (e.g. such as when invoking `elasticgraph-admin`
|
81
|
+
# to configure an index newly defined in configuration) and in other cases it's not.
|
82
|
+
#
|
83
|
+
# The `only_if_exists` argument controls how a discrepancy is treated.
|
84
|
+
#
|
85
|
+
# - When `false` (the default), indices that are defined in config but do not exist in the datastore are still returned.
|
86
|
+
# This is generally what we want for indexing and cluster administration.
|
87
|
+
# - When `true`, any indices in our configuration that do not exist are ignored, and not included in the returned list.
|
88
|
+
# This is appropriate for searching the datastore: if we attempt to exclude an index which is defined in config but does
|
89
|
+
# not exist (e.g. via `-[index_name]` in the search index expression), the datastore will return an error, but we can
|
90
|
+
# safely ignore the index. Likewise, if we have an index in the datastore which we cannot infer a timestamp range, we
|
91
|
+
# need to ignore it to avoid getting errors. Ignoring an index is safe when searching because our search logic uses a
|
92
|
+
# wildcard to match _all_ indices with the same prefix, and then excludes certain known indices that it can safely
|
93
|
+
# exclude based on their timestamp range. Ignored indices which exist will still be searched.
|
94
|
+
#
|
95
|
+
# In addition, any indices which exist, but which are not controlled by our current configuration, are ignored. Examples:
|
96
|
+
#
|
97
|
+
# - An index with a custom suffix (e.g. `__before_2019`) which has no corresponding configuration. We have no way to guess
|
98
|
+
# what the timestamp range is for such an index, and we want to completely ignore it.
|
99
|
+
# - An index with for a different rollover frequency than our current configuration. For example, a `__2019-03` index,
|
100
|
+
# which must rollover monthly, would be ignored if our current rollover frequency is yearly or daily.
|
101
|
+
#
|
102
|
+
# These latter cases are quite rare but can happen when we are dealing with indices defined before an update to our
|
103
|
+
# configuration. Our searches will continue to search these indices so long as their name matches the pattern, and
|
104
|
+
# we otherwise want to ignore these indices (e.g. we don't want admin to attempt to configure them, or want our
|
105
|
+
# indexer to attempt to write to them).
|
106
|
+
def related_rollover_indices(datastore_client, only_if_exists: false)
|
107
|
+
config_indices_by_name = rollover_indices_to_pre_create.to_h { |i| [i.name, i] }
|
108
|
+
|
109
|
+
db_indices_by_name = datastore_client.list_indices_matching(index_expression_for_search).filter_map do |name|
|
110
|
+
index = concrete_rollover_index_for(name, {}, config_indices_by_name[name]&.time_set)
|
111
|
+
[name, index] if index
|
112
|
+
end.to_h
|
113
|
+
|
114
|
+
config_indices_by_name = config_indices_by_name.slice(*db_indices_by_name.keys) if only_if_exists
|
115
|
+
|
116
|
+
db_indices_by_name.merge(config_indices_by_name).values
|
117
|
+
end
|
118
|
+
|
119
|
+
# Gets a single related `RolloverIndex` for a given timestamp.
|
120
|
+
def related_rollover_index_for_timestamp(timestamp, setting_overrides = {})
|
121
|
+
# @type var record: ::Hash[::String, untyped]
|
122
|
+
# We need to use `__skip__` here because `inner_value` has different types on different
|
123
|
+
# block iterations: initially, it's a string, then it becomes a hash. Steep has trouble
|
124
|
+
# with this but it works fine.
|
125
|
+
__skip__ = record = timestamp_field_path.split(".").reverse.reduce(timestamp) do |inner_value, field_name|
|
126
|
+
{field_name => inner_value}
|
127
|
+
end
|
128
|
+
|
129
|
+
concrete_rollover_index_for(index_name_for_writes(record), setting_overrides)
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def after_initialize
|
135
|
+
unless timestamp_field_path && ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY.key?(frequency)
|
136
|
+
raise SchemaError, "Rollover index config 'timestamp_field' or 'frequency' is invalid."
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Returns a list of indices that must be pre-created (rather than allowing them to be
|
141
|
+
# created lazily based on the template). This is done so that we can use different
|
142
|
+
# index settings for some indices. For example, you might want your template to be
|
143
|
+
# configured to use 5 shards, but for old months with a small data set you may only
|
144
|
+
# want to use 1 shard.
|
145
|
+
def rollover_indices_to_pre_create
|
146
|
+
@rollover_indices_to_pre_create ||= begin
|
147
|
+
indices_with_overrides = setting_overrides_by_timestamp.filter_map do |(timestamp, setting_overrides)|
|
148
|
+
related_rollover_index_for_timestamp(timestamp, setting_overrides)
|
149
|
+
end
|
150
|
+
|
151
|
+
indices_for_custom_timestamp_ranges = custom_timestamp_ranges.filter_map do |range|
|
152
|
+
concrete_rollover_index_for(
|
153
|
+
index_name_with_suffix(range.index_name_suffix),
|
154
|
+
range.setting_overrides,
|
155
|
+
range.time_set
|
156
|
+
)
|
157
|
+
end
|
158
|
+
|
159
|
+
indices_with_overrides + indices_for_custom_timestamp_ranges
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def setting_overrides_by_timestamp
|
164
|
+
env_index_config.setting_overrides_by_timestamp
|
165
|
+
end
|
166
|
+
|
167
|
+
def custom_timestamp_ranges
|
168
|
+
env_index_config.custom_timestamp_ranges
|
169
|
+
end
|
170
|
+
|
171
|
+
def index_name_with_suffix(suffix)
|
172
|
+
"#{name}#{ROLLOVER_INDEX_INFIX_MARKER}#{suffix}"
|
173
|
+
end
|
174
|
+
|
175
|
+
ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY = {hourly: "%Y-%m-%d-%H", daily: "%Y-%m-%d", monthly: "%Y-%m", yearly: "%Y"}
|
176
|
+
ROLLOVER_TIME_ELEMENT_COUNTS_BY_FREQUENCY = ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY.transform_values { |format| format.split("-").size }
|
177
|
+
TIME_UNIT_BY_FREQUENCY = {hourly: :hour, daily: :day, monthly: :month, yearly: :year}
|
178
|
+
|
179
|
+
def rollover_index_suffix_for_record(record, timestamp_field_path:)
|
180
|
+
timestamp_value = ::DateTime.iso8601(
|
181
|
+
Support::HashUtil.fetch_value_at_path(record, timestamp_field_path)
|
182
|
+
).to_time
|
183
|
+
|
184
|
+
if (matching_custom_range = env_index_config.custom_timestamp_range_for(timestamp_value))
|
185
|
+
return matching_custom_range.index_name_suffix
|
186
|
+
end
|
187
|
+
|
188
|
+
timestamp_value.strftime(ROLLOVER_SUFFIX_FORMATS_BY_FREQUENCY[frequency])
|
189
|
+
end
|
190
|
+
|
191
|
+
def concrete_rollover_index_for(index_name, setting_overrides, time_set = nil)
|
192
|
+
time_set ||= infer_time_set_from_index_name(index_name)
|
193
|
+
return nil if time_set.nil?
|
194
|
+
|
195
|
+
args = index_args.merge({
|
196
|
+
name: index_name,
|
197
|
+
env_index_config: env_index_config.without_env_overrides.with(
|
198
|
+
setting_overrides: env_index_config.setting_overrides.merge(setting_overrides)
|
199
|
+
)
|
200
|
+
})
|
201
|
+
|
202
|
+
RolloverIndex.new(Index.new(**args), time_set)
|
203
|
+
end
|
204
|
+
|
205
|
+
def infer_time_set_from_index_name(index_name)
|
206
|
+
time_args = index_name.split(ROLLOVER_INDEX_INFIX_MARKER).last.to_s.split("-")
|
207
|
+
|
208
|
+
# Verify that the index is for the same rollover frequency as we are currently configured to use.
|
209
|
+
# If not, return `nil` because we can't accurately infer the time set without the frequency aligning
|
210
|
+
# with the index itself.
|
211
|
+
#
|
212
|
+
# This can happen when we are migrating from one index frequency to another.
|
213
|
+
return nil unless time_args.size == ROLLOVER_TIME_ELEMENT_COUNTS_BY_FREQUENCY.fetch(frequency)
|
214
|
+
|
215
|
+
# Verify that the args are all numeric. If not, return `nil` because we have no idea what the
|
216
|
+
# time set for the index is.
|
217
|
+
#
|
218
|
+
# This can happen when we are migrating from one index configuration to another while also using
|
219
|
+
# custom timestamp ranges (e.g. to have a `__before_2020` index).
|
220
|
+
return nil if time_args.any? { |arg| /\A\d+\z/ !~ arg }
|
221
|
+
|
222
|
+
# Steep can't type the dynamic nature of `*time_args` so we have to use `__skip__` here.
|
223
|
+
# @type var lower_bound: ::Time
|
224
|
+
__skip__ = lower_bound = ::Time.utc(*time_args)
|
225
|
+
upper_bound = Support::TimeUtil.advance_one_unit(lower_bound, TIME_UNIT_BY_FREQUENCY.fetch(frequency))
|
226
|
+
|
227
|
+
Support::TimeSet.of_range(gte: lower_bound, lt: upper_bound)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/index_definition/index"
|
10
|
+
require "elastic_graph/datastore_core/index_definition/rollover_index_template"
|
11
|
+
require "elastic_graph/error"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class DatastoreCore
|
15
|
+
# Represents the definition of a datastore index (or rollover template).
|
16
|
+
# Intended to be an entry point for working with datastore indices.
|
17
|
+
#
|
18
|
+
# This module contains common implementation logic for both the rollover and non-rollover
|
19
|
+
# case, as well as a `with` factory method.
|
20
|
+
module IndexDefinition
|
21
|
+
def self.with(name:, runtime_metadata:, config:, datastore_clients_by_name:)
|
22
|
+
if (env_index_config = config.index_definitions[name]).nil?
|
23
|
+
raise ConfigError, "Configuration does not provide an index definition for `#{name}`, " \
|
24
|
+
"but it is required so we can identify the datastore cluster(s) to query and index into."
|
25
|
+
end
|
26
|
+
|
27
|
+
common_args = {
|
28
|
+
name: name,
|
29
|
+
route_with: runtime_metadata.route_with,
|
30
|
+
default_sort_clauses: runtime_metadata.default_sort_fields.map(&:to_query_clause),
|
31
|
+
current_sources: runtime_metadata.current_sources,
|
32
|
+
fields_by_path: runtime_metadata.fields_by_path,
|
33
|
+
env_index_config: env_index_config,
|
34
|
+
defined_clusters: config.clusters.keys.to_set,
|
35
|
+
datastore_clients_by_name: datastore_clients_by_name
|
36
|
+
}
|
37
|
+
|
38
|
+
if (rollover = runtime_metadata.rollover)
|
39
|
+
RolloverIndexTemplate.new(
|
40
|
+
timestamp_field_path: rollover.timestamp_field_path,
|
41
|
+
frequency: rollover.frequency,
|
42
|
+
index_args: common_args,
|
43
|
+
**common_args
|
44
|
+
)
|
45
|
+
else
|
46
|
+
Index.new(**common_args)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/datastore_core/config"
|
10
|
+
require "elastic_graph/schema_artifacts/from_disk"
|
11
|
+
require "elastic_graph/support/logger"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
# The entry point into this library. Create an instance of this class to get access to
|
15
|
+
# the public interfaces provided by this library.
|
16
|
+
class DatastoreCore
|
17
|
+
# @dynamic config, schema_artifacts, logger, client_customization_block
|
18
|
+
attr_reader :config, :schema_artifacts, :logger, :client_customization_block
|
19
|
+
|
20
|
+
def self.from_parsed_yaml(parsed_yaml, for_context:, &client_customization_block)
|
21
|
+
new(
|
22
|
+
config: DatastoreCore::Config.from_parsed_yaml(parsed_yaml),
|
23
|
+
logger: Support::Logger.from_parsed_yaml(parsed_yaml),
|
24
|
+
schema_artifacts: SchemaArtifacts.from_parsed_yaml(parsed_yaml, for_context: for_context),
|
25
|
+
client_customization_block: client_customization_block
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(
|
30
|
+
config:,
|
31
|
+
logger:,
|
32
|
+
schema_artifacts:,
|
33
|
+
clients_by_name: nil,
|
34
|
+
client_customization_block: nil
|
35
|
+
)
|
36
|
+
@config = config
|
37
|
+
@logger = logger
|
38
|
+
@schema_artifacts = schema_artifacts
|
39
|
+
@clients_by_name = clients_by_name
|
40
|
+
@client_customization_block = client_customization_block
|
41
|
+
end
|
42
|
+
|
43
|
+
# Exposes the datastore index definitions as a map, keyed by index definition name.
|
44
|
+
def index_definitions_by_name
|
45
|
+
@index_definitions_by_name ||= begin
|
46
|
+
require "elastic_graph/datastore_core/index_definition"
|
47
|
+
schema_artifacts.runtime_metadata.index_definitions_by_name.to_h do |name, index_def_metadata|
|
48
|
+
index_def = IndexDefinition.with(
|
49
|
+
name: name,
|
50
|
+
runtime_metadata: index_def_metadata,
|
51
|
+
config: config,
|
52
|
+
datastore_clients_by_name: clients_by_name
|
53
|
+
)
|
54
|
+
|
55
|
+
[name, index_def]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Exposes the datastore index definitions as a map, keyed by GraphQL type.
|
61
|
+
# Note: the GraphQL type name is also used in non-GraphQL contexts (e.g. it is
|
62
|
+
# used in events processed by elasticgraph-indexer), so we expose this hear instead
|
63
|
+
# of from elasticgraph-graphql.
|
64
|
+
def index_definitions_by_graphql_type
|
65
|
+
@index_definitions_by_graphql_type ||= schema_artifacts
|
66
|
+
.runtime_metadata
|
67
|
+
.object_types_by_name
|
68
|
+
.transform_values do |metadata|
|
69
|
+
metadata.index_definition_names.map do |name|
|
70
|
+
index_definitions_by_name.fetch(name)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Exposes the datastore clients in a map, keyed by cluster name.
|
76
|
+
def clients_by_name
|
77
|
+
@clients_by_name ||= begin
|
78
|
+
if (adapter_lib = config.client_faraday_adapter&.require)
|
79
|
+
require adapter_lib
|
80
|
+
end
|
81
|
+
|
82
|
+
adapter_name = config.client_faraday_adapter&.name
|
83
|
+
client_logger = config.log_traffic ? logger : nil
|
84
|
+
|
85
|
+
config.clusters.to_h do |name, cluster_def|
|
86
|
+
client = cluster_def.backend_client_class.new(
|
87
|
+
name,
|
88
|
+
faraday_adapter: adapter_name,
|
89
|
+
url: cluster_def.url,
|
90
|
+
logger: client_logger,
|
91
|
+
retry_on_failure: config.max_client_retries,
|
92
|
+
&@client_customization_block
|
93
|
+
)
|
94
|
+
|
95
|
+
[name, client]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|