elasticgraph-admin 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/admin/cluster_configurator/action_reporter"
10
+ require "elastic_graph/admin/index_definition_configurator/for_index"
11
+ require "elastic_graph/datastore_core/index_config_normalizer"
12
+ require "elastic_graph/indexer/hash_differ"
13
+ require "elastic_graph/support/hash_util"
14
+
15
+ module ElasticGraph
16
+ class Admin
17
+ module IndexDefinitionConfigurator
18
+ # Responsible for managing an index template's configuration, including both mappings and settings.
19
+ class ForIndexTemplate
20
+ # @dynamic index_template
21
+
22
+ attr_reader :index_template
23
+
24
+ def initialize(datastore_client, index_template, env_agnostic_index_config_parent, output, clock)
25
+ @datastore_client = datastore_client
26
+ @index_template = index_template
27
+ @env_agnostic_index_config_parent = env_agnostic_index_config_parent
28
+ @env_agnostic_index_config = env_agnostic_index_config_parent.fetch("template")
29
+ @reporter = ClusterConfigurator::ActionReporter.new(output)
30
+ @output = output
31
+ @clock = clock
32
+ end
33
+
34
+ # Attempts to idempotently update the index configuration to the desired configuration
35
+ # exposed by the `IndexDefinition` object. Based on the configuration of the passed index
36
+ # and the state of the index in the datastore, does one of the following:
37
+ #
38
+ # - If the index did not already exist: creates the index with the desired mappings and settings.
39
+ # - If the desired mapping has fewer fields than what is in the index: raises an exception,
40
+ # because the datastore provides no way to remove fields from a mapping and it would be confusing
41
+ # for this method to silently ignore the issue.
42
+ # - If the settings have desired changes: updates the settings, restoring any setting that
43
+ # no longer has a desired value to its default.
44
+ # - If the mapping has desired changes: updates the mappings.
45
+ #
46
+ # Note that any of the writes to the index may fail. There are many things that cannot
47
+ # be changed on an existing index (such as static settings, field mapping types, etc). We do not attempt
48
+ # to validate those things ahead of time and instead rely on the datastore to fail if an invalid operation
49
+ # is attempted.
50
+ def configure!
51
+ related_index_configurators.each(&:configure!)
52
+
53
+ # there is no partial update for index template config and the same API both creates and updates it
54
+ put_index_template if has_mapping_updates? || settings_updates.any?
55
+ end
56
+
57
+ def validate
58
+ errors = related_index_configurators.flat_map(&:validate)
59
+
60
+ return errors unless index_template_exists?
61
+
62
+ errors << cannot_modify_mapping_field_type_error if mapping_type_changes.any?
63
+
64
+ errors
65
+ end
66
+
67
+ private
68
+
69
+ def put_index_template
70
+ desired_template_config_payload = Support::HashUtil.deep_merge(
71
+ desired_config_parent,
72
+ {"template" => {"mappings" => merge_properties(desired_mapping, current_mapping)}}
73
+ )
74
+
75
+ action_description = "Updated index template: `#{@index_template.name}`:\n#{config_diff}"
76
+
77
+ if mapping_removals.any?
78
+ action_description += "\n\nNote: the extra fields listed here will not actually get removed. " \
79
+ "Mapping removals are unsupported (but ElasticGraph will leave them alone and they'll cause no problems)."
80
+ end
81
+
82
+ @datastore_client.put_index_template(name: @index_template.name, body: desired_template_config_payload)
83
+ report_action action_description
84
+ end
85
+
86
+ def cannot_modify_mapping_field_type_error
87
+ "The datastore does not support modifying the type of a field from an existing index definition. " \
88
+ "You are attempting to update type of fields (#{mapping_type_changes.inspect}) from the #{@index_template.name} index definition."
89
+ end
90
+
91
+ def index_template_exists?
92
+ !current_config_parent.empty?
93
+ end
94
+
95
+ def mapping_removals
96
+ @mapping_removals ||= mapping_fields_from(current_mapping) - mapping_fields_from(desired_mapping)
97
+ end
98
+
99
+ def mapping_type_changes
100
+ @mapping_type_changes ||= begin
101
+ flattened_current = Support::HashUtil.flatten_and_stringify_keys(current_mapping)
102
+ flattened_desired = Support::HashUtil.flatten_and_stringify_keys(desired_mapping)
103
+
104
+ flattened_current.keys.select do |key|
105
+ key.end_with?(".type") && flattened_desired.key?(key) && flattened_desired[key] != flattened_current[key]
106
+ end
107
+ end
108
+ end
109
+
110
+ def has_mapping_updates?
111
+ current_mapping != desired_mapping
112
+ end
113
+
114
+ def settings_updates
115
+ @settings_updates ||= begin
116
+ # Updating a setting to null will cause the datastore to restore the default value of the setting.
117
+ restore_to_defaults = (current_settings.keys - desired_settings.keys).to_h { |key| [key, nil] }
118
+ desired_settings.select { |key, value| current_settings[key] != value }.merge(restore_to_defaults)
119
+ end
120
+ end
121
+
122
+ def mapping_fields_from(mapping_hash, prefix = "")
123
+ (mapping_hash["properties"] || []).flat_map do |key, params|
124
+ field = prefix + key
125
+ if params.key?("properties")
126
+ [field] + mapping_fields_from(params, "#{field}.")
127
+ else
128
+ [field]
129
+ end
130
+ end
131
+ end
132
+
133
+ def desired_mapping
134
+ desired_config_parent.fetch("template").fetch("mappings")
135
+ end
136
+
137
+ def desired_settings
138
+ @desired_settings ||= desired_config_parent.fetch("template").fetch("settings")
139
+ end
140
+
141
+ def desired_config_parent
142
+ @desired_config_parent ||= begin
143
+ # _meta is place where we can record state on the index mapping in the datastore.
144
+ # We want to maintain `_meta.ElasticGraph.sources` as an append-only set of all sources that have ever
145
+ # been configured to flow into an index, so that we can remember whether or not an index which currently
146
+ # has no `sourced_from` from fields ever did. This is necessary for our automatic filtering of multi-source
147
+ # indexes.
148
+ previously_recorded_sources = current_mapping.dig("_meta", "ElasticGraph", "sources") || []
149
+ sources = previously_recorded_sources.union(@index_template.current_sources.to_a).sort
150
+
151
+ env_agnostic_index_config_with_meta =
152
+ DatastoreCore::IndexConfigNormalizer.normalize(Support::HashUtil.deep_merge(@env_agnostic_index_config, {
153
+ "mappings" => {"_meta" => {"ElasticGraph" => {"sources" => sources}}},
154
+ "settings" => @index_template.flattened_env_setting_overrides
155
+ }))
156
+
157
+ @env_agnostic_index_config_parent.merge({"template" => env_agnostic_index_config_with_meta})
158
+ end
159
+ end
160
+
161
+ def current_mapping
162
+ current_config_parent.dig("template", "mappings") || {}
163
+ end
164
+
165
+ def current_settings
166
+ @current_settings ||= current_config_parent.dig("template", "settings")
167
+ end
168
+
169
+ def current_config_parent
170
+ @current_config_parent ||= begin
171
+ config = @datastore_client.get_index_template(@index_template.name)
172
+ if (template = config.dig("template"))
173
+ config.merge({"template" => DatastoreCore::IndexConfigNormalizer.normalize(template)})
174
+ else
175
+ config
176
+ end
177
+ end
178
+ end
179
+
180
+ def config_diff
181
+ @config_diff ||= Indexer::HashDiffer.diff(current_config_parent, desired_config_parent) || "(no diff)"
182
+ end
183
+
184
+ def report_action(message)
185
+ @reporter.report_action(message)
186
+ end
187
+
188
+ # Helper method used to merge properties between a _desired_ configuration and a _current_ configuration.
189
+ # This is used when we are figuring out how to update an index template. We do not want to delete existing
190
+ # fields from a template--while the datastore would allow it, our schema evolution strategy depends upon
191
+ # us not dropping old unused fields. The datastore doesn't allow it on indices, anyway (though it does allow
192
+ # it on index templates). We've ran into trouble (a near SEV) when allowing the logic here to delete an unused
193
+ # field from an index template. The indexer "mapping completeness" check started failing because an old version
194
+ # of the code (from back when the field in question was still used) noticed the expected field was missing and
195
+ # started failing on every event.
196
+ #
197
+ # This helps us avoid that problem by retaining any currently existing fields.
198
+ #
199
+ # Long term, if we want to support fully "garbage collecting" these old fields on templates, we will need
200
+ # to have them get dropped in a follow up step. We could have our `update_datastore_config` script notice that
201
+ # the deployed prod indexers are at a version that will tolerate the fields being dropped, or support it
202
+ # via an opt-in flag or something.
203
+ def merge_properties(desired_object, current_object)
204
+ desired_properties = desired_object.fetch("properties") { _ = {} }
205
+ current_properties = current_object.fetch("properties") { _ = {} }
206
+
207
+ merged_properties = desired_properties.merge(current_properties) do |key, desired, current|
208
+ if current.is_a?(::Hash) && current.key?("properties") && desired.key?("properties")
209
+ merge_properties(desired, current)
210
+ else
211
+ desired
212
+ end
213
+ end
214
+
215
+ desired_object.merge("properties" => merged_properties)
216
+ end
217
+
218
+ def related_index_configurators
219
+ # Here we fan out and get a configurator for each related index. These are generally concrete
220
+ # index that are based on a template, either via being specified in our config YAML, or via
221
+ # auto creation at indexing time.
222
+ #
223
+ # Note that it should not matter whether the related indices are configured before of after
224
+ # its rollover template; our use of index maintenance mode below prevents new indidces from
225
+ # being auto-created while this configuration process runs.
226
+ @related_index_configurators ||= begin
227
+ rollover_indices = @index_template.related_rollover_indices(@datastore_client)
228
+
229
+ # When we have a rollover index, it's important that we make at least one concrete index. Otherwise, if any
230
+ # queries come in before the first event is indexed, we won't have any concrete indices to search, and
231
+ # the datastore returns a response that differs from normal in that case. It particularly creates trouble
232
+ # for aggregation queries since the response format it expects is quite complex.
233
+ #
234
+ # Here we create a concrete index for the current timestamp if there are no concrete indices yet.
235
+ if rollover_indices.empty?
236
+ rollover_indices = [@index_template.related_rollover_index_for_timestamp(@clock.now.getutc.iso8601)].compact
237
+ end
238
+
239
+ rollover_indices.map do |index|
240
+ IndexDefinitionConfigurator::ForIndex.new(@datastore_client, index, @env_agnostic_index_config, @output)
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,24 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/admin/index_definition_configurator/for_index"
10
+ require "elastic_graph/admin/index_definition_configurator/for_index_template"
11
+
12
+ module ElasticGraph
13
+ class Admin
14
+ module IndexDefinitionConfigurator
15
+ def self.new(datastore_client, index_def, env_agnostic_index_config, output, clock)
16
+ if index_def.rollover_index_template?
17
+ ForIndexTemplate.new(datastore_client, _ = index_def, env_agnostic_index_config, output, clock)
18
+ else
19
+ ForIndex.new(datastore_client, _ = index_def, env_agnostic_index_config, output)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,129 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/admin"
10
+ require "elastic_graph/support/from_yaml_file"
11
+ require "rake/tasklib"
12
+
13
+ module ElasticGraph
14
+ class Admin
15
+ class RakeTasks < ::Rake::TaskLib
16
+ extend Support::FromYamlFile::ForRakeTasks.new(ElasticGraph::Admin)
17
+
18
+ attr_reader :output, :prototype_index_names
19
+
20
+ def initialize(prototype_index_names: [], output: $stdout, &load_admin)
21
+ @output = output
22
+ @prototype_index_names = prototype_index_names.to_set
23
+ @load_admin = load_admin
24
+
25
+ define_tasks
26
+ end
27
+
28
+ private
29
+
30
+ def define_tasks
31
+ namespace :clusters do
32
+ namespace :configure do
33
+ desc "Performs the configuration of datastore clusters, including indices, settings, and scripts"
34
+ task :perform do
35
+ print_in_color "#{"=" * 80}\nNOTE: Performing datastore cluster updates for real!\n#{"=" * 80}", RED_COLOR_CODE
36
+
37
+ index_defs = update_clusters_for(admin)
38
+ output.puts "Finished updating datastore clusters. Validating index consistency..."
39
+ admin.datastore_indexing_router.validate_mapping_completeness_of!(:all_accessible_cluster_names, *index_defs)
40
+ output.puts "Done."
41
+ end
42
+
43
+ desc "Dry-runs the configuration of datastore clusters, including indices, settings, and scripts"
44
+ task :dry_run do
45
+ print_in_color "#{"=" * 80}\nNOTE: In dry-run mode. The updates reported below will actually be no-ops.\n#{"=" * 80}", GREEN_COLOR_CODE
46
+ update_clusters_for(admin.with_dry_run_datastore_clients)
47
+ print_in_color "#{"=" * 80}\nNOTE: This was dry-run mode. The updates reported above were actually no-ops.\n#{"=" * 80}", GREEN_COLOR_CODE
48
+ end
49
+ end
50
+ end
51
+
52
+ namespace :indices do
53
+ desc "Drops all prototype index definitions on all datastore clusters"
54
+ task :drop_prototypes do
55
+ require "elastic_graph/support/threading"
56
+
57
+ prototype_indices = admin
58
+ .datastore_core
59
+ .index_definitions_by_name.values
60
+ .select { |index| prototype_index_names.include?(index.name) }
61
+ .reject { |index| index.all_accessible_cluster_names.empty? }
62
+
63
+ output.puts "Disabling rollover index auto creation for all clusters"
64
+ admin.cluster_settings_manager.start_index_maintenance_mode!(:all_clusters)
65
+ output.puts "Disabled rollover index auto creation for all clusters"
66
+
67
+ output.puts "Dropping the following prototype index definitions: #{prototype_indices.map(&:name).join(",")}"
68
+ Support::Threading.parallel_map(prototype_indices) do |prototype_index_def|
69
+ delete_index_def_in_all_accessible_clusters(prototype_index_def)
70
+ end
71
+
72
+ output.puts "Finished dropping all prototype index definitions"
73
+ end
74
+
75
+ desc "Drops the specified index definition on the specified datastore cluster"
76
+ task :drop, :index_def_name, :cluster_name do |_, args|
77
+ index_def_name = args.fetch(:index_def_name)
78
+ cluster_name = args.fetch(:cluster_name)
79
+ datastore_client = admin.datastore_core.clients_by_name.fetch(cluster_name) do |key|
80
+ raise IndexOperationError, "Cluster named `#{key}` does not exist. Valid clusters: #{admin.datastore_core.clients_by_name.keys}."
81
+ end
82
+
83
+ index_def = admin.datastore_core.index_definitions_by_name.fetch(index_def_name)
84
+ unless prototype_index_names.include?(index_def.name)
85
+ raise IndexOperationError, "Unable to drop live index #{index_def_name}. Deleting a live index is extremely dangerous. " \
86
+ "Please ensure this is indeed intended, add the index name to the `prototype_index_names` list and retry."
87
+ end
88
+
89
+ output.puts "Disabling rollover index auto creation for this cluster"
90
+ admin.cluster_settings_manager.in_index_maintenance_mode(cluster_name) do
91
+ output.puts "Disabled rollover index auto creation for this cluster"
92
+ output.puts "Dropping index #{index_def}"
93
+ index_def.delete_from_datastore(datastore_client)
94
+ output.puts "Dropped index #{index_def}"
95
+ end
96
+ output.puts "Re-enabled rollover index auto creation for this cluster"
97
+ end
98
+ end
99
+ end
100
+
101
+ # See https://en.wikipedia.org/wiki/ANSI_escape_code#Colors for full list.
102
+ RED_COLOR_CODE = 31
103
+ GREEN_COLOR_CODE = 32
104
+
105
+ def update_clusters_for(admin)
106
+ configurator = admin.cluster_configurator
107
+
108
+ configurator.accessible_index_definitions.tap do |index_defs|
109
+ output.puts "The following index definitions will be configured:\n#{index_defs.map(&:name).join("\n")}"
110
+ configurator.configure_cluster(@output)
111
+ end
112
+ end
113
+
114
+ def print_in_color(message, color_code)
115
+ @output.puts "\033[#{color_code}m#{message}\033[0m"
116
+ end
117
+
118
+ def delete_index_def_in_all_accessible_clusters(index_def)
119
+ index_def.all_accessible_cluster_names.each do |cluster_name|
120
+ index_def.delete_from_datastore(admin.datastore_core.clients_by_name.fetch(cluster_name))
121
+ end
122
+ end
123
+
124
+ def admin
125
+ @admin ||= @load_admin.call
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,97 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core"
10
+ require "elastic_graph/support/from_yaml_file"
11
+ require "time"
12
+
13
+ module ElasticGraph
14
+ # The entry point into this library. Create an instance of this class to get access to
15
+ # the public interfaces provided by this library.
16
+ class Admin
17
+ extend Support::FromYamlFile
18
+
19
+ # @dynamic datastore_core, schema_artifacts
20
+ attr_reader :datastore_core, :schema_artifacts
21
+
22
+ # A factory method that builds an Admin instance from the given parsed YAML config.
23
+ # `from_yaml_file(file_name, &block)` is also available (via `Support::FromYamlFile`).
24
+ def self.from_parsed_yaml(parsed_yaml, &datastore_client_customization_block)
25
+ new(datastore_core: DatastoreCore.from_parsed_yaml(parsed_yaml, for_context: :admin, &datastore_client_customization_block))
26
+ end
27
+
28
+ def initialize(datastore_core:, monotonic_clock: nil, clock: ::Time)
29
+ @datastore_core = datastore_core
30
+ @monotonic_clock = monotonic_clock
31
+ @clock = clock
32
+ @schema_artifacts = @datastore_core.schema_artifacts
33
+ end
34
+
35
+ def cluster_configurator
36
+ @cluster_configurator ||= begin
37
+ require "elastic_graph/admin/cluster_configurator"
38
+ ClusterConfigurator.new(
39
+ datastore_clients_by_name: @datastore_core.clients_by_name,
40
+ index_defs: @datastore_core.index_definitions_by_name.values,
41
+ index_configurations_by_name: schema_artifacts.indices,
42
+ index_template_configurations_by_name: schema_artifacts.index_templates,
43
+ scripts: schema_artifacts.datastore_scripts,
44
+ cluster_settings_manager: cluster_settings_manager,
45
+ clock: @clock
46
+ )
47
+ end
48
+ end
49
+
50
+ def cluster_settings_manager
51
+ @cluster_settings_manager ||= begin
52
+ require "elastic_graph/admin/cluster_configurator/cluster_settings_manager"
53
+ ClusterConfigurator::ClusterSettingsManager.new(
54
+ datastore_clients_by_name: @datastore_core.clients_by_name,
55
+ datastore_config: @datastore_core.config,
56
+ logger: @datastore_core.logger
57
+ )
58
+ end
59
+ end
60
+
61
+ def datastore_indexing_router
62
+ @datastore_indexing_router ||= begin
63
+ require "elastic_graph/indexer/datastore_indexing_router"
64
+ Indexer::DatastoreIndexingRouter.new(
65
+ datastore_clients_by_name: datastore_core.clients_by_name,
66
+ mappings_by_index_def_name: schema_artifacts.index_mappings_by_index_def_name,
67
+ monotonic_clock: monotonic_clock,
68
+ logger: datastore_core.logger
69
+ )
70
+ end
71
+ end
72
+
73
+ def monotonic_clock
74
+ @monotonic_clock ||= begin
75
+ require "elastic_graph/support/monotonic_clock"
76
+ Support::MonotonicClock.new
77
+ end
78
+ end
79
+
80
+ # Returns an alternate `Admin` instance with the datastore clients replaced with
81
+ # alternate implementations that turn all write operations into no-ops.
82
+ def with_dry_run_datastore_clients
83
+ require "elastic_graph/admin/datastore_client_dry_run_decorator"
84
+ dry_run_clients_by_name = @datastore_core.clients_by_name.transform_values do |client|
85
+ DatastoreClientDryRunDecorator.new(client)
86
+ end
87
+
88
+ Admin.new(datastore_core: DatastoreCore.new(
89
+ config: datastore_core.config,
90
+ logger: datastore_core.logger,
91
+ schema_artifacts: datastore_core.schema_artifacts,
92
+ clients_by_name: dry_run_clients_by_name,
93
+ client_customization_block: datastore_core.client_customization_block
94
+ ))
95
+ end
96
+ end
97
+ end