elasticgraph-admin 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,247 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/admin/cluster_configurator/action_reporter"
10
+ require "elastic_graph/admin/index_definition_configurator/for_index"
11
+ require "elastic_graph/datastore_core/index_config_normalizer"
12
+ require "elastic_graph/indexer/hash_differ"
13
+ require "elastic_graph/support/hash_util"
14
+
15
+ module ElasticGraph
16
+ class Admin
17
+ module IndexDefinitionConfigurator
18
+ # Responsible for managing an index template's configuration, including both mappings and settings.
19
+ class ForIndexTemplate
20
+ # @dynamic index_template
21
+
22
+ attr_reader :index_template
23
+
24
+ def initialize(datastore_client, index_template, env_agnostic_index_config_parent, output, clock)
25
+ @datastore_client = datastore_client
26
+ @index_template = index_template
27
+ @env_agnostic_index_config_parent = env_agnostic_index_config_parent
28
+ @env_agnostic_index_config = env_agnostic_index_config_parent.fetch("template")
29
+ @reporter = ClusterConfigurator::ActionReporter.new(output)
30
+ @output = output
31
+ @clock = clock
32
+ end
33
+
34
+ # Attempts to idempotently update the index configuration to the desired configuration
35
+ # exposed by the `IndexDefinition` object. Based on the configuration of the passed index
36
+ # and the state of the index in the datastore, does one of the following:
37
+ #
38
+ # - If the index did not already exist: creates the index with the desired mappings and settings.
39
+ # - If the desired mapping has fewer fields than what is in the index: raises an exception,
40
+ # because the datastore provides no way to remove fields from a mapping and it would be confusing
41
+ # for this method to silently ignore the issue.
42
+ # - If the settings have desired changes: updates the settings, restoring any setting that
43
+ # no longer has a desired value to its default.
44
+ # - If the mapping has desired changes: updates the mappings.
45
+ #
46
+ # Note that any of the writes to the index may fail. There are many things that cannot
47
+ # be changed on an existing index (such as static settings, field mapping types, etc). We do not attempt
48
+ # to validate those things ahead of time and instead rely on the datastore to fail if an invalid operation
49
+ # is attempted.
50
+ def configure!
51
+ related_index_configurators.each(&:configure!)
52
+
53
+ # there is no partial update for index template config and the same API both creates and updates it
54
+ put_index_template if has_mapping_updates? || settings_updates.any?
55
+ end
56
+
57
+ def validate
58
+ errors = related_index_configurators.flat_map(&:validate)
59
+
60
+ return errors unless index_template_exists?
61
+
62
+ errors << cannot_modify_mapping_field_type_error if mapping_type_changes.any?
63
+
64
+ errors
65
+ end
66
+
67
+ private
68
+
69
+ def put_index_template
70
+ desired_template_config_payload = Support::HashUtil.deep_merge(
71
+ desired_config_parent,
72
+ {"template" => {"mappings" => merge_properties(desired_mapping, current_mapping)}}
73
+ )
74
+
75
+ action_description = "Updated index template: `#{@index_template.name}`:\n#{config_diff}"
76
+
77
+ if mapping_removals.any?
78
+ action_description += "\n\nNote: the extra fields listed here will not actually get removed. " \
79
+ "Mapping removals are unsupported (but ElasticGraph will leave them alone and they'll cause no problems)."
80
+ end
81
+
82
+ @datastore_client.put_index_template(name: @index_template.name, body: desired_template_config_payload)
83
+ report_action action_description
84
+ end
85
+
86
+ def cannot_modify_mapping_field_type_error
87
+ "The datastore does not support modifying the type of a field from an existing index definition. " \
88
+ "You are attempting to update type of fields (#{mapping_type_changes.inspect}) from the #{@index_template.name} index definition."
89
+ end
90
+
91
+ def index_template_exists?
92
+ !current_config_parent.empty?
93
+ end
94
+
95
+ def mapping_removals
96
+ @mapping_removals ||= mapping_fields_from(current_mapping) - mapping_fields_from(desired_mapping)
97
+ end
98
+
99
+ def mapping_type_changes
100
+ @mapping_type_changes ||= begin
101
+ flattened_current = Support::HashUtil.flatten_and_stringify_keys(current_mapping)
102
+ flattened_desired = Support::HashUtil.flatten_and_stringify_keys(desired_mapping)
103
+
104
+ flattened_current.keys.select do |key|
105
+ key.end_with?(".type") && flattened_desired.key?(key) && flattened_desired[key] != flattened_current[key]
106
+ end
107
+ end
108
+ end
109
+
110
+ def has_mapping_updates?
111
+ current_mapping != desired_mapping
112
+ end
113
+
114
+ def settings_updates
115
+ @settings_updates ||= begin
116
+ # Updating a setting to null will cause the datastore to restore the default value of the setting.
117
+ restore_to_defaults = (current_settings.keys - desired_settings.keys).to_h { |key| [key, nil] }
118
+ desired_settings.select { |key, value| current_settings[key] != value }.merge(restore_to_defaults)
119
+ end
120
+ end
121
+
122
+ def mapping_fields_from(mapping_hash, prefix = "")
123
+ (mapping_hash["properties"] || []).flat_map do |key, params|
124
+ field = prefix + key
125
+ if params.key?("properties")
126
+ [field] + mapping_fields_from(params, "#{field}.")
127
+ else
128
+ [field]
129
+ end
130
+ end
131
+ end
132
+
133
+ def desired_mapping
134
+ desired_config_parent.fetch("template").fetch("mappings")
135
+ end
136
+
137
+ def desired_settings
138
+ @desired_settings ||= desired_config_parent.fetch("template").fetch("settings")
139
+ end
140
+
141
+ def desired_config_parent
142
+ @desired_config_parent ||= begin
143
+ # _meta is place where we can record state on the index mapping in the datastore.
144
+ # We want to maintain `_meta.ElasticGraph.sources` as an append-only set of all sources that have ever
145
+ # been configured to flow into an index, so that we can remember whether or not an index which currently
146
+ # has no `sourced_from` from fields ever did. This is necessary for our automatic filtering of multi-source
147
+ # indexes.
148
+ previously_recorded_sources = current_mapping.dig("_meta", "ElasticGraph", "sources") || []
149
+ sources = previously_recorded_sources.union(@index_template.current_sources.to_a).sort
150
+
151
+ env_agnostic_index_config_with_meta =
152
+ DatastoreCore::IndexConfigNormalizer.normalize(Support::HashUtil.deep_merge(@env_agnostic_index_config, {
153
+ "mappings" => {"_meta" => {"ElasticGraph" => {"sources" => sources}}},
154
+ "settings" => @index_template.flattened_env_setting_overrides
155
+ }))
156
+
157
+ @env_agnostic_index_config_parent.merge({"template" => env_agnostic_index_config_with_meta})
158
+ end
159
+ end
160
+
161
+ def current_mapping
162
+ current_config_parent.dig("template", "mappings") || {}
163
+ end
164
+
165
+ def current_settings
166
+ @current_settings ||= current_config_parent.dig("template", "settings")
167
+ end
168
+
169
+ def current_config_parent
170
+ @current_config_parent ||= begin
171
+ config = @datastore_client.get_index_template(@index_template.name)
172
+ if (template = config.dig("template"))
173
+ config.merge({"template" => DatastoreCore::IndexConfigNormalizer.normalize(template)})
174
+ else
175
+ config
176
+ end
177
+ end
178
+ end
179
+
180
+ def config_diff
181
+ @config_diff ||= Indexer::HashDiffer.diff(current_config_parent, desired_config_parent) || "(no diff)"
182
+ end
183
+
184
+ def report_action(message)
185
+ @reporter.report_action(message)
186
+ end
187
+
188
+ # Helper method used to merge properties between a _desired_ configuration and a _current_ configuration.
189
+ # This is used when we are figuring out how to update an index template. We do not want to delete existing
190
+ # fields from a template--while the datastore would allow it, our schema evolution strategy depends upon
191
+ # us not dropping old unused fields. The datastore doesn't allow it on indices, anyway (though it does allow
192
+ # it on index templates). We've ran into trouble (a near SEV) when allowing the logic here to delete an unused
193
+ # field from an index template. The indexer "mapping completeness" check started failing because an old version
194
+ # of the code (from back when the field in question was still used) noticed the expected field was missing and
195
+ # started failing on every event.
196
+ #
197
+ # This helps us avoid that problem by retaining any currently existing fields.
198
+ #
199
+ # Long term, if we want to support fully "garbage collecting" these old fields on templates, we will need
200
+ # to have them get dropped in a follow up step. We could have our `update_datastore_config` script notice that
201
+ # the deployed prod indexers are at a version that will tolerate the fields being dropped, or support it
202
+ # via an opt-in flag or something.
203
+ def merge_properties(desired_object, current_object)
204
+ desired_properties = desired_object.fetch("properties") { _ = {} }
205
+ current_properties = current_object.fetch("properties") { _ = {} }
206
+
207
+ merged_properties = desired_properties.merge(current_properties) do |key, desired, current|
208
+ if current.is_a?(::Hash) && current.key?("properties") && desired.key?("properties")
209
+ merge_properties(desired, current)
210
+ else
211
+ desired
212
+ end
213
+ end
214
+
215
+ desired_object.merge("properties" => merged_properties)
216
+ end
217
+
218
+ def related_index_configurators
219
+ # Here we fan out and get a configurator for each related index. These are generally concrete
220
+ # index that are based on a template, either via being specified in our config YAML, or via
221
+ # auto creation at indexing time.
222
+ #
223
+ # Note that it should not matter whether the related indices are configured before of after
224
+ # its rollover template; our use of index maintenance mode below prevents new indidces from
225
+ # being auto-created while this configuration process runs.
226
+ @related_index_configurators ||= begin
227
+ rollover_indices = @index_template.related_rollover_indices(@datastore_client)
228
+
229
+ # When we have a rollover index, it's important that we make at least one concrete index. Otherwise, if any
230
+ # queries come in before the first event is indexed, we won't have any concrete indices to search, and
231
+ # the datastore returns a response that differs from normal in that case. It particularly creates trouble
232
+ # for aggregation queries since the response format it expects is quite complex.
233
+ #
234
+ # Here we create a concrete index for the current timestamp if there are no concrete indices yet.
235
+ if rollover_indices.empty?
236
+ rollover_indices = [@index_template.related_rollover_index_for_timestamp(@clock.now.getutc.iso8601)].compact
237
+ end
238
+
239
+ rollover_indices.map do |index|
240
+ IndexDefinitionConfigurator::ForIndex.new(@datastore_client, index, @env_agnostic_index_config, @output)
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,24 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/admin/index_definition_configurator/for_index"
10
+ require "elastic_graph/admin/index_definition_configurator/for_index_template"
11
+
12
+ module ElasticGraph
13
+ class Admin
14
+ module IndexDefinitionConfigurator
15
+ def self.new(datastore_client, index_def, env_agnostic_index_config, output, clock)
16
+ if index_def.rollover_index_template?
17
+ ForIndexTemplate.new(datastore_client, _ = index_def, env_agnostic_index_config, output, clock)
18
+ else
19
+ ForIndex.new(datastore_client, _ = index_def, env_agnostic_index_config, output)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,129 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/admin"
10
+ require "elastic_graph/support/from_yaml_file"
11
+ require "rake/tasklib"
12
+
13
+ module ElasticGraph
14
+ class Admin
15
+ class RakeTasks < ::Rake::TaskLib
16
+ extend Support::FromYamlFile::ForRakeTasks.new(ElasticGraph::Admin)
17
+
18
+ attr_reader :output, :prototype_index_names
19
+
20
+ def initialize(prototype_index_names: [], output: $stdout, &load_admin)
21
+ @output = output
22
+ @prototype_index_names = prototype_index_names.to_set
23
+ @load_admin = load_admin
24
+
25
+ define_tasks
26
+ end
27
+
28
+ private
29
+
30
+ def define_tasks
31
+ namespace :clusters do
32
+ namespace :configure do
33
+ desc "Performs the configuration of datastore clusters, including indices, settings, and scripts"
34
+ task :perform do
35
+ print_in_color "#{"=" * 80}\nNOTE: Performing datastore cluster updates for real!\n#{"=" * 80}", RED_COLOR_CODE
36
+
37
+ index_defs = update_clusters_for(admin)
38
+ output.puts "Finished updating datastore clusters. Validating index consistency..."
39
+ admin.datastore_indexing_router.validate_mapping_completeness_of!(:all_accessible_cluster_names, *index_defs)
40
+ output.puts "Done."
41
+ end
42
+
43
+ desc "Dry-runs the configuration of datastore clusters, including indices, settings, and scripts"
44
+ task :dry_run do
45
+ print_in_color "#{"=" * 80}\nNOTE: In dry-run mode. The updates reported below will actually be no-ops.\n#{"=" * 80}", GREEN_COLOR_CODE
46
+ update_clusters_for(admin.with_dry_run_datastore_clients)
47
+ print_in_color "#{"=" * 80}\nNOTE: This was dry-run mode. The updates reported above were actually no-ops.\n#{"=" * 80}", GREEN_COLOR_CODE
48
+ end
49
+ end
50
+ end
51
+
52
+ namespace :indices do
53
+ desc "Drops all prototype index definitions on all datastore clusters"
54
+ task :drop_prototypes do
55
+ require "elastic_graph/support/threading"
56
+
57
+ prototype_indices = admin
58
+ .datastore_core
59
+ .index_definitions_by_name.values
60
+ .select { |index| prototype_index_names.include?(index.name) }
61
+ .reject { |index| index.all_accessible_cluster_names.empty? }
62
+
63
+ output.puts "Disabling rollover index auto creation for all clusters"
64
+ admin.cluster_settings_manager.start_index_maintenance_mode!(:all_clusters)
65
+ output.puts "Disabled rollover index auto creation for all clusters"
66
+
67
+ output.puts "Dropping the following prototype index definitions: #{prototype_indices.map(&:name).join(",")}"
68
+ Support::Threading.parallel_map(prototype_indices) do |prototype_index_def|
69
+ delete_index_def_in_all_accessible_clusters(prototype_index_def)
70
+ end
71
+
72
+ output.puts "Finished dropping all prototype index definitions"
73
+ end
74
+
75
+ desc "Drops the specified index definition on the specified datastore cluster"
76
+ task :drop, :index_def_name, :cluster_name do |_, args|
77
+ index_def_name = args.fetch(:index_def_name)
78
+ cluster_name = args.fetch(:cluster_name)
79
+ datastore_client = admin.datastore_core.clients_by_name.fetch(cluster_name) do |key|
80
+ raise IndexOperationError, "Cluster named `#{key}` does not exist. Valid clusters: #{admin.datastore_core.clients_by_name.keys}."
81
+ end
82
+
83
+ index_def = admin.datastore_core.index_definitions_by_name.fetch(index_def_name)
84
+ unless prototype_index_names.include?(index_def.name)
85
+ raise IndexOperationError, "Unable to drop live index #{index_def_name}. Deleting a live index is extremely dangerous. " \
86
+ "Please ensure this is indeed intended, add the index name to the `prototype_index_names` list and retry."
87
+ end
88
+
89
+ output.puts "Disabling rollover index auto creation for this cluster"
90
+ admin.cluster_settings_manager.in_index_maintenance_mode(cluster_name) do
91
+ output.puts "Disabled rollover index auto creation for this cluster"
92
+ output.puts "Dropping index #{index_def}"
93
+ index_def.delete_from_datastore(datastore_client)
94
+ output.puts "Dropped index #{index_def}"
95
+ end
96
+ output.puts "Re-enabled rollover index auto creation for this cluster"
97
+ end
98
+ end
99
+ end
100
+
101
+ # See https://en.wikipedia.org/wiki/ANSI_escape_code#Colors for full list.
102
+ RED_COLOR_CODE = 31
103
+ GREEN_COLOR_CODE = 32
104
+
105
+ def update_clusters_for(admin)
106
+ configurator = admin.cluster_configurator
107
+
108
+ configurator.accessible_index_definitions.tap do |index_defs|
109
+ output.puts "The following index definitions will be configured:\n#{index_defs.map(&:name).join("\n")}"
110
+ configurator.configure_cluster(@output)
111
+ end
112
+ end
113
+
114
+ def print_in_color(message, color_code)
115
+ @output.puts "\033[#{color_code}m#{message}\033[0m"
116
+ end
117
+
118
+ def delete_index_def_in_all_accessible_clusters(index_def)
119
+ index_def.all_accessible_cluster_names.each do |cluster_name|
120
+ index_def.delete_from_datastore(admin.datastore_core.clients_by_name.fetch(cluster_name))
121
+ end
122
+ end
123
+
124
+ def admin
125
+ @admin ||= @load_admin.call
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,97 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/datastore_core"
10
+ require "elastic_graph/support/from_yaml_file"
11
+ require "time"
12
+
13
+ module ElasticGraph
14
+ # The entry point into this library. Create an instance of this class to get access to
15
+ # the public interfaces provided by this library.
16
+ class Admin
17
+ extend Support::FromYamlFile
18
+
19
+ # @dynamic datastore_core, schema_artifacts
20
+ attr_reader :datastore_core, :schema_artifacts
21
+
22
+ # A factory method that builds an Admin instance from the given parsed YAML config.
23
+ # `from_yaml_file(file_name, &block)` is also available (via `Support::FromYamlFile`).
24
+ def self.from_parsed_yaml(parsed_yaml, &datastore_client_customization_block)
25
+ new(datastore_core: DatastoreCore.from_parsed_yaml(parsed_yaml, for_context: :admin, &datastore_client_customization_block))
26
+ end
27
+
28
+ def initialize(datastore_core:, monotonic_clock: nil, clock: ::Time)
29
+ @datastore_core = datastore_core
30
+ @monotonic_clock = monotonic_clock
31
+ @clock = clock
32
+ @schema_artifacts = @datastore_core.schema_artifacts
33
+ end
34
+
35
+ def cluster_configurator
36
+ @cluster_configurator ||= begin
37
+ require "elastic_graph/admin/cluster_configurator"
38
+ ClusterConfigurator.new(
39
+ datastore_clients_by_name: @datastore_core.clients_by_name,
40
+ index_defs: @datastore_core.index_definitions_by_name.values,
41
+ index_configurations_by_name: schema_artifacts.indices,
42
+ index_template_configurations_by_name: schema_artifacts.index_templates,
43
+ scripts: schema_artifacts.datastore_scripts,
44
+ cluster_settings_manager: cluster_settings_manager,
45
+ clock: @clock
46
+ )
47
+ end
48
+ end
49
+
50
+ def cluster_settings_manager
51
+ @cluster_settings_manager ||= begin
52
+ require "elastic_graph/admin/cluster_configurator/cluster_settings_manager"
53
+ ClusterConfigurator::ClusterSettingsManager.new(
54
+ datastore_clients_by_name: @datastore_core.clients_by_name,
55
+ datastore_config: @datastore_core.config,
56
+ logger: @datastore_core.logger
57
+ )
58
+ end
59
+ end
60
+
61
+ def datastore_indexing_router
62
+ @datastore_indexing_router ||= begin
63
+ require "elastic_graph/indexer/datastore_indexing_router"
64
+ Indexer::DatastoreIndexingRouter.new(
65
+ datastore_clients_by_name: datastore_core.clients_by_name,
66
+ mappings_by_index_def_name: schema_artifacts.index_mappings_by_index_def_name,
67
+ monotonic_clock: monotonic_clock,
68
+ logger: datastore_core.logger
69
+ )
70
+ end
71
+ end
72
+
73
+ def monotonic_clock
74
+ @monotonic_clock ||= begin
75
+ require "elastic_graph/support/monotonic_clock"
76
+ Support::MonotonicClock.new
77
+ end
78
+ end
79
+
80
+ # Returns an alternate `Admin` instance with the datastore clients replaced with
81
+ # alternate implementations that turn all write operations into no-ops.
82
+ def with_dry_run_datastore_clients
83
+ require "elastic_graph/admin/datastore_client_dry_run_decorator"
84
+ dry_run_clients_by_name = @datastore_core.clients_by_name.transform_values do |client|
85
+ DatastoreClientDryRunDecorator.new(client)
86
+ end
87
+
88
+ Admin.new(datastore_core: DatastoreCore.new(
89
+ config: datastore_core.config,
90
+ logger: datastore_core.logger,
91
+ schema_artifacts: datastore_core.schema_artifacts,
92
+ clients_by_name: dry_run_clients_by_name,
93
+ client_customization_block: datastore_core.client_customization_block
94
+ ))
95
+ end
96
+ end
97
+ end