schema-tools 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,79 @@
1
+ require 'json'
2
+
3
+ module SchemaTools
4
+ class ApiAwareMappingsDiff
5
+ def initialize(local_mappings, remote_mappings)
6
+ @local_mappings = local_mappings
7
+ @remote_mappings = remote_mappings
8
+ end
9
+
10
+ def generate_minimal_changes
11
+ return {} unless @local_mappings.is_a?(Hash) && @local_mappings.key?("properties")
12
+
13
+ remote_properties = @remote_mappings.is_a?(Hash) && @remote_mappings.key?("properties") ? @remote_mappings["properties"] : {}
14
+ changes = find_api_aware_changes(remote_properties, @local_mappings["properties"])
15
+
16
+ # Check if dynamic setting is different
17
+ dynamic_changed = false
18
+ if @local_mappings.key?("dynamic")
19
+ remote_dynamic = @remote_mappings.is_a?(Hash) && @remote_mappings.key?("dynamic") ? @remote_mappings["dynamic"] : nil
20
+ dynamic_changed = @local_mappings["dynamic"] != remote_dynamic
21
+ end
22
+
23
+ if changes.empty? && !dynamic_changed
24
+ {}
25
+ else
26
+ result = {}
27
+ result["properties"] = changes unless changes.empty?
28
+ result["dynamic"] = @local_mappings["dynamic"] if dynamic_changed
29
+ result
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def find_api_aware_changes(remote, local)
36
+ changes = {}
37
+
38
+ return changes unless local.is_a?(Hash) && remote.is_a?(Hash)
39
+
40
+ local.each do |key, value|
41
+ if !remote.key?(key)
42
+ # New field - include complete definition
43
+ changes[key] = value
44
+ elsif value != remote[key]
45
+ if value.is_a?(Hash) && remote[key].is_a?(Hash)
46
+ # Field exists but has changes
47
+ if is_field_definition?(value)
48
+ # For field definitions, always include complete definition for API compatibility
49
+ changes[key] = value
50
+ else
51
+ # For nested objects, try to be more selective
52
+ nested_changes = find_api_aware_changes(remote[key], value)
53
+ if nested_changes.empty?
54
+ # No nested changes, but the values are different
55
+ # Only include if this is a significant change
56
+ changes[key] = value
57
+ else
58
+ changes[key] = nested_changes
59
+ end
60
+ end
61
+ else
62
+ # Simple value change
63
+ changes[key] = value
64
+ end
65
+ end
66
+ end
67
+
68
+ changes
69
+ end
70
+
71
+ def is_field_definition?(field_value)
72
+ return false unless field_value.is_a?(Hash)
73
+
74
+ # Field definitions are objects that have a "type" property
75
+ # These require complete definitions for OpenSearch/Elasticsearch API compatibility
76
+ field_value.key?("type")
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,23 @@
1
+ module SchemaTools
2
+ def self.catchup(index_name:, client:)
3
+ raise "index_name parameter is required" unless index_name
4
+
5
+ index_config = SchemaFiles.get_index_config(index_name)
6
+ raise "Index configuration not found for #{index_name}" unless index_config
7
+
8
+ from_index = index_config['from_index_name']
9
+ raise "from_index_name not specified in index configuration" unless from_index
10
+
11
+ unless client.index_exists?(from_index)
12
+ raise "Source index #{from_index} does not exist. Cannot perform catchup reindex to #{index_name}."
13
+ end
14
+
15
+ reindex_script = SchemaFiles.get_reindex_script(index_name)
16
+
17
+ puts "Starting catchup reindex from #{from_index} to #{index_name}"
18
+ # TODO NOT IMPLEMENTED YET
19
+ # Do a reindex by query
20
+ puts "TODO IMPLEMENT ME"
21
+ response = client.reindex(from_index, index_name, reindex_script)
22
+ end
23
+ end
@@ -0,0 +1,472 @@
1
+ require 'net/http'
2
+ require 'json'
3
+ require 'uri'
4
+ require_relative 'settings_filter'
5
+
6
+ class SimpleLogger
7
+ def initialize(output = STDOUT)
8
+ @output = output
9
+ end
10
+
11
+ def info(message)
12
+ @output.puts message
13
+ end
14
+
15
+ def warn(message)
16
+ @output.puts message
17
+ end
18
+
19
+ def error(message)
20
+ @output.puts message
21
+ end
22
+ end
23
+
24
+ module SchemaTools
25
+ class Client
26
+ attr_reader :url
27
+
28
+ def initialize(url, dryrun: false, logger: SimpleLogger.new, username: nil, password: nil)
29
+ @url = url
30
+ @dryrun = dryrun
31
+ @logger = logger
32
+ @username = username
33
+ @password = password
34
+ @logger.info "Client is running in DRYRUN mode. No mutating operations will be performed." if dryrun
35
+ end
36
+
37
+ def get(path)
38
+ uri = URI("#{@url}#{path}")
39
+ request = Net::HTTP::Get.new(uri)
40
+ add_auth_header(request)
41
+ response = make_http_request(uri) { |http| http.request(request) }
42
+
43
+ case response.code.to_i
44
+ when 200
45
+ JSON.parse(response.body)
46
+ when 404
47
+ nil
48
+ else
49
+ raise "HTTP #{response.code}: #{response.body}"
50
+ end
51
+ end
52
+
53
+ def put(path, body, suppress_logging: false)
54
+ unless suppress_logging
55
+ @logger.info("DRYRUN=true, simulation only") if @dryrun
56
+ log_operation('PUT', path, body)
57
+ await_user_input if interactive_mode?
58
+ end
59
+ if @dryrun
60
+ return { 'acknowledged' => true } # Return mock response for dry run
61
+ end
62
+
63
+ uri = URI("#{@url}#{path}")
64
+ request = Net::HTTP::Put.new(uri)
65
+ request['Content-Type'] = 'application/json'
66
+ request.body = body.to_json
67
+ add_auth_header(request)
68
+
69
+ response = make_http_request(uri) { |http| http.request(request) }
70
+
71
+ case response.code.to_i
72
+ when 200, 201
73
+ JSON.parse(response.body) if response.body && !response.body.empty?
74
+ else
75
+ raise "HTTP #{response.code}: #{response.body}"
76
+ end
77
+ end
78
+
79
+ def post(path, body, suppress_logging: false)
80
+ unless suppress_logging
81
+ @logger.info("DRYRUN=true, simulation only") if @dryrun
82
+ log_operation('POST', path, body)
83
+ await_user_input if interactive_mode?
84
+ end
85
+ if @dryrun
86
+ return {"task"=>"FEl-TdjcTpmIvnE5_1fv4Q:164963"} if path.start_with?('/_reindex')
87
+ return { 'acknowledged' => true } # Return mock response for dry run
88
+ end
89
+
90
+ uri = URI("#{@url}#{path}")
91
+ request = Net::HTTP::Post.new(uri)
92
+ request['Content-Type'] = 'application/json'
93
+ request.body = body.to_json
94
+ add_auth_header(request)
95
+
96
+ response = make_http_request(uri) { |http| http.request(request) }
97
+
98
+ case response.code.to_i
99
+ when 200, 201
100
+ JSON.parse(response.body) if response.body && !response.body.empty?
101
+ else
102
+ raise "HTTP #{response.code}: #{response.body}"
103
+ end
104
+ end
105
+
106
+ def delete(path, suppress_logging: false)
107
+ unless suppress_logging
108
+ @logger.info("DRYRUN=true, simulation only") if @dryrun
109
+ log_operation('DELETE', path)
110
+ await_user_input if interactive_mode?
111
+ end
112
+ if @dryrun
113
+ return { 'acknowledged' => true } # Return mock response for dry run
114
+ end
115
+
116
+ uri = URI("#{@url}#{path}")
117
+ request = Net::HTTP::Delete.new(uri)
118
+ add_auth_header(request)
119
+
120
+ response = make_http_request(uri) { |http| http.request(request) }
121
+
122
+ case response.code.to_i
123
+ when 200, 404
124
+ JSON.parse(response.body) if response.body && !response.body.empty?
125
+ else
126
+ raise "HTTP #{response.code}: #{response.body}"
127
+ end
128
+ end
129
+
130
+ def index_exists?(index_name)
131
+ get("/#{index_name}") != nil
132
+ end
133
+
134
+ def get_index_settings(index_name)
135
+ response = get("/#{index_name}")
136
+ return nil unless response
137
+ response[index_name]['settings']
138
+ end
139
+
140
+ def get_index_mappings(index_name)
141
+ response = get("/#{index_name}")
142
+ return nil unless response
143
+ response[index_name]['mappings']
144
+ end
145
+
146
+ def get_index_doc_count(index_name)
147
+ response = get("/#{index_name}/_count")
148
+ return 0 unless response
149
+ response['count'] || 0
150
+ end
151
+
152
+ def create_index(index_name, settings, mappings)
153
+ body = {
154
+ settings: settings,
155
+ mappings: mappings
156
+ }
157
+ put("/#{index_name}", body)
158
+ end
159
+
160
+ def update_index_settings(index_name, settings)
161
+ put("/#{index_name}/_settings", settings)
162
+ end
163
+
164
+
165
+ def reindex(source_index, dest_index, script = nil)
166
+ body = {
167
+ source: { index: source_index },
168
+ dest: { index: dest_index },
169
+ conflicts: "proceed"
170
+ }
171
+ body[:script] = { source: script } if script
172
+
173
+ url = "/_reindex?wait_for_completion=false&refresh=false"
174
+
175
+ post(url, body)
176
+ end
177
+
178
+ def get_task_status(task_id)
179
+ get("/_tasks/#{task_id}")
180
+ end
181
+
182
+ def put_script(script_name, script_content)
183
+ body = { script: { lang: "painless", source: script_content } }
184
+ put("/_scripts/#{script_name}", body)
185
+ end
186
+
187
+ def delete_script(script_name, suppress_logging: false)
188
+ unless suppress_logging
189
+ @logger.info("DRYRUN=true, simulation only") if @dryrun
190
+ log_operation('DELETE', "/_scripts/#{script_name}")
191
+ await_user_input if interactive_mode?
192
+ end
193
+ if @dryrun
194
+ return { 'acknowledged' => true } # Return mock response for dry run
195
+ end
196
+
197
+ uri = URI("#{@url}/_scripts/#{script_name}")
198
+ request = Net::HTTP::Delete.new(uri)
199
+ add_auth_header(request)
200
+
201
+ response = make_http_request(uri) { |http| http.request(request) }
202
+
203
+ case response.code.to_i
204
+ when 200
205
+ JSON.parse(response.body) if response.body && !response.body.empty?
206
+ when 404
207
+ raise "HTTP 404: Script '#{script_name}' not found"
208
+ else
209
+ raise "HTTP #{response.code}: #{response.body}"
210
+ end
211
+ end
212
+
213
+ def get_stored_scripts
214
+ # Try the legacy Elasticsearch API first (works for Elasticsearch and older OpenSearch)
215
+ begin
216
+ response = get("/_scripts")
217
+ return {} unless response
218
+
219
+ scripts = {}
220
+ response.each do |script_id, script_data|
221
+ scripts[script_id] = script_data.dig('script', 'source')
222
+ end
223
+
224
+ return scripts
225
+ rescue => e
226
+ # If the legacy API fails (e.g., OpenSearch 2.x), try the new API
227
+ begin
228
+ response = get("/_cluster/state/metadata?filter_path=metadata.stored_scripts")
229
+ return {} unless response
230
+
231
+ stored_scripts_data = response.dig('metadata', 'stored_scripts')
232
+ return {} unless stored_scripts_data
233
+
234
+ scripts = {}
235
+ stored_scripts_data.each do |script_id, script_data|
236
+ scripts[script_id] = script_data['source']
237
+ end
238
+
239
+ return scripts
240
+ rescue => fallback_error
241
+ # If both APIs fail, log the original error and return empty hash
242
+ @logger.warn("Could not retrieve stored scripts: #{e.message}. Fallback error: #{fallback_error}") if @logger
243
+ {}
244
+ end
245
+ end
246
+ end
247
+
248
+ def delete_index(index_name)
249
+ delete("/#{index_name}")
250
+ end
251
+
252
+ def list_indices
253
+ response = get("/_cat/indices?format=json")
254
+ return [] unless response && response.is_a?(Array)
255
+
256
+ response.map { |index| index['index'] }
257
+ .reject { |name| name.start_with?('.') || name.start_with?('top_queries-') } # Exclude system indices
258
+ .sort
259
+ end
260
+
261
+ def list_aliases
262
+ response = get("/_aliases")
263
+ return {} unless response
264
+
265
+ aliases = {}
266
+ response.each do |index_name, index_data|
267
+ index_aliases = index_data['aliases']
268
+ next unless index_aliases && !index_aliases.empty?
269
+
270
+ index_aliases.each do |alias_name, alias_data|
271
+ aliases[alias_name] ||= []
272
+ aliases[alias_name] << index_name
273
+ end
274
+ end
275
+
276
+ aliases
277
+ end
278
+
279
+ def get_alias_indices(alias_name)
280
+ response = get("/_alias/#{alias_name}")
281
+ return [] unless response
282
+
283
+ response.keys
284
+ end
285
+
286
+ def create_alias(alias_name, index_name)
287
+ body = {
288
+ actions: [
289
+ {
290
+ add: {
291
+ index: index_name,
292
+ alias: alias_name
293
+ }
294
+ }
295
+ ]
296
+ }
297
+ post("/_aliases", body)
298
+ end
299
+
300
+ def alias_exists?(alias_name)
301
+ response = get("/_alias/#{alias_name}")
302
+ response && !response.empty?
303
+ end
304
+
305
+ def delete_alias(alias_name, indices = nil)
306
+ # If no indices specified, get all indices for this alias
307
+ if indices.nil?
308
+ indices = get_alias_indices(alias_name)
309
+ end
310
+
311
+ actions = indices.map do |index_name|
312
+ {
313
+ remove: {
314
+ index: index_name,
315
+ alias: alias_name
316
+ }
317
+ }
318
+ end
319
+
320
+ body = { actions: actions }
321
+ post("/_aliases", body)
322
+ end
323
+
324
+ def update_index_settings(index_name, settings)
325
+ # Filter out internal settings that can't be updated
326
+ filtered_settings = SettingsFilter.filter_internal_settings(settings)
327
+
328
+ body = { index: filtered_settings['index'] || {} }
329
+ put("/#{index_name}/_settings", body)
330
+ end
331
+
332
+ def update_index_mappings(index_name, mappings)
333
+ body = { properties: mappings['properties'] || {} }
334
+ put("/#{index_name}/_mapping", body)
335
+ end
336
+
337
+ def test_connection
338
+ path = "/_cluster/health"
339
+ puts "Testing connection to #{@url}#{path}"
340
+ get(path)
341
+ true
342
+ rescue => e
343
+ puts e
344
+ false
345
+ end
346
+
347
+ def close_index(index_name)
348
+ post("/#{index_name}/_close", {})
349
+ end
350
+
351
+ def update_by_query(source_index, dest_index, script = nil)
352
+ body = {
353
+ source: { index: source_index },
354
+ dest: { index: dest_index }
355
+ }
356
+ body[:script] = { source: script } if script
357
+
358
+ url = "/_update_by_query?wait_for_completion=false"
359
+
360
+ post(url, body)
361
+ end
362
+
363
+ def update_aliases(actions)
364
+ body = { actions: actions }
365
+ post("/_aliases", body)
366
+ end
367
+
368
+ def wait_for_task(task_id, timeout = 3600)
369
+ start_time = Time.now
370
+
371
+ loop do
372
+ task_status = get_task_status(task_id)
373
+
374
+ if task_status['completed']
375
+ return task_status
376
+ end
377
+
378
+ if Time.now - start_time > timeout
379
+ raise "Task #{task_id} timed out after #{timeout} seconds"
380
+ end
381
+
382
+ sleep 5
383
+ end
384
+ end
385
+
386
+ def bulk_index(documents, index_name, suppress_logging: false)
387
+ unless suppress_logging
388
+ @logger.info("DRYRUN=true, simulation only") if @dryrun
389
+ log_operation('POST', '/_bulk') # including documents would be too noisy
390
+ await_user_input if interactive_mode?
391
+ end
392
+ if @dryrun
393
+ return { 'items' => documents.map { |doc| { 'index' => { 'status' => 201 } } } }
394
+ end
395
+
396
+ bulk_body = documents.map do |doc|
397
+ [
398
+ { index: { _index: index_name } },
399
+ doc
400
+ ]
401
+ end.flatten
402
+
403
+ ndjson = bulk_body.map(&:to_json).join("\n") + "\n"
404
+
405
+ uri = URI("#{@url}/_bulk")
406
+ request = Net::HTTP::Post.new(uri)
407
+ request['Content-Type'] = 'application/x-ndjson'
408
+ request.body = ndjson
409
+ add_auth_header(request)
410
+
411
+ response = make_http_request(uri) { |http| http.request(request) }
412
+
413
+ case response.code.to_i
414
+ when 200
415
+ JSON.parse(response.body)
416
+ else
417
+ raise "HTTP #{response.code}: #{response.body}"
418
+ end
419
+ end
420
+
421
+ def index_closed?(index_name)
422
+ response = get("/#{index_name}")
423
+ return false unless response
424
+
425
+ # Check if the index is closed by looking at the index status
426
+ # Closed indices have a specific status in the response
427
+ index_info = response[index_name]
428
+ return false unless index_info
429
+
430
+ # Check if the index is closed by looking at the settings
431
+ settings = index_info['settings']
432
+ return false unless settings
433
+
434
+ # An index is closed if it has the 'verified_before_close' setting set to true
435
+ settings.dig('index', 'verified_before_close') == 'true'
436
+ end
437
+
438
+ private
439
+
440
+ def make_http_request(uri)
441
+ use_ssl = uri.scheme == 'https'
442
+ port = uri.port || (use_ssl ? 443 : 80)
443
+
444
+ Net::HTTP.start(uri.hostname, port, use_ssl: use_ssl) do |http|
445
+ yield(http)
446
+ end
447
+ end
448
+
449
+ def add_auth_header(request)
450
+ if @username && @password
451
+ request.basic_auth(@username, @password)
452
+ end
453
+ end
454
+
455
+ def log_operation(method, path, body = nil)
456
+ message = "\e[1m\e[37m#{method} #{path}\e[0m" # Bold White color
457
+ if body
458
+ message += "\n#{body.is_a?(String) ? body : JSON.pretty_generate(body)}"
459
+ end
460
+ @logger.info message
461
+ end
462
+
463
+ def interactive_mode?
464
+ ENV['INTERACTIVE'] == 'true'
465
+ end
466
+
467
+ def await_user_input
468
+ print "\nPress Enter to continue... "
469
+ STDIN.gets
470
+ end
471
+ end
472
+ end
@@ -0,0 +1,28 @@
1
+ module SchemaTools
2
+ def self.close(name:, client:)
3
+ raise "name parameter is required" unless name
4
+
5
+ # Check if it's an alias
6
+ if client.alias_exists?(name)
7
+ indices = client.get_alias_indices(name)
8
+ puts "Closing alias '#{name}' (points to: #{indices.join(', ')})"
9
+ puts "This will close all underlying index(es)."
10
+
11
+ indices.each do |index_name|
12
+ if client.index_exists?(index_name)
13
+ client.close_index(index_name)
14
+ puts "✓ Index #{index_name} closed"
15
+ else
16
+ puts "⚠ Index #{index_name} does not exist"
17
+ end
18
+ end
19
+ puts "✓ All index(es) in alias '#{name}' closed"
20
+ elsif client.index_exists?(name)
21
+ puts "Closing index #{name}"
22
+ client.close_index(name)
23
+ puts "✓ Index #{name} closed"
24
+ else
25
+ raise "Neither alias nor index exists: #{name}"
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,46 @@
1
+ module SchemaTools
2
+ module Config
3
+ # e.g. http://localhost:9200
4
+ CONNECTION_URL = ENV['OPENSEARCH_URL'] || ENV['ELASTICSEARCH_URL']
5
+
6
+ # Optional username for HTTP basic authentication
7
+ CONNECTION_USERNAME = ENV['OPENSEARCH_USERNAME'] || ENV['ELASTICSEARCH_USERNAME']
8
+
9
+ # Optional password for HTTP basic authentication
10
+ CONNECTION_PASSWORD = ENV['OPENSEARCH_PASSWORD'] || ENV['ELASTICSEARCH_PASSWORD']
11
+
12
+ # Folder on disk where all schema definitions are stored
13
+ SCHEMAS_PATH = ENV['SCHEMAS_PATH'] || 'schemas'
14
+
15
+ # Folder on disk where painless scripts are stored
16
+ PAINLESS_SCRIPTS_PATH = ENV['PAINLESS_SCRIPTS_PATH'] || 'painless_scripts'
17
+
18
+ # Descriptive name for operations (kept for backward compatibility)
19
+ SCHEMA_TOOLS_USER = ENV['SCHEMA_TOOLS_USER'] || 'rake task'
20
+
21
+ def self.schema_tools_user
22
+ SCHEMA_TOOLS_USER
23
+ end
24
+
25
+ def self.connection_url
26
+ CONNECTION_URL
27
+ end
28
+
29
+ def self.connection_username
30
+ CONNECTION_USERNAME
31
+ end
32
+
33
+ def self.connection_password
34
+ CONNECTION_PASSWORD
35
+ end
36
+
37
+ def self.schemas_path
38
+ SCHEMAS_PATH
39
+ end
40
+
41
+ def self.painless_scripts_path
42
+ PAINLESS_SCRIPTS_PATH
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,28 @@
1
+ module SchemaTools
2
+ def self.delete(name:, client:)
3
+ raise "name parameter is required" unless name
4
+
5
+ # Check if it's an alias
6
+ if client.alias_exists?(name)
7
+ indices = client.get_alias_indices(name)
8
+ puts "Deleting alias '#{name}' (points to: #{indices.join(', ')})"
9
+ puts "The underlying index(es) will remain intact."
10
+
11
+ client.delete_alias(name)
12
+ puts "✓ Alias '#{name}' deleted"
13
+ puts "Index(es) #{indices.join(', ')} remain(s) intact"
14
+ elsif client.index_exists?(name)
15
+ puts "Checking that index #{name} is closed before proceeding"
16
+ unless client.index_closed?(name)
17
+ raise "Hard delete only allowed on closed indexes. Please run rake 'schema:close[#{name}]' first."
18
+ end
19
+ puts "Index #{name} is closed"
20
+
21
+ puts "Hard deleting index #{name}"
22
+ client.delete_index(name)
23
+ puts "✓ Index #{name} hard deleted"
24
+ else
25
+ raise "Neither alias nor index exists: #{name}"
26
+ end
27
+ end
28
+ end