schema-tools 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2b738fb047f313fd9e18d008f02e3bfcbb49ba675fd59f927242340212e2d098
4
- data.tar.gz: fd89c552170b2b080bb0327963ac47fe42da36a2aaa859aaf28a150f65e61367
3
+ metadata.gz: ace88e188ea3e99453282c5b8a4f3160aabd50a1791b44adbbbbaa96ec2d9801
4
+ data.tar.gz: c4cb7b090362308b92c9e8f857110d7d104eef7907649825669fc46cc34f567b
5
5
  SHA512:
6
- metadata.gz: 9553ce52859e437d88a4680af4234822b96f3c97a5518c7ba6ffea37b75b81c9336a4167fa1add2e824bf2aaf452aec382970d9c134139d56b3aee1288f4347a
7
- data.tar.gz: b11e364c73bb62b6e7f9d49d44740fdeda4fe11483d6310b196595124c2835f716a0c1639346a56d60dc89c5bdaa7a651c490931fb6cf01b613a6757e0dd65ab
6
+ metadata.gz: 9d69414e92b731a19f47c98cb97a0cb8c6f2994be31f61930d7fcab7cc60272bd4c5af2790cbdd5eae7061a737eef744f7ab643d58c0647dfb3b166ad1f8d2b3
7
+ data.tar.gz: 2441ceb2f6d920aaf4e33b1116978f014e093bd433d4dc2ee3206b79d06c5dd43353de449f3fb7cc35a941a104eacdd8497e4601f246e4d2db5eaae79bb4a03b
data/README.md CHANGED
@@ -134,10 +134,55 @@ schemas/users
134
134
 
135
135
  Each schema folder name matches the name of an alias.
136
136
 
137
- ## Other settings and tasks
137
+ ## Seed sample data
138
138
 
139
139
  Use `rake schema:seed` to seed an index with sample documents that conform to your schema.
140
140
 
141
+ The seeder can generate sample docs for an index 3 ways:
142
+
143
+ 1. (Default) Mappings-based seeder
144
+
145
+ The seeder generates random data that conforms to the index's mappings.
146
+
147
+ 2. Sample-based seeder
148
+
149
+ Add a `sample_docs.json` file in the schema folder with example docs to randomly select from when seeding:
150
+
151
+ ```json
152
+ {
153
+ "hits": [
154
+ {
155
+ "_source": {
156
+ "title": "Foo",
157
+ "desc": "Bar"
158
+ }
159
+ },
160
+ ...
161
+ ]
162
+ }
163
+ ```
164
+
165
+ 3. Custom document seeder
166
+
167
+ Add a `doc_seeder.rb` file in the schema folder with a class DocSeeder
168
+
169
+ ```ruby
170
+ # schema:seed invokes this class when seeding test data for this index
171
+ class DocSeeder
172
+ def initialize(index_or_alias_name) end
173
+ def generate_document
174
+ return {
175
+ 'title' => 'Foo',
176
+ 'desc' => 'Bar'
177
+ }
178
+ end
179
+ end
180
+ ```
181
+
182
+ The seeder first looks for a Custom document seeder. If none found, it falls back to a Sample seeder. If no sample documents found, it falls back to a Mappings seeder.
183
+
184
+ ## Other settings and tasks
185
+
141
186
  Use `DRYRUN` to simulate but not apply any POST/PUT/DELETE operations to your index:
142
187
 
143
188
  ```
@@ -23,6 +23,21 @@ module SchemaTools
23
23
  File.exist?(script_path) ? File.read(script_path) : nil
24
24
  end
25
25
 
26
+ def self.get_sample_docs(alias_name)
27
+ sample_docs_path = File.join(Config.schemas_path, alias_name, 'sample_docs.json')
28
+ return nil unless File.exist?(sample_docs_path)
29
+
30
+ JSON.parse(File.read(sample_docs_path))
31
+ end
32
+
33
+ def self.get_doc_seeder_class(alias_name)
34
+ seeder_path = File.join(Config.schemas_path, alias_name, 'doc_seeder.rb')
35
+
36
+ return nil unless File.exist?(seeder_path)
37
+ require(File.expand_path(seeder_path))
38
+ return DocSeeder
39
+ end
40
+
26
41
  def self.discover_all_schemas
27
42
  return [] unless Dir.exist?(Config.schemas_path)
28
43
 
@@ -1,21 +1,43 @@
1
1
  module SchemaTools
2
2
  def self.seed(client:)
3
- # List available indices (connection already validated during client initialization)
3
+ # List available indices and aliases (connection already validated during client initialization)
4
4
  puts "Connecting to #{Config.connection_url}..."
5
+ aliases = client.list_aliases
5
6
  indices = client.list_indices
6
7
 
7
- if indices.empty?
8
- puts "No indices found in the cluster."
8
+ single_aliases = aliases.select { |alias_name, indices| indices.length == 1 && !alias_name.start_with?('.') }
9
+ unaliased_indices = indices.reject { |index| aliases.values.flatten.include?(index) || index.start_with?('.') || client.index_closed?(index) }
10
+
11
+ # Create a combined list with sequential numbering
12
+ options = []
13
+
14
+ if single_aliases.empty? && unaliased_indices.empty?
15
+ puts "No indices or aliases found in the cluster."
9
16
  puts "Please create an index first."
10
17
  exit 0
11
18
  end
12
19
 
13
- puts "Available indices:"
14
- indices.each_with_index do |index_name, index|
15
- puts "#{index + 1}. #{index_name}"
20
+ puts "Available indices and aliases:"
21
+
22
+ # Show aliases first
23
+ if single_aliases.any?
24
+ single_aliases.each do |alias_name, indices|
25
+ option_number = options.length + 1
26
+ options << { type: :alias, name: alias_name, index: indices.first }
27
+ puts "#{option_number}. #{alias_name} -> #{indices.first}"
28
+ end
29
+ end
30
+
31
+ # Show unaliased indices
32
+ if unaliased_indices.any?
33
+ unaliased_indices.each do |index_name|
34
+ option_number = options.length + 1
35
+ options << { type: :index, name: index_name, index: index_name }
36
+ puts "#{option_number}. #{index_name}"
37
+ end
16
38
  end
17
39
 
18
- puts "\nPlease select an index by number (1-#{indices.length}):"
40
+ puts "\nPlease select an index or alias by number (1-#{options.length}):"
19
41
  selection_input = STDIN.gets&.chomp
20
42
  if selection_input.nil?
21
43
  puts "No input provided. Exiting."
@@ -23,24 +45,13 @@ module SchemaTools
23
45
  end
24
46
  selection = selection_input.to_i
25
47
 
26
- if selection < 1 || selection > indices.length
48
+ if selection < 1 || selection > options.length
27
49
  puts "Invalid selection. Please run the task again and select a valid number."
28
50
  exit 1
29
51
  end
30
52
 
31
- selected_index = indices[selection - 1]
32
- puts "Selected index: #{selected_index}"
33
-
34
- # Fetch the mappings for the selected index
35
- puts "Fetching mappings for #{selected_index}..."
36
- mappings = client.get_index_mappings(selected_index)
37
-
38
- if mappings.nil?
39
- puts "Failed to fetch mappings for #{selected_index}"
40
- exit 1
41
- end
42
-
43
- puts "Mappings fetched successfully."
53
+ selected_option = options[selection - 1]
54
+ puts "Selected #{selected_option[:type]}: #{selected_option[:name]}"
44
55
 
45
56
  # Prompt user for number of documents to seed
46
57
  puts "\nHow many documents would you like to seed?"
@@ -56,9 +67,7 @@ module SchemaTools
56
67
  exit 1
57
68
  end
58
69
 
59
- puts "Seeding #{num_docs} documents from #{selected_index}..."
60
-
61
- # Call the seeding function
62
- Seed.seed_data(num_docs, mappings, client, selected_index)
70
+ seeder = Seeder::Seeder.new(index_or_alias_name: selected_option[:name], client: client)
71
+ seeder.seed(num_docs: num_docs, batch_size: 5)
63
72
  end
64
73
  end
@@ -0,0 +1,7 @@
1
+ module SchemaTools::Seeder
2
+ class BaseDocSeeder
3
+ def generate_document
4
+ raise NotImplementedError, "Subclasses must implement #generate_document"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,15 @@
1
+ module SchemaTools::Seeder
2
+ # To add a custom document seeder for a schema, add a file called
3
+ # schemas/{alias_name}/doc_seeder.rb with a class DocSeeder that extends from CustomDocSeeder
4
+ class CustomDocSeeder < BaseDocSeeder
5
+ attr_reader :index_or_alias_name
6
+
7
+ def initialize(index_or_alias_name)
8
+ @index_or_alias_name = index_or_alias_name
9
+ end
10
+
11
+ def generate_document
12
+ raise NotImplementedError, "Subclasses must implement #generate_document"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,451 @@
1
+ require 'securerandom'
2
+
3
+ module SchemaTools::Seeder
4
+ # Create a seed document by generating random values of correct types for an index mappings
5
+ class MappingsDocSeeder < BaseDocSeeder
6
+
7
+ # mappings: OpenSearch/Elasticsearch index mappings
8
+ def initialize(mappings)
9
+ @mappings = mappings
10
+ end
11
+
12
+ def generate_document()
13
+ document = {}
14
+
15
+ (@mappings.dig('properties') || {}).each do |field_name, field_config|
16
+ value = self.class.generate_field_value(field_config)
17
+ # Skip fields that return nil (like alias fields)
18
+ document[field_name] = value unless value.nil?
19
+ end
20
+
21
+ document
22
+ end
23
+
24
+ def self.generate_field_value(field_config)
25
+ field_type = field_config['type']
26
+
27
+ case field_type
28
+ when 'text'
29
+ generate_text_value
30
+ when 'keyword'
31
+ generate_keyword_value
32
+ when 'long', 'integer'
33
+ generate_integer_value
34
+ when 'short'
35
+ generate_short_value
36
+ when 'float', 'double'
37
+ generate_float_value
38
+ when 'boolean'
39
+ generate_boolean_value
40
+ when 'date'
41
+ generate_date_value(field_config['format'])
42
+ when 'object'
43
+ generate_object_value(field_config['properties'])
44
+ when 'nested'
45
+ generate_nested_value(field_config['properties'])
46
+ when 'rank_features'
47
+ generate_rank_features_value
48
+ when 'completion'
49
+ generate_completion_value
50
+ when 'search_as_you_type'
51
+ generate_search_as_you_type_value
52
+ when 'token_count'
53
+ generate_token_count_value
54
+ when 'alias'
55
+ # Skip alias fields - they point to other fields
56
+ nil
57
+ when 'byte'
58
+ generate_byte_value
59
+ when 'half_float'
60
+ generate_half_float_value
61
+ when 'scaled_float'
62
+ generate_scaled_float_value
63
+ when 'unsigned_long'
64
+ generate_unsigned_long_value
65
+ when 'date_nanos'
66
+ generate_date_nanos_value
67
+ when 'wildcard'
68
+ generate_wildcard_value
69
+ when 'constant_keyword'
70
+ generate_constant_keyword_value
71
+ when 'geo_shape'
72
+ generate_geo_shape_value
73
+ when 'date_range'
74
+ generate_date_range_value
75
+ when 'integer_range'
76
+ generate_integer_range_value
77
+ when 'float_range'
78
+ generate_float_range_value
79
+ when 'long_range'
80
+ generate_long_range_value
81
+ when 'double_range'
82
+ generate_double_range_value
83
+ when 'ip_range'
84
+ generate_ip_range_value
85
+ when 'geo_point'
86
+ generate_geo_point_value
87
+ when 'ip'
88
+ generate_ip_value
89
+ when 'binary'
90
+ generate_binary_value
91
+ else
92
+ # Default to keyword for unknown types
93
+ generate_keyword_value
94
+ end
95
+ end
96
+
97
+ def self.generate_text_value
98
+ # Generate a paragraph of 10-50 words
99
+ word_count = rand(10..50)
100
+ word_count.times.map { dictionary_words.sample }.join(' ')
101
+ end
102
+
103
+ def self.generate_keyword_value
104
+ # Generate a short phrase or single word
105
+ case rand(1..4)
106
+ when 1
107
+ dictionary_words.sample
108
+ when 2
109
+ "#{dictionary_words.sample}_#{rand(1000..9999)}"
110
+ when 3
111
+ "#{dictionary_words.sample} #{dictionary_words.sample}"
112
+ when 4
113
+ "#{dictionary_words.sample}-#{dictionary_words.sample}"
114
+ end
115
+ end
116
+
117
+ def self.generate_integer_value
118
+ # Generate reasonable integer values based on common use cases
119
+ case rand(1..5)
120
+ when 1
121
+ rand(1..1000) # Small positive numbers
122
+ when 2
123
+ rand(1_000_000..999_999_999) # Large IDs
124
+ when 3
125
+ rand(-100..100) # Small range including negatives
126
+ when 4
127
+ rand(1..100) # Percentages/scores
128
+ when 5
129
+ rand(1..365) # Days/periods
130
+ end
131
+ end
132
+
133
+ def self.generate_short_value
134
+ # Generate short values within Java short range (-32,768 to 32,767)
135
+ case rand(1..3)
136
+ when 1
137
+ rand(1..100) # Small positive numbers (common for ratings, counts)
138
+ when 2
139
+ rand(-100..100) # Small range including negatives
140
+ when 3
141
+ rand(1..10) # Very small numbers (ratings, flags)
142
+ end
143
+ end
144
+
145
+ def self.generate_float_value
146
+ # Generate decimal numbers
147
+ case rand(1..3)
148
+ when 1
149
+ (rand * 100).round(2) # 0-100 with 2 decimal places
150
+ when 2
151
+ (rand * 1000).round(4) # 0-1000 with 4 decimal places
152
+ when 3
153
+ (rand * 10 - 5).round(3) # -5 to 5 with 3 decimal places
154
+ end
155
+ end
156
+
157
+ def self.generate_boolean_value
158
+ [true, false].sample
159
+ end
160
+
161
+ def self.generate_date_value(format = nil)
162
+ # Generate a random date within the last year
163
+ start_time = Time.now - (365 * 24 * 60 * 60) # one year ago
164
+ random_time = Time.at(start_time.to_i + rand(Time.now.to_i - start_time.to_i))
165
+
166
+ case format
167
+ when 'epoch_millis'
168
+ (random_time.to_f * 1000).to_i
169
+ when 'epoch_second'
170
+ random_time.to_i
171
+ when 'yyyy-MM-dd'
172
+ random_time.strftime('%Y-%m-%d')
173
+ when 'yyyy-MM-dd HH:mm:ss'
174
+ random_time.strftime('%Y-%m-%d %H:%M:%S')
175
+ when 'MM/dd/yyyy'
176
+ random_time.strftime('%m/%d/%Y')
177
+ when 'dd-MM-yyyy'
178
+ random_time.strftime('%d-%m-%Y')
179
+ else
180
+ # Default to ISO 8601 format
181
+ random_time.iso8601
182
+ end
183
+ end
184
+
185
+ def self.generate_object_value(properties)
186
+ return {} unless properties
187
+
188
+ object = {}
189
+ properties.each do |nested_field_name, nested_field_config|
190
+ # If a field has properties but no explicit type, it's an object
191
+ field_type = nested_field_config['type'] || (nested_field_config['properties'] ? 'object' : 'keyword')
192
+
193
+ parsed_config = {
194
+ 'type' => field_type,
195
+ 'properties' => nested_field_config['properties'],
196
+ 'format' => nested_field_config['format']
197
+ }
198
+ object[nested_field_name] = generate_field_value(parsed_config)
199
+ end
200
+ object
201
+ end
202
+
203
+ def self.generate_nested_value(properties)
204
+ return [] unless properties
205
+
206
+ # Generate 1-3 nested objects
207
+ count = rand(1..3)
208
+ count.times.map do
209
+ object = {}
210
+ properties.each do |nested_field_name, nested_field_config|
211
+ # If a field has properties but no explicit type, it's an object
212
+ field_type = nested_field_config['type'] || (nested_field_config['properties'] ? 'object' : 'keyword')
213
+
214
+ parsed_config = {
215
+ type: field_type,
216
+ properties: nested_field_config['properties'],
217
+ format: nested_field_config['format']
218
+ }
219
+ object[nested_field_name] = generate_field_value(parsed_config)
220
+ end
221
+ object
222
+ end
223
+ end
224
+
225
+ def self.generate_rank_features_value
226
+ # Generate a rank_features object with random feature names and scores
227
+ # OpenSearch requires positive normal floats with minimum value of 1.17549435E-38
228
+ feature_count = rand(3..8)
229
+ features = {}
230
+
231
+ feature_count.times do
232
+ feature_name = "#{dictionary_words.sample}_#{rand(100..999)}"
233
+ # Generate values between 1.0e-30 and 1.0 to ensure positive normal floats
234
+ # Use a higher minimum to avoid floating-point precision issues
235
+ min_value = 1.0e-30 # Much higher than the OpenSearch minimum
236
+ value = rand(min_value..1.0).round(4)
237
+ # Ensure we never get exactly 0.0 due to floating-point precision
238
+ value = [value, 1.0e-30].max
239
+ features[feature_name] = value
240
+ end
241
+
242
+ features
243
+ end
244
+
245
+ def self.generate_geo_point_value
246
+ # Generate random latitude/longitude coordinates
247
+ {
248
+ lat: (rand * 180 - 90).round(6), # -90 to 90
249
+ lon: (rand * 360 - 180).round(6) # -180 to 180
250
+ }
251
+ end
252
+
253
+ def self.generate_ip_value
254
+ # Generate random IP addresses
255
+ case rand(1..2)
256
+ when 1
257
+ # IPv4
258
+ "#{rand(1..254)}.#{rand(0..255)}.#{rand(0..255)}.#{rand(1..254)}"
259
+ when 2
260
+ # IPv6 (simplified)
261
+ "2001:db8::#{rand(1000..9999)}:#{rand(1000..9999)}:#{rand(1000..9999)}:#{rand(1000..9999)}"
262
+ end
263
+ end
264
+
265
+ def self.generate_binary_value
266
+ # Generate base64 encoded random data
267
+ require 'base64'
268
+ random_bytes = (0...32).map { rand(256) }.pack('C*')
269
+ Base64.encode64(random_bytes).strip
270
+ end
271
+
272
+ def self.generate_completion_value
273
+ # Generate completion suggestions
274
+ {
275
+ 'input' => [dictionary_words.sample, "#{dictionary_words.sample} #{dictionary_words.sample}"],
276
+ 'weight' => rand(1..100)
277
+ }
278
+ end
279
+
280
+ def self.generate_search_as_you_type_value
281
+ # Generate search-as-you-type text
282
+ "#{dictionary_words.sample} #{dictionary_words.sample} #{dictionary_words.sample}"
283
+ end
284
+
285
+ def self.generate_token_count_value
286
+ # Generate token count (integer representing number of tokens)
287
+ rand(1..50)
288
+ end
289
+
290
+ def self.generate_byte_value
291
+ # Generate byte values (-128 to 127)
292
+ rand(-128..127)
293
+ end
294
+
295
+ def self.generate_half_float_value
296
+ # Generate half-float values (smaller range than regular float)
297
+ (rand * 100 - 50).round(2)
298
+ end
299
+
300
+ def self.generate_scaled_float_value
301
+ # Generate scaled float values (multiplied by scaling factor)
302
+ (rand * 100).round(2)
303
+ end
304
+
305
+ def self.generate_unsigned_long_value
306
+ # Generate unsigned long values (0 to 2^64-1, but keep reasonable)
307
+ rand(0..999_999_999)
308
+ end
309
+
310
+ def self.generate_date_nanos_value
311
+ # Generate date with nanosecond precision
312
+ start_time = Time.now - (365 * 24 * 60 * 60)
313
+ random_time = Time.at(start_time.to_i + rand(Time.now.to_i - start_time.to_i))
314
+ random_time.iso8601(9) # Include nanoseconds
315
+ end
316
+
317
+ def self.generate_wildcard_value
318
+ # Generate wildcard text (similar to keyword but optimized for wildcard queries)
319
+ "#{dictionary_words.sample}_#{rand(1000..9999)}"
320
+ end
321
+
322
+ def self.generate_constant_keyword_value
323
+ # Generate constant keyword (always the same value)
324
+ "constant_value"
325
+ end
326
+
327
+ def self.generate_geo_shape_value
328
+ # Generate simple geo shapes (point)
329
+ {
330
+ 'type' => "point",
331
+ 'coordinates' => [rand(-180.0..180.0).round(6), rand(-90.0..90.0).round(6)]
332
+ }
333
+ end
334
+
335
+ def self.generate_date_range_value
336
+ # Generate date range
337
+ start_date = Time.now - (365 * 24 * 60 * 60)
338
+ end_date = Time.now
339
+ {
340
+ 'gte' => start_date.iso8601,
341
+ 'lte' => end_date.iso8601
342
+ }
343
+ end
344
+
345
+ def self.generate_integer_range_value
346
+ # Generate integer range
347
+ start_val = rand(-1000..1000)
348
+ end_val = start_val + rand(1..1000)
349
+ {
350
+ 'gte' => start_val,
351
+ 'lte' => end_val
352
+ }
353
+ end
354
+
355
+ def self.generate_float_range_value
356
+ # Generate float range
357
+ start_val = (rand * 100 - 50).round(2)
358
+ end_val = start_val + (rand * 100).round(2)
359
+ {
360
+ 'gte' => start_val,
361
+ 'lte' => end_val
362
+ }
363
+ end
364
+
365
+ def self.generate_long_range_value
366
+ # Generate long range
367
+ start_val = rand(-1_000_000..1_000_000)
368
+ end_val = start_val + rand(1..1_000_000)
369
+ {
370
+ 'gte' => start_val,
371
+ 'lte' => end_val
372
+ }
373
+ end
374
+
375
+ def self.generate_double_range_value
376
+ # Generate double range
377
+ start_val = (rand * 1000 - 500).round(4)
378
+ end_val = start_val + (rand * 1000).round(4)
379
+ {
380
+ 'gte' => start_val,
381
+ 'lte' => end_val
382
+ }
383
+ end
384
+
385
+ def self.generate_ip_range_value
386
+ # Generate IP range with proper ordering
387
+ # Generate a base IP and add a small range to it
388
+ base_ip = "#{rand(1..254)}.#{rand(0..255)}.#{rand(0..255)}.#{rand(1..254)}"
389
+
390
+ # Parse the last octet and create a small range
391
+ parts = base_ip.split('.')
392
+ last_octet = parts[3].to_i
393
+ start_last = [last_octet, 254].min
394
+ end_last = [start_last + rand(1..10), 254].min
395
+
396
+ start_ip = "#{parts[0]}.#{parts[1]}.#{parts[2]}.#{start_last}"
397
+ end_ip = "#{parts[0]}.#{parts[1]}.#{parts[2]}.#{end_last}"
398
+
399
+ {
400
+ 'gte' => start_ip,
401
+ 'lte' => end_ip
402
+ }
403
+ end
404
+
405
+ def self.dictionary_words
406
+ @dictionary_words ||= begin
407
+ File.readlines('/usr/share/dict/words')
408
+ .map(&:chomp)
409
+ .select { |w| w.length.between?(3, 10) }
410
+ rescue Errno::ENOENT
411
+ %w[lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor
412
+ incididunt ut labore et dolore magna aliqua enim ad minim veniam quis nostrud
413
+ exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute
414
+ irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
415
+ nulla pariatur excepteur sint occaecat cupidatat non proident sunt in culpa
416
+ qui officia deserunt mollit anim id est laborum search engine data ruby
417
+ document index mapping schema elasticsearch opensearch cluster node shard
418
+ replica primary secondary analysis tokenizer filter analyzer query filter
419
+ aggregation pipeline script painless groovy mustache template kibana
420
+ logstash beats metricbeat filebeat packetbeat heartbeat auditbeat
421
+ functionbeat winlogbeat journalbeat apm agent apm server fleet agent
422
+ policy enrollment token integration package endpoint security detection
423
+ rule machine learning anomaly detection forecasting classification
424
+ regression clustering outlier detection natural language processing
425
+ vector search semantic search neural search transformer embedding
426
+ vector database similarity search recommendation system personalization
427
+ real-time streaming batch processing event sourcing cqrs microservices
428
+ distributed system scalability performance optimization monitoring
429
+ observability logging metrics tracing alerting notification dashboard
430
+ visualization reporting analytics business intelligence data science
431
+ machine learning artificial intelligence deep learning neural network
432
+ algorithm model training inference prediction classification regression
433
+ clustering dimensionality reduction feature engineering data preprocessing
434
+ validation testing deployment production staging development environment
435
+ configuration management version control continuous integration continuous
436
+ deployment devops infrastructure as code containerization orchestration
437
+ kubernetes docker swarm mesos nomad consul etcd zookeeper redis memcached
438
+ rabbitmq kafka pulsar nats jetstream grpc rest api graphql websocket
439
+ http https tls ssl certificate authentication authorization oauth jwt
440
+ saml ldap active directory kerberos rbac abac policy enforcement
441
+ compliance governance security audit vulnerability assessment penetration
442
+ testing threat modeling risk management incident response disaster recovery
443
+ backup restore high availability fault tolerance load balancing auto-scaling
444
+ horizontal scaling vertical scaling sharding partitioning replication
445
+ consistency eventual consistency strong consistency cap theorem acid
446
+ base distributed consensus raft paxos byzantine fault tolerance
447
+ ]
448
+ end
449
+ end
450
+ end
451
+ end
@@ -0,0 +1,20 @@
1
+ require 'securerandom'
2
+ require 'active_support/all'
3
+
4
+ module SchemaTools::Seeder
5
+ # Generate a document by choosing a document at random from an array of sample documents
6
+ #
7
+ # The seeder looks for sample docs in schemas/{alias_name}/sample_docs.json
8
+ # in the form: { "hits": [ { "_source": { "title": "Foo", "desc": "Bar" } }, ... ] }
9
+ class SampleDocSeeder < BaseDocSeeder
10
+
11
+ # sample_docs: Array of sample documents to pull from at random
12
+ def initialize(sample_docs)
13
+ @sample_docs = sample_docs['hits'].pluck('_source')
14
+ end
15
+
16
+ def generate_document
17
+ @sample_docs.sample
18
+ end
19
+ end
20
+ end
data/lib/seeder/seeder.rb CHANGED
@@ -1,539 +1,117 @@
1
1
  require 'json'
2
2
  require 'time'
3
3
 
4
- module Seed
5
- # Word list for generating realistic text content
6
- WORD_LIST = %w[
7
- lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor
8
- incididunt ut labore et dolore magna aliqua enim ad minim veniam quis nostrud
9
- exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute
10
- irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
11
- nulla pariatur excepteur sint occaecat cupidatat non proident sunt in culpa
12
- qui officia deserunt mollit anim id est laborum search engine data ruby
13
- document index mapping schema elasticsearch opensearch cluster node shard
14
- replica primary secondary analysis tokenizer filter analyzer query filter
15
- aggregation pipeline script painless groovy mustache template kibana
16
- logstash beats metricbeat filebeat packetbeat heartbeat auditbeat
17
- functionbeat winlogbeat journalbeat apm agent apm server fleet agent
18
- policy enrollment token integration package endpoint security detection
19
- rule machine learning anomaly detection forecasting classification
20
- regression clustering outlier detection natural language processing
21
- vector search semantic search neural search transformer embedding
22
- vector database similarity search recommendation system personalization
23
- real-time streaming batch processing event sourcing cqrs microservices
24
- distributed system scalability performance optimization monitoring
25
- observability logging metrics tracing alerting notification dashboard
26
- visualization reporting analytics business intelligence data science
27
- machine learning artificial intelligence deep learning neural network
28
- algorithm model training inference prediction classification regression
29
- clustering dimensionality reduction feature engineering data preprocessing
30
- validation testing deployment production staging development environment
31
- configuration management version control continuous integration continuous
32
- deployment devops infrastructure as code containerization orchestration
33
- kubernetes docker swarm mesos nomad consul etcd zookeeper redis memcached
34
- rabbitmq kafka pulsar nats jetstream grpc rest api graphql websocket
35
- http https tls ssl certificate authentication authorization oauth jwt
36
- saml ldap active directory kerberos rbac abac policy enforcement
37
- compliance governance security audit vulnerability assessment penetration
38
- testing threat modeling risk management incident response disaster recovery
39
- backup restore high availability fault tolerance load balancing auto-scaling
40
- horizontal scaling vertical scaling sharding partitioning replication
41
- consistency eventual consistency strong consistency cap theorem acid
42
- base distributed consensus raft paxos byzantine fault tolerance
43
- ].freeze
44
-
45
- def self.seed_data(num_docs, mappings_json, client, index_name)
46
- puts "Seeding #{num_docs} documents to index: #{index_name}"
47
-
48
- # Parse the mappings to understand the schema
49
- schema = parse_mappings(mappings_json)
50
- puts "Parsed schema with #{schema.keys.length} top-level fields"
51
-
52
- # Generate documents in batches for efficiency
53
- # Reduced batch size to avoid circuit breaker issues with large documents
54
- batch_size = 25 # Reduced from 100 to 25 for large documents
55
- total_batches = (num_docs.to_f / batch_size).ceil
56
-
57
- (1..total_batches).each do |batch_num|
58
- docs_in_batch = [batch_size, num_docs - (batch_num - 1) * batch_size].min
59
- puts "Generating batch #{batch_num}/#{total_batches} (#{docs_in_batch} documents)..."
60
-
61
- documents = generate_document_batch(docs_in_batch, schema)
62
-
63
- puts "Indexing batch #{batch_num}..."
64
- begin
65
- response = client.bulk_index(documents, index_name)
66
-
67
- # Check for errors in bulk response
68
- if response['errors']
69
- error_items = response['items'].select { |item| item.dig('index', 'status') >= 400 }
70
- error_count = error_items.length
71
- if error_count > 0
72
- puts "WARN: #{error_count} documents failed to index in batch #{batch_num}"
73
-
74
- # Print first few errors for debugging
75
- error_items.first(3).each_with_index do |item, index|
76
- error_info = item.dig('index', 'error')
77
- if error_info
78
- puts " Error #{index + 1}: #{error_info['type']} - #{error_info['reason']}"
79
- if error_info['caused_by']
80
- puts " Caused by: #{error_info['caused_by']['type']} - #{error_info['caused_by']['reason']}"
81
- end
82
- end
83
- end
84
-
85
- if error_count > 3
86
- puts " ... and #{error_count - 3} more errors"
87
- end
88
- end
89
- end
90
-
91
- puts "Successfully indexed batch #{batch_num}"
92
- rescue => e
93
- if e.message.include?('circuit_breaking_exception') || e.message.include?('HTTP 429')
94
- puts "ERROR: Circuit breaker triggered - OpenSearch cluster is out of memory"
95
- puts "Consider:"
96
- puts " 1. Reducing batch size further (currently #{batch_size})"
97
- puts " 2. Increasing OpenSearch heap size"
98
- puts " 3. Reducing document size/complexity"
99
- puts " 4. Adding delays between batches"
100
- puts ""
101
- puts "Batch #{batch_num} failed: #{e.message}"
102
- raise StandardError.new("Circuit breaker triggered - OpenSearch cluster is out of memory")
103
- else
104
- puts "Error indexing batch #{batch_num}: #{e.message}"
105
- raise e
106
- end
107
- end
108
-
109
- # Add a small delay between batches to help with memory pressure
110
- sleep(0.1) if batch_num < total_batches
4
+ module SchemaTools::Seeder
5
+ class Seeder
6
+ def initialize(index_or_alias_name:, client:)
7
+ @client = client
8
+ @index_or_alias_name = index_or_alias_name
9
+ @doc_seeder = initialize_doc_seeder
111
10
  end
112
-
113
- puts "Successfully seeded #{num_docs} documents to #{index_name}"
114
- end
115
11
 
116
- private
12
+ def initialize_doc_seeder
13
+ custom_doc_seeder_class = SchemaTools::SchemaFiles.get_doc_seeder_class(@index_or_alias_name)
14
+ return custom_doc_seeder_class.new(@index_or_alias_name) if custom_doc_seeder_class
117
15
 
118
- def self.parse_mappings(mappings_json)
119
- # Extract the properties from the mappings
120
- properties = mappings_json.dig('properties') || {}
121
- parse_properties(properties)
122
- end
16
+ sample_docs = SchemaTools::SchemaFiles.get_sample_docs(@index_or_alias_name)
17
+ return SampleDocSeeder.new(sample_docs) if sample_docs
123
18
 
124
- def self.parse_properties(properties)
125
- schema = {}
126
-
127
- properties.each do |field_name, field_config|
128
- # If a field has properties but no explicit type, it's an object
129
- field_type = field_config['type'] || (field_config['properties'] ? 'object' : 'keyword')
130
-
131
- schema[field_name] = {
132
- type: field_type,
133
- properties: field_config['properties'],
134
- format: field_config['format']
135
- }
136
- end
137
-
138
- schema
139
- end
19
+ # Resolve alias to actual index name if needed
20
+ actual_index_name = resolve_to_index_name(@index_or_alias_name)
21
+ mappings = @client.get_index_mappings(actual_index_name)
22
+ return MappingsDocSeeder.new(mappings) if mappings
140
23
 
141
- def self.generate_document_batch(count, schema)
142
- count.times.map do
143
- generate_document(schema)
24
+ raise "No custom document seeder, sample documents, or mappings found for #{@index_or_alias_name}"
144
25
  end
145
- end
146
26
 
147
- def self.generate_document(schema)
148
- document = {}
149
-
150
- schema.each do |field_name, field_config|
151
- value = generate_field_value(field_config)
152
- # Skip fields that return nil (like alias fields)
153
- document[field_name] = value unless value.nil?
154
- end
27
+ def seed(num_docs:, batch_size: 5)
28
+ puts "Seeding #{num_docs} in batches of #{batch_size} documents from #{@index_or_alias_name} using #{@doc_seeder.class.name}"
155
29
 
156
- document
157
- end
30
+ total_batches = (num_docs.to_f / batch_size).ceil
31
+ total_seeded_docs = 0
158
32
 
159
- def self.generate_field_value(field_config)
160
- field_type = field_config[:type]
161
-
162
- case field_type
163
- when 'text'
164
- generate_text_value
165
- when 'keyword'
166
- generate_keyword_value
167
- when 'long', 'integer'
168
- generate_integer_value
169
- when 'short'
170
- generate_short_value
171
- when 'float', 'double'
172
- generate_float_value
173
- when 'boolean'
174
- generate_boolean_value
175
- when 'date'
176
- generate_date_value(field_config[:format])
177
- when 'object'
178
- generate_object_value(field_config[:properties])
179
- when 'nested'
180
- generate_nested_value(field_config[:properties])
181
- when 'rank_features'
182
- generate_rank_features_value
183
- when 'completion'
184
- generate_completion_value
185
- when 'search_as_you_type'
186
- generate_search_as_you_type_value
187
- when 'token_count'
188
- generate_token_count_value
189
- when 'alias'
190
- # Skip alias fields - they point to other fields
191
- nil
192
- when 'byte'
193
- generate_byte_value
194
- when 'half_float'
195
- generate_half_float_value
196
- when 'scaled_float'
197
- generate_scaled_float_value
198
- when 'unsigned_long'
199
- generate_unsigned_long_value
200
- when 'date_nanos'
201
- generate_date_nanos_value
202
- when 'wildcard'
203
- generate_wildcard_value
204
- when 'constant_keyword'
205
- generate_constant_keyword_value
206
- when 'geo_shape'
207
- generate_geo_shape_value
208
- when 'date_range'
209
- generate_date_range_value
210
- when 'integer_range'
211
- generate_integer_range_value
212
- when 'float_range'
213
- generate_float_range_value
214
- when 'long_range'
215
- generate_long_range_value
216
- when 'double_range'
217
- generate_double_range_value
218
- when 'ip_range'
219
- generate_ip_range_value
220
- when 'geo_point'
221
- generate_geo_point_value
222
- when 'ip'
223
- generate_ip_value
224
- when 'binary'
225
- generate_binary_value
226
- else
227
- # Default to keyword for unknown types
228
- generate_keyword_value
229
- end
230
- end
33
+ num_docs.times.each_slice(batch_size).with_index(1) do |batch_range, batch_num|
34
+ docs_in_batch = batch_range.size
231
35
 
232
- def self.generate_text_value
233
- # Generate a paragraph of 10-50 words
234
- word_count = rand(10..50)
235
- word_count.times.map { WORD_LIST.sample }.join(' ')
236
- end
36
+ puts "Generating batch #{batch_num}/#{total_batches} (#{docs_in_batch} documents)..."
37
+ documents = Array.new(docs_in_batch) do
38
+ @doc_seeder.generate_document
39
+ end
237
40
 
238
- def self.generate_keyword_value
239
- # Generate a short phrase or single word
240
- case rand(1..4)
241
- when 1
242
- WORD_LIST.sample
243
- when 2
244
- "#{WORD_LIST.sample}_#{rand(1000..9999)}"
245
- when 3
246
- "#{WORD_LIST.sample} #{WORD_LIST.sample}"
247
- when 4
248
- "#{WORD_LIST.sample}-#{WORD_LIST.sample}"
41
+ puts "Indexing batch #{batch_num}..."
42
+ response = bulk_index(documents)
43
+ seeded_docs = documents.length - print_errors(response)
44
+ total_seeded_docs += seeded_docs
45
+ puts "Indexed #{seeded_docs} documents for batch #{batch_num}" if seeded_docs
46
+
47
+ sleep(0.1) if batch_num < total_batches # small delay to help with memory pressure
48
+ rescue StandardError => e
49
+ puts "Batch #{batch_num} failed: #{e.message}"
50
+ handle_circuit_breaker_exception(e, batch_size)
51
+ raise e
52
+ end
53
+ puts "Seeded #{total_seeded_docs} documents to #{@index_or_alias_name}"
249
54
  end
250
- end
251
55
 
252
- def self.generate_integer_value
253
- # Generate reasonable integer values based on common use cases
254
- case rand(1..5)
255
- when 1
256
- rand(1..1000) # Small positive numbers
257
- when 2
258
- rand(1_000_000..999_999_999) # Large IDs
259
- when 3
260
- rand(-100..100) # Small range including negatives
261
- when 4
262
- rand(1..100) # Percentages/scores
263
- when 5
264
- rand(1..365) # Days/periods
56
+ def bulk_index(documents)
57
+ @client.bulk_index(documents, @index_or_alias_name)
265
58
  end
266
- end
267
59
 
268
- def self.generate_short_value
269
- # Generate short values within Java short range (-32,768 to 32,767)
270
- case rand(1..3)
271
- when 1
272
- rand(1..100) # Small positive numbers (common for ratings, counts)
273
- when 2
274
- rand(-100..100) # Small range including negatives
275
- when 3
276
- rand(1..10) # Very small numbers (ratings, flags)
60
+ def handle_circuit_breaker_exception(error, batch_size)
61
+ return unless error&.message&.match?(/circuit_breaking_exception|HTTP 429/)
62
+
63
+ puts 'ERROR: Circuit breaker triggered - OpenSearch cluster is out of memory'
64
+ puts 'Consider:'
65
+ puts " 1. Reducing batch size further (currently #{batch_size})"
66
+ puts ' 2. Increasing OpenSearch heap size'
67
+ puts ' 3. Reducing document size/complexity'
68
+ puts ' 4. Adding delays between batches'
69
+ puts ''
70
+ raise StandardError, 'Circuit breaker triggered - OpenSearch cluster is out of memory'
277
71
  end
278
- end
279
72
 
280
- def self.generate_float_value
281
- # Generate decimal numbers
282
- case rand(1..3)
283
- when 1
284
- (rand * 100).round(2) # 0-100 with 2 decimal places
285
- when 2
286
- (rand * 1000).round(4) # 0-1000 with 4 decimal places
287
- when 3
288
- (rand * 10 - 5).round(3) # -5 to 5 with 3 decimal places
289
- end
290
- end
73
+ def print_errors(response)
74
+ return 0 unless response['errors']
291
75
 
292
- def self.generate_boolean_value
293
- [true, false].sample
294
- end
76
+ error_items = response['items'].select { |item| item.dig('index', 'status') >= 400 }
77
+ error_count = error_items.length
78
+ return 0 unless error_count.positive?
295
79
 
296
- def self.generate_date_value(format = nil)
297
- # Generate a random date within the last year
298
- start_time = Time.now - (365 * 24 * 60 * 60) # one year ago
299
- random_time = Time.at(start_time.to_i + rand(Time.now.to_i - start_time.to_i))
300
-
301
- case format
302
- when 'epoch_millis'
303
- (random_time.to_f * 1000).to_i
304
- when 'epoch_second'
305
- random_time.to_i
306
- when 'yyyy-MM-dd'
307
- random_time.strftime('%Y-%m-%d')
308
- when 'yyyy-MM-dd HH:mm:ss'
309
- random_time.strftime('%Y-%m-%d %H:%M:%S')
310
- when 'MM/dd/yyyy'
311
- random_time.strftime('%m/%d/%Y')
312
- when 'dd-MM-yyyy'
313
- random_time.strftime('%d-%m-%Y')
314
- else
315
- # Default to ISO 8601 format
316
- random_time.iso8601
317
- end
318
- end
80
+ puts "WARN: #{error_count} documents failed to index"
319
81
 
320
- def self.generate_object_value(properties)
321
- return {} unless properties
322
-
323
- object = {}
324
- properties.each do |nested_field_name, nested_field_config|
325
- # If a field has properties but no explicit type, it's an object
326
- field_type = nested_field_config['type'] || (nested_field_config['properties'] ? 'object' : 'keyword')
327
-
328
- parsed_config = {
329
- type: field_type,
330
- properties: nested_field_config['properties'],
331
- format: nested_field_config['format']
332
- }
333
- object[nested_field_name] = generate_field_value(parsed_config)
334
- end
335
- object
336
- end
82
+ # Print first few errors for debugging
83
+ error_items.first(3).each_with_index do |item, index|
84
+ error_info = item.dig('index', 'error')
85
+ next unless error_info
337
86
 
338
- def self.generate_nested_value(properties)
339
- return [] unless properties
340
-
341
- # Generate 1-3 nested objects
342
- count = rand(1..3)
343
- count.times.map do
344
- object = {}
345
- properties.each do |nested_field_name, nested_field_config|
346
- # If a field has properties but no explicit type, it's an object
347
- field_type = nested_field_config['type'] || (nested_field_config['properties'] ? 'object' : 'keyword')
348
-
349
- parsed_config = {
350
- type: field_type,
351
- properties: nested_field_config['properties'],
352
- format: nested_field_config['format']
353
- }
354
- object[nested_field_name] = generate_field_value(parsed_config)
87
+ print_error_item(error_info, index)
355
88
  end
356
- object
357
- end
358
- end
359
89
 
360
- def self.generate_rank_features_value
361
- # Generate a rank_features object with random feature names and scores
362
- # OpenSearch requires positive normal floats with minimum value of 1.17549435E-38
363
- feature_count = rand(3..8)
364
- features = {}
365
-
366
- feature_count.times do
367
- feature_name = "#{WORD_LIST.sample}_#{rand(100..999)}"
368
- # Generate values between 1.0e-30 and 1.0 to ensure positive normal floats
369
- # Use a higher minimum to avoid floating-point precision issues
370
- min_value = 1.0e-30 # Much higher than the OpenSearch minimum
371
- value = rand(min_value..1.0).round(4)
372
- # Ensure we never get exactly 0.0 due to floating-point precision
373
- value = [value, 1.0e-30].max
374
- features[feature_name] = value
90
+ puts " ... and #{error_count - 3} more errors" if error_count > 3
91
+ error_count
375
92
  end
376
-
377
- features
378
- end
379
93
 
380
- def self.generate_geo_point_value
381
- # Generate random latitude/longitude coordinates
382
- {
383
- lat: (rand * 180 - 90).round(6), # -90 to 90
384
- lon: (rand * 360 - 180).round(6) # -180 to 180
385
- }
386
- end
94
+ def print_error_item(error_info, index)
95
+ puts " Error #{index + 1}: #{error_info['type']} - #{error_info['reason']}"
96
+ return unless error_info['caused_by']
387
97
 
388
- def self.generate_ip_value
389
- # Generate random IP addresses
390
- case rand(1..2)
391
- when 1
392
- # IPv4
393
- "#{rand(1..254)}.#{rand(0..255)}.#{rand(0..255)}.#{rand(1..254)}"
394
- when 2
395
- # IPv6 (simplified)
396
- "2001:db8::#{rand(1000..9999)}:#{rand(1000..9999)}:#{rand(1000..9999)}:#{rand(1000..9999)}"
98
+ puts " Caused by: #{error_info['caused_by']['type']} - #{error_info['caused_by']['reason']}"
397
99
  end
398
- end
399
-
400
- def self.generate_binary_value
401
- # Generate base64 encoded random data
402
- require 'base64'
403
- random_bytes = (0...32).map { rand(256) }.pack('C*')
404
- Base64.encode64(random_bytes).strip
405
- end
406
-
407
- def self.generate_completion_value
408
- # Generate completion suggestions
409
- {
410
- 'input' => [WORD_LIST.sample, "#{WORD_LIST.sample} #{WORD_LIST.sample}"],
411
- 'weight' => rand(1..100)
412
- }
413
- end
414
-
415
- def self.generate_search_as_you_type_value
416
- # Generate search-as-you-type text
417
- "#{WORD_LIST.sample} #{WORD_LIST.sample} #{WORD_LIST.sample}"
418
- end
419
-
420
- def self.generate_token_count_value
421
- # Generate token count (integer representing number of tokens)
422
- rand(1..50)
423
- end
424
-
425
- def self.generate_byte_value
426
- # Generate byte values (-128 to 127)
427
- rand(-128..127)
428
- end
429
-
430
- def self.generate_half_float_value
431
- # Generate half-float values (smaller range than regular float)
432
- (rand * 100 - 50).round(2)
433
- end
434
-
435
- def self.generate_scaled_float_value
436
- # Generate scaled float values (multiplied by scaling factor)
437
- (rand * 100).round(2)
438
- end
439
-
440
- def self.generate_unsigned_long_value
441
- # Generate unsigned long values (0 to 2^64-1, but keep reasonable)
442
- rand(0..999_999_999)
443
- end
444
-
445
- def self.generate_date_nanos_value
446
- # Generate date with nanosecond precision
447
- start_time = Time.now - (365 * 24 * 60 * 60)
448
- random_time = Time.at(start_time.to_i + rand(Time.now.to_i - start_time.to_i))
449
- random_time.iso8601(9) # Include nanoseconds
450
- end
451
100
 
452
- def self.generate_wildcard_value
453
- # Generate wildcard text (similar to keyword but optimized for wildcard queries)
454
- "#{WORD_LIST.sample}_#{rand(1000..9999)}"
455
- end
456
-
457
- def self.generate_constant_keyword_value
458
- # Generate constant keyword (always the same value)
459
- "constant_value"
460
- end
461
-
462
- def self.generate_geo_shape_value
463
- # Generate simple geo shapes (point)
464
- {
465
- 'type' => "point",
466
- 'coordinates' => [rand(-180.0..180.0).round(6), rand(-90.0..90.0).round(6)]
467
- }
468
- end
469
-
470
- def self.generate_date_range_value
471
- # Generate date range
472
- start_date = Time.now - (365 * 24 * 60 * 60)
473
- end_date = Time.now
474
- {
475
- 'gte' => start_date.iso8601,
476
- 'lte' => end_date.iso8601
477
- }
478
- end
479
-
480
- def self.generate_integer_range_value
481
- # Generate integer range
482
- start_val = rand(-1000..1000)
483
- end_val = start_val + rand(1..1000)
484
- {
485
- 'gte' => start_val,
486
- 'lte' => end_val
487
- }
488
- end
489
-
490
- def self.generate_float_range_value
491
- # Generate float range
492
- start_val = (rand * 100 - 50).round(2)
493
- end_val = start_val + (rand * 100).round(2)
494
- {
495
- 'gte' => start_val,
496
- 'lte' => end_val
497
- }
498
- end
101
+ private
499
102
 
500
- def self.generate_long_range_value
501
- # Generate long range
502
- start_val = rand(-1_000_000..1_000_000)
503
- end_val = start_val + rand(1..1_000_000)
504
- {
505
- 'gte' => start_val,
506
- 'lte' => end_val
507
- }
508
- end
509
-
510
- def self.generate_double_range_value
511
- # Generate double range
512
- start_val = (rand * 1000 - 500).round(4)
513
- end_val = start_val + (rand * 1000).round(4)
514
- {
515
- 'gte' => start_val,
516
- 'lte' => end_val
517
- }
518
- end
519
-
520
- def self.generate_ip_range_value
521
- # Generate IP range with proper ordering
522
- # Generate a base IP and add a small range to it
523
- base_ip = "#{rand(1..254)}.#{rand(0..255)}.#{rand(0..255)}.#{rand(1..254)}"
524
-
525
- # Parse the last octet and create a small range
526
- parts = base_ip.split('.')
527
- last_octet = parts[3].to_i
528
- start_last = [last_octet, 254].min
529
- end_last = [start_last + rand(1..10), 254].min
530
-
531
- start_ip = "#{parts[0]}.#{parts[1]}.#{parts[2]}.#{start_last}"
532
- end_ip = "#{parts[0]}.#{parts[1]}.#{parts[2]}.#{end_last}"
533
-
534
- {
535
- 'gte' => start_ip,
536
- 'lte' => end_ip
537
- }
103
+ def resolve_to_index_name(name)
104
+ # If it's an alias, get the actual index name it points to
105
+ if @client.alias_exists?(name)
106
+ indices = @client.get_alias_indices(name)
107
+ if indices.length != 1
108
+ raise "Alias '#{name}' points to multiple indices: #{indices.join(', ')}. Cannot determine which index to use for seeding."
109
+ end
110
+ return indices.first
111
+ end
112
+
113
+ # If it's already an index name, return it as-is
114
+ name
115
+ end
538
116
  end
539
- end
117
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: schema-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.9
4
+ version: 1.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rich Kuzsma
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-10-17 00:00:00.000000000 Z
11
+ date: 2025-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -92,6 +92,10 @@ files:
92
92
  - lib/schema_tools/seed.rb
93
93
  - lib/schema_tools/settings_diff.rb
94
94
  - lib/schema_tools/settings_filter.rb
95
+ - lib/seeder/base_doc_seeder.rb
96
+ - lib/seeder/custom_doc_seeder.rb
97
+ - lib/seeder/mappings_doc_seeder.rb
98
+ - lib/seeder/sample_doc_seeder.rb
95
99
  - lib/seeder/seeder.rb
96
100
  - lib/tasks/schema.rake
97
101
  - lib/tasks/test.rake