search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/concern'
|
|
4
|
+
require 'search_engine/base/index_maintenance/cleanup'
|
|
5
|
+
require 'search_engine/base/index_maintenance/lifecycle'
|
|
6
|
+
require 'search_engine/base/index_maintenance/schema'
|
|
7
|
+
require 'search_engine/logging/color'
|
|
8
|
+
|
|
9
|
+
module SearchEngine
|
|
10
|
+
class Base
|
|
11
|
+
# Index lifecycle helpers: applying schema, indexing, retention cleanup.
|
|
12
|
+
module IndexMaintenance
|
|
13
|
+
extend ActiveSupport::Concern
|
|
14
|
+
|
|
15
|
+
include IndexMaintenance::Cleanup
|
|
16
|
+
include IndexMaintenance::Lifecycle
|
|
17
|
+
include IndexMaintenance::Schema
|
|
18
|
+
|
|
19
|
+
class_methods do
|
|
20
|
+
# ---------------------- Preflight dependencies ----------------------
|
|
21
|
+
# Recursively ensure/index direct and transitive belongs_to dependencies
|
|
22
|
+
# before indexing the current collection.
|
|
23
|
+
# @param mode [Symbol] :ensure (only missing) or :index (missing + drift)
|
|
24
|
+
# @param client [SearchEngine::Client]
|
|
25
|
+
# @param visited [Set<String>, nil]
|
|
26
|
+
# @param depth [Integer] recursion depth for logging
|
|
27
|
+
# @return [void]
|
|
28
|
+
def __se_preflight_dependencies!(mode:, client:, visited: nil, depth: 0)
|
|
29
|
+
return unless mode
|
|
30
|
+
|
|
31
|
+
visited ||= Set.new
|
|
32
|
+
current = __se_current_collection_name
|
|
33
|
+
return if current.to_s.strip.empty?
|
|
34
|
+
return if visited.include?(current)
|
|
35
|
+
|
|
36
|
+
visited.add(current)
|
|
37
|
+
|
|
38
|
+
configs = __se_fetch_joins_config
|
|
39
|
+
deps = __se_belongs_to_dependencies(configs)
|
|
40
|
+
return if deps.empty?
|
|
41
|
+
|
|
42
|
+
indent = ' ' * depth
|
|
43
|
+
puts if depth.zero?
|
|
44
|
+
puts(%(#{indent}>>>>>> Preflight Dependencies (mode: #{mode}, collection: "#{current}")))
|
|
45
|
+
|
|
46
|
+
deps.each do |cfg|
|
|
47
|
+
dep_coll = (cfg[:collection] || cfg['collection']).to_s
|
|
48
|
+
next if __se_skip_dep?(dep_coll, visited)
|
|
49
|
+
|
|
50
|
+
dep_klass = __se_resolve_dep_class(dep_coll)
|
|
51
|
+
|
|
52
|
+
if dep_klass.nil?
|
|
53
|
+
puts(%(#{indent} "#{dep_coll}" → skipped (unregistered)))
|
|
54
|
+
visited.add(dep_coll)
|
|
55
|
+
next
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
diff = __se_diff_for(dep_klass, client)
|
|
59
|
+
missing, drift = __se_dependency_status(diff, dep_klass)
|
|
60
|
+
|
|
61
|
+
should_index = case mode.to_s
|
|
62
|
+
when 'ensure' then missing
|
|
63
|
+
when 'index' then missing || drift
|
|
64
|
+
else false
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Only recurse when we are about to index this dependency.
|
|
68
|
+
__se_preflight_recurse(dep_klass, mode, client, visited, depth + 1) if should_index
|
|
69
|
+
|
|
70
|
+
__se_handle_preflight_action(mode, dep_coll, missing, drift, dep_klass, client, indent: "#{indent} ")
|
|
71
|
+
|
|
72
|
+
visited.add(dep_coll)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
puts(%(#{indent}>>>>>> Preflight Done (collection: "#{current}")))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# @return [String] current collection logical name; empty string when unavailable
|
|
79
|
+
def __se_current_collection_name
|
|
80
|
+
respond_to?(:collection) ? (collection || '').to_s : name.to_s
|
|
81
|
+
rescue StandardError
|
|
82
|
+
name.to_s
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# @return [Hash] raw joins configuration or empty hash on errors
|
|
86
|
+
def __se_fetch_joins_config
|
|
87
|
+
joins_config || {}
|
|
88
|
+
rescue StandardError
|
|
89
|
+
{}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# @param configs [Hash]
|
|
93
|
+
# @return [Array<Hash>] only belongs_to-type dependency configs
|
|
94
|
+
def __se_belongs_to_dependencies(configs)
|
|
95
|
+
values = begin
|
|
96
|
+
configs.values
|
|
97
|
+
rescue StandardError
|
|
98
|
+
[]
|
|
99
|
+
end
|
|
100
|
+
values.select { |c| (c[:kind] || c['kind']).to_s == 'belongs_to' }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# @param dep_coll [String]
|
|
104
|
+
# @param visited [Set<String>]
|
|
105
|
+
# @return [Boolean]
|
|
106
|
+
def __se_skip_dep?(dep_coll, visited)
|
|
107
|
+
dep_coll.to_s.strip.empty? || visited.include?(dep_coll)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# @param dep_coll [String]
|
|
111
|
+
# @return [Class, nil]
|
|
112
|
+
def __se_resolve_dep_class(dep_coll)
|
|
113
|
+
SearchEngine.collection_for(dep_coll)
|
|
114
|
+
rescue StandardError
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @param dep_klass [Class]
|
|
119
|
+
# @param mode [Symbol]
|
|
120
|
+
# @param client [SearchEngine::Client]
|
|
121
|
+
# @param visited [Set<String>]
|
|
122
|
+
# @param depth [Integer]
|
|
123
|
+
# @return [void]
|
|
124
|
+
def __se_preflight_recurse(dep_klass, mode, client, visited, depth)
|
|
125
|
+
dep_klass.__se_preflight_dependencies!(mode: mode, client: client, visited: visited, depth: depth)
|
|
126
|
+
rescue StandardError
|
|
127
|
+
# ignore recursion errors to not block main flow
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# @param dep_klass [Class]
|
|
131
|
+
# @param client [SearchEngine::Client]
|
|
132
|
+
# @return [Hash]
|
|
133
|
+
def __se_diff_for(dep_klass, client)
|
|
134
|
+
SearchEngine::Schema.diff(dep_klass, client: client)[:diff] || {}
|
|
135
|
+
rescue StandardError
|
|
136
|
+
{}
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @param diff [Hash]
|
|
140
|
+
# @param dep_klass [Class]
|
|
141
|
+
# @return [Array(Boolean, Boolean)]
|
|
142
|
+
def __se_dependency_status(diff, dep_klass)
|
|
143
|
+
missing = begin
|
|
144
|
+
dep_klass.__se_schema_missing?(diff)
|
|
145
|
+
rescue StandardError
|
|
146
|
+
false
|
|
147
|
+
end
|
|
148
|
+
drift = begin
|
|
149
|
+
dep_klass.__se_schema_drift?(diff)
|
|
150
|
+
rescue StandardError
|
|
151
|
+
false
|
|
152
|
+
end
|
|
153
|
+
[missing, drift]
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# @param mode [Symbol]
|
|
157
|
+
# @param dep_coll [String]
|
|
158
|
+
# @param missing [Boolean]
|
|
159
|
+
# @param drift [Boolean]
|
|
160
|
+
# @param dep_klass [Class]
|
|
161
|
+
# @param client [SearchEngine::Client]
|
|
162
|
+
# @param indent [String]
|
|
163
|
+
# @return [void]
|
|
164
|
+
def __se_handle_preflight_action(mode, dep_coll, missing, drift, dep_klass, client, indent: ' ')
|
|
165
|
+
case mode.to_s
|
|
166
|
+
when 'ensure'
|
|
167
|
+
if missing
|
|
168
|
+
puts(%(#{indent}"#{dep_coll}" → ensure (missing) → index_collection))
|
|
169
|
+
# Avoid nested preflight to prevent redundant recursion cycles
|
|
170
|
+
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
171
|
+
dep_klass.index_collection(client: client)
|
|
172
|
+
end
|
|
173
|
+
else
|
|
174
|
+
puts(%(#{indent}"#{dep_coll}" → present (skip)))
|
|
175
|
+
end
|
|
176
|
+
when 'index'
|
|
177
|
+
if missing || drift
|
|
178
|
+
reason = missing ? 'missing' : 'drift'
|
|
179
|
+
puts(%(#{indent}"#{dep_coll}" → index (#{reason}) → index_collection))
|
|
180
|
+
# Avoid nested preflight to prevent redundant recursion cycles
|
|
181
|
+
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
182
|
+
dep_klass.index_collection(client: client)
|
|
183
|
+
end
|
|
184
|
+
else
|
|
185
|
+
puts(%(#{indent}"#{dep_coll}" → in_sync (skip)))
|
|
186
|
+
end
|
|
187
|
+
else
|
|
188
|
+
puts(%(#{indent}"#{dep_coll}" → skipped (unknown mode: #{mode})))
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def __se_log_batches_from_summary(batches)
|
|
193
|
+
return unless batches.is_a?(Array)
|
|
194
|
+
|
|
195
|
+
batches.each_with_index do |batch_stats, idx|
|
|
196
|
+
batch_number = idx + 1
|
|
197
|
+
batch_status = __se_batch_status_from_stats(batch_stats)
|
|
198
|
+
status_color = SearchEngine::Logging::Color.for_status(batch_status)
|
|
199
|
+
|
|
200
|
+
prefix = batch_number == 1 ? ' single → ' : ' '
|
|
201
|
+
line = +prefix
|
|
202
|
+
line << SearchEngine::Logging::Color.apply("status=#{batch_status}", status_color) << ' '
|
|
203
|
+
docs_count = batch_stats[:docs_count] || batch_stats['docs_count'] || 0
|
|
204
|
+
line << "docs=#{docs_count}" << ' '
|
|
205
|
+
success_count = (batch_stats[:success_count] || batch_stats['success_count'] || 0).to_i
|
|
206
|
+
success_str = "success=#{success_count}"
|
|
207
|
+
line << (
|
|
208
|
+
success_count.positive? ? SearchEngine::Logging::Color.bold(success_str) : success_str
|
|
209
|
+
) << ' '
|
|
210
|
+
failed_count = (batch_stats[:failure_count] || batch_stats['failure_count'] || 0).to_i
|
|
211
|
+
failed_str = "failed=#{failed_count}"
|
|
212
|
+
line << (
|
|
213
|
+
failed_count.positive? ? SearchEngine::Logging::Color.apply(failed_str, :red) : failed_str
|
|
214
|
+
) << ' '
|
|
215
|
+
line << "batch=#{batch_number} "
|
|
216
|
+
duration_ms = batch_stats[:duration_ms] || batch_stats['duration_ms'] || 0.0
|
|
217
|
+
line << "duration_ms=#{duration_ms}"
|
|
218
|
+
|
|
219
|
+
# Extract sample error from batch stats
|
|
220
|
+
sample_err = __se_extract_batch_sample_error(batch_stats)
|
|
221
|
+
line << " sample_error=#{sample_err.inspect}" if sample_err
|
|
222
|
+
|
|
223
|
+
puts(line)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def __se_batch_status_from_stats(stats)
|
|
228
|
+
success_count = (stats[:success_count] || stats['success_count'] || 0).to_i
|
|
229
|
+
failure_count = (stats[:failure_count] || stats['failure_count'] || 0).to_i
|
|
230
|
+
|
|
231
|
+
if failure_count.positive? && success_count.positive?
|
|
232
|
+
:partial
|
|
233
|
+
elsif failure_count.positive?
|
|
234
|
+
:failed
|
|
235
|
+
else
|
|
236
|
+
:ok
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def __se_extract_batch_sample_error(stats)
|
|
241
|
+
samples = stats[:errors_sample] || stats['errors_sample']
|
|
242
|
+
return nil unless samples.is_a?(Array) && samples.any?
|
|
243
|
+
|
|
244
|
+
samples.each do |msg|
|
|
245
|
+
s = msg.to_s
|
|
246
|
+
return s unless s.strip.empty?
|
|
247
|
+
end
|
|
248
|
+
nil
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
private :__se_current_collection_name,
|
|
252
|
+
:__se_fetch_joins_config,
|
|
253
|
+
:__se_belongs_to_dependencies,
|
|
254
|
+
:__se_skip_dep?,
|
|
255
|
+
:__se_resolve_dep_class,
|
|
256
|
+
:__se_preflight_recurse,
|
|
257
|
+
:__se_diff_for,
|
|
258
|
+
:__se_dependency_status,
|
|
259
|
+
:__se_handle_preflight_action,
|
|
260
|
+
:__se_log_batches_from_summary,
|
|
261
|
+
:__se_batch_status_from_stats,
|
|
262
|
+
:__se_extract_batch_sample_error
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
class_methods do
|
|
266
|
+
def __se_schema_missing?(diff)
|
|
267
|
+
opts = diff[:collection_options]
|
|
268
|
+
opts.is_a?(Hash) && opts[:live] == :missing
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def __se_schema_drift?(diff)
|
|
272
|
+
added = Array(diff[:added_fields])
|
|
273
|
+
removed = Array(diff[:removed_fields])
|
|
274
|
+
changed = (diff[:changed_fields] || {}).to_h
|
|
275
|
+
coll_opts = (diff[:collection_options] || {}).to_h
|
|
276
|
+
stale_refs = Array(diff[:stale_references])
|
|
277
|
+
added.any? || removed.any? || !changed.empty? || !coll_opts.empty? || stale_refs.any?
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
class_methods do
|
|
282
|
+
def __se_extract_sample_error(summary)
|
|
283
|
+
failed = begin
|
|
284
|
+
summary.respond_to?(:failed_total) ? summary.failed_total.to_i : 0
|
|
285
|
+
rescue StandardError
|
|
286
|
+
0
|
|
287
|
+
end
|
|
288
|
+
return nil if failed <= 0
|
|
289
|
+
|
|
290
|
+
batches = begin
|
|
291
|
+
summary.respond_to?(:batches) ? summary.batches : nil
|
|
292
|
+
rescue StandardError
|
|
293
|
+
nil
|
|
294
|
+
end
|
|
295
|
+
return nil unless batches.is_a?(Array)
|
|
296
|
+
|
|
297
|
+
batches.each do |b|
|
|
298
|
+
next unless b.is_a?(Hash)
|
|
299
|
+
|
|
300
|
+
samples = b[:errors_sample] || b['errors_sample']
|
|
301
|
+
next if samples.nil?
|
|
302
|
+
|
|
303
|
+
Array(samples).each do |m|
|
|
304
|
+
s = m.to_s
|
|
305
|
+
return s unless s.strip.empty?
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
nil
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
class_methods do
|
|
313
|
+
def __se_index_partitions!(into:)
|
|
314
|
+
compiled = SearchEngine::Partitioner.for(self)
|
|
315
|
+
if compiled
|
|
316
|
+
parts = Array(compiled.partitions)
|
|
317
|
+
max_p = compiled.max_parallel.to_i
|
|
318
|
+
return __se_index_partitions_seq!(parts, into) if max_p <= 1 || parts.size <= 1
|
|
319
|
+
|
|
320
|
+
__se_index_partitions_parallel!(parts, into, max_p)
|
|
321
|
+
else
|
|
322
|
+
summary = SearchEngine::Indexer.rebuild_partition!(self, partition: nil, into: into)
|
|
323
|
+
summary.status
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
class_methods do
|
|
329
|
+
# Sequential processing of partition list
|
|
330
|
+
def __se_index_partitions_seq!(parts, into)
|
|
331
|
+
agg = :ok
|
|
332
|
+
parts.each do |part|
|
|
333
|
+
summary = SearchEngine::Indexer.rebuild_partition!(self, partition: part, into: into)
|
|
334
|
+
puts(SearchEngine::Logging::PartitionProgress.line(part, summary))
|
|
335
|
+
# Log batches individually if there are multiple batches
|
|
336
|
+
__se_log_batches_from_summary(summary.batches) if summary.batches_total.to_i > 1
|
|
337
|
+
begin
|
|
338
|
+
st = summary.status
|
|
339
|
+
if st == :failed
|
|
340
|
+
agg = :failed
|
|
341
|
+
elsif st == :partial && agg == :ok
|
|
342
|
+
agg = :partial
|
|
343
|
+
end
|
|
344
|
+
rescue StandardError
|
|
345
|
+
agg = :failed
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
agg
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
class_methods do
|
|
353
|
+
# Parallel processing via bounded thread pool
|
|
354
|
+
def __se_index_partitions_parallel!(parts, into, max_p)
|
|
355
|
+
require 'concurrent-ruby'
|
|
356
|
+
pool = Concurrent::FixedThreadPool.new(max_p)
|
|
357
|
+
ctx = SearchEngine::Instrumentation.context
|
|
358
|
+
mtx = Mutex.new
|
|
359
|
+
agg = :ok
|
|
360
|
+
begin
|
|
361
|
+
parts.each do |part|
|
|
362
|
+
pool.post do
|
|
363
|
+
SearchEngine::Instrumentation.with_context(ctx) do
|
|
364
|
+
summary = SearchEngine::Indexer.rebuild_partition!(self, partition: part, into: into)
|
|
365
|
+
mtx.synchronize do
|
|
366
|
+
puts(SearchEngine::Logging::PartitionProgress.line(part, summary))
|
|
367
|
+
# Log batches individually if there are multiple batches
|
|
368
|
+
__se_log_batches_from_summary(summary.batches) if summary.batches_total.to_i > 1
|
|
369
|
+
begin
|
|
370
|
+
st = summary.status
|
|
371
|
+
if st == :failed
|
|
372
|
+
agg = :failed
|
|
373
|
+
elsif st == :partial && agg == :ok
|
|
374
|
+
agg = :partial
|
|
375
|
+
end
|
|
376
|
+
rescue StandardError
|
|
377
|
+
agg = :failed
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
rescue StandardError => error
|
|
382
|
+
mtx.synchronize do
|
|
383
|
+
warn(" partition=#{part.inspect} → error=#{error.class}: #{error.message.to_s[0, 200]}")
|
|
384
|
+
agg = :failed
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
ensure
|
|
389
|
+
pool.shutdown
|
|
390
|
+
# Wait up to 1 hour, then force-kill and wait a bit more to ensure cleanup
|
|
391
|
+
pool.wait_for_termination(3600) || pool.kill
|
|
392
|
+
pool.wait_for_termination(60)
|
|
393
|
+
end
|
|
394
|
+
agg
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
class_methods do
|
|
399
|
+
# Single non-partitioned pass helper
|
|
400
|
+
def __se_index_single!(into)
|
|
401
|
+
SearchEngine::Indexer.rebuild_partition!(self, partition: nil, into: into)
|
|
402
|
+
end
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
class_methods do
|
|
406
|
+
def __se_retention_cleanup!(logical:, client:)
|
|
407
|
+
keep = begin
|
|
408
|
+
local = respond_to?(:schema_retention) ? (schema_retention || {}) : {}
|
|
409
|
+
lk = local[:keep_last]
|
|
410
|
+
lk.nil? ? SearchEngine.config.schema.retention.keep_last : Integer(lk)
|
|
411
|
+
rescue StandardError
|
|
412
|
+
SearchEngine.config.schema.retention.keep_last
|
|
413
|
+
end
|
|
414
|
+
keep = 0 if keep.nil? || keep.to_i.negative?
|
|
415
|
+
|
|
416
|
+
meta_timeout = begin
|
|
417
|
+
t = SearchEngine.config.timeout_ms.to_i
|
|
418
|
+
t = 5_000 if t <= 0
|
|
419
|
+
t < 10_000 ? 10_000 : t
|
|
420
|
+
rescue StandardError
|
|
421
|
+
10_000
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
alias_target = client.resolve_alias(logical, timeout_ms: meta_timeout)
|
|
425
|
+
names = Array(client.list_collections(timeout_ms: meta_timeout)).map { |c| (c[:name] || c['name']).to_s }
|
|
426
|
+
re = /^#{Regexp.escape(logical)}_\d{8}_\d{6}_\d{3}$/
|
|
427
|
+
physicals = names.select { |n| re.match?(n) }
|
|
428
|
+
|
|
429
|
+
ordered = physicals.sort_by do |n|
|
|
430
|
+
ts = __se_extract_timestamp(logical, n)
|
|
431
|
+
seq = __se_extract_sequence(n)
|
|
432
|
+
[-ts, -seq]
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
candidates = ordered.reject { |n| n == alias_target }
|
|
436
|
+
to_drop = candidates.drop(keep)
|
|
437
|
+
to_drop.each { |n| client.delete_collection(n, timeout_ms: 60_000) }
|
|
438
|
+
to_drop
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
private :__se_retention_cleanup!
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
class_methods do
|
|
445
|
+
def __se_extract_timestamp(logical, name)
|
|
446
|
+
base = name.to_s.delete_prefix("#{logical}_")
|
|
447
|
+
parts = base.split('_')
|
|
448
|
+
return 0 unless parts.size == 3
|
|
449
|
+
|
|
450
|
+
(parts[0] + parts[1]).to_i
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
def __se_extract_sequence(name)
|
|
454
|
+
name.to_s.split('_').last.to_i
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
end
|