search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Cascade reindexing for collections that reference other collections via
|
|
5
|
+
# Typesense field-level references.
|
|
6
|
+
#
|
|
7
|
+
# Public API:
|
|
8
|
+
# - {.cascade_reindex!(source:, ids:, context: :update, client: nil)} => Hash summary
|
|
9
|
+
# - source: Class (SearchEngine::Base subclass) or String collection name
|
|
10
|
+
# - ids: Array<String, Integer>, the target key values to match in referencers
|
|
11
|
+
# - context: :update or :full (controls partial vs full behavior)
|
|
12
|
+
module Cascade
|
|
13
|
+
class << self
|
|
14
|
+
# Trigger cascade reindex on collections that reference +source+.
|
|
15
|
+
#
|
|
16
|
+
# @param source [Class, String]
|
|
17
|
+
# @param ids [Array<#to_s>, nil]
|
|
18
|
+
# @param context [Symbol] :update or :full
|
|
19
|
+
# @param client [SearchEngine::Client, nil]
|
|
20
|
+
# @return [Hash]
|
|
21
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
|
|
22
|
+
def cascade_reindex!(source:, ids:, context: :update, client: nil)
|
|
23
|
+
raise ArgumentError, 'context must be :update or :full' unless %i[update full].include?(context.to_sym)
|
|
24
|
+
|
|
25
|
+
src_collection = normalize_collection_name(source)
|
|
26
|
+
source_klass = source.is_a?(Class) ? source : safe_collection_class(src_collection)
|
|
27
|
+
ts_client = client || SearchEngine.client
|
|
28
|
+
|
|
29
|
+
graph = build_reverse_graph(client: ts_client)
|
|
30
|
+
referencers = Array(graph[src_collection])
|
|
31
|
+
|
|
32
|
+
# Detect immediate cycles (A <-> B) and skip those pairs
|
|
33
|
+
cycle_pairs = detect_immediate_cycles(graph)
|
|
34
|
+
|
|
35
|
+
# Per-run cache for alias lookups to avoid repeated network calls
|
|
36
|
+
alias_cache = {}
|
|
37
|
+
|
|
38
|
+
ensure_source_reference_fields!(
|
|
39
|
+
source_klass,
|
|
40
|
+
src_collection,
|
|
41
|
+
referencers,
|
|
42
|
+
client: ts_client,
|
|
43
|
+
alias_cache: alias_cache
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
outcomes = []
|
|
47
|
+
partial_count = 0
|
|
48
|
+
full_count = 0
|
|
49
|
+
skipped_unregistered = 0
|
|
50
|
+
skipped_cycles = []
|
|
51
|
+
|
|
52
|
+
seen_full = {}
|
|
53
|
+
referencers.each do |edge|
|
|
54
|
+
referrer_coll = edge[:referrer]
|
|
55
|
+
local_key = edge[:local_key]
|
|
56
|
+
|
|
57
|
+
# Skip cycle pairs deterministically (avoid ping-pong)
|
|
58
|
+
if cycle_pairs.include?([src_collection, referrer_coll])
|
|
59
|
+
skipped_cycles << { pair: [src_collection, referrer_coll] }
|
|
60
|
+
outcomes << { collection: referrer_coll, mode: :skipped_cycle }
|
|
61
|
+
next
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
ref_klass = safe_collection_class(referrer_coll)
|
|
65
|
+
unless ref_klass
|
|
66
|
+
skipped_unregistered += 1
|
|
67
|
+
outcomes << { collection: referrer_coll, mode: :skipped_unregistered }
|
|
68
|
+
next
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
mode = :full
|
|
72
|
+
if context.to_sym == :update && can_partial_reindex?(ref_klass)
|
|
73
|
+
begin
|
|
74
|
+
SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: { local_key.to_sym => Array(ids) },
|
|
75
|
+
into: nil
|
|
76
|
+
)
|
|
77
|
+
mode = :partial
|
|
78
|
+
partial_count += 1
|
|
79
|
+
rescue StandardError => error
|
|
80
|
+
# Fallback to full when partial path fails unexpectedly
|
|
81
|
+
if seen_full[referrer_coll]
|
|
82
|
+
mode = :skipped_duplicate
|
|
83
|
+
else
|
|
84
|
+
executed = __se_full_reindex_for_referrer(ref_klass, client: ts_client, alias_cache: alias_cache)
|
|
85
|
+
seen_full[referrer_coll] = true if executed
|
|
86
|
+
if executed
|
|
87
|
+
mode = :full
|
|
88
|
+
full_count += 1
|
|
89
|
+
else
|
|
90
|
+
mode = :skipped_no_partitions
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
# Record diagnostic on the outcome for visibility upstream
|
|
94
|
+
outcomes << { collection: referrer_coll, mode: :partial_failed, error_class: error.class.name,
|
|
95
|
+
message: error.message.to_s[0, 200] }
|
|
96
|
+
end
|
|
97
|
+
elsif seen_full[referrer_coll]
|
|
98
|
+
mode = :skipped_duplicate
|
|
99
|
+
else
|
|
100
|
+
executed = __se_full_reindex_for_referrer(ref_klass, client: ts_client, alias_cache: alias_cache)
|
|
101
|
+
seen_full[referrer_coll] = true if executed
|
|
102
|
+
if executed
|
|
103
|
+
mode = :full
|
|
104
|
+
full_count += 1
|
|
105
|
+
else
|
|
106
|
+
mode = :skipped_no_partitions
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
outcomes << { collection: referrer_coll, mode: mode }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
payload = {
|
|
114
|
+
source_collection: src_collection,
|
|
115
|
+
ids_count: Array(ids).size,
|
|
116
|
+
context: context.to_sym,
|
|
117
|
+
targets_total: referencers.size,
|
|
118
|
+
partial_count: partial_count,
|
|
119
|
+
full_count: full_count,
|
|
120
|
+
skipped_unregistered: skipped_unregistered,
|
|
121
|
+
skipped_cycles: skipped_cycles
|
|
122
|
+
}
|
|
123
|
+
SearchEngine::Instrumentation.instrument('search_engine.cascade.run', payload.merge(outcomes: outcomes)) {}
|
|
124
|
+
|
|
125
|
+
payload.merge(outcomes: outcomes)
|
|
126
|
+
end
|
|
127
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
|
|
128
|
+
|
|
129
|
+
# Build a reverse graph from Typesense live schemas when possible, falling
|
|
130
|
+
# back to compiled local schemas for registered models.
|
|
131
|
+
#
|
|
132
|
+
# @param client [SearchEngine::Client]
|
|
133
|
+
# @return [Hash{String=>Array<Hash>}] mapping target_collection => [{ referrer, local_key, foreign_key }]
|
|
134
|
+
def build_reverse_graph(client:)
|
|
135
|
+
from_ts = build_from_typesense(client)
|
|
136
|
+
return from_ts unless from_ts.empty?
|
|
137
|
+
|
|
138
|
+
build_from_registry
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
private
|
|
142
|
+
|
|
143
|
+
# Perform a full reindex for a referencer collection, honoring partitioning
|
|
144
|
+
# directives when present. Falls back to a single non-partitioned rebuild
|
|
145
|
+
# when no partitions are configured.
|
|
146
|
+
# @param ref_klass [Class]
|
|
147
|
+
# @return [void]
|
|
148
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
|
149
|
+
def __se_full_reindex_for_referrer(ref_klass, client:, alias_cache:)
|
|
150
|
+
logical = ref_klass.respond_to?(:collection) ? ref_klass.collection.to_s : ref_klass.name.to_s
|
|
151
|
+
physical = resolve_physical_collection_name(logical, client: client, cache: alias_cache)
|
|
152
|
+
|
|
153
|
+
# For cascade full reindex, force a schema rebuild (blue/green) to
|
|
154
|
+
# refresh reference targets before importing documents.
|
|
155
|
+
forced = reindex_referencer_with_fresh_schema!(
|
|
156
|
+
ref_klass,
|
|
157
|
+
logical,
|
|
158
|
+
physical,
|
|
159
|
+
client: client,
|
|
160
|
+
force_rebuild: true
|
|
161
|
+
)
|
|
162
|
+
return true if forced
|
|
163
|
+
|
|
164
|
+
# Fallback: force full destructive reindex when forced rebuild fails.
|
|
165
|
+
dropped = reindex_referencer_with_drop!(ref_klass, logical, physical)
|
|
166
|
+
return true if dropped
|
|
167
|
+
|
|
168
|
+
begin
|
|
169
|
+
compiled = SearchEngine::Partitioner.for(ref_klass)
|
|
170
|
+
rescue StandardError
|
|
171
|
+
compiled = nil
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
executed = false
|
|
175
|
+
|
|
176
|
+
if compiled
|
|
177
|
+
parts = begin
|
|
178
|
+
Array(compiled.partitions)
|
|
179
|
+
rescue StandardError
|
|
180
|
+
[]
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
parts = parts.reject { |p| p.nil? || p.to_s.strip.empty? }
|
|
184
|
+
|
|
185
|
+
if parts.empty?
|
|
186
|
+
coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
|
|
187
|
+
puts(%( Referencer "#{coll_display}" — partitions=0 → skip))
|
|
188
|
+
return false
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
|
|
192
|
+
puts(%( Referencer "#{coll_display}" — partitions=#{parts.size} parallel=#{compiled.max_parallel}))
|
|
193
|
+
mp = compiled.max_parallel.to_i
|
|
194
|
+
if mp > 1 && parts.size > 1
|
|
195
|
+
require 'concurrent-ruby'
|
|
196
|
+
pool = Concurrent::FixedThreadPool.new(mp)
|
|
197
|
+
ctx = SearchEngine::Instrumentation.context
|
|
198
|
+
mtx = Mutex.new
|
|
199
|
+
begin
|
|
200
|
+
post_partitions_to_pool!(pool, ctx, parts, ref_klass, mtx)
|
|
201
|
+
ensure
|
|
202
|
+
pool.shutdown
|
|
203
|
+
# Wait up to 1 hour, then force-kill and wait a bit more to ensure cleanup
|
|
204
|
+
pool.wait_for_termination(3600) || pool.kill
|
|
205
|
+
pool.wait_for_termination(60)
|
|
206
|
+
end
|
|
207
|
+
executed = true
|
|
208
|
+
else
|
|
209
|
+
executed = rebuild_partitions_sequential!(ref_klass, parts)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
else
|
|
213
|
+
coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
|
|
214
|
+
puts(%( Referencer "#{coll_display}" — single))
|
|
215
|
+
SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: nil, into: nil)
|
|
216
|
+
executed = true
|
|
217
|
+
end
|
|
218
|
+
executed
|
|
219
|
+
end
|
|
220
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
|
221
|
+
|
|
222
|
+
# Resolve logical alias to physical name with optional per-run memoization.
|
|
223
|
+
# @param logical [String]
|
|
224
|
+
# @param client [SearchEngine::Client]
|
|
225
|
+
# @param cache [Hash, nil] per-run cache; when provided, both hits and misses are cached
|
|
226
|
+
# @return [String, nil]
|
|
227
|
+
def resolve_physical_collection_name(logical, client:, cache: nil)
|
|
228
|
+
key = logical.to_s
|
|
229
|
+
return cache[key] if cache&.key?(key)
|
|
230
|
+
|
|
231
|
+
value = begin
|
|
232
|
+
physical = client.resolve_alias(key)
|
|
233
|
+
physical && !physical.to_s.strip.empty? ? physical.to_s : nil
|
|
234
|
+
rescue StandardError
|
|
235
|
+
nil
|
|
236
|
+
end
|
|
237
|
+
cache[key] = value if cache
|
|
238
|
+
value
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def normalize_collection_name(source)
|
|
242
|
+
return source.to_s unless source.is_a?(Class)
|
|
243
|
+
|
|
244
|
+
if source.respond_to?(:collection)
|
|
245
|
+
source.collection.to_s
|
|
246
|
+
else
|
|
247
|
+
source.name.to_s
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Check if a collection's live schema has references pointing to physical
|
|
252
|
+
# collection names that no longer exist. This can happen after blue/green
|
|
253
|
+
# deployments when the referenced collection was reindexed but this
|
|
254
|
+
# referencer's schema still points to the old physical name.
|
|
255
|
+
#
|
|
256
|
+
# @param collection_name [String] physical or logical collection name
|
|
257
|
+
# @param client [SearchEngine::Client]
|
|
258
|
+
# @return [Boolean]
|
|
259
|
+
def referencer_has_stale_references?(collection_name, client:)
|
|
260
|
+
schema = begin
|
|
261
|
+
client.retrieve_collection_schema(collection_name)
|
|
262
|
+
rescue StandardError
|
|
263
|
+
nil
|
|
264
|
+
end
|
|
265
|
+
return false unless schema
|
|
266
|
+
|
|
267
|
+
fields = Array(schema[:fields] || schema['fields'])
|
|
268
|
+
fields.any? do |field|
|
|
269
|
+
ref = field[:reference] || field['reference']
|
|
270
|
+
next false if ref.nil? || ref.to_s.strip.empty?
|
|
271
|
+
|
|
272
|
+
ref_coll = ref.to_s.split('.', 2).first
|
|
273
|
+
next false if ref_coll.empty?
|
|
274
|
+
|
|
275
|
+
# Check if it looks like a physical name (has timestamp suffix)
|
|
276
|
+
next false unless ref_coll.match?(/_\d{8}_\d{6}_\d{3}$/)
|
|
277
|
+
|
|
278
|
+
logical = ref_coll.sub(/_\d{8}_\d{6}_\d{3}$/, '')
|
|
279
|
+
alias_target = begin
|
|
280
|
+
client.resolve_alias(logical)
|
|
281
|
+
rescue StandardError
|
|
282
|
+
nil
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
if alias_target && !alias_target.to_s.strip.empty? && alias_target.to_s != ref_coll
|
|
286
|
+
true
|
|
287
|
+
else
|
|
288
|
+
# Verify the referenced physical collection doesn't exist
|
|
289
|
+
ref_schema = begin
|
|
290
|
+
client.retrieve_collection_schema(ref_coll)
|
|
291
|
+
rescue StandardError
|
|
292
|
+
nil
|
|
293
|
+
end
|
|
294
|
+
ref_schema.nil?
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Determine whether a referencer schema needs a rebuild due to stale
|
|
300
|
+
# references, missing collection, or detected schema drift.
|
|
301
|
+
#
|
|
302
|
+
# @param ref_klass [Class]
|
|
303
|
+
# @param collection_name [String]
|
|
304
|
+
# @param client [SearchEngine::Client]
|
|
305
|
+
# @return [Boolean]
|
|
306
|
+
def referencer_requires_schema_rebuild?(ref_klass, collection_name, client:)
|
|
307
|
+
return true if referencer_has_stale_references?(collection_name, client: client)
|
|
308
|
+
|
|
309
|
+
diff = SearchEngine::Schema.diff(ref_klass, client: client)[:diff] || {}
|
|
310
|
+
stale_refs = Array(diff[:stale_references])
|
|
311
|
+
return true if stale_refs.any?
|
|
312
|
+
|
|
313
|
+
opts = (diff[:collection_options] || {}).to_h
|
|
314
|
+
return true if opts[:live] == :missing
|
|
315
|
+
|
|
316
|
+
added = Array(diff[:added_fields])
|
|
317
|
+
removed = Array(diff[:removed_fields])
|
|
318
|
+
changed = (diff[:changed_fields] || {}).to_h
|
|
319
|
+
coll_opts = (diff[:collection_options] || {}).to_h
|
|
320
|
+
|
|
321
|
+
added.any? || removed.any? || !changed.empty? || !coll_opts.empty?
|
|
322
|
+
rescue StandardError
|
|
323
|
+
false
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Force a full reindex of the referencer to rebuild its schema with valid
|
|
327
|
+
# alias references. Suppresses cascade to avoid infinite recursion.
|
|
328
|
+
#
|
|
329
|
+
# @param ref_klass [Class]
|
|
330
|
+
# @param logical [String]
|
|
331
|
+
# @param physical [String, nil]
|
|
332
|
+
# @return [Boolean]
|
|
333
|
+
def reindex_referencer_with_fresh_schema!(ref_klass, logical, physical, client:, force_rebuild: false)
|
|
334
|
+
coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
|
|
335
|
+
action = force_rebuild ? 'force_rebuild index_collection' : 'index_collection'
|
|
336
|
+
puts(%( Referencer "#{coll_display}" — schema rebuild required, running #{action}))
|
|
337
|
+
|
|
338
|
+
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
339
|
+
ref_klass.index_collection(client: client, pre: :ensure, force_rebuild: force_rebuild)
|
|
340
|
+
end
|
|
341
|
+
true
|
|
342
|
+
rescue StandardError => error
|
|
343
|
+
puts(%( Referencer "#{logical}" — schema rebuild failed: #{error.message}))
|
|
344
|
+
false
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def reindex_referencer_with_drop!(ref_klass, logical, physical)
|
|
348
|
+
coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
|
|
349
|
+
puts(%( Referencer "#{coll_display}" — force reindex (drop+index)))
|
|
350
|
+
|
|
351
|
+
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
352
|
+
ref_klass.reindex_collection!
|
|
353
|
+
end
|
|
354
|
+
true
|
|
355
|
+
rescue StandardError => error
|
|
356
|
+
puts(%( Referencer "#{logical}" — force reindex failed: #{error.message}))
|
|
357
|
+
false
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def post_partitions_to_pool!(pool, ctx, parts, ref_klass, mtx)
|
|
361
|
+
parts.each do |p|
|
|
362
|
+
pool.post do
|
|
363
|
+
SearchEngine::Instrumentation.with_context(ctx) do
|
|
364
|
+
summary = SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: p, into: nil)
|
|
365
|
+
mtx.synchronize { puts(SearchEngine::Logging::PartitionProgress.line(p, summary)) }
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def rebuild_partitions_sequential!(ref_klass, parts)
|
|
372
|
+
executed = false
|
|
373
|
+
parts.each do |p|
|
|
374
|
+
summary = SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: p, into: nil)
|
|
375
|
+
puts(SearchEngine::Logging::PartitionProgress.line(p, summary))
|
|
376
|
+
executed = true
|
|
377
|
+
end
|
|
378
|
+
executed
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def build_from_typesense(client)
|
|
382
|
+
graph = Hash.new { |h, k| h[k] = [] }
|
|
383
|
+
# Use a slightly longer timeout for metadata requests to avoid noisy timeouts
|
|
384
|
+
meta_timeout = begin
|
|
385
|
+
t = SearchEngine.config.timeout_ms.to_i
|
|
386
|
+
t = 5_000 if t <= 0
|
|
387
|
+
t < 10_000 ? 10_000 : t
|
|
388
|
+
rescue StandardError
|
|
389
|
+
10_000
|
|
390
|
+
end
|
|
391
|
+
collections = Array(client.list_collections(timeout_ms: meta_timeout))
|
|
392
|
+
names = collections.map { |c| (c[:name] || c['name']).to_s }.reject(&:empty?)
|
|
393
|
+
names.each do |name|
|
|
394
|
+
begin
|
|
395
|
+
schema = client.retrieve_collection_schema(name, timeout_ms: meta_timeout)
|
|
396
|
+
rescue StandardError
|
|
397
|
+
schema = nil
|
|
398
|
+
end
|
|
399
|
+
next unless schema
|
|
400
|
+
|
|
401
|
+
fields = Array(schema[:fields] || schema['fields'])
|
|
402
|
+
fields.each do |f|
|
|
403
|
+
ref = f[:reference] || f['reference']
|
|
404
|
+
next if ref.nil? || ref.to_s.strip.empty?
|
|
405
|
+
|
|
406
|
+
coll, fk = parse_reference(ref)
|
|
407
|
+
next if coll.nil? || coll.empty?
|
|
408
|
+
|
|
409
|
+
referrer_name = (schema[:name] || schema['name']).to_s
|
|
410
|
+
referrer_logical = normalize_physical_to_logical(referrer_name)
|
|
411
|
+
graph[coll] << { referrer: referrer_logical, local_key: (f[:name] || f['name']).to_s,
|
|
412
|
+
foreign_key: fk }
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
graph
|
|
416
|
+
rescue StandardError
|
|
417
|
+
{}
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
def build_from_registry
|
|
421
|
+
graph = Hash.new { |h, k| h[k] = [] }
|
|
422
|
+
# Use models_map instead of Registry.mapping to ensure all models are discovered,
|
|
423
|
+
# including those that may not be explicitly registered yet
|
|
424
|
+
mapping = SearchEngine::CollectionResolver.models_map
|
|
425
|
+
mapping.each do |coll_name, klass|
|
|
426
|
+
compiled = SearchEngine::Schema.compile(klass)
|
|
427
|
+
fields = Array(compiled[:fields])
|
|
428
|
+
fields.each do |f|
|
|
429
|
+
ref = f[:reference] || f['reference']
|
|
430
|
+
next if ref.nil? || ref.to_s.strip.empty?
|
|
431
|
+
|
|
432
|
+
target_coll, fk = parse_reference(ref)
|
|
433
|
+
next if target_coll.nil? || target_coll.empty?
|
|
434
|
+
|
|
435
|
+
graph[target_coll] << {
|
|
436
|
+
referrer: coll_name.to_s,
|
|
437
|
+
local_key: (f[:name] || f['name']).to_s,
|
|
438
|
+
foreign_key: fk
|
|
439
|
+
}
|
|
440
|
+
end
|
|
441
|
+
rescue StandardError
|
|
442
|
+
# ignore individual compile errors for robustness
|
|
443
|
+
end
|
|
444
|
+
graph
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def parse_reference(ref_value)
|
|
448
|
+
s = ref_value.to_s
|
|
449
|
+
parts = s.split('.', 2)
|
|
450
|
+
coll = parts[0].to_s
|
|
451
|
+
fk = parts[1]&.to_s
|
|
452
|
+
[coll, fk]
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Convert a physical collection name like
|
|
456
|
+
# logical_YYYYMMDD_HHMMSS_###
|
|
457
|
+
# back to its logical base name. If it doesn't match the pattern, return as-is.
|
|
458
|
+
# @param name [String]
|
|
459
|
+
# @return [String]
|
|
460
|
+
def normalize_physical_to_logical(name)
|
|
461
|
+
s = name.to_s
|
|
462
|
+
m = s.match(/\A(.+)_\d{8}_\d{6}_\d{3}\z/)
|
|
463
|
+
return s unless m
|
|
464
|
+
|
|
465
|
+
base = m[1].to_s
|
|
466
|
+
base.empty? ? s : base
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def detect_immediate_cycles(graph)
|
|
470
|
+
pairs = []
|
|
471
|
+
# Avoid mutating the Hash while iterating: do not access graph[other] unless key exists
|
|
472
|
+
graph.each do |target, edges|
|
|
473
|
+
edges.each do |e|
|
|
474
|
+
other = e[:referrer]
|
|
475
|
+
next unless graph.key?(other)
|
|
476
|
+
|
|
477
|
+
back_edges = graph[other]
|
|
478
|
+
back = Array(back_edges).any? { |x| x[:referrer] == target }
|
|
479
|
+
pairs << [target, other] if back
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
pairs.uniq
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def safe_collection_class(name)
|
|
486
|
+
SearchEngine::CollectionResolver.model_for_logical(name)
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
def can_partial_reindex?(klass)
|
|
490
|
+
# Disallow partial when a custom Partitioner is used
|
|
491
|
+
return false if SearchEngine::Partitioner.for(klass)
|
|
492
|
+
|
|
493
|
+
# Require ActiveRecord source adapter for partition Hash filtering support
|
|
494
|
+
dsl = begin
|
|
495
|
+
klass.instance_variable_defined?(:@__mapper_dsl__) ? klass.instance_variable_get(:@__mapper_dsl__) : nil
|
|
496
|
+
rescue StandardError
|
|
497
|
+
nil
|
|
498
|
+
end
|
|
499
|
+
return false unless dsl.is_a?(Hash)
|
|
500
|
+
|
|
501
|
+
src = dsl[:source]
|
|
502
|
+
src && src[:type].to_s == 'active_record'
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
def ensure_source_reference_fields!(source_klass, logical_name, referencers, client:, alias_cache:)
|
|
506
|
+
return unless source_klass
|
|
507
|
+
|
|
508
|
+
required_fields = referencers.map { |edge| edge[:foreign_key].to_s }.reject(&:empty?).uniq
|
|
509
|
+
return if required_fields.empty?
|
|
510
|
+
|
|
511
|
+
physical =
|
|
512
|
+
resolve_physical_collection_name(logical_name, client: client, cache: alias_cache) || logical_name.to_s
|
|
513
|
+
schema = begin
|
|
514
|
+
client.retrieve_collection_schema(physical)
|
|
515
|
+
rescue StandardError
|
|
516
|
+
nil
|
|
517
|
+
end
|
|
518
|
+
return unless schema
|
|
519
|
+
|
|
520
|
+
live_fields =
|
|
521
|
+
Array(schema[:fields] || schema['fields']).map { |f| (f[:name] || f['name']).to_s }
|
|
522
|
+
missing = required_fields - live_fields
|
|
523
|
+
return if missing.empty?
|
|
524
|
+
|
|
525
|
+
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
526
|
+
source_klass.index_collection(pre: :ensure)
|
|
527
|
+
end
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
end
|