search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,531 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Cascade reindexing for collections that reference other collections via
5
+ # Typesense field-level references.
6
+ #
7
+ # Public API:
8
+ # - {.cascade_reindex!(source:, ids:, context: :update, client: nil)} => Hash summary
9
+ # - source: Class (SearchEngine::Base subclass) or String collection name
10
+ # - ids: Array<String, Integer>, the target key values to match in referencers
11
+ # - context: :update or :full (controls partial vs full behavior)
12
+ module Cascade
13
+ class << self
14
+ # Trigger cascade reindex on collections that reference +source+.
15
+ #
16
+ # @param source [Class, String]
17
+ # @param ids [Array<#to_s>, nil]
18
+ # @param context [Symbol] :update or :full
19
+ # @param client [SearchEngine::Client, nil]
20
+ # @return [Hash]
21
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
22
+ def cascade_reindex!(source:, ids:, context: :update, client: nil)
23
+ raise ArgumentError, 'context must be :update or :full' unless %i[update full].include?(context.to_sym)
24
+
25
+ src_collection = normalize_collection_name(source)
26
+ source_klass = source.is_a?(Class) ? source : safe_collection_class(src_collection)
27
+ ts_client = client || SearchEngine.client
28
+
29
+ graph = build_reverse_graph(client: ts_client)
30
+ referencers = Array(graph[src_collection])
31
+
32
+ # Detect immediate cycles (A <-> B) and skip those pairs
33
+ cycle_pairs = detect_immediate_cycles(graph)
34
+
35
+ # Per-run cache for alias lookups to avoid repeated network calls
36
+ alias_cache = {}
37
+
38
+ ensure_source_reference_fields!(
39
+ source_klass,
40
+ src_collection,
41
+ referencers,
42
+ client: ts_client,
43
+ alias_cache: alias_cache
44
+ )
45
+
46
+ outcomes = []
47
+ partial_count = 0
48
+ full_count = 0
49
+ skipped_unregistered = 0
50
+ skipped_cycles = []
51
+
52
+ seen_full = {}
53
+ referencers.each do |edge|
54
+ referrer_coll = edge[:referrer]
55
+ local_key = edge[:local_key]
56
+
57
+ # Skip cycle pairs deterministically (avoid ping-pong)
58
+ if cycle_pairs.include?([src_collection, referrer_coll])
59
+ skipped_cycles << { pair: [src_collection, referrer_coll] }
60
+ outcomes << { collection: referrer_coll, mode: :skipped_cycle }
61
+ next
62
+ end
63
+
64
+ ref_klass = safe_collection_class(referrer_coll)
65
+ unless ref_klass
66
+ skipped_unregistered += 1
67
+ outcomes << { collection: referrer_coll, mode: :skipped_unregistered }
68
+ next
69
+ end
70
+
71
+ mode = :full
72
+ if context.to_sym == :update && can_partial_reindex?(ref_klass)
73
+ begin
74
+ SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: { local_key.to_sym => Array(ids) },
75
+ into: nil
76
+ )
77
+ mode = :partial
78
+ partial_count += 1
79
+ rescue StandardError => error
80
+ # Fallback to full when partial path fails unexpectedly
81
+ if seen_full[referrer_coll]
82
+ mode = :skipped_duplicate
83
+ else
84
+ executed = __se_full_reindex_for_referrer(ref_klass, client: ts_client, alias_cache: alias_cache)
85
+ seen_full[referrer_coll] = true if executed
86
+ if executed
87
+ mode = :full
88
+ full_count += 1
89
+ else
90
+ mode = :skipped_no_partitions
91
+ end
92
+ end
93
+ # Record diagnostic on the outcome for visibility upstream
94
+ outcomes << { collection: referrer_coll, mode: :partial_failed, error_class: error.class.name,
95
+ message: error.message.to_s[0, 200] }
96
+ end
97
+ elsif seen_full[referrer_coll]
98
+ mode = :skipped_duplicate
99
+ else
100
+ executed = __se_full_reindex_for_referrer(ref_klass, client: ts_client, alias_cache: alias_cache)
101
+ seen_full[referrer_coll] = true if executed
102
+ if executed
103
+ mode = :full
104
+ full_count += 1
105
+ else
106
+ mode = :skipped_no_partitions
107
+ end
108
+ end
109
+
110
+ outcomes << { collection: referrer_coll, mode: mode }
111
+ end
112
+
113
+ payload = {
114
+ source_collection: src_collection,
115
+ ids_count: Array(ids).size,
116
+ context: context.to_sym,
117
+ targets_total: referencers.size,
118
+ partial_count: partial_count,
119
+ full_count: full_count,
120
+ skipped_unregistered: skipped_unregistered,
121
+ skipped_cycles: skipped_cycles
122
+ }
123
+ SearchEngine::Instrumentation.instrument('search_engine.cascade.run', payload.merge(outcomes: outcomes)) {}
124
+
125
+ payload.merge(outcomes: outcomes)
126
+ end
127
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
128
+
129
+ # Build a reverse graph from Typesense live schemas when possible, falling
130
+ # back to compiled local schemas for registered models.
131
+ #
132
+ # @param client [SearchEngine::Client]
133
+ # @return [Hash{String=>Array<Hash>}] mapping target_collection => [{ referrer, local_key, foreign_key }]
134
+ def build_reverse_graph(client:)
135
+ from_ts = build_from_typesense(client)
136
+ return from_ts unless from_ts.empty?
137
+
138
+ build_from_registry
139
+ end
140
+
141
+ private
142
+
143
+ # Perform a full reindex for a referencer collection, honoring partitioning
144
+ # directives when present. Falls back to a single non-partitioned rebuild
145
+ # when no partitions are configured.
146
+ # @param ref_klass [Class]
147
+ # @return [void]
148
+ # rubocop:disable Metrics/PerceivedComplexity
149
+ def __se_full_reindex_for_referrer(ref_klass, client:, alias_cache:)
150
+ logical = ref_klass.respond_to?(:collection) ? ref_klass.collection.to_s : ref_klass.name.to_s
151
+ physical = resolve_physical_collection_name(logical, client: client, cache: alias_cache)
152
+
153
+ # For cascade full reindex, force a schema rebuild (blue/green) to
154
+ # refresh reference targets before importing documents.
155
+ forced = reindex_referencer_with_fresh_schema!(
156
+ ref_klass,
157
+ logical,
158
+ physical,
159
+ client: client,
160
+ force_rebuild: true
161
+ )
162
+ return true if forced
163
+
164
+ # Fallback: force full destructive reindex when forced rebuild fails.
165
+ dropped = reindex_referencer_with_drop!(ref_klass, logical, physical)
166
+ return true if dropped
167
+
168
+ begin
169
+ compiled = SearchEngine::Partitioner.for(ref_klass)
170
+ rescue StandardError
171
+ compiled = nil
172
+ end
173
+
174
+ executed = false
175
+
176
+ if compiled
177
+ parts = begin
178
+ Array(compiled.partitions)
179
+ rescue StandardError
180
+ []
181
+ end
182
+
183
+ parts = parts.reject { |p| p.nil? || p.to_s.strip.empty? }
184
+
185
+ if parts.empty?
186
+ coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
187
+ puts(%( Referencer "#{coll_display}" — partitions=0 → skip))
188
+ return false
189
+ end
190
+
191
+ coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
192
+ puts(%( Referencer "#{coll_display}" — partitions=#{parts.size} parallel=#{compiled.max_parallel}))
193
+ mp = compiled.max_parallel.to_i
194
+ if mp > 1 && parts.size > 1
195
+ require 'concurrent-ruby'
196
+ pool = Concurrent::FixedThreadPool.new(mp)
197
+ ctx = SearchEngine::Instrumentation.context
198
+ mtx = Mutex.new
199
+ begin
200
+ post_partitions_to_pool!(pool, ctx, parts, ref_klass, mtx)
201
+ ensure
202
+ pool.shutdown
203
+ # Wait up to 1 hour, then force-kill and wait a bit more to ensure cleanup
204
+ pool.wait_for_termination(3600) || pool.kill
205
+ pool.wait_for_termination(60)
206
+ end
207
+ executed = true
208
+ else
209
+ executed = rebuild_partitions_sequential!(ref_klass, parts)
210
+ end
211
+
212
+ else
213
+ coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
214
+ puts(%( Referencer "#{coll_display}" — single))
215
+ SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: nil, into: nil)
216
+ executed = true
217
+ end
218
+ executed
219
+ end
220
+ # rubocop:enable Metrics/PerceivedComplexity
221
+
222
+ # Resolve logical alias to physical name with optional per-run memoization.
223
+ # @param logical [String]
224
+ # @param client [SearchEngine::Client]
225
+ # @param cache [Hash, nil] per-run cache; when provided, both hits and misses are cached
226
+ # @return [String, nil]
227
+ def resolve_physical_collection_name(logical, client:, cache: nil)
228
+ key = logical.to_s
229
+ return cache[key] if cache&.key?(key)
230
+
231
+ value = begin
232
+ physical = client.resolve_alias(key)
233
+ physical && !physical.to_s.strip.empty? ? physical.to_s : nil
234
+ rescue StandardError
235
+ nil
236
+ end
237
+ cache[key] = value if cache
238
+ value
239
+ end
240
+
241
+ def normalize_collection_name(source)
242
+ return source.to_s unless source.is_a?(Class)
243
+
244
+ if source.respond_to?(:collection)
245
+ source.collection.to_s
246
+ else
247
+ source.name.to_s
248
+ end
249
+ end
250
+
251
+ # Check if a collection's live schema has references pointing to physical
252
+ # collection names that no longer exist. This can happen after blue/green
253
+ # deployments when the referenced collection was reindexed but this
254
+ # referencer's schema still points to the old physical name.
255
+ #
256
+ # @param collection_name [String] physical or logical collection name
257
+ # @param client [SearchEngine::Client]
258
+ # @return [Boolean]
259
+ def referencer_has_stale_references?(collection_name, client:)
260
+ schema = begin
261
+ client.retrieve_collection_schema(collection_name)
262
+ rescue StandardError
263
+ nil
264
+ end
265
+ return false unless schema
266
+
267
+ fields = Array(schema[:fields] || schema['fields'])
268
+ fields.any? do |field|
269
+ ref = field[:reference] || field['reference']
270
+ next false if ref.nil? || ref.to_s.strip.empty?
271
+
272
+ ref_coll = ref.to_s.split('.', 2).first
273
+ next false if ref_coll.empty?
274
+
275
+ # Check if it looks like a physical name (has timestamp suffix)
276
+ next false unless ref_coll.match?(/_\d{8}_\d{6}_\d{3}$/)
277
+
278
+ logical = ref_coll.sub(/_\d{8}_\d{6}_\d{3}$/, '')
279
+ alias_target = begin
280
+ client.resolve_alias(logical)
281
+ rescue StandardError
282
+ nil
283
+ end
284
+
285
+ if alias_target && !alias_target.to_s.strip.empty? && alias_target.to_s != ref_coll
286
+ true
287
+ else
288
+ # Verify the referenced physical collection doesn't exist
289
+ ref_schema = begin
290
+ client.retrieve_collection_schema(ref_coll)
291
+ rescue StandardError
292
+ nil
293
+ end
294
+ ref_schema.nil?
295
+ end
296
+ end
297
+ end
298
+
299
+ # Determine whether a referencer schema needs a rebuild due to stale
300
+ # references, missing collection, or detected schema drift.
301
+ #
302
+ # @param ref_klass [Class]
303
+ # @param collection_name [String]
304
+ # @param client [SearchEngine::Client]
305
+ # @return [Boolean]
306
+ def referencer_requires_schema_rebuild?(ref_klass, collection_name, client:)
307
+ return true if referencer_has_stale_references?(collection_name, client: client)
308
+
309
+ diff = SearchEngine::Schema.diff(ref_klass, client: client)[:diff] || {}
310
+ stale_refs = Array(diff[:stale_references])
311
+ return true if stale_refs.any?
312
+
313
+ opts = (diff[:collection_options] || {}).to_h
314
+ return true if opts[:live] == :missing
315
+
316
+ added = Array(diff[:added_fields])
317
+ removed = Array(diff[:removed_fields])
318
+ changed = (diff[:changed_fields] || {}).to_h
319
+ coll_opts = (diff[:collection_options] || {}).to_h
320
+
321
+ added.any? || removed.any? || !changed.empty? || !coll_opts.empty?
322
+ rescue StandardError
323
+ false
324
+ end
325
+
326
+ # Force a full reindex of the referencer to rebuild its schema with valid
327
+ # alias references. Suppresses cascade to avoid infinite recursion.
328
+ #
329
+ # @param ref_klass [Class]
330
+ # @param logical [String]
331
+ # @param physical [String, nil]
332
+ # @return [Boolean]
333
+ def reindex_referencer_with_fresh_schema!(ref_klass, logical, physical, client:, force_rebuild: false)
334
+ coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
335
+ action = force_rebuild ? 'force_rebuild index_collection' : 'index_collection'
336
+ puts(%( Referencer "#{coll_display}" — schema rebuild required, running #{action}))
337
+
338
+ SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
339
+ ref_klass.index_collection(client: client, pre: :ensure, force_rebuild: force_rebuild)
340
+ end
341
+ true
342
+ rescue StandardError => error
343
+ puts(%( Referencer "#{logical}" — schema rebuild failed: #{error.message}))
344
+ false
345
+ end
346
+
347
+ def reindex_referencer_with_drop!(ref_klass, logical, physical)
348
+ coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
349
+ puts(%( Referencer "#{coll_display}" — force reindex (drop+index)))
350
+
351
+ SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
352
+ ref_klass.reindex_collection!
353
+ end
354
+ true
355
+ rescue StandardError => error
356
+ puts(%( Referencer "#{logical}" — force reindex failed: #{error.message}))
357
+ false
358
+ end
359
+
360
+ def post_partitions_to_pool!(pool, ctx, parts, ref_klass, mtx)
361
+ parts.each do |p|
362
+ pool.post do
363
+ SearchEngine::Instrumentation.with_context(ctx) do
364
+ summary = SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: p, into: nil)
365
+ mtx.synchronize { puts(SearchEngine::Logging::PartitionProgress.line(p, summary)) }
366
+ end
367
+ end
368
+ end
369
+ end
370
+
371
+ def rebuild_partitions_sequential!(ref_klass, parts)
372
+ executed = false
373
+ parts.each do |p|
374
+ summary = SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: p, into: nil)
375
+ puts(SearchEngine::Logging::PartitionProgress.line(p, summary))
376
+ executed = true
377
+ end
378
+ executed
379
+ end
380
+
381
+ def build_from_typesense(client)
382
+ graph = Hash.new { |h, k| h[k] = [] }
383
+ # Use a slightly longer timeout for metadata requests to avoid noisy timeouts
384
+ meta_timeout = begin
385
+ t = SearchEngine.config.timeout_ms.to_i
386
+ t = 5_000 if t <= 0
387
+ t < 10_000 ? 10_000 : t
388
+ rescue StandardError
389
+ 10_000
390
+ end
391
+ collections = Array(client.list_collections(timeout_ms: meta_timeout))
392
+ names = collections.map { |c| (c[:name] || c['name']).to_s }.reject(&:empty?)
393
+ names.each do |name|
394
+ begin
395
+ schema = client.retrieve_collection_schema(name, timeout_ms: meta_timeout)
396
+ rescue StandardError
397
+ schema = nil
398
+ end
399
+ next unless schema
400
+
401
+ fields = Array(schema[:fields] || schema['fields'])
402
+ fields.each do |f|
403
+ ref = f[:reference] || f['reference']
404
+ next if ref.nil? || ref.to_s.strip.empty?
405
+
406
+ coll, fk = parse_reference(ref)
407
+ next if coll.nil? || coll.empty?
408
+
409
+ referrer_name = (schema[:name] || schema['name']).to_s
410
+ referrer_logical = normalize_physical_to_logical(referrer_name)
411
+ graph[coll] << { referrer: referrer_logical, local_key: (f[:name] || f['name']).to_s,
412
+ foreign_key: fk }
413
+ end
414
+ end
415
+ graph
416
+ rescue StandardError
417
+ {}
418
+ end
419
+
420
+ def build_from_registry
421
+ graph = Hash.new { |h, k| h[k] = [] }
422
+ # Use models_map instead of Registry.mapping to ensure all models are discovered,
423
+ # including those that may not be explicitly registered yet
424
+ mapping = SearchEngine::CollectionResolver.models_map
425
+ mapping.each do |coll_name, klass|
426
+ compiled = SearchEngine::Schema.compile(klass)
427
+ fields = Array(compiled[:fields])
428
+ fields.each do |f|
429
+ ref = f[:reference] || f['reference']
430
+ next if ref.nil? || ref.to_s.strip.empty?
431
+
432
+ target_coll, fk = parse_reference(ref)
433
+ next if target_coll.nil? || target_coll.empty?
434
+
435
+ graph[target_coll] << {
436
+ referrer: coll_name.to_s,
437
+ local_key: (f[:name] || f['name']).to_s,
438
+ foreign_key: fk
439
+ }
440
+ end
441
+ rescue StandardError
442
+ # ignore individual compile errors for robustness
443
+ end
444
+ graph
445
+ end
446
+
447
+ def parse_reference(ref_value)
448
+ s = ref_value.to_s
449
+ parts = s.split('.', 2)
450
+ coll = parts[0].to_s
451
+ fk = parts[1]&.to_s
452
+ [coll, fk]
453
+ end
454
+
455
+ # Convert a physical collection name like
456
+ # logical_YYYYMMDD_HHMMSS_###
457
+ # back to its logical base name. If it doesn't match the pattern, return as-is.
458
+ # @param name [String]
459
+ # @return [String]
460
+ def normalize_physical_to_logical(name)
461
+ s = name.to_s
462
+ m = s.match(/\A(.+)_\d{8}_\d{6}_\d{3}\z/)
463
+ return s unless m
464
+
465
+ base = m[1].to_s
466
+ base.empty? ? s : base
467
+ end
468
+
469
+ def detect_immediate_cycles(graph)
470
+ pairs = []
471
+ # Avoid mutating the Hash while iterating: do not access graph[other] unless key exists
472
+ graph.each do |target, edges|
473
+ edges.each do |e|
474
+ other = e[:referrer]
475
+ next unless graph.key?(other)
476
+
477
+ back_edges = graph[other]
478
+ back = Array(back_edges).any? { |x| x[:referrer] == target }
479
+ pairs << [target, other] if back
480
+ end
481
+ end
482
+ pairs.uniq
483
+ end
484
+
485
+ def safe_collection_class(name)
486
+ SearchEngine::CollectionResolver.model_for_logical(name)
487
+ end
488
+
489
+ def can_partial_reindex?(klass)
490
+ # Disallow partial when a custom Partitioner is used
491
+ return false if SearchEngine::Partitioner.for(klass)
492
+
493
+ # Require ActiveRecord source adapter for partition Hash filtering support
494
+ dsl = begin
495
+ klass.instance_variable_defined?(:@__mapper_dsl__) ? klass.instance_variable_get(:@__mapper_dsl__) : nil
496
+ rescue StandardError
497
+ nil
498
+ end
499
+ return false unless dsl.is_a?(Hash)
500
+
501
+ src = dsl[:source]
502
+ src && src[:type].to_s == 'active_record'
503
+ end
504
+
505
+ def ensure_source_reference_fields!(source_klass, logical_name, referencers, client:, alias_cache:)
506
+ return unless source_klass
507
+
508
+ required_fields = referencers.map { |edge| edge[:foreign_key].to_s }.reject(&:empty?).uniq
509
+ return if required_fields.empty?
510
+
511
+ physical =
512
+ resolve_physical_collection_name(logical_name, client: client, cache: alias_cache) || logical_name.to_s
513
+ schema = begin
514
+ client.retrieve_collection_schema(physical)
515
+ rescue StandardError
516
+ nil
517
+ end
518
+ return unless schema
519
+
520
+ live_fields =
521
+ Array(schema[:fields] || schema['fields']).map { |f| (f[:name] || f['name']).to_s }
522
+ missing = required_fields - live_fields
523
+ return if missing.empty?
524
+
525
+ SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
526
+ source_klass.index_collection(pre: :ensure)
527
+ end
528
+ end
529
+ end
530
+ end
531
+ end