search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,1083 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Schema utilities to compile model DSL into a Typesense-compatible schema
5
+ # hash and to diff it against a live collection.
6
+ #
7
+ # Public API:
8
+ # - {.compile(klass)} => Hash
9
+ # - {.diff(klass, client: SearchEngine.client)} => { diff: Hash, pretty: String }
10
+ module Schema
11
+ # Deterministic mapping from DSL types to Typesense field types.
12
+ #
13
+ # Policy:
14
+ # - :integer -> "int64" (consistent; prefer wider range)
15
+ # - :float/:decimal -> "float"
16
+ # - :string -> "string"
17
+ # - :boolean -> "bool"
18
+ # - :time/:datetime -> "int64" (epoch seconds)
19
+ # - :time_string/:datetime_string -> "string" (ISO8601 timestamps)
20
+ # - Array types (e.g. [:string]) -> "string[]" (when present)
21
+ TYPE_MAPPING = {
22
+ string: 'string',
23
+ integer: 'int64',
24
+ float: 'float',
25
+ decimal: 'float',
26
+ boolean: 'bool',
27
+ time: 'int64',
28
+ datetime: 'int64',
29
+ time_string: 'string',
30
+ datetime_string: 'string'
31
+ }.freeze
32
+
33
+ FIELD_COMPARE_KEYS = %i[type reference async_reference locale sort optional infix facet].freeze
34
+ PHYSICAL_SUFFIX_RE = /_\d{8}_\d{6}_\d{3}\z/
35
+
36
+ class << self
37
+ # Build a Typesense-compatible schema hash from a model class DSL.
38
+ #
39
+ # The output includes only keys that are supported and declared via the DSL.
40
+ # Fields explicitly marked with `index: false` are intentionally omitted
41
+ # from the compiled schema (they can still be sent in documents and will
42
+ # be hydrated/displayed, but are not indexed in memory).
43
+ #
44
+ # @param klass [Class] model class inheriting from {SearchEngine::Base}
45
+ # @return [Hash] frozen schema hash with symbol keys
46
+ # @raise [ArgumentError] if the class has no collection name defined
47
+ # @note Automatically sets `enable_nested_fields: true` at collection level when
48
+ # any attribute is declared with type `:object` or `[:object]`.
49
+ def compile(klass, client: nil)
50
+ collection_name = collection_name_for!(klass)
51
+
52
+ fields_array, needs_nested_fields = compile_fields_for(klass, client: client)
53
+ # Do NOT include implicit `id` in compiled schema: Typesense treats `id` as
54
+ # a special string identifier and it is not declared in collection schema.
55
+ # Keeping it out avoids confusing diffs and mismatches with live schema.
56
+ coerce_doc_updated_at_type!(fields_array)
57
+
58
+ schema = build_schema_hash(collection_name, fields_array, needs_nested_fields)
59
+ deep_freeze(schema)
60
+ end
61
+
62
+ # No longer used: id is not included in compiled schema.
63
+ def infer_id_field_type(_klass)
64
+ 'string'
65
+ end
66
+
67
+ # Diff the compiled schema for +klass+ against the live physical collection
68
+ # in Typesense, resolving aliases when present. Returns both a structured
69
+ # diff Hash and a compact human-readable summary string.
70
+ #
71
+ # @param klass [Class] model class inheriting from {SearchEngine::Base}
72
+ # @param client [SearchEngine::Client] optional client wrapper (for tests)
73
+ # @return [Hash] { diff: Hash, pretty: String }
74
+ # @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema-indexer-e2e`
75
+ # @see `https://typesense.org/docs/latest/api/collections.html`
76
+ def diff(klass, client: nil)
77
+ client ||= SearchEngine.client
78
+ compiled = compile(klass, client: client)
79
+ logical_name = compiled[:name]
80
+
81
+ physical_name = client.resolve_alias(logical_name) || logical_name
82
+ live_schema = client.retrieve_collection_schema(physical_name)
83
+ stale_refs = live_schema ? detect_stale_references(live_schema, client: client) : []
84
+
85
+ if live_schema.nil?
86
+ diff_hash = {
87
+ collection: { name: logical_name, physical: physical_name },
88
+ added_fields: compiled[:fields].dup.first(2),
89
+ removed_fields: [],
90
+ changed_fields: {},
91
+ collection_options: { live: :missing },
92
+ stale_references: stale_refs
93
+ }
94
+ payload = {
95
+ collection: klass.name.to_s,
96
+ logical: logical_name,
97
+ physical_current: nil,
98
+ fields_changed_count: 0,
99
+ added_count: diff_hash[:added_fields].size,
100
+ removed_count: 0,
101
+ stale_references_count: stale_refs.size,
102
+ in_sync: false
103
+ }
104
+ SearchEngine::Instrumentation.instrument('search_engine.schema.diff', payload) {}
105
+ return { diff: diff_hash, pretty: pretty_print(diff_hash) }
106
+ end
107
+
108
+ normalized_compiled = normalize_schema(compiled)
109
+ normalized_live = normalize_schema(live_schema)
110
+
111
+ added, removed, changed = diff_fields(normalized_compiled[:fields], normalized_live[:fields])
112
+ collection_opts_changes = diff_collection_options(normalized_compiled, normalized_live)
113
+
114
+ diff_hash = {
115
+ collection: { name: logical_name, physical: physical_name },
116
+ added_fields: added,
117
+ removed_fields: removed,
118
+ changed_fields: changed,
119
+ collection_options: collection_opts_changes,
120
+ stale_references: stale_refs
121
+ }
122
+
123
+ in_sync = added.empty? && removed.empty? && changed.empty? &&
124
+ collection_opts_changes.empty? && stale_refs.empty?
125
+
126
+ payload = {
127
+ collection: klass.name.to_s,
128
+ logical: logical_name,
129
+ physical_current: physical_name,
130
+ fields_changed_count: changed.size,
131
+ added_count: added.size,
132
+ removed_count: removed.size,
133
+ stale_references_count: stale_refs.size,
134
+ in_sync: in_sync
135
+ }
136
+ SearchEngine::Instrumentation.instrument('search_engine.schema.diff', payload) {}
137
+
138
+ { diff: diff_hash, pretty: pretty_print(diff_hash) }
139
+ end
140
+
141
+ # Apply schema lifecycle: create a new physical collection, reindex data into it,
142
+ # atomically point the alias (logical name) to it, and enforce retention.
143
+ #
144
+ # The reindexing step can be provided via an optional block (yielded with the new
145
+ # physical name). If no block is given, and the klass responds to
146
+ # `reindex_all_to(physical_name)`, that method will be called. If neither is available,
147
+ # an ArgumentError is raised and no alias swap occurs. If reindexing fails, the
148
+ # newly created physical is left intact for inspection; retention cleanup only runs
149
+ # after a successful alias swap.
150
+ #
151
+ # @param klass [Class] model class inheriting from {SearchEngine::Base}
152
+ # @param client [SearchEngine::Client] optional client wrapper (for tests)
153
+ # @param force_rebuild [Boolean] if true, skips in-place update check and forces full Blue/Green rebuild (default: false)
154
+ # @yieldparam physical_name [String] the newly created physical collection name
155
+ # @return [Hash] { logical: String, new_physical: String, previous_physical: String, alias_target: String, dropped_physicals: Array<String> }
156
+ # @raise [SearchEngine::Errors::Api, ArgumentError]
157
+ # @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema#lifecycle`
158
+ # @see `https://typesense.org/docs/latest/api/collections.html`
159
+ def apply!(klass, client: nil, force_rebuild: false)
160
+ client ||= SearchEngine.client
161
+ # Optimization: Try in-place update first if not forced to rebuild.
162
+ # If update! returns true, the schema is synced (either no changes or successfully patched).
163
+ if !force_rebuild && update!(klass, client: client)
164
+ compiled = compile(klass)
165
+ logical = compiled[:name]
166
+ # Resolve current physical to return consistent result
167
+ physical = client.resolve_alias(logical) || logical
168
+
169
+ return update_result_payload(logical, physical)
170
+ end
171
+
172
+ compiled = compile(klass, client: client)
173
+ logical = compiled[:name]
174
+
175
+ cleanup_logical_collection_conflict!(logical, client: client)
176
+
177
+ start_ms = monotonic_ms
178
+ current_target = client.resolve_alias(logical)
179
+
180
+ new_physical = generate_physical_name(logical, client: client)
181
+ create_schema = { name: new_physical, fields: compiled[:fields].map(&:dup) }
182
+ create_schema[:enable_nested_fields] = true if compiled[:enable_nested_fields]
183
+
184
+ # Validate referenced collections exist and have required fields before creating schema
185
+ begin
186
+ validate_referenced_collections!(create_schema[:fields], client: client)
187
+ rescue ArgumentError => error
188
+ # Re-raise validation errors to prevent creating invalid schemas
189
+ raise ArgumentError,
190
+ "Schema validation failed for collection '#{logical}': #{error.message}"
191
+ end
192
+
193
+ client.create_collection(create_schema)
194
+
195
+ if block_given?
196
+ yield new_physical
197
+ elsif klass.respond_to?(:reindex_all_to)
198
+ klass.reindex_all_to(new_physical)
199
+ else
200
+ raise ArgumentError, 'reindex step is required: provide a block or implement klass.reindex_all_to(name)'
201
+ end
202
+
203
+ # Idempotent: if alias already points to new physical, treat as no-op
204
+ current_after_reindex = client.resolve_alias(logical)
205
+ swapped = current_after_reindex != new_physical
206
+ client.upsert_alias(logical, new_physical) if swapped
207
+
208
+ # Retention cleanup
209
+ _, dropped = enforce_retention!(logical, new_physical, client: client, keep_last: effective_keep_last(klass))
210
+
211
+ if defined?(ActiveSupport::Notifications)
212
+ # Preserve legacy payload shape while adding canonical keys expected by the subscriber
213
+ SearchEngine::Instrumentation.instrument('search_engine.schema.apply',
214
+ logical: logical,
215
+ new_physical: new_physical,
216
+ previous_physical: current_target,
217
+ dropped_count: dropped.size,
218
+ # canonical keys
219
+ collection: klass.name.to_s,
220
+ physical_new: new_physical,
221
+ alias_swapped: swapped,
222
+ retention_deleted_count: dropped.size,
223
+ status: :ok,
224
+ duration_ms: (monotonic_ms - start_ms)
225
+ ) {}
226
+ end
227
+
228
+ {
229
+ logical: logical,
230
+ new_physical: new_physical,
231
+ previous_physical: current_target,
232
+ alias_target: new_physical,
233
+ dropped_physicals: dropped,
234
+ action: :rebuild
235
+ }
236
+ end
237
+
238
+ # Roll back the alias for the given klass to the previous retained physical collection.
239
+ #
240
+ # Chooses the most recent retained physical behind the current alias target. If none
241
+ # is available, raises an ArgumentError explaining that retention may be set to 0.
242
+ # The method is idempotent: if the alias already points to the chosen target, no-op.
243
+ #
244
+ # @param klass [Class]
245
+ # @param client [SearchEngine::Client]
246
+ # @return [Hash] { logical: String, new_target: String, previous_target: String }
247
+ # @raise [ArgumentError] when no previous physical exists
248
+ # @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema#retention`
249
+ def rollback(klass, client: nil)
250
+ client ||= SearchEngine.client
251
+ compiled = compile(klass)
252
+ logical = compiled[:name]
253
+
254
+ start_ms = monotonic_ms
255
+ current_target = client.resolve_alias(logical)
256
+
257
+ physicals = list_physicals(logical, client: client)
258
+ ordered = order_physicals_desc(logical, physicals)
259
+ previous = ordered.find { |name| name != current_target }
260
+ if previous.nil?
261
+ raise ArgumentError,
262
+ 'No previous physical available for rollback; retention keep_last may be 0'
263
+ end
264
+
265
+ # Idempotent swap
266
+ client.upsert_alias(logical, previous) unless current_target == previous
267
+
268
+ if defined?(ActiveSupport::Notifications)
269
+ SearchEngine::Instrumentation.instrument('search_engine.schema.rollback',
270
+ logical: logical,
271
+ new_target: previous,
272
+ previous_target: current_target,
273
+ duration_ms: (monotonic_ms - start_ms)
274
+ ) {}
275
+ end
276
+
277
+ { logical: logical, new_target: previous, previous_target: current_target }
278
+ end
279
+
280
+ # Attempt to update the collection schema in-place (PATCH) if the changes are compatible.
281
+ #
282
+ # Compatible changes are:
283
+ # - Adding new fields
284
+ # - Removing fields (drop: true)
285
+ #
286
+ # Incompatible changes (triggering return false) are:
287
+ # - Modifying existing fields (type/facet/etc changes)
288
+ # - Changing collection-level options
289
+ #
290
+ # @param klass [Class]
291
+ # @param client [SearchEngine::Client]
292
+ # @return [Boolean] true if updated in-place, false if changes required full rebuild or no changes needed
293
+ def update!(klass, client: nil)
294
+ client ||= SearchEngine.client
295
+ res = diff(klass, client: client)
296
+ diff_hash = res[:diff]
297
+
298
+ # No physical collection implies missing, caller should create
299
+ return false if diff_hash[:collection][:physical].nil?
300
+
301
+ # Check for incompatible changes
302
+ return false if diff_hash[:changed_fields].any?
303
+ return false if diff_hash[:collection_options].any?
304
+
305
+ added_fields = Array(diff_hash[:added_fields])
306
+
307
+ # Reference-bearing fields require full rebuild (Typesense limitation for PATCH)
308
+ return false if added_fields.any? do |field|
309
+ ref = field[:reference] || field['reference']
310
+ async = field[:async_reference] || field['async_reference']
311
+ (ref && !ref.to_s.strip.empty?) || async
312
+ end
313
+
314
+ # Check if there is anything to do
315
+ return true if added_fields.empty? && diff_hash[:removed_fields].empty?
316
+
317
+ # Construct patch payload
318
+ fields_payload = []
319
+ Array(diff_hash[:removed_fields]).each do |f|
320
+ fields_payload << { name: f[:name], drop: true }
321
+ end
322
+ added_fields.each do |f|
323
+ fields_payload << f
324
+ end
325
+
326
+ physical = diff_hash[:collection][:physical]
327
+ client.update_collection(physical, { fields: fields_payload })
328
+ true
329
+ end
330
+
331
+ private
332
+
333
+ # Generate a new physical name using UTC timestamp + 3-digit sequence.
334
+ # Example: "products_20250131_235959_001"
335
+ def generate_physical_name(logical, client:)
336
+ now = Time.now.utc
337
+ timestamp = now.strftime('%Y%m%d_%H%M%S')
338
+ prefix = "#{logical}_#{timestamp}_"
339
+
340
+ existing = list_physicals_starting_with(prefix, client: client)
341
+ used_sequences = existing.map { |name| name.split('_').last.to_i }
342
+
343
+ seq = 1
344
+ seq += 1 while used_sequences.include?(seq) && seq < 999
345
+ format('%<prefix>s%<seq>03d', prefix: prefix, seq: seq)
346
+ end
347
+
348
+ # Return current alias target or nil.
349
+ def current_alias_target(logical, client:)
350
+ client.resolve_alias(logical)
351
+ end
352
+
353
+ # Atomically swap alias to the provided physical.
354
+ def swap_alias!(logical, physical, client:)
355
+ client.upsert_alias(logical, physical)
356
+ end
357
+
358
+ # Enumerate all physicals that match the naming pattern for the logical name.
359
+ def list_physicals(logical, client:)
360
+ meta_timeout = begin
361
+ t = SearchEngine.config.timeout_ms.to_i
362
+ t = 5_000 if t <= 0
363
+ t < 10_000 ? 10_000 : t
364
+ rescue StandardError
365
+ 10_000
366
+ end
367
+ collections = Array(client.list_collections(timeout_ms: meta_timeout))
368
+ re = /^#{Regexp.escape(logical)}_\d{8}_\d{6}_\d{3}$/
369
+ names = collections.map { |c| (c[:name] || c['name']).to_s }
370
+ names.select { |n| re.match?(n) }
371
+ rescue StandardError
372
+ []
373
+ end
374
+
375
+ # Internal: list physicals that share the same timestamp prefix (for sequence calculation)
376
+ def list_physicals_starting_with(prefix, client:)
377
+ meta_timeout = begin
378
+ t = SearchEngine.config.timeout_ms.to_i
379
+ t = 5_000 if t <= 0
380
+ t < 10_000 ? 10_000 : t
381
+ rescue StandardError
382
+ 10_000
383
+ end
384
+ collections = Array(client.list_collections(timeout_ms: meta_timeout))
385
+ names = collections.map { |c| (c[:name] || c['name']).to_s }
386
+ names.select { |n| n.start_with?(prefix) }
387
+ rescue StandardError
388
+ []
389
+ end
390
+
391
+ def enforce_retention!(logical, new_target, client:, keep_last:)
392
+ keep = Integer(keep_last || 0)
393
+ keep = 0 if keep.negative?
394
+
395
+ physicals = list_physicals(logical, client: client)
396
+ ordered = order_physicals_desc(logical, physicals)
397
+ candidates = ordered.reject { |name| name == new_target }
398
+ to_keep = candidates.first(keep)
399
+ to_drop = candidates.drop(keep)
400
+
401
+ to_drop.each do |name|
402
+ # Safety: best-effort delete; ignore 404
403
+ client.delete_collection(name)
404
+ end
405
+
406
+ [to_keep, to_drop]
407
+ end
408
+
409
+ def order_physicals_desc(logical, names)
410
+ names.sort_by { |n| [-extract_timestamp(logical, n).to_i, -extract_sequence(logical, n)] }
411
+ end
412
+
413
+ def extract_timestamp(logical, name)
414
+ # name format logical_YYYYMMDD_HHMMSS_###
415
+ base = name.delete_prefix("#{logical}_")
416
+ parts = base.split('_')
417
+ return 0 unless parts.size == 3
418
+
419
+ (parts[0] + parts[1]).to_i
420
+ end
421
+
422
+ def extract_sequence(_logical, name)
423
+ name.split('_').last.to_i
424
+ end
425
+
426
+ def effective_keep_last(klass)
427
+ per = klass.respond_to?(:schema_retention) && klass.schema_retention ? klass.schema_retention[:keep_last] : nil
428
+ return per unless per.nil?
429
+
430
+ SearchEngine.config.schema.retention.keep_last
431
+ end
432
+
433
+ def monotonic_ms
434
+ SearchEngine::Instrumentation.monotonic_ms
435
+ end
436
+
437
+ # Extract and validate collection name as a non-empty string.
438
+ def collection_name_for!(klass)
439
+ collection_name = klass.respond_to?(:collection) ? klass.collection : nil
440
+ if collection_name.nil? || collection_name.to_s.strip.empty?
441
+ raise ArgumentError, 'klass must define a collection name'
442
+ end
443
+
444
+ collection_name.to_s
445
+ end
446
+
447
+ # Compile attributes from the model DSL into a fields array and detect nested fields requirement.
448
+ def compile_fields_for(klass, client: nil)
449
+ attributes_map = klass.respond_to?(:attributes) ? klass.attributes : {}
450
+ attribute_options = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
451
+ references_by_local_key = build_references_by_local_key(klass, client: client)
452
+ async_reference_by_local_key = build_async_reference_by_local_key(klass)
453
+
454
+ fields_array = []
455
+ needs_nested_fields = false
456
+
457
+ attributes_map.each do |attribute_name, type_descriptor|
458
+ validate_attribute_type!(attribute_name, type_descriptor)
459
+
460
+ opts = attribute_options[attribute_name.to_sym] || {}
461
+ # Skip non-indexed attributes and any nested fields under a non-indexed base
462
+ base_index_false = false
463
+ if attribute_name.to_s.include?('.')
464
+ base_sym = attribute_name.to_s.split('.', 2).first.to_sym
465
+ base_opts = attribute_options[base_sym] || {}
466
+ base_index_false = (base_opts[:index] == false)
467
+ end
468
+ next if opts[:index] == false || base_index_false
469
+
470
+ ts_type = typesense_type_for(type_descriptor)
471
+
472
+ # Validate: reference fields must be typed as string/string[] in the DSL.
473
+ if references_by_local_key.key?(attribute_name.to_sym) &&
474
+ !%w[string string[]].include?(ts_type.to_s)
475
+ raise SearchEngine::Errors::InvalidOption.new(
476
+ "Reference field :#{attribute_name} must be declared as :string or [:string] " \
477
+ "(got #{type_descriptor.inspect}).",
478
+ hint: "Declare attribute :#{attribute_name}, :string in #{klass.name} to match " \
479
+ 'Typesense reference requirements.',
480
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#declaring-references',
481
+ details: {
482
+ field: attribute_name.to_s,
483
+ declared_type: type_descriptor,
484
+ compiled_type: ts_type,
485
+ reference: references_by_local_key[attribute_name.to_sym]
486
+ }
487
+ )
488
+ end
489
+ needs_nested_fields ||= nested_type?(ts_type)
490
+
491
+ fields_array << build_field_entry(
492
+ attribute_name,
493
+ ts_type,
494
+ references_by_local_key,
495
+ async_reference_by_local_key,
496
+ opts
497
+ )
498
+
499
+ # Hidden flags:
500
+ # - <name>_empty for array attributes with empty_filtering enabled
501
+ # - <name>_blank for any attribute with optional enabled
502
+ append_hidden_empty_field(fields_array, attribute_name, type_descriptor, opts)
503
+ end
504
+
505
+ [fields_array, needs_nested_fields]
506
+ end
507
+
508
+ # Validate unsupported or unsafe type descriptors early.
509
+ def validate_attribute_type!(attribute_name, type_descriptor)
510
+ return unless type_descriptor.to_s.downcase == 'auto'
511
+
512
+ return if regex_attribute_name?(attribute_name)
513
+
514
+ raise SearchEngine::Errors::InvalidOption,
515
+ "Attribute #{attribute_name.inspect} must use a regex-style name (e.g. /.*_facet/) to declare type :auto."
516
+ end
517
+
518
+ def regex_attribute_name?(attribute_name)
519
+ name = attribute_name.to_s
520
+ name.match?(/[.*+?\[\]()|{}]/)
521
+ end
522
+
523
+ def nested_type?(ts_type)
524
+ %w[object object[]].include?(ts_type)
525
+ end
526
+
527
+ def build_field_entry(attribute_name, ts_type, references_by_local_key, async_reference_by_local_key, opts)
528
+ {
529
+ name: attribute_name.to_s,
530
+ type: ts_type,
531
+ **{
532
+ locale: opts[:locale],
533
+ sort: opts[:sort],
534
+ optional: opts[:optional],
535
+ infix: opts[:infix],
536
+ facet: opts[:facet],
537
+ reference: references_by_local_key[attribute_name.to_sym],
538
+ async_reference: async_reference_by_local_key[attribute_name.to_sym]
539
+ }.compact
540
+ }
541
+ end
542
+
543
+ # Ensure mandatory system field is present with enforced type when declared by developers.
544
+ def coerce_doc_updated_at_type!(fields_array)
545
+ fields_array.each do |f|
546
+ fname = (f[:name] || f['name']).to_s
547
+ next unless fname == 'doc_updated_at'
548
+
549
+ if f.key?(:type)
550
+ f[:type] = 'int64'
551
+ elsif f.key?('type')
552
+ f['type'] = 'int64'
553
+ else
554
+ f[:type] = 'int64'
555
+ end
556
+ break
557
+ end
558
+ end
559
+
560
+ # Build the final schema hash and set collection-level nested fields when needed.
561
+ def build_schema_hash(collection_name, fields_array, needs_nested_fields)
562
+ schema = { name: collection_name.to_s, fields: fields_array }
563
+ schema[:enable_nested_fields] = true if needs_nested_fields
564
+ schema
565
+ end
566
+
567
+ def typesense_type_for(type_descriptor)
568
+ # Array types (e.g., [:string]) => "string[]"; support nested symbol or string
569
+ if type_descriptor.is_a?(Array) && type_descriptor.size == 1
570
+ inner = type_descriptor.first
571
+ mapped = TYPE_MAPPING[inner.to_s.downcase.to_sym] || inner.to_s
572
+ return "#{mapped}[]"
573
+ end
574
+
575
+ TYPE_MAPPING[type_descriptor.to_s.downcase.to_sym] || type_descriptor.to_s
576
+ end
577
+
578
+ def normalize_schema(schema)
579
+ # Accept either compiled or live schema; return shape with symbol keys
580
+ name = (schema[:name] || schema['name']).to_s
581
+ fields = Array(schema[:fields] || schema['fields'])
582
+
583
+ normalized_fields = {}
584
+ fields.each do |field|
585
+ fname = (field[:name] || field['name']).to_s
586
+
587
+ ftype = (field[:type] || field['type']).to_s
588
+ fref = field[:reference] || field['reference']
589
+ entry = { name: fname, type: normalize_type(ftype) }
590
+ entry[:reference] = fref.to_s unless fref.nil? || fref.to_s.strip.empty?
591
+ # Preserve attribute-level flags from either compiled or live schemas.
592
+ %i[locale sort optional infix facet async_reference].each do |k|
593
+ val = field[k] || field[k.to_s]
594
+ entry[k] = val unless val.nil?
595
+ end
596
+ normalized_fields[fname] = entry
597
+ end
598
+
599
+ {
600
+ name: name,
601
+ fields: normalized_fields,
602
+ default_sorting_field: schema[:default_sorting_field] || schema['default_sorting_field'],
603
+ token_separators: schema[:token_separators] || schema['token_separators'],
604
+ symbols_to_index: schema[:symbols_to_index] || schema['symbols_to_index'],
605
+ enable_nested_fields: schema[:enable_nested_fields] || schema['enable_nested_fields']
606
+ }
607
+ end
608
+
609
+ def detect_stale_references(schema, client:)
610
+ fields = Array(schema[:fields] || schema['fields'])
611
+ stale = []
612
+
613
+ fields.each do |field|
614
+ ref = field[:reference] || field['reference']
615
+ next if ref.nil? || ref.to_s.strip.empty?
616
+
617
+ ref_coll = ref.to_s.split('.', 2).first.to_s
618
+ next if ref_coll.empty?
619
+ next unless physical_reference_name?(ref_coll)
620
+
621
+ field_name = (field[:name] || field['name']).to_s
622
+ logical = strip_physical_suffix(ref_coll)
623
+ reason = nil
624
+
625
+ alias_target = begin
626
+ client.resolve_alias(logical)
627
+ rescue StandardError
628
+ nil
629
+ end
630
+
631
+ if alias_target && !alias_target.to_s.strip.empty? && alias_target.to_s != ref_coll
632
+ reason = 'alias_mismatch'
633
+ else
634
+ ref_schema = begin
635
+ client.retrieve_collection_schema(ref_coll)
636
+ rescue StandardError
637
+ nil
638
+ end
639
+ reason = ref_schema.nil? ? 'missing_physical' : 'physical_reference'
640
+ end
641
+
642
+ stale << {
643
+ field: field_name,
644
+ logical: logical,
645
+ physical: ref_coll,
646
+ reason: reason
647
+ }
648
+ end
649
+
650
+ stale
651
+ rescue StandardError
652
+ []
653
+ end
654
+ private :detect_stale_references
655
+
656
+ def physical_reference_name?(name)
657
+ name.to_s.match?(PHYSICAL_SUFFIX_RE)
658
+ end
659
+ private :physical_reference_name?
660
+
661
+ def strip_physical_suffix(name)
662
+ name.to_s.sub(PHYSICAL_SUFFIX_RE, '')
663
+ end
664
+ private :strip_physical_suffix
665
+
666
+ def normalize_type(type_string)
667
+ s = type_string.to_s
668
+ return 'string[]' if s.casecmp('string[]').zero?
669
+ return 'int64' if s.casecmp('int64').zero?
670
+ return 'int32' if s.casecmp('int32').zero?
671
+ return 'float' if s.casecmp('float').zero?
672
+ return 'bool' if %w[bool boolean].include?(s.downcase)
673
+ return 'string' if s.casecmp('string').zero?
674
+
675
+ # Fallback: return as-is
676
+ s
677
+ end
678
+
679
+ def diff_fields(compiled_fields_by_name, live_fields_by_name)
680
+ compiled_names = compiled_fields_by_name.keys
681
+ live_names = live_fields_by_name.keys
682
+
683
+ added_names = compiled_names - live_names
684
+ removed_names = live_names - compiled_names
685
+ shared_names = compiled_names & live_names
686
+
687
+ added = added_names.map { |n| compiled_fields_by_name[n] }
688
+ removed = removed_names.map { |n| live_fields_by_name[n] }
689
+
690
+ changed = {}
691
+ shared_names.each do |name|
692
+ compiled_field = compiled_fields_by_name[name]
693
+ live_field = live_fields_by_name[name]
694
+
695
+ field_changes = {}
696
+ FIELD_COMPARE_KEYS.each do |key|
697
+ # Only compare attribute-level flags when declared in compiled schema.
698
+ next unless key == :type || key == :reference || compiled_field.key?(key)
699
+
700
+ cval = compiled_field[key]
701
+ lval = live_field[key]
702
+ next if values_equal?(cval, lval)
703
+
704
+ field_changes[key.to_s] = [cval, lval]
705
+ end
706
+
707
+ changed[name] = field_changes unless field_changes.empty?
708
+ end
709
+
710
+ [added, removed, changed]
711
+ end
712
+
713
+ def diff_collection_options(compiled, live)
714
+ # Compare only keys present in compiled to avoid noisy diffs when DSL
715
+ # does not declare collection-level options.
716
+ keys = %i[default_sorting_field token_separators symbols_to_index enable_nested_fields]
717
+ differences = {}
718
+ keys.each do |key|
719
+ cval = compiled[key]
720
+ next if cval.nil?
721
+
722
+ lval = live[key]
723
+ next if values_equal?(cval, lval)
724
+
725
+ differences[key] = [cval, lval]
726
+ end
727
+ differences
728
+ end
729
+
730
+ def values_equal?(a, b)
731
+ if a.is_a?(Array) && b.is_a?(Array)
732
+ a == b
733
+ else
734
+ a.to_s == b.to_s
735
+ end
736
+ end
737
+
738
+ def deep_freeze(object)
739
+ case object
740
+ when Hash
741
+ object.each_value { |v| deep_freeze(v) }
742
+ when Array
743
+ object.each { |v| deep_freeze(v) }
744
+ end
745
+ object.freeze
746
+ end
747
+
748
+ def pretty_print(diff)
749
+ lines = []
750
+ lines << format_header(diff[:collection] || {})
751
+
752
+ added = diff[:added_fields] || []
753
+ removed = diff[:removed_fields] || []
754
+ changed = diff[:changed_fields] || {}
755
+ coll_opts = diff[:collection_options] || {}
756
+ stale_refs = diff[:stale_references] || []
757
+
758
+ if added.empty? && removed.empty? && changed.empty? && stale_refs.empty? &&
759
+ (coll_opts.nil? || coll_opts.empty?)
760
+ lines << 'No changes'
761
+ return lines.join("\n")
762
+ end
763
+
764
+ lines.concat(format_added_fields(added)) unless added.empty?
765
+ lines.concat(format_removed_fields(removed)) unless removed.empty?
766
+ lines.concat(format_changed_fields(changed)) unless changed.empty?
767
+ lines.concat(format_stale_references(stale_refs)) unless stale_refs.empty?
768
+ lines.concat(format_collection_options(coll_opts)) unless coll_opts.empty?
769
+
770
+ lines.join("\n")
771
+ end
772
+
773
+ def format_header(collection)
774
+ logical = collection[:name]
775
+ physical = collection[:physical]
776
+ if physical && physical != logical
777
+ "Collection: #{logical} -> #{physical}"
778
+ else
779
+ "Collection: #{logical}"
780
+ end
781
+ end
782
+ private :format_header
783
+
784
+ def format_added_fields(list)
785
+ lines = ['+ Added fields:']
786
+ list.each do |f|
787
+ lines << " - #{f[:name]}:#{f[:type]}"
788
+ end
789
+ lines
790
+ end
791
+ private :format_added_fields
792
+
793
+ def format_removed_fields(list)
794
+ lines = ['- Removed fields:']
795
+ list.each do |f|
796
+ lines << " - #{f[:name]}:#{f[:type]}"
797
+ end
798
+ lines
799
+ end
800
+ private :format_removed_fields
801
+
802
+ def format_changed_fields(map)
803
+ lines = ['~ Changed fields:']
804
+ map.keys.sort.each do |fname|
805
+ pairs = map[fname]
806
+ pairs.each do |attr, (cval, lval)|
807
+ lines << " - #{fname}.#{attr}: #{cval} -> #{lval}"
808
+ end
809
+ end
810
+ lines
811
+ end
812
+ private :format_changed_fields
813
+
814
+ def format_stale_references(stale_refs)
815
+ lines = ['~ Stale references:']
816
+ stale_refs.each do |ref|
817
+ field = ref[:field] || ref['field']
818
+ logical = ref[:logical] || ref['logical']
819
+ physical = ref[:physical] || ref['physical']
820
+ reason = ref[:reason] || ref['reason']
821
+ line = +" - #{field}"
822
+ line << " (logical=#{logical}, physical=#{physical}"
823
+ line << ", reason=#{reason}" if reason
824
+ line << ')'
825
+ lines << line
826
+ end
827
+ lines
828
+ end
829
+ private :format_stale_references
830
+
831
+ def format_collection_options(opts)
832
+ lines = ['~ Collection options:']
833
+ opts.each do |key, (cval, lval)|
834
+ next if key == :live && cval.nil? && lval.nil?
835
+
836
+ lines << if key == :live && (cval == :missing || lval == :missing)
837
+ " - live: #{cval || lval}"
838
+ else
839
+ " - #{key}: #{cval} -> #{lval}"
840
+ end
841
+ end
842
+ lines
843
+ end
844
+ private :format_collection_options
845
+
846
+ def update_result_payload(logical, physical)
847
+ {
848
+ logical: logical,
849
+ new_physical: physical,
850
+ previous_physical: physical,
851
+ alias_target: physical,
852
+ dropped_physicals: [],
853
+ action: :update
854
+ }
855
+ end
856
+
857
+ # Build a mapping of local attribute names to referenced collection names based on join declarations.
858
+ # @param klass [Class]
859
+ # @return [Hash{Symbol=>String}]
860
+ def build_references_by_local_key(klass, client: nil)
861
+ refs = {}
862
+ return refs unless klass.respond_to?(:joins_config)
863
+
864
+ (klass.joins_config || {}).each_value do |cfg|
865
+ # Only belongs_to/belongs_to_many contribute references to schema
866
+ kind = (cfg[:kind] || :belongs_to).to_sym
867
+ next if %i[has_one has_many].include?(kind)
868
+
869
+ lk = cfg[:local_key]
870
+ coll = cfg[:collection]
871
+ fk = cfg[:foreign_key]
872
+ next if lk.nil?
873
+
874
+ coll_name = coll.to_s
875
+ coll_name = resolve_reference_collection_name(coll_name, client)
876
+ fk_name = fk.to_s
877
+ next if coll_name.strip.empty? || fk_name.strip.empty?
878
+
879
+ key = lk.to_sym
880
+ refs[key] ||= "#{coll_name}.#{fk_name}"
881
+ end
882
+ refs
883
+ end
884
+
885
+ def resolve_reference_collection_name(logical, _client)
886
+ # Keep reference targets stable across blue/green swaps by using
887
+ # logical collection names (aliases) instead of physical names.
888
+ logical.to_s
889
+ end
890
+
891
+ # Build a mapping of local attribute names to async_reference flag based on belongs_to declarations.
892
+ # @param klass [Class]
893
+ # @return [Hash{Symbol=>Boolean}]
894
+ def build_async_reference_by_local_key(klass)
895
+ out = {}
896
+ return out unless klass.respond_to?(:joins_config)
897
+
898
+ (klass.joins_config || {}).each_value do |cfg|
899
+ kind = (cfg[:kind] || :belongs_to).to_sym
900
+ next if %i[has_one has_many].include?(kind)
901
+
902
+ lk = cfg[:local_key]
903
+ async = cfg[:async_ref]
904
+ next if lk.nil?
905
+
906
+ out[lk.to_sym] = true if async
907
+ end
908
+ out
909
+ end
910
+
911
+ # Append hidden flags based on attribute options:
912
+ # - <name>_empty for array attributes with empty_filtering enabled
913
+ # - <name>_blank for any attribute with optional enabled
914
+ def append_hidden_empty_field(fields_array, attribute_name, type_descriptor, opts)
915
+ add_empty = opts[:empty_filtering] && type_descriptor.is_a?(Array) && type_descriptor.size == 1
916
+ add_blank = opts[:optional]
917
+
918
+ return unless add_empty || add_blank
919
+
920
+ fields_array << { name: "#{attribute_name}_empty", type: 'bool' } if add_empty
921
+ fields_array << { name: "#{attribute_name}_blank", type: 'bool' } if add_blank
922
+ end
923
+
924
+ # Validate that referenced collections exist and have the required fields before creating a schema.
925
+ # Typesense references use logical collection names (aliases), which must point to valid physical
926
+ # collections. The referenced fields must exist in the referenced collection's schema.
927
+ # @param fields [Array<Hash>] field definitions with potential reference values
928
+ # @param client [SearchEngine::Client] client to resolve aliases and retrieve schemas
929
+ # @raise [SearchEngine::Errors::Api, ArgumentError] if referenced collections are invalid
930
+ def validate_referenced_collections!(fields, client:)
931
+ fields.each do |field|
932
+ ref = field[:reference] || field['reference']
933
+ next if ref.nil? || ref.to_s.strip.empty?
934
+
935
+ # Parse reference format: "logical_collection_name.field_name"
936
+ parts = ref.to_s.split('.', 2)
937
+ logical_coll = parts[0].to_s
938
+ field_name = parts[1]&.to_s
939
+
940
+ next if logical_coll.empty? || field_name.nil? || field_name.empty?
941
+
942
+ validate_single_reference!(logical_coll, field_name, ref, client)
943
+ end
944
+ end
945
+
946
+ # Validate a single reference field.
947
+ # @param logical_coll [String] logical collection name
948
+ # @param field_name [String] field name to validate
949
+ # @param ref [String] original reference string for error messages
950
+ # @param client [SearchEngine::Client] client to resolve aliases and retrieve schemas
951
+ # @raise [ArgumentError] if validation fails
952
+ def validate_single_reference!(logical_coll, field_name, ref, client)
953
+ physical_coll = resolve_referenced_collection(logical_coll, client)
954
+ referenced_schema = retrieve_referenced_schema(logical_coll, physical_coll, client)
955
+ schema_fields = Array(referenced_schema[:fields] || referenced_schema['fields'])
956
+ target_field = schema_fields.find { |f| (f[:name] || f['name']).to_s == field_name }
957
+
958
+ if target_field.nil?
959
+ raise ArgumentError,
960
+ build_field_not_found_error(logical_coll, field_name, schema_fields, physical_coll: physical_coll)
961
+ end
962
+
963
+ # Typesense requires referenced fields to be indexed (for JOIN filtering)
964
+ index_val = target_field[:index] || target_field['index']
965
+ if index_val == false
966
+ raise ArgumentError,
967
+ build_field_not_indexed_error(logical_coll, field_name, physical_coll: physical_coll)
968
+ end
969
+ rescue ArgumentError
970
+ raise
971
+ rescue StandardError => error
972
+ raise ArgumentError, "Failed to validate reference '#{ref}': #{error.class}: #{error.message}"
973
+ end
974
+
975
+ # Resolve a logical collection name to its physical collection name.
976
+ # @param logical_coll [String] logical collection name
977
+ # @param client [SearchEngine::Client] client to resolve aliases
978
+ # @return [String] physical collection name
979
+ # @raise [ArgumentError] if alias/physical cannot be resolved
980
+ def resolve_referenced_collection(logical_coll, client)
981
+ physical_coll = client.resolve_alias(logical_coll)
982
+ physical_coll = logical_coll if physical_coll.nil? || physical_coll.to_s.strip.empty?
983
+
984
+ if physical_coll.to_s.strip.empty?
985
+ raise ArgumentError,
986
+ "Referenced collection alias '#{logical_coll}' does not exist or points to no physical collection"
987
+ end
988
+
989
+ physical_coll
990
+ end
991
+
992
+ # Drop a physical collection that shadows an alias with the same name.
993
+ # Typesense allows alias/collection name collisions; when both exist,
994
+ # references to the alias resolve to the physical collection, which
995
+ # breaks joins after blue/green swaps. If an alias exists and points to
996
+ # a different physical, remove the conflicting logical-named collection.
997
+ def cleanup_logical_collection_conflict!(logical, client:)
998
+ logical_name = logical.to_s
999
+ alias_target = client.resolve_alias(logical_name)
1000
+ return if alias_target.nil? || alias_target.to_s.strip.empty?
1001
+ return if alias_target.to_s == logical_name
1002
+
1003
+ existing = client.retrieve_collection_schema(logical_name)
1004
+ return unless existing
1005
+
1006
+ client.delete_collection(logical_name, timeout_ms: 60_000)
1007
+ return unless defined?(ActiveSupport::Notifications)
1008
+
1009
+ SearchEngine::Instrumentation.instrument(
1010
+ 'search_engine.schema.cleanup_conflict',
1011
+ logical: logical_name,
1012
+ alias_target: alias_target.to_s,
1013
+ dropped: logical_name
1014
+ ) {}
1015
+ end
1016
+
1017
+ # Retrieve the schema for a referenced collection.
1018
+ # @param logical_coll [String] logical collection name
1019
+ # @param physical_coll [String] physical collection name
1020
+ # @param client [SearchEngine::Client] client to retrieve schemas
1021
+ # @return [Hash] collection schema
1022
+ # @raise [ArgumentError] if schema cannot be retrieved
1023
+ def retrieve_referenced_schema(logical_coll, physical_coll, client)
1024
+ referenced_schema = client.retrieve_collection_schema(physical_coll)
1025
+ if referenced_schema.nil?
1026
+ raise ArgumentError,
1027
+ "Referenced collection '#{logical_coll}' (physical: '#{physical_coll}') " \
1028
+ 'schema could not be retrieved'
1029
+ end
1030
+ referenced_schema
1031
+ end
1032
+
1033
+ # Build a detailed error message when a referenced field is not found.
1034
+ # @param logical_coll [String] logical collection name
1035
+ # @param field_name [String] field name that was not found
1036
+ # @param schema_fields [Array<Hash>] fields from the referenced collection schema
1037
+ # @param physical_coll [String, nil] physical collection name (optional)
1038
+ # @return [String] error message
1039
+ def build_field_not_found_error(logical_coll, field_name, schema_fields, physical_coll: nil)
1040
+ available_fields = schema_fields.map { |f| (f[:name] || f['name']).to_s }.sort
1041
+ coll_display =
1042
+ physical_coll && physical_coll != logical_coll ? "#{logical_coll} (physical: #{physical_coll})" : logical_coll
1043
+ error_msg = "Referenced field '#{field_name}' not found in collection '#{coll_display}'. "
1044
+
1045
+ referenced_klass = begin
1046
+ SearchEngine::CollectionResolver.model_for_logical(logical_coll)
1047
+ rescue StandardError
1048
+ nil
1049
+ end
1050
+
1051
+ if referenced_klass
1052
+ compiled_schema = compile(referenced_klass)
1053
+ compiled_fields = Array(compiled_schema[:fields]).map { |f| (f[:name] || f['name']).to_s }
1054
+ error_msg += if compiled_fields.include?(field_name)
1055
+ "Field exists in model '#{referenced_klass.name}' but is not indexed " \
1056
+ '(possibly marked with `index: false`). '
1057
+ else
1058
+ "Field is not declared in model '#{referenced_klass.name}'. "
1059
+ end
1060
+ end
1061
+
1062
+ error_msg += "Available fields in collection: #{available_fields.join(', ')}. " \
1063
+ 'Ensure the field is declared and indexed in the referenced collection.'
1064
+ error_msg
1065
+ end
1066
+
1067
+ # Build a detailed error message when a referenced field has index: false.
1068
+ # @param logical_coll [String] logical collection name
1069
+ # @param field_name [String] field name that is not indexed
1070
+ # @param physical_coll [String, nil] physical collection name (optional)
1071
+ # @return [String] error message
1072
+ def build_field_not_indexed_error(logical_coll, field_name, physical_coll: nil)
1073
+ coll_display =
1074
+ physical_coll && physical_coll != logical_coll ? "#{logical_coll} (physical: #{physical_coll})" : logical_coll
1075
+ error_msg = "Referenced field '#{field_name}' in collection '#{coll_display}' has index: false. "
1076
+ error_msg += 'Typesense requires referenced fields to be indexed for JOIN operations. '
1077
+ error_msg += "Reindex the '#{logical_coll}' collection to update its schema, ensuring the " \
1078
+ "'#{field_name}' field does not have `index: false` in the model declaration."
1079
+ error_msg
1080
+ end
1081
+ end
1082
+ end
1083
+ end