search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,308 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Lightweight wrapper over ActiveSupport::Notifications that standardizes
5
+ # event names, payload shaping, and timing helpers. Provides a thread-local
6
+ # context to propagate dispatch metadata (e.g., mode, job_id) from the
7
+ # dispatcher/job into lower layers such as the indexer.
8
+ #
9
+ # Public API is intentionally small:
10
+ # - {.instrument(event, payload = {}) { |ctx| }}: emit an event (yields mutable ctx)
11
+ # - {.time(event, base_payload = {}) { |payload| }}: measure duration_ms
12
+ # - {.with_context(hash) { }}: set per-thread context for nested calls
13
+ # - {.context}: current shallow context Hash (dup)
14
+ #
15
+ # All payloads are duped, nil values are pruned, and keys are symbolized.
16
+ module Instrumentation
17
+ THREAD_KEY = :__search_engine_context__
18
+ THREAD_CORRELATION_KEY = :__search_engine_correlation_id__
19
+
20
+ CATALOG = {
21
+ 'search_engine.search' => {
22
+ required: %i[collection],
23
+ optional: %i[
24
+ labels status duration_ms correlation_id params_preview http_status url_opts
25
+ error_class error_message selection_include_count selection_exclude_count
26
+ selection_nested_assoc_count preset_name preset_mode preset_pruned_keys
27
+ preset_pruned_keys_count preset_locked_domains_count curation_pinned_count
28
+ curation_hidden_count curation_has_override_tags curation_filter_flag
29
+ curation_conflict_type curation_conflict_count
30
+ ]
31
+ },
32
+ 'search_engine.multi_search' => {
33
+ required: %i[searches_count],
34
+ optional: %i[labels status duration_ms correlation_id url_opts http_status]
35
+ },
36
+ 'search_engine.compile' => {
37
+ required: %i[collection klass node_count source],
38
+ optional: %i[duration_ms]
39
+ },
40
+ 'search_engine.grouping.compile' => {
41
+ required: %i[field],
42
+ optional: %i[collection limit missing_values duration_ms]
43
+ },
44
+ 'search_engine.joins.compile' => {
45
+ required: %i[collection],
46
+ optional: %i[join_count assocs used_in include_len filter_len sort_len duration_ms has_joins]
47
+ },
48
+ 'search_engine.selection.compile' => {
49
+ required: %i[],
50
+ optional: %i[include_count exclude_count nested_assoc_count]
51
+ },
52
+ 'search_engine.preset.apply' => {
53
+ required: %i[preset_name],
54
+ optional: %i[preset_mode preset_pruned_keys preset_pruned_keys_count preset_locked_domains_count]
55
+ },
56
+ 'search_engine.preset.conflict' => {
57
+ required: %i[type count],
58
+ optional: %i[limit]
59
+ },
60
+ 'search_engine.curation.compile' => {
61
+ required: %i[],
62
+ optional: %i[pinned_count hidden_count has_override_tags filter_curated_hits]
63
+ },
64
+ 'search_engine.curation.conflict' => {
65
+ required: %i[type count],
66
+ optional: %i[limit]
67
+ },
68
+ 'search_engine.schema.diff' => {
69
+ required: %i[collection],
70
+ optional: %i[fields_changed_count added_count removed_count in_sync duration_ms]
71
+ },
72
+ 'search_engine.schema.apply' => {
73
+ required: %i[collection],
74
+ optional: %i[physical_new new_physical alias_swapped dropped_count retention_deleted_count status duration_ms]
75
+ },
76
+ 'search_engine.indexer.partition_start' => {
77
+ required: %i[collection into partition],
78
+ optional: %i[dispatch_mode job_id timestamp]
79
+ },
80
+ 'search_engine.indexer.partition_finish' => {
81
+ required: %i[collection into partition batches_total docs_total success_total failed_total status],
82
+ optional: %i[duration_ms]
83
+ },
84
+ 'search_engine.indexer.batch_import' => {
85
+ required: %i[collection into batch_index docs_count],
86
+ optional: %i[
87
+ success_count failure_count attempts http_status bytes_sent duration_ms
88
+ transient_retry error_sample
89
+ ]
90
+ },
91
+ 'search_engine.indexer.delete_stale' => {
92
+ required: %i[collection into partition filter_hash status],
93
+ optional: %i[deleted_count duration_ms reason]
94
+ },
95
+ 'search_engine.result.grouped_parsed' => { required: %i[collection groups_count], optional: %i[] },
96
+ 'search_engine.joins.declared' => { required: %i[model name collection], optional: %i[] },
97
+ 'search_engine.relation.group_by_updated' => {
98
+ required: %i[collection field],
99
+ optional: %i[limit missing_values]
100
+ },
101
+ 'search_engine.facet.compile' => {
102
+ required: %i[],
103
+ optional: %i[collection fields_count queries_count max_facet_values sort_flags conflicts duration_ms]
104
+ },
105
+ 'search_engine.highlight.compile' => {
106
+ required: %i[],
107
+ optional: %i[collection fields_count full_fields_count affix_tokens snippet_threshold tag_kind duration_ms]
108
+ },
109
+ 'search_engine.synonyms.apply' => {
110
+ required: %i[],
111
+ optional: %i[collection use_synonyms use_stopwords source duration_ms]
112
+ },
113
+ 'search_engine.geo.compile' => {
114
+ required: %i[],
115
+ optional: %i[collection filters_count shapes sort_mode radius_bucket duration_ms]
116
+ },
117
+ 'search_engine.vector.compile' => {
118
+ required: %i[],
119
+ optional: %i[collection query_vector_present dims hybrid_weight ann_params_present duration_ms]
120
+ },
121
+ 'search_engine.hits.limit' => {
122
+ required: %i[],
123
+ optional: %i[collection early_limit validate_max applied_strategy triggered total_hits duration_ms]
124
+ }
125
+ }.freeze
126
+
127
+ def self.catalog
128
+ CATALOG
129
+ end
130
+
131
+ # Emit an event with a shaped payload and yield a mutable context.
132
+ # Stamps correlation_id, status, error_class/error_message, and duration_ms.
133
+ # @param event [String]
134
+ # @param payload [Hash]
135
+ # @yieldparam ctx [Hash]
136
+ # @return [Object] block result when provided
137
+ def self.instrument(event, payload = {})
138
+ started = monotonic_ms
139
+ ctx = shape_payload(payload)
140
+ ctx[:correlation_id] ||= (Thread.current[THREAD_CORRELATION_KEY] ||= generate_correlation_id)
141
+
142
+ if defined?(ActiveSupport::Notifications)
143
+ result = nil
144
+ ActiveSupport::Notifications.instrument(event, ctx) do
145
+ result = yield(ctx) if block_given?
146
+ ctx[:status] = :ok unless ctx.key?(:status)
147
+ result
148
+ rescue StandardError => error
149
+ fill_error_context!(ctx, error)
150
+ raise
151
+ ensure
152
+ ctx[:duration_ms] = (monotonic_ms - started).round(1)
153
+ end
154
+ return result
155
+ end
156
+
157
+ # Fallback path when AS::N is unavailable
158
+ begin
159
+ result = yield(ctx) if block_given?
160
+ ctx[:status] = :ok unless ctx.key?(:status)
161
+ result
162
+ rescue StandardError => error
163
+ fill_error_context!(ctx, error)
164
+ raise
165
+ ensure
166
+ ctx[:duration_ms] = (monotonic_ms - started).round(1)
167
+ end
168
+ end
169
+
170
+ # Known events
171
+ #
172
+ # - "search_engine.joins.compile": emitted once per relation compile summarizing JOIN usage.
173
+ # Payload keys (nil/empty omitted):
174
+ # - :collection [String]
175
+ # - :join_count [Integer]
176
+ # - :assocs [Array<String>]
177
+ # - :used_in [Hash{Symbol=>Array<String>}] include/filter/sort association usage
178
+ # - :include_len [Integer]
179
+ # - :filter_len [Integer]
180
+ # - :sort_len [Integer]
181
+ # - :duration_ms [Float]
182
+ # - :has_joins [Boolean]
183
+ # - "search_engine.selection.compile": emitted once per relation compile summarizing field selection counts.
184
+ # Payload keys:
185
+ # - :include_count [Integer] total effective include fields (root + nested after precedence)
186
+ # - :exclude_count [Integer] total excluded fields (root + nested)
187
+ # - :nested_assoc_count [Integer] associations with any selection state (include or exclude)
188
+ # - "search_engine.preset.apply": emitted once per relation compile when a preset is present.
189
+ # Payload keys (keys-only; values redacted elsewhere):
190
+ # - :preset_name [String] effective namespaced preset
191
+ # - :mode [Symbol] one of :merge, :only, :lock
192
+ # - :locked_domains [Array<Symbol>] configured locked domains for :lock mode
193
+ # - :pruned_keys [Array<Symbol>] keys removed by the chosen mode
194
+ # - "search_engine.curation.compile": emitted once per relation compile when curation state is present.
195
+ # Payload keys:
196
+ # - :pinned_count [Integer]
197
+ # - :hidden_count [Integer]
198
+ # - :has_override_tags [Boolean]
199
+ # - :filter_curated_hits [true,false,nil]
200
+ # - "search_engine.curation.conflict": emitted when overlaps or limits are detected; at most once per compile.
201
+ # Payload keys:
202
+ # - :type [Symbol] one of :overlap, :limit_exceeded
203
+ # - :count [Integer]
204
+ # - :limit [Integer, optional] present when type==:limit_exceeded
205
+ # # See also: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/presets#observability
206
+ # Measure a block and attach duration_ms to payload.
207
+ # @param event [String]
208
+ # @param base_payload [Hash]
209
+ # @yieldreturn [Object] result of the block
210
+ # @return [Object]
211
+ def self.time(event, base_payload = {})
212
+ started = monotonic_ms
213
+ result = nil
214
+ instrument(event, base_payload.merge(started_at_ms: started)) do
215
+ result = yield(base_payload)
216
+ end
217
+ result
218
+
219
+ # NOTE: ActiveSupport::Notifications already attaches duration; callers
220
+ # should prefer event.duration on the subscriber side. We still provide
221
+ # started_at_ms in the payload for completeness.
222
+ end
223
+
224
+ # Set a shallow thread-local context for nested calls.
225
+ # @param ctx [Hash]
226
+ # @yield block executed with context applied
227
+ # @return [Object] result of the block
228
+ def self.with_context(ctx)
229
+ prev = Thread.current[THREAD_KEY]
230
+ Thread.current[THREAD_KEY] = (prev || {}).merge(ctx || {})
231
+ yield
232
+ ensure
233
+ Thread.current[THREAD_KEY] = prev
234
+ end
235
+
236
+ # Current shallow context hash.
237
+ # @return [Hash]
238
+ def self.context
239
+ (Thread.current[THREAD_KEY] || {}).dup
240
+ end
241
+
242
+ # Apply/propagate correlation id for the current execution (thread/fiber-local)
243
+ # and restore the previous value afterwards.
244
+ # @param id [String, nil]
245
+ # @yieldreturn [Object]
246
+ def self.with_correlation_id(id = nil)
247
+ previous = Thread.current[THREAD_CORRELATION_KEY]
248
+ Thread.current[THREAD_CORRELATION_KEY] = id || previous || generate_correlation_id
249
+ yield
250
+ ensure
251
+ Thread.current[THREAD_CORRELATION_KEY] = if previous.nil?
252
+ nil
253
+ else
254
+ previous
255
+ end
256
+ end
257
+
258
+ # @return [String, nil]
259
+ def self.current_correlation_id
260
+ Thread.current[THREAD_CORRELATION_KEY]
261
+ end
262
+
263
+ # Redact a payload-like structure (delegates to Observability)
264
+ def self.redact(value)
265
+ SearchEngine::Observability.redact(value)
266
+ end
267
+
268
+ # Monotonic clock in milliseconds.
269
+ # @return [Float]
270
+ def self.monotonic_ms
271
+ Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond)
272
+ rescue StandardError
273
+ (Time.now.to_f * 1000.0)
274
+ end
275
+
276
+ # Internal: normalize and prune a payload hash.
277
+ def self.shape_payload(payload)
278
+ shaped = {}
279
+ (payload || {}).each do |k, v|
280
+ next if v.nil?
281
+
282
+ shaped[k.to_sym] = v
283
+ end
284
+ # Attach current context values without overriding explicit keys
285
+ ctx = context
286
+ ctx.each do |k, v|
287
+ shaped[k] = v unless shaped.key?(k)
288
+ end
289
+ shaped
290
+ end
291
+ private_class_method :shape_payload
292
+
293
+ def self.generate_correlation_id
294
+ require 'securerandom'
295
+ SecureRandom.urlsafe_base64(8)
296
+ end
297
+ private_class_method :generate_correlation_id
298
+
299
+ def self.fill_error_context!(ctx, error)
300
+ ctx[:status] = :error unless ctx.key?(:status)
301
+ ctx[:error_class] ||= error.class.name
302
+ ctx[:error_message] ||= SearchEngine::Observability.truncate_message(
303
+ error.message,
304
+ SearchEngine.config.observability&.max_message_length || 200
305
+ )
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ module Joins
5
+ # Stateless guard for validating join usage across Relation and Parser.
6
+ #
7
+ # Public API is module functions that raise {SearchEngine::Errors::*}
8
+ # on invalid inputs; successful validations return nil.
9
+ module Guard
10
+ module_function
11
+
12
+ # Ensure the association exists on +klass+.
13
+ #
14
+ # @param klass [Class] model class
15
+ # @param assoc [Symbol, String]
16
+ # @raise [SearchEngine::Errors::InvalidJoin]
17
+ # @return [void]
18
+ def ensure_assoc_exists!(klass, assoc)
19
+ key = assoc.to_sym
20
+ cfg = safe_joins_config(klass)[key]
21
+ return if cfg
22
+
23
+ suggestions = suggest(key, safe_joins_config(klass).keys)
24
+ model_name = safe_class_name(klass)
25
+ msg = "association :#{key} is not declared on #{model_name} "
26
+ msg += "(declare with `join :#{key}, ...`)"
27
+ msg += suggestion_suffix(suggestions)
28
+ raise SearchEngine::Errors::InvalidJoin.new(
29
+ msg,
30
+ hint: (suggestions&.any? ? "Did you mean #{suggestions.map { |s| ":#{s}" }.join(', ')}?" : nil),
31
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
32
+ details: { assoc: key, known: safe_joins_config(klass).keys }
33
+ )
34
+ end
35
+
36
+ # Ensure the association config is complete (local_key and foreign_key present and non-blank).
37
+ #
38
+ # @param klass [Class]
39
+ # @param assoc [Symbol, String]
40
+ # @raise [SearchEngine::Errors::InvalidJoinConfig]
41
+ # @return [void]
42
+ def ensure_config_complete!(klass, assoc)
43
+ key = assoc.to_sym
44
+ cfg = safe_joins_config(klass)[key]
45
+ # If missing entirely, surface presence error via ensure_assoc_exists!
46
+ unless cfg
47
+ ensure_assoc_exists!(klass, key)
48
+ return
49
+ end
50
+
51
+ missing = []
52
+ missing << :local_key if blank?(cfg[:local_key])
53
+ missing << :foreign_key if blank?(cfg[:foreign_key])
54
+ return if missing.empty?
55
+
56
+ model_name = safe_class_name(klass)
57
+ msg = "join :#{key} on #{model_name} is missing "
58
+ msg += missing.map { |m| ":#{m}" }.join(' and ')
59
+ msg += ' (declare with `join '
60
+ msg += ":#{key}, collection: ..., local_key: ..., foreign_key: ...`)"
61
+ raise SearchEngine::Errors::InvalidJoinConfig.new(
62
+ msg,
63
+ hint: 'Declare local_key and foreign_key in join config.',
64
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
65
+ details: { assoc: key, missing: missing }
66
+ )
67
+ end
68
+
69
+ # Ensure the relation has applied the association via .joins(:assoc) before use.
70
+ #
71
+ # @param joins [Array<Symbol>, nil]
72
+ # @param assoc [Symbol, String]
73
+ # @param context [String] optional human-friendly action (e.g., "filtering/sorting")
74
+ # @raise [SearchEngine::Errors::JoinNotApplied]
75
+ # @return [void]
76
+ def ensure_join_applied!(joins, assoc, context: 'filtering/sorting')
77
+ key = assoc.to_sym
78
+ list = Array(joins)
79
+ return if list.include?(key)
80
+
81
+ raise SearchEngine::Errors::JoinNotApplied.new(
82
+ "Call .joins(:#{key}) before #{context} on #{key} fields",
83
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
84
+ details: { assoc: key, context: context }
85
+ )
86
+ end
87
+
88
+ # Validate that a joined field exists on the target collection when the
89
+ # target model is registered and exposes attributes.
90
+ #
91
+ # Best-effort: when registry or attributes are unavailable, no exception
92
+ # is raised (prefer low-noise behavior on missing metadata).
93
+ #
94
+ # @param assoc_cfg [Hash] normalized association config
95
+ # @param field [Symbol, String]
96
+ # @param source_klass [Class, nil] optional source model class for error messages
97
+ # @raise [SearchEngine::Errors::UnknownJoinField]
98
+ # @return [void]
99
+ def validate_joined_field!(assoc_cfg, field, source_klass: nil)
100
+ return if assoc_cfg.nil?
101
+
102
+ collection = assoc_cfg[:collection]
103
+ return if blank?(collection)
104
+
105
+ target_klass = begin
106
+ SearchEngine.collection_for(collection)
107
+ rescue StandardError
108
+ nil
109
+ end
110
+ return unless target_klass.respond_to?(:attributes)
111
+
112
+ known = Array(target_klass.attributes).map { |k, _| k.to_s }
113
+ known |= ['id'] # allow id implicitly on joined collections
114
+ return if known.empty?
115
+
116
+ fname = field.to_s
117
+ return if known.include?(fname)
118
+
119
+ suggestions = suggest(fname, known)
120
+ # Prefer the source model name for message clarity when provided
121
+ model_name = safe_class_name(source_klass || target_klass)
122
+ assoc_name = assoc_cfg[:name] || begin
123
+ # best effort: derive from collection
124
+ collection.to_s
125
+ end
126
+ msg = "UnknownJoinField: :#{fname} is not declared on association :#{assoc_name} for #{model_name}"
127
+ msg += suggestion_suffix(suggestions)
128
+ raise SearchEngine::Errors::UnknownJoinField.new(
129
+ msg,
130
+ details: { assoc: assoc_name, field: fname }
131
+ )
132
+ end
133
+
134
+ # Reject multi-hop paths like $authors.publisher.name
135
+ #
136
+ # @param path [String]
137
+ # @raise [SearchEngine::Errors::UnsupportedJoinNesting]
138
+ # @return [void]
139
+ def ensure_single_hop_path!(path)
140
+ return if path.to_s.count('.') <= 1
141
+
142
+ raise SearchEngine::Errors::UnsupportedJoinNesting.new(
143
+ 'Only one join hop is supported: `$assoc.field`. Use a separate pipeline step to denormalize deeper paths.',
144
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
145
+ details: { path: path }
146
+ )
147
+ end
148
+
149
+ # --- internals -------------------------------------------------------
150
+
151
+ def suggest(input, known)
152
+ return [] if known.nil? || known.empty?
153
+
154
+ begin
155
+ require 'did_you_mean'
156
+ require 'did_you_mean/levenshtein'
157
+ rescue StandardError
158
+ return []
159
+ end
160
+
161
+ candidates = Array(known).map(&:to_s)
162
+ str = input.to_s
163
+ distances = candidates.each_with_object({}) do |cand, acc|
164
+ acc[cand] = DidYouMean::Levenshtein.distance(str, cand)
165
+ end
166
+ min = distances.values.min
167
+ return [] if min.nil? || min > 2
168
+
169
+ best = distances.select { |_, d| d == min }.keys.sort
170
+ best.first(3).map(&:to_sym)
171
+ end
172
+ private_class_method :suggest
173
+
174
+ def suggestion_suffix(suggestions)
175
+ return '' if suggestions.nil? || suggestions.empty?
176
+
177
+ tail = suggestions.map { |s| ":#{s}" }.join(', ')
178
+ " (did you mean #{tail}?)"
179
+ end
180
+ private_class_method :suggestion_suffix
181
+
182
+ def safe_joins_config(klass)
183
+ if klass.respond_to?(:joins_config)
184
+ klass.joins_config || {}
185
+ else
186
+ {}
187
+ end
188
+ end
189
+ private_class_method :safe_joins_config
190
+
191
+ def safe_class_name(klass)
192
+ klass.respond_to?(:name) && klass.name ? klass.name : klass.to_s
193
+ end
194
+ private_class_method :safe_class_name
195
+
196
+ def blank?(value)
197
+ value.nil? || value.to_s.strip.empty?
198
+ end
199
+ private_class_method :blank?
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ module Joins
5
+ # Key resolver for join associations.
6
+ #
7
+ # Determines { local_key, foreign_key } to use for an association, honoring
8
+ # explicitly provided keys and auto-inferring when absent. Auto-inference
9
+ # prefers a single shared attribute name between the base and target models
10
+ # that ends with `_id`. When ambiguity is detected, an error is raised with
11
+ # short suggestions.
12
+ module Resolver
13
+ module_function
14
+
15
+ # Resolve join keys for an association.
16
+ #
17
+ # @param base_klass [Class] the model on which the join is declared
18
+ # @param assoc_cfg [Hash] normalized join config (from joins_config)
19
+ # @return [Hash] { local_key: Symbol, foreign_key: Symbol }
20
+ # @raise [SearchEngine::Errors::InvalidJoinConfig]
21
+ def resolve_keys(base_klass, assoc_cfg)
22
+ local_key = assoc_cfg[:local_key]
23
+ foreign_key = assoc_cfg[:foreign_key]
24
+
25
+ if present?(local_key) && present?(foreign_key)
26
+ validate_key_known!(base_klass, local_key, side: :local)
27
+ validate_foreign_key_known!(assoc_cfg, foreign_key)
28
+ return { local_key: local_key.to_sym, foreign_key: foreign_key.to_sym }
29
+ end
30
+
31
+ # Infer when any of the keys is missing
32
+ base_attrs = safe_attributes(base_klass).keys.map(&:to_s)
33
+ target_klass = safe_target_klass(assoc_cfg[:collection])
34
+ target_attrs = safe_attributes(target_klass).keys.map(&:to_s)
35
+
36
+ shared = (base_attrs & target_attrs)
37
+ candidates = shared.select { |n| n.end_with?('_id') }
38
+ candidates = shared if candidates.empty?
39
+
40
+ raise_ambiguous_keys!(base_klass, assoc_cfg, candidates) if candidates.length != 1
41
+
42
+ key = candidates.first.to_sym
43
+ { local_key: key, foreign_key: key }
44
+ end
45
+
46
+ # --- internals --------------------------------------------------------
47
+
48
+ def validate_key_known!(klass, key, side:)
49
+ attrs = safe_attributes(klass)
50
+ return if key.to_sym == :id || attrs.key?(key.to_sym)
51
+
52
+ model_name = klass.respond_to?(:name) && klass.name ? klass.name : klass.to_s
53
+ raise SearchEngine::Errors::InvalidJoin.new(
54
+ "Unknown #{side} key :#{key} for #{model_name}. Declare it via `attribute :#{key}, ...`.",
55
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
56
+ details: { side: side, key: key, model: model_name }
57
+ )
58
+ end
59
+
60
+ def validate_foreign_key_known!(assoc_cfg, key)
61
+ tklass = safe_target_klass(assoc_cfg[:collection])
62
+ validate_key_known!(tklass, key, side: :foreign)
63
+ end
64
+
65
+ def safe_target_klass(collection_name)
66
+ SearchEngine.collection_for(collection_name)
67
+ rescue StandardError
68
+ nil
69
+ end
70
+
71
+ def safe_attributes(klass)
72
+ return {} unless klass.respond_to?(:attributes)
73
+
74
+ klass.attributes || {}
75
+ end
76
+
77
+ def raise_ambiguous_keys!(base_klass, assoc_cfg, candidates)
78
+ base_name = base_klass.respond_to?(:name) && base_klass.name ? base_klass.name : base_klass.to_s
79
+ assoc_name = assoc_cfg[:name] || assoc_cfg[:collection] || :unknown
80
+ sugg = candidates.map { |n| ":#{n}" }.join(', ')
81
+ msg = "Ambiguous join keys for :#{assoc_name} on #{base_name}. " \
82
+ "Could not infer a unique shared key. Candidates: #{sugg}"
83
+ raise SearchEngine::Errors::InvalidJoinConfig.new(
84
+ msg,
85
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#client-side-fallback',
86
+ details: { assoc: assoc_name, candidates: candidates }
87
+ )
88
+ end
89
+
90
+ def present?(v)
91
+ !(v.nil? || v.to_s.strip.empty?)
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ module Logging
5
+ # ANSI color helpers for CLI output.
6
+ #
7
+ # Applies colors only when $stdout is a TTY and NO_COLOR is not set.
8
+ # Intended for short substring coloring inside existing log lines.
9
+ #
10
+ # @since M8
11
+ module Color
12
+ module_function
13
+
14
+ # @param str [String]
15
+ # @param color [Symbol] one of :green, :yellow, :red
16
+ # @return [String]
17
+ def apply(str, color)
18
+ return str unless enabled?
19
+
20
+ code = case color.to_sym
21
+ when :green then 32
22
+ when :yellow then 33
23
+ when :red then 31
24
+ else 0
25
+ end
26
+ return str if code.zero?
27
+
28
+ "\e[#{code}m#{str}\e[0m"
29
+ end
30
+
31
+ # Apply bold styling to a string.
32
+ # @param str [String]
33
+ # @return [String]
34
+ def bold(str)
35
+ return str unless enabled?
36
+
37
+ "\e[1m#{str}\e[0m"
38
+ end
39
+
40
+ # Map indexation status to a color.
41
+ # @param status [#to_s]
42
+ # @return [Symbol] color name
43
+ def for_status(status)
44
+ case status.to_s
45
+ when 'ok' then :green
46
+ when 'failed' then :red
47
+ when 'partial' then :yellow
48
+ else :yellow
49
+ end
50
+ end
51
+
52
+ # Determine color for partition/status based on success/failure counts.
53
+ # @param failed_total [Integer] number of failed documents
54
+ # @param success_total [Integer] number of successful documents
55
+ # @return [Symbol] color name (:green, :yellow, or :red)
56
+ def for_partition_status(failed_total, success_total)
57
+ if failed_total.to_i.zero?
58
+ :green
59
+ elsif success_total.to_i.positive?
60
+ :yellow # partial success
61
+ else
62
+ :red # all failed
63
+ end
64
+ end
65
+
66
+ # @return [Boolean] whether coloring is active
67
+ def enabled?
68
+ return false if ENV['NO_COLOR']
69
+
70
+ begin
71
+ $stdout.isatty
72
+ rescue StandardError
73
+ false
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end