search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,903 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'search_engine/relation/dsl/filters'
4
+ require 'search_engine/relation/dsl/selection'
5
+
6
+ module SearchEngine
7
+ class Relation
8
+ # User-facing chainers and input normalizers.
9
+ # Chainers MUST be copy-on-write and return new Relation instances.
10
+ module DSL
11
+ include SearchEngine::Relation::DSL::Filters
12
+ include SearchEngine::Relation::DSL::Selection
13
+
14
+ # Append ordering expressions. Accepts Hash/String/Array/Symbol forms.
15
+ #
16
+ # Supports an alias for Typesense text relevance ordering:
17
+ # - order(text_match: :asc|:desc) → "_text_match:asc|desc"
18
+ # - order(:text_match) → "_text_match:asc"
19
+ # The legacy string form still works as-is: "_text_match:desc".
20
+ #
21
+ # When passing a String, raw Typesense tokens are accepted, including
22
+ # `_eval(...)` expressions. Commas and colons that appear inside
23
+ # parentheses are handled, so inputs like
24
+ # "_eval(locked:true):desc" are parsed correctly.
25
+ #
26
+ # @param value [Hash, String, Array, Symbol]
27
+ # @return [SearchEngine::Relation]
28
+ def order(value)
29
+ additions = normalize_order(value)
30
+ spawn do |s|
31
+ existing = Array(s[:orders])
32
+ s[:orders] = dedupe_orders_last_wins(existing + additions)
33
+ end
34
+ end
35
+
36
+ # Apply a server-side preset with a specified merge strategy.
37
+ # @param name [#to_s, #to_sym]
38
+ # @param mode [Symbol]
39
+ # @return [SearchEngine::Relation]
40
+ def preset(name, mode: :merge)
41
+ raise ArgumentError, 'preset requires a name' if name.nil?
42
+
43
+ token = name.to_s.strip
44
+ raise ArgumentError, 'preset name must be non-empty' if token.empty?
45
+
46
+ sym_mode = mode.to_sym
47
+ unless %i[merge only lock].include?(sym_mode)
48
+ raise ArgumentError, "preset mode must be one of :merge, :only, :lock (got #{mode.inspect})"
49
+ end
50
+
51
+ cfg = SearchEngine.config.presets
52
+ effective = if cfg.enabled && cfg.namespace
53
+ +"#{cfg.namespace}_#{token}"
54
+ else
55
+ token.dup
56
+ end
57
+
58
+ spawn do |s|
59
+ s[:preset_name] = effective
60
+ s[:preset_mode] = sym_mode
61
+ end
62
+ end
63
+
64
+ # Fine-grained ranking & typo tuning.
65
+ # @return [SearchEngine::Relation]
66
+ def ranking(num_typos: nil, drop_tokens_threshold: nil, prioritize_exact_match: nil, query_by_weights: nil)
67
+ normalized = normalize_ranking_input(
68
+ num_typos: num_typos,
69
+ drop_tokens_threshold: drop_tokens_threshold,
70
+ prioritize_exact_match: prioritize_exact_match,
71
+ query_by_weights: query_by_weights
72
+ )
73
+
74
+ spawn do |s|
75
+ current = s[:ranking] || {}
76
+ merged = current.dup
77
+ %i[num_typos drop_tokens_threshold prioritize_exact_match].each do |k|
78
+ merged[k] = normalized[k] unless normalized[k].nil?
79
+ end
80
+ if normalized.key?(:query_by_weights)
81
+ existing = current[:query_by_weights] || {}
82
+ merged[:query_by_weights] = existing.merge(normalized[:query_by_weights])
83
+ end
84
+ s[:ranking] = merged
85
+ end
86
+ end
87
+
88
+ # Control Typesense infix/prefix matching per relation via a simple enum.
89
+ # @return [SearchEngine::Relation]
90
+ def prefix(mode)
91
+ sym = mode.to_s.strip.downcase.to_sym
92
+ valid = {
93
+ disabled: 'off',
94
+ fallback: 'fallback',
95
+ always: 'always'
96
+ }
97
+ unless valid.key?(sym)
98
+ raise SearchEngine::Errors::InvalidOption.new(
99
+ "InvalidOption: unknown prefix mode #{mode.inspect}",
100
+ hint: 'Use :disabled, :fallback, or :always',
101
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#prefix',
102
+ details: { provided: mode, allowed: valid.keys }
103
+ )
104
+ end
105
+
106
+ token = valid[sym]
107
+ spawn do |s|
108
+ opts = (s[:options] || {}).dup
109
+ opts[:infix] = token
110
+ s[:options] = opts
111
+ end
112
+ end
113
+
114
+ # Pin hits to the top of results by ID.
115
+ # @param ids [Array<#to_s>]
116
+ # @return [SearchEngine::Relation]
117
+ def pin(*ids)
118
+ additions = normalize_curation_ids(ids)
119
+ return self if additions.empty?
120
+
121
+ spawn do |s|
122
+ cur = s[:curation] || { pinned: [], hidden: [], override_tags: [], filter_curated_hits: nil }
123
+ cur[:pinned] = (Array(cur[:pinned]) + additions).each_with_object([]) do |t, acc|
124
+ acc << t unless acc.include?(t)
125
+ end
126
+ s[:curation] = cur
127
+ end
128
+ end
129
+
130
+ # Hide hits by ID.
131
+ # @param ids [Array<#to_s>]
132
+ # @return [SearchEngine::Relation]
133
+ def hide(*ids)
134
+ additions = normalize_curation_ids(ids)
135
+ return self if additions.empty?
136
+
137
+ spawn do |s|
138
+ cur = s[:curation] || { pinned: [], hidden: [], override_tags: [], filter_curated_hits: nil }
139
+ cur[:hidden] = (Array(cur[:hidden]) + additions).each_with_object([]) do |t, acc|
140
+ acc << t unless acc.include?(t)
141
+ end
142
+ s[:curation] = cur
143
+ end
144
+ end
145
+
146
+ # Set multiple curation knobs in one call.
147
+ # @return [SearchEngine::Relation]
148
+ def curate(pin: nil, hide: nil, override_tags: nil, filter_curated_hits: :__unset__)
149
+ spawn do |s|
150
+ cur = s[:curation] || { pinned: [], hidden: [], override_tags: [], filter_curated_hits: nil }
151
+
152
+ unless pin.nil?
153
+ list = normalize_curation_ids(pin)
154
+ cur[:pinned] = list.each_with_object([]) { |t, acc| acc << t unless acc.include?(t) }
155
+ end
156
+ unless hide.nil?
157
+ list = normalize_curation_ids(hide)
158
+ cur[:hidden] = list.each_with_object([]) { |t, acc| acc << t unless acc.include?(t) }
159
+ end
160
+ cur[:override_tags] = normalize_curation_tags(override_tags) unless override_tags.nil?
161
+ if filter_curated_hits != :__unset__
162
+ cur[:filter_curated_hits] =
163
+ filter_curated_hits.nil? ? nil : coerce_boolean_strict(filter_curated_hits, :filter_curated_hits)
164
+ end
165
+
166
+ s[:curation] = cur
167
+ end
168
+ end
169
+
170
+ # Clear all curation state from the relation.
171
+ # @return [SearchEngine::Relation]
172
+ def clear_curation
173
+ spawn do |s|
174
+ s[:curation] = nil
175
+ end
176
+ end
177
+
178
+ # Group results by a single field with optional limit and missing values policy.
179
+ # @return [SearchEngine::Relation]
180
+ def group_by(field, limit: nil, missing_values: false)
181
+ normalized = normalize_grouping(field: field, limit: limit, missing_values: missing_values)
182
+
183
+ rel = spawn do |s|
184
+ s[:grouping] = normalized
185
+ end
186
+
187
+ begin
188
+ payload = {
189
+ collection: klass_name_for_inspect,
190
+ field: normalized[:field].to_s,
191
+ limit: normalized[:limit],
192
+ missing_values: normalized[:missing_values]
193
+ }
194
+ SearchEngine::Instrumentation.instrument('search_engine.relation.group_by_updated', payload) {}
195
+ rescue StandardError
196
+ nil
197
+ end
198
+
199
+ rel
200
+ end
201
+
202
+ # Remove specific pieces of relation state (AR-style unscope).
203
+ # @return [SearchEngine::Relation]
204
+ def unscope(*parts)
205
+ symbols = Array(parts).flatten.compact.map(&:to_sym)
206
+ supported = %i[where order select limit offset page per]
207
+ unknown = symbols - supported
208
+ unless unknown.empty?
209
+ raise ArgumentError,
210
+ "unscope: unknown part #{unknown.first.inspect} (supported: #{supported.map(&:inspect).join(', ')})"
211
+ end
212
+
213
+ spawn do |s|
214
+ symbols.each do |part|
215
+ case part
216
+ when :where
217
+ s[:ast] = []
218
+ s[:filters] = []
219
+ when :order
220
+ s[:orders] = []
221
+ when :select
222
+ s[:select] = []
223
+ s[:select_nested] = {}
224
+ s[:select_nested_order] = []
225
+ s[:exclude] = []
226
+ s[:exclude_nested] = {}
227
+ s[:exclude_nested_order] = []
228
+ when :limit
229
+ s[:limit] = nil
230
+ when :offset
231
+ s[:offset] = nil
232
+ when :page
233
+ s[:page] = nil
234
+ when :per
235
+ s[:per_page] = nil
236
+ end
237
+ end
238
+ end
239
+ end
240
+
241
+ # Set the maximum number of results.
242
+ # @return [SearchEngine::Relation]
243
+ def limit(n)
244
+ value = coerce_integer_min(n, :limit, 1)
245
+ spawn { |s| s[:limit] = value }
246
+ end
247
+
248
+ # Set the offset of results.
249
+ # @return [SearchEngine::Relation]
250
+ def offset(n)
251
+ value = coerce_integer_min(n, :offset, 0)
252
+ spawn { |s| s[:offset] = value }
253
+ end
254
+
255
+ # Set page number.
256
+ # @return [SearchEngine::Relation]
257
+ def page(n)
258
+ value = coerce_integer_min(n, :page, 1)
259
+ spawn { |s| s[:page] = value }
260
+ end
261
+
262
+ # Set per-page size.
263
+ # @return [SearchEngine::Relation]
264
+ def per_page(n)
265
+ value = coerce_integer_min(n, :per, 1)
266
+ spawn { |s| s[:per_page] = value }
267
+ end
268
+
269
+ # Convenience alias for per-page size.
270
+ # @return [SearchEngine::Relation]
271
+ def per(n)
272
+ per_page(n)
273
+ end
274
+
275
+ # Configure an early hard cap on hits to fetch/consider.
276
+ # Uses normalize_hit_limits_input and updates @state[:hit_limits].
277
+ # @param n [Integer]
278
+ # @return [SearchEngine::Relation]
279
+ def limit_hits(n)
280
+ normalized = normalize_hit_limits_input({ early_limit: n })
281
+ return self if normalized.empty?
282
+
283
+ spawn do |s|
284
+ current = (s[:hit_limits] || {}).dup
285
+ s[:hit_limits] = current.merge(normalized)
286
+ end
287
+ end
288
+
289
+ # Configure a post-query validation that asserts total/applicable hits ≤ max.
290
+ # Uses normalize_hit_limits_input and updates @state[:hit_limits].
291
+ # @param max [Integer]
292
+ # @return [SearchEngine::Relation]
293
+ def validate_hits!(max:)
294
+ normalized = normalize_hit_limits_input({ max: max })
295
+ return self if normalized.empty?
296
+
297
+ spawn do |s|
298
+ current = (s[:hit_limits] || {}).dup
299
+ s[:hit_limits] = current.merge(normalized)
300
+ end
301
+ end
302
+
303
+ # Set the query string (Typesense `q`) for this relation.
304
+ # @param query [Object] coerced via #to_s
305
+ # @return [SearchEngine::Relation]
306
+ def search(query)
307
+ spawn do |s|
308
+ opts = (s[:options] || {}).dup
309
+ opts[:q] = query.to_s
310
+ s[:options] = opts
311
+ end
312
+ end
313
+
314
+ # Shallow-merge options into the relation.
315
+ # @param opts [Hash]
316
+ # @return [SearchEngine::Relation]
317
+ def options(opts = {})
318
+ raise ArgumentError, 'options must be a Hash' unless opts.is_a?(Hash)
319
+
320
+ spawn do |s|
321
+ s[:options] = (s[:options] || {}).merge(opts)
322
+ end
323
+ end
324
+
325
+ # Control per-call cache usage (URL-level knob).
326
+ # Accepts booleans and common string/integer forms; nil unsets the option.
327
+ # @param value [Boolean, String, Integer, nil]
328
+ # @return [SearchEngine::Relation]
329
+ def cache(value)
330
+ v = value.nil? ? nil : coerce_boolean_strict(value, :use_cache)
331
+ spawn do |s|
332
+ opts = (s[:options] || {}).dup
333
+ opts[:use_cache] = v
334
+ s[:options] = opts
335
+ end
336
+ end
337
+
338
+ # Join association names to include in server-side join compilation.
339
+ # @param assocs [Array<#to_sym,#to_s>]
340
+ # @return [SearchEngine::Relation]
341
+ def joins(*assocs)
342
+ names = normalize_joins(assocs)
343
+ return self if names.empty?
344
+
345
+ names.each { |name| SearchEngine::Joins::Guard.ensure_config_complete!(@klass, name) }
346
+
347
+ spawn do |s|
348
+ existing = Array(s[:joins])
349
+ s[:joins] = existing + names
350
+ end
351
+ end
352
+
353
+ # Control usage of synonyms at query time.
354
+ # @return [SearchEngine::Relation]
355
+ def use_synonyms(value)
356
+ v = value.nil? ? nil : coerce_boolean_strict(value, :use_synonyms)
357
+ spawn do |s|
358
+ s[:use_synonyms] = v
359
+ end
360
+ end
361
+
362
+ # Control usage of stopwords at query time.
363
+ # @return [SearchEngine::Relation]
364
+ def use_stopwords(value)
365
+ v = value.nil? ? nil : coerce_boolean_strict(value, :use_stopwords)
366
+ spawn do |s|
367
+ s[:use_stopwords] = v
368
+ end
369
+ end
370
+
371
+ # Faceting DSL
372
+ # ---------------
373
+ def facet_by(field, max_values: nil, sort: nil, stats: nil)
374
+ name = field.to_s.strip
375
+ raise SearchEngine::Errors::InvalidParams, 'facet_by: field name must be non-empty' if name.empty?
376
+
377
+ validate_facet_field!(name)
378
+ validate_facet_sort!(sort)
379
+ validate_facet_stats!(stats)
380
+
381
+ cap = parse_facet_cap!(max_values)
382
+
383
+ spawn do |s|
384
+ fields = Array(s[:facet_fields])
385
+ s[:facet_fields] = fields.include?(name) ? fields : (fields + [name])
386
+
387
+ caps = Array(s[:facet_max_values])
388
+ s[:facet_max_values] = cap.nil? ? caps : (caps + [cap])
389
+ end
390
+ end
391
+
392
+ def facet_query(field, expression, label: nil)
393
+ name = field.to_s.strip
394
+ raise SearchEngine::Errors::InvalidParams, 'facet_query: field name must be non-empty' if name.empty?
395
+
396
+ validate_facet_field!(name, context: 'facet_query')
397
+ ensure_known_field!(name)
398
+
399
+ expr = expression.to_s.strip
400
+ raise SearchEngine::Errors::InvalidParams, 'facet_query: expression must be a non-empty String' if expr.empty?
401
+
402
+ validate_range_brackets!(expr)
403
+
404
+ label_str = label&.to_s&.strip
405
+
406
+ spawn do |s|
407
+ queries = Array(s[:facet_queries])
408
+ rec = { field: name, expr: expr }
409
+ rec[:label] = label_str unless label_str.nil? || label_str.empty?
410
+ exists = queries.any? { |q| q[:field] == rec[:field] && q[:expr] == rec[:expr] && q[:label] == rec[:label] }
411
+ s[:facet_queries] = exists ? queries : (queries + [rec])
412
+ end
413
+ end
414
+
415
+ # --- Normalizers (private) ---
416
+ private
417
+
418
+ # Parse and normalize order input into an array of "field:dir" strings.
419
+ def normalize_order(value)
420
+ return [] if value.nil?
421
+
422
+ dispatch = {
423
+ Hash => :normalize_order_hash,
424
+ String => :normalize_order_string,
425
+ Array => :normalize_order_array,
426
+ Symbol => :normalize_order_symbol
427
+ }
428
+ meth = dispatch[value.class]
429
+ return send(meth, value) if meth
430
+
431
+ raise ArgumentError, "order: unsupported input #{value.class}"
432
+ end
433
+
434
+ # Split order tokens by top-level commas, ignoring commas inside
435
+ # parentheses. Returns trimmed, non-empty chunks.
436
+ def split_order_tokens(value)
437
+ str = value.to_s
438
+ return [] if str.strip.empty?
439
+
440
+ parts = []
441
+ buf = +''
442
+ depth = 0
443
+ str.each_char do |ch|
444
+ case ch
445
+ when '('
446
+ depth += 1
447
+ when ')'
448
+ depth -= 1 if depth.positive?
449
+ when ','
450
+ if depth.zero?
451
+ token = buf.strip
452
+ parts << token unless token.empty?
453
+ buf.clear
454
+ next
455
+ end
456
+ end
457
+ buf << ch
458
+ end
459
+
460
+ last = buf.strip
461
+ parts << last unless last.empty?
462
+ parts
463
+ end
464
+
465
+ def normalize_order_hash(value)
466
+ value.flat_map do |k, dir|
467
+ if dir.is_a?(Hash)
468
+ assoc = k.to_sym
469
+ @klass.join_for(assoc)
470
+ SearchEngine::Joins::Guard.ensure_join_applied!(joins_list, assoc, context: 'sorting')
471
+
472
+ dir.flat_map do |field_name, d|
473
+ field = field_name.to_s.strip
474
+ raise ArgumentError, 'order: field name must be non-empty' if field.empty?
475
+
476
+ begin
477
+ cfg = @klass.join_for(assoc)
478
+ SearchEngine::Joins::Guard.validate_joined_field!(cfg, field)
479
+ rescue StandardError
480
+ nil
481
+ end
482
+
483
+ direction = d.to_s.strip.downcase
484
+ unless %w[asc desc].include?(direction)
485
+ raise ArgumentError,
486
+ "order: direction must be :asc or :desc (got #{d.inspect} for field #{field_name.inspect})"
487
+ end
488
+
489
+ "$#{assoc}.#{field}:#{direction}"
490
+ end
491
+ else
492
+ field = k.to_s.strip
493
+ raise ArgumentError, 'order: field name must be non-empty' if field.empty?
494
+
495
+ direction = dir.to_s.strip.downcase
496
+ unless %w[asc desc].include?(direction)
497
+ raise ArgumentError,
498
+ "order: direction must be :asc or :desc (got #{dir.inspect} for field #{k.inspect})"
499
+ end
500
+
501
+ # Map DSL alias to Typesense special token for text relevance
502
+ token = (field == 'text_match' ? '_text_match' : field)
503
+ "#{token}:#{direction}"
504
+ end
505
+ end
506
+ end
507
+
508
+ def normalize_order_string(value)
509
+ split_order_tokens(value).map do |chunk|
510
+ # Extract trailing direction anchored at the end; whitespace tolerant
511
+ m = chunk.match(/\A\s*(.+?):\s*(asc|desc)\s*\z/i)
512
+ raise ArgumentError, "order: expected 'field:direction' (got #{chunk.inspect})" unless m
513
+
514
+ name = m[1].to_s.strip
515
+ direction = m[2].to_s.strip.downcase
516
+ unless %w[asc desc].include?(direction)
517
+ raise ArgumentError,
518
+ "order: direction must be :asc or :desc (got #{direction.inspect} for field #{name.inspect})"
519
+ end
520
+
521
+ # Preserve alias for text relevance
522
+ field = (name == 'text_match' ? '_text_match' : name)
523
+ "#{field}:#{direction}"
524
+ end
525
+ end
526
+
527
+ def normalize_order_array(value)
528
+ value.flat_map { |v| normalize_order(v) }
529
+ end
530
+
531
+ def normalize_order_symbol(value)
532
+ field = value.to_s.strip
533
+ raise ArgumentError, 'order: field name must be non-empty' if field.empty?
534
+
535
+ token = (field == 'text_match' ? '_text_match' : field)
536
+ ["#{token}:asc"]
537
+ end
538
+
539
+ # Dedupe by field with last-wins semantics while preserving last positions.
540
+ def dedupe_orders_last_wins(list)
541
+ return [] if list.nil? || list.empty?
542
+
543
+ last_by_field = {}
544
+ list.each_with_index do |entry, idx|
545
+ # Split by the last colon so complex field expressions remain intact
546
+ field, _sep, dir = entry.rpartition(':')
547
+ last_by_field[field] = { idx: idx, str: "#{field}:#{dir}" }
548
+ end
549
+ last_by_field.values.sort_by { |h| h[:idx] }.map { |h| h[:str] }
550
+ end
551
+
552
+ def normalize_grouping(value)
553
+ return nil if value.nil? || value.empty?
554
+ raise ArgumentError, 'grouping: expected a Hash' unless value.is_a?(Hash)
555
+
556
+ field = value[:field]
557
+ limit = value[:limit]
558
+ missing_values = value[:missing_values]
559
+
560
+ unless field.is_a?(Symbol) || field.is_a?(String)
561
+ raise SearchEngine::Errors::InvalidGroup,
562
+ 'InvalidGroup: field must be a Symbol or String'
563
+ end
564
+
565
+ field_str = field.to_s
566
+ if field_str.start_with?('$') || field_str.include?('.')
567
+ raise SearchEngine::Errors::UnsupportedGroupField.new(
568
+ %(UnsupportedGroupField: grouping supports base fields only (got #{field_str.inspect})),
569
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/grouping#troubleshooting',
570
+ details: { field: field_str }
571
+ )
572
+ end
573
+
574
+ attrs = safe_attributes_map
575
+ unless attrs.nil? || attrs.empty?
576
+ sym = field.to_sym
577
+ unless attrs.key?(sym)
578
+ msg = build_invalid_group_unknown_field_message(sym)
579
+ raise SearchEngine::Errors::InvalidGroup.new(
580
+ msg,
581
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/grouping#troubleshooting',
582
+ details: { field: sym }
583
+ )
584
+ end
585
+ end
586
+
587
+ if !limit.nil? && !(limit.is_a?(Integer) && limit >= 1)
588
+ got = limit.nil? ? 'nil' : limit.inspect
589
+ raise SearchEngine::Errors::InvalidGroup.new(
590
+ "InvalidGroup: limit must be a positive integer (got #{got})",
591
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/grouping#troubleshooting',
592
+ details: { limit: limit }
593
+ )
594
+ end
595
+
596
+ unless [true, false].include?(missing_values)
597
+ raise SearchEngine::Errors::InvalidGroup.new(
598
+ "InvalidGroup: missing_values must be boolean (got #{missing_values.inspect})",
599
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/grouping#troubleshooting',
600
+ details: { missing_values: missing_values }
601
+ )
602
+ end
603
+
604
+ { field: field.to_sym, limit: limit, missing_values: missing_values }
605
+ end
606
+
607
+ # Normalize and validate join names, preserving order and duplicates.
608
+ def normalize_joins(values)
609
+ list = Array(values).flatten.compact
610
+ return [] if list.empty?
611
+
612
+ names = list.map do |v|
613
+ case v
614
+ when Symbol, String
615
+ v.to_sym
616
+ else
617
+ raise ArgumentError, "joins: expected symbols/strings (got #{v.class})"
618
+ end
619
+ end
620
+
621
+ names.each { |name| @klass.join_for(name) }
622
+ names
623
+ end
624
+
625
+ # Build an actionable InvalidGroup message for unknown field with suggestions.
626
+ def build_invalid_group_unknown_field_message(field_sym)
627
+ klass_name = klass_name_for_inspect
628
+ known = safe_attributes_map.keys.map(&:to_sym)
629
+ suggestions = suggest_fields(field_sym, known)
630
+ suggestion_str =
631
+ if suggestions.empty?
632
+ ''
633
+ elsif suggestions.length == 1
634
+ " (did you mean :#{suggestions.first}?)"
635
+ else
636
+ last = suggestions.last
637
+ others = suggestions[0..-2].map { |s| ":#{s}" }.join(', ')
638
+ " (did you mean #{others}, or :#{last}?)"
639
+ end
640
+ "InvalidGroup: unknown field :#{field_sym} for grouping on #{klass_name}#{suggestion_str}"
641
+ end
642
+
643
+ # Lightweight suggestion helper using Levenshtein; returns up to 3 candidates.
644
+ def suggest_fields(field_sym, known_syms)
645
+ return [] if known_syms.nil? || known_syms.empty?
646
+
647
+ input = field_sym.to_s
648
+ candidates = known_syms.map(&:to_s)
649
+ begin
650
+ require 'did_you_mean'
651
+ require 'did_you_mean/levenshtein'
652
+ rescue StandardError
653
+ return []
654
+ end
655
+
656
+ distances = candidates.each_with_object({}) do |cand, acc|
657
+ acc[cand] = DidYouMean::Levenshtein.distance(input, cand)
658
+ end
659
+ sorted = distances.sort_by { |(_cand, d)| d }
660
+ threshold = 2
661
+ sorted.take(3).select { |(_cand, d)| d <= threshold }.map { |cand, _d| cand.to_sym }
662
+ end
663
+
664
+ # Highlight/ranking/curation normalizers used by chainers and initial state
665
+ def normalize_highlight_input(value)
666
+ h = value || {}
667
+ raise SearchEngine::Errors::InvalidOption, 'highlight must be a Hash of options' unless h.is_a?(Hash)
668
+
669
+ fields = Array(h[:fields] || h['fields']).flatten.compact.map { |f| f.to_s.strip }.reject(&:empty?)
670
+ full_fields = Array(h[:full_fields] || h['full_fields']).flatten.compact.map do |f|
671
+ f.to_s.strip
672
+ end.reject(&:empty?)
673
+ start_tag = h[:start_tag] || h['start_tag']
674
+ end_tag = h[:end_tag] || h['end_tag']
675
+ affix = h.key?(:affix_tokens) ? h[:affix_tokens] : h['affix_tokens']
676
+ snippet = h.key?(:snippet_threshold) ? h[:snippet_threshold] : h['snippet_threshold']
677
+
678
+ affix = nil if affix.nil?
679
+ affix = coerce_integer_min(affix, :highlight_affix_num_tokens, 0) unless affix.nil?
680
+ snippet = nil if snippet.nil?
681
+ snippet = coerce_integer_min(snippet, :highlight_snippet_threshold, 0) unless snippet.nil?
682
+
683
+ {
684
+ fields: fields,
685
+ full_fields: full_fields,
686
+ start_tag: start_tag&.to_s,
687
+ end_tag: end_tag&.to_s,
688
+ affix_tokens: affix,
689
+ snippet_threshold: snippet
690
+ }
691
+ end
692
+
693
+ def normalize_ranking_input(value)
694
+ h = value || {}
695
+ unless h.is_a?(Hash)
696
+ raise SearchEngine::Errors::InvalidOption.new(
697
+ 'InvalidOption: ranking expects a Hash of options',
698
+ hint: 'Use ranking(num_typos: 1, drop_tokens_threshold: 0.2,'\
699
+ 'prioritize_exact_match: true, query_by_weights: { name: 2 })',
700
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#options'
701
+ )
702
+ end
703
+
704
+ out = {}
705
+ apply_ranking_handlers!(h, out)
706
+ out
707
+ end
708
+
709
+ def apply_ranking_handlers!(h, out)
710
+ handle_num_typos_option!(h, out)
711
+ handle_drop_tokens_threshold_option!(h, out)
712
+ handle_prioritize_exact_match_option!(h, out)
713
+ handle_query_by_weights_option!(h, out)
714
+ end
715
+
716
+ def handle_num_typos_option!(h, out)
717
+ return unless h.key?(:num_typos) || h.key?('num_typos')
718
+
719
+ raw = h[:num_typos] || h['num_typos']
720
+ return if raw.nil?
721
+
722
+ begin
723
+ iv = Integer(raw)
724
+ unless [0, 1, 2].include?(iv)
725
+ raise SearchEngine::Errors::InvalidOption.new(
726
+ "InvalidOption: num_typos must be 0, 1, or 2 (got #{raw.inspect})",
727
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#options'
728
+ )
729
+ end
730
+ out[:num_typos] = iv
731
+ rescue ArgumentError, TypeError
732
+ raise SearchEngine::Errors::InvalidOption.new(
733
+ "InvalidOption: num_typos must be an Integer in {0,1,2} (got #{raw.inspect})",
734
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#options'
735
+ )
736
+ end
737
+ end
738
+
739
+ def handle_drop_tokens_threshold_option!(h, out)
740
+ return unless h.key?(:drop_tokens_threshold) || h.key?('drop_tokens_threshold')
741
+
742
+ raw = h[:drop_tokens_threshold] || h['drop_tokens_threshold']
743
+ return if raw.nil?
744
+
745
+ begin
746
+ fv = Float(raw)
747
+ unless fv >= 0.0 && fv <= 1.0 && fv.finite?
748
+ raise SearchEngine::Errors::InvalidOption.new(
749
+ "InvalidOption: drop_tokens_threshold must be a float between 0.0 and 1.0 (got #{raw.inspect})",
750
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#options'
751
+ )
752
+ end
753
+ out[:drop_tokens_threshold] = fv
754
+ rescue ArgumentError, TypeError
755
+ raise SearchEngine::Errors::InvalidOption.new(
756
+ "InvalidOption: drop_tokens_threshold must be a float between 0.0 and 1.0 (got #{raw.inspect})",
757
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#options'
758
+ )
759
+ end
760
+ end
761
+
762
+ def handle_prioritize_exact_match_option!(h, out)
763
+ return unless h.key?(:prioritize_exact_match) || h.key?('prioritize_exact_match')
764
+
765
+ raw = h[:prioritize_exact_match] || h['prioritize_exact_match']
766
+ out[:prioritize_exact_match] = raw.nil? ? nil : coerce_boolean_strict(raw, :prioritize_exact_match)
767
+ end
768
+
769
+ def handle_query_by_weights_option!(h, out)
770
+ return unless h.key?(:query_by_weights) || h.key?('query_by_weights')
771
+
772
+ raw = h[:query_by_weights] || h['query_by_weights']
773
+ return if raw.nil?
774
+
775
+ unless raw.is_a?(Hash)
776
+ raise SearchEngine::Errors::InvalidOption.new(
777
+ 'InvalidOption: query_by_weights must be a Hash of { field => Integer }',
778
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#weights'
779
+ )
780
+ end
781
+ normalized = {}
782
+ raw.each do |k, v|
783
+ key = k.to_s.strip
784
+ next if key.empty?
785
+
786
+ begin
787
+ w = Integer(v)
788
+ rescue ArgumentError, TypeError
789
+ raise SearchEngine::Errors::InvalidOption.new(
790
+ "InvalidOption: weight for #{k.inspect} must be an Integer >= 0",
791
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#weights',
792
+ details: { field: k, weight: v }
793
+ )
794
+ end
795
+ if w.negative?
796
+ raise SearchEngine::Errors::InvalidOption.new(
797
+ "InvalidOption: weight for #{k.inspect} must be >= 0",
798
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#weights',
799
+ details: { field: k, weight: v }
800
+ )
801
+ end
802
+ normalized[key] = w
803
+ end
804
+ out[:query_by_weights] = normalized
805
+ end
806
+
807
+ def normalize_curation_ids(values)
808
+ list = Array(values).flatten(1).compact
809
+ list.map { |v| v.to_s.strip }.reject(&:empty?)
810
+ end
811
+
812
+ def normalize_curation_tags(values)
813
+ list = Array(values).flatten(1).compact.map { |v| v.to_s.strip }.reject(&:empty?)
814
+ list.each_with_object([]) { |t, acc| acc << t unless acc.include?(t) }
815
+ end
816
+
817
+ def normalize_curation_input(value)
818
+ return nil if value.nil? || (value.respond_to?(:empty?) && value.empty?)
819
+ raise ArgumentError, 'curation must be a Hash' unless value.is_a?(Hash)
820
+
821
+ pinned = normalize_curation_ids(value[:pinned] || value['pinned'])
822
+ hidden = normalize_curation_ids(value[:hidden] || value['hidden'])
823
+ tags = normalize_curation_tags(value[:override_tags] || value['override_tags'])
824
+
825
+ raw_fch = (value.key?(:filter_curated_hits) ? value[:filter_curated_hits] : value['filter_curated_hits'])
826
+ fch = raw_fch.nil? ? nil : coerce_boolean_strict(raw_fch, :filter_curated_hits)
827
+
828
+ { pinned: pinned, hidden: hidden, override_tags: tags, filter_curated_hits: fch }
829
+ end
830
+
831
+ def validate_facet_field!(name, context: 'facet_by')
832
+ return unless name.start_with?('$') || name.include?('.')
833
+
834
+ raise SearchEngine::Errors::InvalidParams.new(
835
+ %(#{context}: supports base fields only (got #{name.inspect})),
836
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/faceting#supported-options',
837
+ details: { field: name }
838
+ )
839
+ end
840
+
841
+ def ensure_known_field!(name)
842
+ attrs = safe_attributes_map
843
+ return if attrs.nil? || attrs.empty? || attrs.key?(name.to_sym)
844
+
845
+ suggestions = suggest_fields(name.to_sym, attrs.keys.map(&:to_sym))
846
+ suggest = if suggestions.empty?
847
+ ''
848
+ elsif suggestions.length == 1
849
+ " (did you mean :#{suggestions.first}?)"
850
+ else
851
+ last = suggestions.last
852
+ others = suggestions[0..-2].map { |s| ":#{s}" }.join(', ')
853
+ " (did you mean #{others}, or :#{last}?)"
854
+ end
855
+ raise SearchEngine::Errors::UnknownField,
856
+ "UnknownField: unknown field #{name.inspect} for #{klass_name_for_inspect}#{suggest}"
857
+ end
858
+
859
+ def validate_facet_sort!(sort)
860
+ return if sort.nil?
861
+
862
+ raise SearchEngine::Errors::InvalidParams.new(
863
+ "facet_by: option :sort is not supported by Typesense facets (got #{sort.inspect})",
864
+ hint: 'Supported: default count-desc only at present.',
865
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/faceting#supported-options',
866
+ details: { sort: sort }
867
+ )
868
+ end
869
+
870
+ def validate_facet_stats!(stats)
871
+ return if stats.nil?
872
+
873
+ raise SearchEngine::Errors::InvalidParams.new(
874
+ 'facet_by: option :stats is not supported at present',
875
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/faceting#supported-options',
876
+ details: { stats: stats }
877
+ )
878
+ end
879
+
880
+ def parse_facet_cap!(max_values)
881
+ return nil if max_values.nil?
882
+
883
+ cap = Integer(max_values)
884
+ raise SearchEngine::Errors::InvalidParams, 'facet_by: max_values must be >= 1' if cap < 1
885
+
886
+ cap
887
+ rescue ArgumentError, TypeError
888
+ raise SearchEngine::Errors::InvalidParams, 'facet_by: max_values must be an Integer or nil'
889
+ end
890
+
891
+ def validate_range_brackets!(expr)
892
+ return unless expr.include?('[') ^ expr.include?(']')
893
+
894
+ raise SearchEngine::Errors::InvalidParams.new(
895
+ %(facet_query: invalid range syntax #{expr.inspect} (unbalanced brackets)),
896
+ hint: 'Use shapes like "[0..9]", "[10..19]"',
897
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/faceting#facet-query-expressions',
898
+ details: { expr: expr }
899
+ )
900
+ end
901
+ end
902
+ end
903
+ end