search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,711 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ class Relation
5
+ # Compile immutable relation state and options into Typesense body params.
6
+ # This module is pure/deterministic and avoids any network I/O.
7
+ module Compiler
8
+ # Compile immutable relation state and options into Typesense body params.
9
+ #
10
+ # Pure function w.r.t. relation state; performs no I/O and emits
11
+ # redaction-aware instrumentation events for DX surfaces.
12
+ #
13
+ # @return [SearchEngine::CompiledParams] deterministic, deeply frozen params
14
+ # @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/query-dsl`
15
+ # @see `https://typesense.org/docs/latest/api/documents.html#search-document`
16
+ def to_typesense_params
17
+ cfg = SearchEngine.config
18
+ opts = @state[:options] || {}
19
+
20
+ params = {}
21
+ runtime_flags = {}
22
+
23
+ # Query basics
24
+ apply_query_basics!(params, opts, cfg)
25
+
26
+ # Filters and sorting
27
+ ast_nodes = Array(@state[:ast]).flatten.compact
28
+ SearchEngine::Instrumentation.monotonic_ms if defined?(SearchEngine::Instrumentation)
29
+ filter_str = assign_filter_by!(params, ast_nodes)
30
+
31
+ orders = Array(@state[:orders])
32
+ sort_str = assign_sort_by!(params, orders)
33
+
34
+ # Field selection and instrumentation
35
+ include_str, exclude_str = compile_selection_fields!(params)
36
+ instrument_selection_compile(include_str, exclude_str)
37
+
38
+ # Highlighting
39
+ apply_highlighting!(params)
40
+
41
+ # Faceting
42
+ apply_faceting!(params)
43
+
44
+ # Curation (body params only)
45
+ apply_curation!(params)
46
+
47
+ # Pagination and early limit (compiler mapping)
48
+ hits_info = apply_pagination_and_hit_limits!(params)
49
+
50
+ # Grouping
51
+ apply_grouping!(params)
52
+
53
+ # Keep infix last for stability; include when configured or overridden
54
+ apply_infix!(params, opts, cfg)
55
+
56
+ # Ranking & typo tuning — authoritative mapping
57
+ apply_ranking!(params)
58
+
59
+ # Internal join context (for downstream components; may be stripped before HTTP)
60
+ compile_started_ms = SearchEngine::Instrumentation.monotonic_ms
61
+ join_ctx = build_join_context(ast_nodes: ast_nodes, orders: orders)
62
+ params[:_join] = join_ctx unless join_ctx.nil? || join_ctx.empty?
63
+ instrument_join_compile(join_ctx, include_str, filter_str, sort_str, compile_started_ms)
64
+
65
+ # Preset emission and merge strategies
66
+ params = apply_presets!(params)
67
+
68
+ # Synonyms/Stopwords toggles
69
+ apply_text_processing_flags!(params, runtime_flags)
70
+
71
+ # Attach internal-only runtime flags preview
72
+ params[:_runtime_flags] = runtime_flags unless runtime_flags.empty?
73
+
74
+ # Attach internal-only hit limits preview for DX surfaces; stripped client-side
75
+ attach_hits_info!(params, hits_info)
76
+
77
+ SearchEngine::CompiledParams.new(params)
78
+ end
79
+
80
+ # Compile filter_by string from AST nodes or legacy fragments.
81
+ # @param ast_nodes [Array<SearchEngine::AST::Node>]
82
+ # @return [String, nil] a Typesense filter string or nil when absent
83
+ # @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/compiler`
84
+ def compiled_filter_by(ast_nodes)
85
+ unless ast_nodes.empty?
86
+ compiled = SearchEngine::Compiler.compile(ast_nodes, klass: @klass)
87
+ return nil if compiled.to_s.empty?
88
+
89
+ return compiled
90
+ end
91
+
92
+ fragments = Array(@state[:filters])
93
+ return nil if fragments.empty?
94
+
95
+ fragments.join(' && ')
96
+ end
97
+
98
+ # Compile sort_by from normalized order entries.
99
+ # @param orders [Array<String>]
100
+ # @return [String, nil] comma-separated sort tokens or nil
101
+ def compiled_sort_by(orders)
102
+ list = Array(orders)
103
+ return nil if list.empty?
104
+
105
+ list.join(',')
106
+ end
107
+
108
+ # Build include_fields string with nested association segments first, then base fields.
109
+ def compile_include_fields_string
110
+ include_nested_map = @state[:select_nested] || {}
111
+ include_base = Array(@state[:select])
112
+
113
+ exclude_base = Array(@state[:exclude])
114
+ exclude_nested_map = @state[:exclude_nested] || {}
115
+
116
+ base_segment = include_base.empty? ? [] : (include_base - exclude_base)
117
+
118
+ applied_joins = joins_list
119
+ nested_segments = []
120
+ Array(@state[:select_nested_order]).each do |assoc|
121
+ next unless applied_joins.include?(assoc)
122
+
123
+ inc_fields = Array(include_nested_map[assoc])
124
+ next if inc_fields.empty?
125
+
126
+ exc_fields = Array(exclude_nested_map[assoc])
127
+ # Full-association exclusion sentinel wins: omit nested include segment entirely
128
+ next if exc_fields.include?(:__all) || exc_fields.map(&:to_s).include?('__all')
129
+
130
+ fields = (inc_fields - exc_fields).map(&:to_s).reject(&:empty?)
131
+ fields = fields.sort
132
+ next if fields.empty?
133
+
134
+ nested_segments << "$#{assoc}(#{fields.join(',')})"
135
+ end
136
+
137
+ segments = []
138
+ segments.concat(nested_segments)
139
+ segments.concat(base_segment) unless base_segment.empty?
140
+
141
+ segments.join(',')
142
+ end
143
+
144
+ # Build exclude_fields string with nested association segments first, then base fields.
145
+ def compile_exclude_fields_string
146
+ exclude_nested_order = Array(@state[:exclude_nested_order])
147
+ exclude_nested_map = @state[:exclude_nested] || {}
148
+ exclude_base = Array(@state[:exclude])
149
+
150
+ include_base = Array(@state[:select])
151
+ base_part = include_base.empty? ? exclude_base : []
152
+
153
+ segments = []
154
+
155
+ include_nested_map = @state[:select_nested] || {}
156
+
157
+ exclude_nested_order.each do |assoc|
158
+ exc_raw = Array(exclude_nested_map[assoc])
159
+ # If full-association exclusion sentinel present, emit "$assoc(*,doc_updated_at)" regardless of includes
160
+ if exc_raw.include?(:__all) || exc_raw.map(&:to_s).include?('__all')
161
+ segments << "$#{assoc}(*,doc_updated_at)"
162
+ next
163
+ end
164
+
165
+ # Otherwise, only emit nested exclude when there are no nested includes for this assoc
166
+ next if Array(include_nested_map[assoc]).any?
167
+
168
+ fields = exc_raw.map(&:to_s).reject(&:empty?)
169
+ fields = fields.sort
170
+ next if fields.empty?
171
+
172
+ segments << "$#{assoc}(#{fields.join(',')})"
173
+ end
174
+
175
+ segments.concat(base_part) unless base_part.empty?
176
+ segments.join(',')
177
+ end
178
+
179
+ # Build a JSON-serializable join context for Typesense.
180
+ # @param ast_nodes [Array<SearchEngine::AST::Node>]
181
+ # @param orders [Array<String>]
182
+ # @return [Hash]
183
+ def build_join_context(ast_nodes:, orders:)
184
+ applied = Array(@state[:joins])
185
+ return {} if applied.empty?
186
+
187
+ assocs = []
188
+ applied.each { |a| assocs << a unless assocs.include?(a) }
189
+
190
+ nested_map = @state[:select_nested] || {}
191
+ nested_order = Array(@state[:select_nested_order])
192
+
193
+ fields_by_assoc = {}
194
+ assocs.each do |assoc|
195
+ fields = Array(nested_map[assoc]).map(&:to_s).reject(&:empty?)
196
+ fields_by_assoc[assoc] = fields unless fields.empty?
197
+ end
198
+
199
+ include_refs = nested_order.select { |a| Array(nested_map[a]).any? }
200
+ filter_refs = extract_assocs_from_ast(ast_nodes)
201
+ sort_refs = extract_assocs_from_orders(orders)
202
+
203
+ referenced_in = {}
204
+ referenced_in[:include] = include_refs unless include_refs.empty?
205
+ referenced_in[:filter] = filter_refs unless filter_refs.empty?
206
+ referenced_in[:sort] = sort_refs unless sort_refs.empty?
207
+
208
+ out = {}
209
+ out[:assocs] = assocs unless assocs.empty?
210
+ out[:fields_by_assoc] = fields_by_assoc unless fields_by_assoc.empty?
211
+ out[:referenced_in] = referenced_in unless referenced_in.empty?
212
+ out
213
+ end
214
+
215
+ # Walk AST nodes and collect association names used via "$assoc.field" LHS.
216
+ # @param nodes [Array<SearchEngine::AST::Node>]
217
+ # @return [Array<Symbol>] unique assoc names in first-mention order
218
+ def extract_assocs_from_ast(nodes)
219
+ list = Array(nodes).flatten.compact
220
+ return [] if list.empty?
221
+
222
+ seen = []
223
+ walker = lambda do |node|
224
+ return unless node.is_a?(SearchEngine::AST::Node)
225
+
226
+ if node.respond_to?(:field)
227
+ field = node.field.to_s
228
+ if field.start_with?('$')
229
+ m = field.match(/^\$(\w+)\./)
230
+ if m
231
+ name = m[1].to_sym
232
+ seen << name unless seen.include?(name)
233
+ end
234
+ end
235
+ end
236
+
237
+ Array(node.children).each { |child| walker.call(child) }
238
+ end
239
+
240
+ list.each { |n| walker.call(n) }
241
+ seen
242
+ end
243
+
244
+ # Parse order strings and collect assoc names used via "$assoc.field:dir".
245
+ # @param orders [Array<String>]
246
+ # @return [Array<Symbol>] unique assoc names in first-mention order
247
+ def extract_assocs_from_orders(orders)
248
+ list = Array(orders).flatten.compact
249
+ return [] if list.empty?
250
+
251
+ seen = []
252
+ list.each do |entry|
253
+ field, _dir = entry.to_s.split(':', 2)
254
+ next unless field&.start_with?('$')
255
+
256
+ m = field.match(/^\$(\w+)\./)
257
+ next unless m
258
+
259
+ name = m[1].to_sym
260
+ seen << name unless seen.include?(name)
261
+ end
262
+ seen
263
+ end
264
+
265
+ # Helpers for inspect/DX
266
+ def friendly_where(filter_by)
267
+ SearchEngine::Relation::Dx::FriendlyWhere.render(filter_by)
268
+ end
269
+
270
+ def add_pagination_line!(lines, params)
271
+ page = params[:page]
272
+ per = params[:per_page]
273
+ return unless page || per
274
+
275
+ if page && per
276
+ lines << " page/per: #{page}/#{per}"
277
+ elsif page
278
+ lines << " page/per: #{page}/"
279
+ elsif per
280
+ lines << " page/per: /#{per}"
281
+ end
282
+ end
283
+
284
+ def append_selection_inspect_parts(parts, compiled)
285
+ selected_len = Array(@state[:select]).length
286
+ parts << "select=#{selected_len}" unless selected_len.zero?
287
+
288
+ inc_str = compiled[:include_fields]
289
+ parts << %(sel="#{truncate_for_inspect(inc_str)}") if inc_str && !inc_str.to_s.empty?
290
+ exc_str = compiled[:exclude_fields]
291
+ parts << %(xsel="#{truncate_for_inspect(exc_str)}") if exc_str && !exc_str.to_s.empty?
292
+ end
293
+
294
+ def append_selection_explain_lines(lines, params)
295
+ if params[:include_fields] && !params[:include_fields].to_s.strip.empty?
296
+ lines << " select: #{params[:include_fields]}"
297
+ end
298
+ return lines unless params[:exclude_fields] && !params[:exclude_fields].to_s.strip.empty?
299
+
300
+ lines << " exclude: #{params[:exclude_fields]}"
301
+ end
302
+
303
+ def append_curation_explain_lines(lines)
304
+ cur = @state[:curation]
305
+ return lines unless cur
306
+
307
+ pinned = Array(cur[:pinned]).map(&:to_s).reject(&:empty?)
308
+ hidden = Array(cur[:hidden]).map(&:to_s).reject(&:empty?)
309
+ tags = Array(cur[:override_tags]).map(&:to_s).reject(&:empty?)
310
+ fch = cur[:filter_curated_hits]
311
+
312
+ lines << " Pinned: #{pinned.join(', ')}" unless pinned.empty?
313
+ lines << " Hidden: #{hidden.join(', ')}" unless hidden.empty?
314
+ lines << " Override tags: #{tags.join(', ')}" unless tags.empty?
315
+ lines << " Filter curated hits: #{fch}" unless fch.nil?
316
+ lines
317
+ end
318
+
319
+ def add_effective_selection_tokens!(lines)
320
+ include_root = Array(@state[:select]).map(&:to_s)
321
+ exclude_root = Array(@state[:exclude]).map(&:to_s)
322
+ return if include_root.empty? && exclude_root.empty?
323
+
324
+ effective = include_root.empty? ? include_root : (include_root - exclude_root)
325
+ parts = ['selection:']
326
+ parts << "sel=#{effective.join(',')}" if effective.any?
327
+ parts << "xsel=#{exclude_root.join(',')}" if exclude_root.any?
328
+ lines << " #{parts.join(' ')}"
329
+ end
330
+
331
+ def append_preset_explain_line(lines, params)
332
+ return lines unless @state[:preset_name]
333
+
334
+ mode = @state[:preset_mode] || :merge
335
+ if (conf = Array(params[:_preset_conflicts])) && !conf.empty?
336
+ keys = conf.map(&:to_s).sort
337
+ lines << " preset: #{@state[:preset_name]} (mode=#{mode} dropped: #{keys.join(',')})"
338
+ else
339
+ lines << " preset: #{@state[:preset_name]} (mode=#{mode})"
340
+ end
341
+ lines
342
+ end
343
+
344
+ # Instrument preset conflicts in :lock mode without affecting compile flow.
345
+ # @param mode [Symbol]
346
+ # @param name [String]
347
+ # @param conflicts [Array<Symbol>]
348
+ # @return [void]
349
+ def instrument_preset_conflicts(mode, name, conflicts)
350
+ return if Array(conflicts).empty?
351
+
352
+ payload = {
353
+ keys: Array(conflicts).map(&:to_sym).sort,
354
+ mode: mode,
355
+ preset_name: name,
356
+ count: Array(conflicts).size
357
+ }
358
+ SearchEngine::Instrumentation.instrument('search_engine.preset.conflict', payload) {}
359
+ rescue StandardError
360
+ nil
361
+ end
362
+
363
+ private
364
+
365
+ def apply_query_basics!(params, opts, cfg)
366
+ q_val = option_value(opts, :q) || '*'
367
+ model_qb = begin
368
+ @klass.query_by if @klass.respond_to?(:query_by)
369
+ rescue StandardError
370
+ nil
371
+ end
372
+ query_by_val = option_value(opts, :query_by) || model_qb || cfg.default_query_by
373
+ params[:q] = q_val
374
+ params[:query_by] = query_by_val if query_by_val
375
+ end
376
+
377
+ def assign_filter_by!(params, ast_nodes)
378
+ filter_str = compiled_filter_by(ast_nodes)
379
+ filter_str = map_join_tokens(filter_str)
380
+ params[:filter_by] = filter_str if filter_str
381
+ filter_str
382
+ end
383
+
384
+ def assign_sort_by!(params, orders)
385
+ sort_str = compiled_sort_by(orders)
386
+ sort_str = map_join_tokens(sort_str)
387
+ params[:sort_by] = sort_str if sort_str
388
+ sort_str
389
+ end
390
+
391
+ def compile_selection_fields!(params)
392
+ include_str = compile_include_fields_string
393
+ exclude_str = compile_exclude_fields_string
394
+ include_str = map_join_tokens(include_str)
395
+ exclude_str = map_join_tokens(exclude_str)
396
+
397
+ params[:include_fields] = include_str unless include_str.to_s.strip.empty?
398
+ params[:exclude_fields] = exclude_str unless exclude_str.to_s.strip.empty?
399
+
400
+ [include_str, exclude_str]
401
+ end
402
+
403
+ def map_join_tokens(value)
404
+ return value if value.nil?
405
+
406
+ str = value.to_s
407
+ return value if str.strip.empty?
408
+
409
+ rewrites = join_token_rewrites
410
+ return value if rewrites.empty?
411
+
412
+ mapped = str.dup
413
+ rewrites.each do |assoc, collection|
414
+ coll = collection.to_s
415
+ assoc_token = assoc.to_s
416
+ next if coll.strip.empty? || assoc_token.empty? || coll == assoc_token
417
+
418
+ pattern = /\$#{Regexp.escape(assoc_token)}(?=[.(])/
419
+ mapped = mapped.gsub(pattern, "$#{coll}")
420
+ end
421
+
422
+ mapped
423
+ end
424
+
425
+ def join_token_rewrites
426
+ return {} unless @klass.respond_to?(:join_for)
427
+
428
+ applied = joins_list
429
+ return {} if applied.empty?
430
+
431
+ applied.each_with_object({}) do |assoc, acc|
432
+ cfg = @klass.join_for(assoc)
433
+ coll = cfg[:collection].to_s
434
+ next if coll.strip.empty?
435
+
436
+ acc[assoc.to_sym] = coll
437
+ rescue StandardError
438
+ next
439
+ end
440
+ end
441
+
442
+ def instrument_selection_compile(include_str, exclude_str)
443
+ included_count = 0
444
+ excluded_count = 0
445
+ nested_assocs = []
446
+
447
+ unless include_str.to_s.strip.empty?
448
+ include_str.split(',').each do |segment|
449
+ seg = segment.strip
450
+ if (m = seg.match(/^\$(\w+)\(([^)]*)\)$/))
451
+ assoc = m[1]
452
+ inner = m[2]
453
+ nested_assocs << assoc
454
+ inner_fields = inner.to_s.split(',').map(&:strip).reject(&:empty?)
455
+ included_count += inner_fields.length
456
+ else
457
+ included_count += 1
458
+ end
459
+ end
460
+ end
461
+
462
+ unless exclude_str.to_s.strip.empty?
463
+ exclude_str.split(',').each do |segment|
464
+ seg = segment.strip
465
+ if (m = seg.match(/^\$(\w+)\(([^)]*)\)$/))
466
+ assoc = m[1]
467
+ inner = m[2]
468
+ nested_assocs << assoc
469
+ inner_fields = inner.to_s.split(',').map(&:strip).reject(&:empty?)
470
+ excluded_count += inner_fields.length
471
+ else
472
+ excluded_count += 1
473
+ end
474
+ end
475
+ end
476
+
477
+ s_payload = {
478
+ include_count: included_count,
479
+ exclude_count: excluded_count,
480
+ nested_assoc_count: nested_assocs.uniq.length
481
+ }
482
+ SearchEngine::Instrumentation.instrument('search_engine.selection.compile', s_payload) {}
483
+ rescue StandardError
484
+ # swallow observability errors
485
+ end
486
+
487
+ def apply_highlighting!(params)
488
+ return unless (h = @state[:highlight])
489
+
490
+ hf = Array(h[:fields]).map(&:to_s).reject(&:empty?)
491
+ params[:highlight_fields] = hf.join(',') unless hf.empty?
492
+
493
+ hff = Array(h[:full_fields]).map(&:to_s).reject(&:empty?)
494
+ params[:highlight_full_fields] = hff.join(',') unless hff.empty?
495
+
496
+ params[:highlight_start_tag] = h[:start_tag] if h[:start_tag]
497
+ params[:highlight_end_tag] = h[:end_tag] if h[:end_tag]
498
+
499
+ params[:highlight_affix_num_tokens] = h[:affix_tokens] unless h[:affix_tokens].nil?
500
+ params[:snippet_threshold] = h[:snippet_threshold] unless h[:snippet_threshold].nil?
501
+ end
502
+
503
+ def apply_curation!(params)
504
+ return unless (cur = @state[:curation])
505
+
506
+ pinned = Array(cur[:pinned]).map(&:to_s).reject(&:empty?)
507
+ hidden = Array(cur[:hidden]).map(&:to_s).reject(&:empty?)
508
+ tags = Array(cur[:override_tags]).map(&:to_s).reject(&:empty?)
509
+ fch = cur[:filter_curated_hits]
510
+
511
+ params[:pinned_hits] = pinned.join(',') if pinned.any?
512
+ params[:hidden_hits] = hidden.join(',') if hidden.any?
513
+ params[:override_tags] = tags.join(',') if tags.any?
514
+ params[:filter_curated_hits] = fch unless fch.nil?
515
+ # Expose a compact curation meta segment for callers (not sent over HTTP)
516
+ params[:_curation] = { filter_curated_hits: fch } if cur.key?(:filter_curated_hits)
517
+
518
+ instrument_curation_compile(pinned, hidden, tags, cur)
519
+ end
520
+
521
+ def instrument_curation_compile(pinned, hidden, tags, cur)
522
+ c_payload = {
523
+ pinned_count: pinned.size.positive? ? pinned.size : nil,
524
+ hidden_count: hidden.size.positive? ? hidden.size : nil,
525
+ has_override_tags: tags.any? || nil,
526
+ filter_curated_hits: (cur.key?(:filter_curated_hits) ? cur[:filter_curated_hits] : nil)
527
+ }.compact
528
+ SearchEngine::Instrumentation.instrument('search_engine.curation.compile', c_payload) {}
529
+
530
+ overlap = (pinned & hidden)
531
+ if overlap.any?
532
+ SearchEngine::Instrumentation.instrument(
533
+ 'search_engine.curation.conflict',
534
+ { type: :overlap, count: overlap.size }
535
+ ) {}
536
+ end
537
+ rescue StandardError
538
+ # swallow observability errors
539
+ end
540
+
541
+ def apply_pagination_and_hit_limits!(params)
542
+ hits_info = {}
543
+ pagination = compute_pagination
544
+ if (hl = @state[:hit_limits]) && hl[:early_limit]
545
+ if pagination.key?(:per_page) && pagination[:per_page].to_i > hl[:early_limit].to_i
546
+ pagination = pagination.merge(per_page: hl[:early_limit].to_i)
547
+ hits_info[:per_adjusted] = true
548
+ else
549
+ hits_info[:per_adjusted] = false
550
+ end
551
+ hits_info[:early_limit] = hl[:early_limit].to_i
552
+ end
553
+ params[:page] = pagination[:page] if pagination.key?(:page)
554
+ params[:per_page] = pagination[:per_page] if pagination.key?(:per_page)
555
+ hits_info
556
+ end
557
+
558
+ def apply_grouping!(params)
559
+ grouping = @state[:grouping]
560
+ return unless grouping
561
+
562
+ field = grouping[:field]
563
+ limit = grouping[:limit]
564
+ missing_values = grouping[:missing_values]
565
+
566
+ if field
567
+ params[:group_by] = field.to_s
568
+ params[:group_limit] = limit if limit
569
+ params[:group_missing_values] = true if missing_values
570
+ end
571
+
572
+ instrument_grouping_compile(field, limit, missing_values)
573
+ end
574
+
575
+ def instrument_grouping_compile(field, limit, missing_values)
576
+ payload = {
577
+ collection: klass_name_for_inspect,
578
+ field: field&.to_s,
579
+ limit: limit,
580
+ missing_values: missing_values
581
+ }.compact
582
+ SearchEngine::Instrumentation.instrument('search_engine.grouping.compile', payload) {}
583
+ rescue StandardError
584
+ # swallow observability errors
585
+ end
586
+
587
+ def apply_infix!(params, opts, cfg)
588
+ infix_val = option_value(opts, :infix) || cfg.default_infix
589
+ params[:infix] = infix_val if infix_val
590
+ end
591
+
592
+ def apply_ranking!(params)
593
+ return unless (rk = @state[:ranking])
594
+
595
+ plan = SearchEngine::RankingPlan.new(relation: self, query_by: params[:query_by], ranking: rk)
596
+ rparams = plan.params
597
+ params.merge!(rparams) unless rparams.empty?
598
+ rescue SearchEngine::Errors::Error
599
+ raise
600
+ rescue StandardError => error
601
+ raise SearchEngine::Errors::InvalidOption.new(
602
+ "InvalidOption: ranking options could not be compiled (#{error.class}: #{error.message})",
603
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#options'
604
+ )
605
+ end
606
+
607
+ def instrument_join_compile(join_ctx, include_str, filter_str, sort_str, compile_started_ms)
608
+ assocs = Array(join_ctx[:assocs]).map(&:to_s)
609
+ used = join_ctx[:referenced_in] || {}
610
+ used_in = {}
611
+ %i[include filter sort].each do |k|
612
+ arr = Array(used[k]).map(&:to_s)
613
+ used_in[k] = arr unless arr.empty?
614
+ end
615
+
616
+ payload = {
617
+ collection: klass_name_for_inspect,
618
+ join_count: assocs.size,
619
+ assocs: (assocs unless assocs.empty?),
620
+ used_in: (used_in unless used_in.empty?),
621
+ include_len: (include_str.to_s.length unless include_str.to_s.strip.empty?),
622
+ filter_len: (filter_str.to_s.length unless filter_str.to_s.strip.empty?),
623
+ sort_len: (sort_str.to_s.length unless sort_str.to_s.strip.empty?),
624
+ duration_ms: (SearchEngine::Instrumentation.monotonic_ms - compile_started_ms if compile_started_ms),
625
+ has_joins: !assocs.empty?
626
+ }
627
+ SearchEngine::Instrumentation.instrument('search_engine.joins.compile', payload)
628
+ rescue StandardError
629
+ # swallow observability errors
630
+ end
631
+
632
+ def apply_presets!(params)
633
+ return params unless (pn = @state[:preset_name])
634
+
635
+ pmode = (@state[:preset_mode] || :merge).to_sym
636
+ params[:preset] = pn
637
+
638
+ case pmode
639
+ when :only
640
+ allowed = ESSENTIAL_PARAM_KEYS
641
+ minimal = {}
642
+ (allowed + [:preset]).each do |k|
643
+ minimal[k] = params[k] if params.key?(k)
644
+ end
645
+ # Preserve internal join context if present for observability
646
+ minimal[:_join] = params[:_join] if params.key?(:_join)
647
+ minimal
648
+ when :lock
649
+ conflicts = []
650
+ locked = SearchEngine.config.presets.locked_domains_set
651
+ params.each_key do |k|
652
+ next unless locked.include?(k)
653
+
654
+ params.delete(k)
655
+ conflicts << k
656
+ end
657
+ params[:_preset_conflicts] = conflicts unless conflicts.empty?
658
+
659
+ instrument_preset_conflicts(pmode, pn, conflicts)
660
+ params
661
+ else
662
+ params
663
+ end
664
+ end
665
+
666
+ def attach_hits_info!(params, hits_info)
667
+ return unless (hl = @state[:hit_limits])
668
+
669
+ hits_info[:max] = hl[:max].to_i if hl[:max]
670
+ params[:_hits] = hits_info unless hits_info.empty?
671
+ end
672
+
673
+ # Faceting block extracted for clarity
674
+ def apply_faceting!(params)
675
+ facet_fields = Array(@state[:facet_fields]).map(&:to_s).reject(&:empty?)
676
+ params[:facet_by] = facet_fields.join(',') unless facet_fields.empty?
677
+
678
+ caps = Array(@state[:facet_max_values]).compact
679
+ if caps.any?
680
+ valid_caps = []
681
+ caps.each do |v|
682
+ valid_caps << Integer(v)
683
+ rescue ArgumentError, TypeError
684
+ # skip invalid cap
685
+ end
686
+ max_cap = valid_caps.max
687
+ params[:max_facet_values] = max_cap if max_cap&.positive?
688
+ end
689
+
690
+ queries = Array(@state[:facet_queries])
691
+ return unless queries.any?
692
+
693
+ tokens = queries.map { |q| "#{q[:field]}:#{q[:expr]}" }
694
+ params[:facet_query] = tokens.join(',') unless tokens.empty?
695
+ end
696
+
697
+ # Synonyms/stopwords toggles extracted for clarity
698
+ def apply_text_processing_flags!(params, runtime_flags)
699
+ unless @state[:use_synonyms].nil?
700
+ params[:enable_synonyms] = @state[:use_synonyms]
701
+ runtime_flags[:use_synonyms] = @state[:use_synonyms]
702
+ end
703
+ return if @state[:use_stopwords].nil?
704
+
705
+ remove = !@state[:use_stopwords]
706
+ params[:remove_stop_words] = remove
707
+ runtime_flags[:use_stopwords] = @state[:use_stopwords]
708
+ end
709
+ end
710
+ end
711
+ end