search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,664 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Result wraps a Typesense search response and exposes hydrated hits.
5
+ #
6
+ # Hydration converts each hit's document into either an instance of the
7
+ # provided model class or a generic OpenStruct when no class is available.
8
+ #
9
+ # - Enumeration yields hydrated objects (includes Enumerable)
10
+ # - Metadata readers: {#found}, {#out_of}, {#facets}, {#raw}
11
+ # - Selection is respected implicitly by hydrating only keys present in the
12
+ # returned document; no missing attributes are synthesized.
13
+ #
14
+ # Unknown collections: when +klass+ is +nil+, hydration falls back to
15
+ # OpenStruct.
16
+ class Result
17
+ include Enumerable
18
+
19
+ # Immutable lightweight group record for grouped responses.
20
+ #
21
+ # @!attribute [r] key
22
+ # @return [Hash{String=>Object}] mapping of field name to group value
23
+ # @!attribute [r] hits
24
+ # @return [Array<Object>] hydrated hits within the group
25
+ # @!attribute [r] size
26
+ # @return [Integer] number of hits in the group
27
+ class Group
28
+ attr_reader :key, :hits
29
+
30
+ # @param key [Hash{String=>Object}]
31
+ # @param hits [Array<Object>]
32
+ def initialize(key:, hits:)
33
+ @key = (key || {}).dup.freeze
34
+ @hits = Array(hits).freeze
35
+ freeze
36
+ end
37
+
38
+ # @return [Integer]
39
+ def size
40
+ @hits.size
41
+ end
42
+
43
+ # @return [String]
44
+ def inspect
45
+ "#<SearchEngine::Result::Group key=#{key.inspect} size=#{size}>"
46
+ end
47
+
48
+ def ==(other)
49
+ other.is_a?(Group) && other.key == key && other.hits == hits
50
+ end
51
+ end
52
+
53
+ # @return [Array<Object>] hydrated hits (frozen internal array)
54
+ # @return [Integer] number of documents that matched the search
55
+ # @return [Integer] number of documents searched
56
+ # @return [Array<Hash>, nil] facet counts as returned by Typesense
57
+ # @return [Hash] raw Typesense response (unmodified)
58
+ attr_reader :hits, :found, :out_of, :raw
59
+
60
+ # Build a new result wrapper.
61
+ #
62
+ # @param raw [Hash] Parsed Typesense response ("hits"/"grouped_hits", "found", "out_of", "facet_counts")
63
+ # @param klass [Class, nil] Optional model class used to hydrate each document
64
+ # @param selection [Hash, nil] Optional selection context for strict missing checks
65
+ # @param facets [Hash, nil] Optional facets context carrying declared facet queries/labels
66
+ # @param highlight [Hash, nil] Optional highlight context carrying configured tags and knobs
67
+ def initialize(raw, klass: nil, selection: nil, facets: nil, highlight: nil)
68
+ require 'ostruct'
69
+
70
+ @raw = raw || {}
71
+ @found = @raw['found'] || @raw[:found]
72
+ @out_of = @raw['out_of'] || @raw[:out_of]
73
+ # raw facet_counts preserved in @raw; parsed via #facets helper
74
+ @klass = klass
75
+ @selection_ctx = selection if selection
76
+ @facets_ctx = facets if facets
77
+ @highlight_ctx = highlight if highlight
78
+
79
+ @__groups_memo = nil
80
+ # Precompute facets memo before freeze to avoid later mutation
81
+ @__facets_parsed_memo = build_parsed_facets(@raw, @facets_ctx).freeze
82
+
83
+ if grouped?
84
+ groups_built = build_groups
85
+ @__groups_memo = groups_built.freeze
86
+ first_hits = groups_built.map { |g| g.hits.first }.compact
87
+ @hits = first_hits.freeze
88
+ instrument_group_parse(groups_built)
89
+ else
90
+ entries = Array(@raw['hits'] || @raw[:hits]).map { |h| symbolize_hit(h) }
91
+ hydrated = []
92
+ entries.each do |entry|
93
+ next unless entry[:document]
94
+
95
+ obj = hydrate(entry[:document])
96
+ attach_highlighting!(obj, entry)
97
+ hydrated << obj
98
+ end
99
+ @hits = hydrated.freeze
100
+ end
101
+
102
+ freeze
103
+ end
104
+
105
+ # Iterate over hydrated hits.
106
+ # @yieldparam obj [Object] hydrated object
107
+ # @return [Enumerator] when no block is given
108
+ def each(&block)
109
+ return @hits.each unless block_given?
110
+
111
+ @hits.each(&block)
112
+ end
113
+
114
+ # @return [Array<Object>] a shallow copy of hydrated hits
115
+ def to_a
116
+ @hits.dup
117
+ end
118
+
119
+ # @return [Integer]
120
+ def size
121
+ @hits.size
122
+ end
123
+
124
+ # @return [Boolean]
125
+ def empty?
126
+ @hits.empty?
127
+ end
128
+
129
+ # Whether this result represents a grouped response.
130
+ # Detection prefers presence and Array-ness of a grouped section.
131
+ # @return [Boolean]
132
+ def grouped?
133
+ gh = @raw['grouped_hits'] || @raw[:grouped_hits]
134
+ gh.is_a?(Array)
135
+ end
136
+
137
+ # Groups for grouped responses. Returns an empty Array when not grouped.
138
+ # The returned Array is frozen; each Group is immutable.
139
+ # @return [Array<SearchEngine::Result::Group>]
140
+ def groups
141
+ return [].freeze unless grouped?
142
+
143
+ @__groups_memo.dup
144
+ end
145
+
146
+ # Enumerate over groups. Returns an Enumerator when no block given.
147
+ # Empty enumerator when not grouped.
148
+ # @yieldparam group [SearchEngine::Result::Group]
149
+ # @return [Enumerator]
150
+ def each_group(&block)
151
+ return enum_for(:each_group) unless block_given?
152
+
153
+ groups.each(&block)
154
+ end
155
+
156
+ # Number of groups present in this result page.
157
+ # When grouping is disabled, returns 0.
158
+ # @return [Integer]
159
+ # @example
160
+ # res = SearchEngine::Product.group_by(:brand_id, limit: 1).execute
161
+ # res.groups_count #=> number of groups in this page
162
+ def groups_count
163
+ return 0 unless grouped?
164
+
165
+ @__groups_memo.size
166
+ end
167
+
168
+ # Total documents found by the backend for this query (not page-limited).
169
+ # Reads the backend-provided scalar (e.g., Typesense's `found`).
170
+ # @return [Integer, nil]
171
+ # @example
172
+ # res = SearchEngine::Product.group_by(:brand_id, limit: 1).execute
173
+ # res.total_found #=> total documents found
174
+ def total_found
175
+ @found
176
+ end
177
+
178
+ # Total number of groups for this query.
179
+ # If the backend exposes a total groups count, returns that value.
180
+ # Otherwise, falls back to the number of groups in the current page
181
+ # (i.e., {#groups_count}). When grouping is disabled, returns +nil+.
182
+ # @return [Integer, nil]
183
+ # @example
184
+ # res = SearchEngine::Product.group_by(:brand_id, limit: 1).execute
185
+ # res.total_groups #=> global groups if available; else groups_count (page-scoped)
186
+ def total_groups
187
+ return nil unless grouped?
188
+
189
+ api_total = detect_total_groups_from_raw(@raw)
190
+ api_total.nil? ? @__groups_memo.size : api_total
191
+ end
192
+
193
+ # First group in this page or +nil+ when there are no groups.
194
+ # Returns a reference to the memoized group; no new objects are allocated.
195
+ # @return [SearchEngine::Result::Group, nil]
196
+ def first_group
197
+ return nil unless grouped?
198
+
199
+ @__groups_memo.first
200
+ end
201
+
202
+ # Last group in this page or +nil+ when there are no groups.
203
+ # Returns a reference to the memoized group; no new objects are allocated.
204
+ # @return [SearchEngine::Result::Group, nil]
205
+ def last_group
206
+ return nil unless grouped?
207
+
208
+ @__groups_memo.last
209
+ end
210
+
211
+ # Facets helpers
212
+ # ---------------
213
+ #
214
+ # Parse Typesense facet_counts into a stable Hash mapping field => [ { value:, count:, highlighted:, label: } ].
215
+ # Returns an empty Hash when no facets are present.
216
+ # Arrays/hashes in the returned structure are defensive copies and can be safely mutated by callers.
217
+ # @return [Hash{String=>Array<Hash{Symbol=>Object}>}]
218
+ def facets
219
+ parsed = parse_facets
220
+ parsed.dup
221
+ end
222
+
223
+ # Facet values for a given field name.
224
+ # @param name [#to_s]
225
+ # @return [Array<Hash{Symbol=>Object}>]
226
+ def facet_values(name)
227
+ field = name.to_s
228
+ arr = parse_facets[field] || []
229
+ arr.dup
230
+ end
231
+
232
+ # Optional convenience: map of value => count for a given facet field.
233
+ # @param name [#to_s]
234
+ # @return [Hash{Object=>Integer}]
235
+ def facet_value_map(name)
236
+ facet_values(name).each_with_object({}) { |h, acc| acc[h[:value]] = h[:count] }
237
+ end
238
+
239
+ private
240
+
241
+ # Per-hit highlighting mixin: added onto hydrated objects.
242
+ module HitHighlighting
243
+ # @return [Hash{String=>Array<Hash{Symbol=>Object}>}] normalized highlights by field
244
+ def highlights
245
+ h = instance_variable_get(:@__se_highlights_map__)
246
+ h ? h.dup : {}
247
+ end
248
+
249
+ # Return a sanitized HTML snippet or full highlighted value for a field.
250
+ # @param field [Symbol, String]
251
+ # @param full [Boolean] when true, prefer full highlighted value
252
+ # @return [String] HTML-safe string; SafeBuffer when ActiveSupport present
253
+ def snippet_for(field, full: false)
254
+ map = instance_variable_get(:@__se_highlights_map__)
255
+ return nil unless map && field
256
+
257
+ key = field.to_s
258
+ list = map[key]
259
+ return nil unless Array(list).any?
260
+
261
+ ctx = instance_variable_get(:@__se_highlight_ctx__)
262
+ entry = if full
263
+ list.find { |h| h[:snippet] == false } || list.first
264
+ else
265
+ list.find { |h| h[:snippet] == true } || list.first
266
+ end
267
+
268
+ return nil unless entry
269
+
270
+ value = entry[:value].to_s
271
+ matched = Array(entry[:matched_tokens]).map(&:to_s)
272
+ ctx && ctx[:affix_tokens]
273
+ threshold = ctx && ctx[:snippet_threshold]
274
+
275
+ html = SearchEngine::Result.send(:sanitize_highlight_html, value, ctx)
276
+ return SearchEngine::Result.send(:wrap_safe_if_rails, html) if entry[:snippet] == true
277
+
278
+ # Full value requested or only full value available
279
+ return SearchEngine::Result.send(:wrap_safe_if_rails, html) if full || threshold.nil?
280
+
281
+ # Compute a minimal snippet when server didn't provide one
282
+ snippet = SearchEngine::Result.send(:compute_snippet_from_full, html, matched, ctx)
283
+ SearchEngine::Result.send(:wrap_safe_if_rails, snippet)
284
+ end
285
+ end
286
+
287
+ def parse_facets
288
+ @__facets_parsed_memo || {}.freeze
289
+ end
290
+
291
+ def build_parsed_facets(raw, ctx)
292
+ raw_facets = (raw && (raw['facet_counts'] || raw[:facet_counts])) || []
293
+ result = {}
294
+ Array(raw_facets).each do |entry|
295
+ field = (entry['field_name'] || entry[:field_name]).to_s
296
+ next if field.empty?
297
+
298
+ values = Array(entry['counts'] || entry[:counts])
299
+ list = build_facet_value_list(values)
300
+
301
+ if ctx && Array(ctx[:queries]).any?
302
+ q_for_field = Array(ctx[:queries]).select { |q| (q[:field] || q['field']).to_s == field }
303
+ annotate_labels_for_field!(list, q_for_field) if q_for_field.any?
304
+ end
305
+
306
+ result[field] = list.freeze
307
+ end
308
+
309
+ result
310
+ end
311
+
312
+ def build_facet_value_list(values)
313
+ Array(values).map do |v|
314
+ value = v['value'] || v[:value]
315
+ count = v['count'] || v[:count]
316
+ highlighted = v['highlighted'] || v[:highlighted]
317
+ { value: value, count: Integer(count || 0), highlighted: highlighted, label: nil }
318
+ end
319
+ end
320
+
321
+ def annotate_labels_for_field!(list, queries)
322
+ list.each do |h|
323
+ val_str = h[:value].to_s
324
+ match = queries.find { |q| (q[:expr] || q['expr']).to_s == val_str }
325
+ h[:label] = ((match && (match[:label] || match['label'])) || nil)
326
+ end
327
+ end
328
+
329
+ # Attempt to read a total groups count from the raw payload using common keys.
330
+ # Returns +nil+ when the backend does not provide a value.
331
+ # @param raw [Hash]
332
+ # @return [Integer, nil]
333
+ def detect_total_groups_from_raw(raw)
334
+ keys = %w[total_groups group_count groups_count found_groups total_group_count total_grouped total_group_matches]
335
+ keys.each do |key|
336
+ val = raw[key] || raw[key.to_sym]
337
+ next if val.nil?
338
+ return Integer(val) if val.is_a?(Integer) || (val.is_a?(String) && val.match?(/\A-?\d+\z/))
339
+ end
340
+ nil
341
+ rescue StandardError
342
+ nil
343
+ end
344
+
345
+ # Hydrate a single Typesense document (Hash) into a Ruby object.
346
+ #
347
+ # If +@klass+ is present, an instance of that class is allocated and each
348
+ # document key is assigned as an instance variable on the object. No reader
349
+ # methods are generated; callers may access via the model's own readers (if
350
+ # defined) or via reflection. Unknown keys are permitted.
351
+ #
352
+ # If +@klass+ is +nil+, an OpenStruct is created with the same keys.
353
+ #
354
+ # @param doc [Hash]
355
+ # @return [Object]
356
+ def hydrate(doc)
357
+ keys = doc.is_a?(Hash) ? doc.keys.map(&:to_s) : []
358
+ enforce_strict_missing_if_needed!(keys)
359
+ if @klass
360
+ if @klass.respond_to?(:from_document)
361
+ @klass.from_document(doc)
362
+ else
363
+ @klass.new.tap do |obj|
364
+ doc.each do |key, value|
365
+ obj.instance_variable_set(ivar_name(key), value)
366
+ end
367
+ end
368
+ end
369
+ else
370
+ OpenStruct.new(doc)
371
+ end
372
+ end
373
+
374
+ # Build Group objects from the raw grouped response.
375
+ # Preserves backend order and hydrates documents once.
376
+ # @return [Array<SearchEngine::Result::Group>]
377
+ def build_groups
378
+ grouped = @raw['grouped_hits'] || @raw[:grouped_hits] || []
379
+ fields = group_by_fields_from_raw
380
+
381
+ grouped.map do |entry|
382
+ key_values = Array(entry['group_key'] || entry[:group_key])
383
+ key_hash = build_group_key_hash(fields, key_values)
384
+
385
+ subhits = Array(entry['hits'] || entry[:hits])
386
+ hydrated = []
387
+ subhits.each do |sub|
388
+ doc = sub && (sub['document'] || sub[:document])
389
+ next unless doc
390
+
391
+ obj = hydrate(doc)
392
+ attach_highlighting!(obj, symbolize_hit(sub))
393
+ hydrated << obj
394
+ end
395
+
396
+ Group.new(key: key_hash, hits: hydrated)
397
+ end
398
+ end
399
+
400
+ # Derive group_by fields from echoed request params when available.
401
+ # Returns an Array of field names (Strings). Empty when unknown.
402
+ def group_by_fields_from_raw
403
+ params = @raw['request_params'] || @raw[:request_params] || @raw['search_params'] || @raw[:search_params]
404
+ return [] unless params
405
+
406
+ gb = params['group_by'] || params[:group_by]
407
+ return [] unless gb.is_a?(String) && !gb.strip.empty?
408
+
409
+ gb.split(',').map!(&:strip).tap { |a| a.reject!(&:empty?) }
410
+ end
411
+
412
+ # Build a Hash mapping field names to coerced group key values.
413
+ # Falls back to a single-field synthetic key when fields are unknown.
414
+ def build_group_key_hash(fields, values)
415
+ return {} if values.empty?
416
+
417
+ if fields.any?
418
+ out = {}
419
+ fields.each_with_index do |field, idx|
420
+ break if idx >= values.size
421
+
422
+ out[field.to_s] = coerce_group_value(values[idx])
423
+ end
424
+ return out
425
+ end
426
+
427
+ return { 'group' => coerce_group_value(values.first) } if values.size == 1
428
+
429
+ out = {}
430
+ values.each_with_index do |val, idx|
431
+ out["group_#{idx}"] = coerce_group_value(val)
432
+ end
433
+ out
434
+ end
435
+
436
+ # Best-effort coercion for common scalar types.
437
+ def coerce_group_value(value)
438
+ return nil if value.nil?
439
+
440
+ return true if value == true || value.to_s == 'true'
441
+ return false if value == false || value.to_s == 'false'
442
+ return Integer(value) if value.is_a?(String) && value.match?(/\A-?\d+\z/)
443
+ return Float(value) if value.is_a?(String) && value.match?(/\A-?\d+\.\d+\z/)
444
+
445
+ value
446
+ end
447
+
448
+ def ivar_name(key)
449
+ @ivar_prefix_cache ||= {}
450
+ @ivar_prefix_cache[key] ||= "@#{key}"
451
+ end
452
+
453
+ def instrument_group_parse(groups)
454
+ count = groups.size
455
+ total = groups.inject(0) { |acc, g| acc + g.size }
456
+ avg = count.positive? ? (total.to_f / count) : 0.0
457
+ coll = begin
458
+ @klass.respond_to?(:collection) ? @klass.collection : nil
459
+ rescue StandardError
460
+ nil
461
+ end
462
+
463
+ SearchEngine::Instrumentation.instrument(
464
+ 'search_engine.result.grouped_parsed',
465
+ collection: coll,
466
+ groups_count: count,
467
+ avg_group_size: avg
468
+ )
469
+ end
470
+
471
+ # Enforce strict-missing behavior when enabled.
472
+ # Computes missing = requested_root − present_keys and raises when non-empty.
473
+ def enforce_strict_missing_if_needed!(present_keys)
474
+ ctx = @selection_ctx || {}
475
+ strict = (ctx[:strict_missing] == true)
476
+ return unless strict
477
+
478
+ requested = Array(ctx[:requested_root]).map(&:to_s).reject(&:empty?)
479
+ return if requested.empty?
480
+
481
+ missing = requested - present_keys
482
+ return if missing.empty?
483
+
484
+ model_name = begin
485
+ @klass&.name || 'Object'
486
+ rescue StandardError
487
+ 'Object'
488
+ end
489
+
490
+ sample = missing.take(3)
491
+ more = missing.size - sample.size
492
+ sample_str = sample.map { |f| %("#{f}") }.join(', ')
493
+ sample_str << " (+#{more} more)" if more.positive?
494
+
495
+ msg = 'MissingField: requested fields absent for ' \
496
+ "#{model_name}: #{sample_str}. " \
497
+ 'They may be excluded by selection or upstream Typesense mapping. ' \
498
+ 'Fix by adjusting select/exclude/reselect, relaxing strictness, or ' \
499
+ 'ensuring the mapping includes these fields.'
500
+ raise SearchEngine::Errors::MissingField.new(
501
+ msg,
502
+ hint: 'Adjust select/exclude or disable strict_missing to avoid raising.',
503
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/field-selection#strict-vs-lenient-selection',
504
+ details: { requested: requested, present_keys: present_keys }
505
+ )
506
+ end
507
+
508
+ # --- Highlight internals -------------------------------------------------
509
+
510
+ def symbolize_hit(h)
511
+ return {} unless h.is_a?(Hash)
512
+
513
+ out = {}
514
+ h.each { |k, v| out[k.is_a?(String) ? k.to_sym : k] = v }
515
+ out
516
+ rescue StandardError
517
+ {}
518
+ end
519
+
520
+ def attach_highlighting!(obj, hit_entry)
521
+ raw_list = Array(hit_entry[:highlights])
522
+ return obj if raw_list.empty?
523
+
524
+ map = normalize_highlights(raw_list)
525
+ return obj if map.empty?
526
+
527
+ # Extend object once and inject context + normalized map
528
+ obj.extend(HitHighlighting) unless obj.singleton_class.included_modules.include?(HitHighlighting)
529
+ obj.instance_variable_set(:@__se_highlights_map__, map)
530
+ obj.instance_variable_set(:@__se_highlight_ctx__, safe_highlight_ctx)
531
+ obj
532
+ rescue StandardError
533
+ obj
534
+ end
535
+
536
+ def safe_highlight_ctx
537
+ ctx = @highlight_ctx || {}
538
+ return {} unless ctx.is_a?(Hash)
539
+
540
+ out = {}
541
+ out[:fields] = Array(ctx[:fields]).map(&:to_s).reject(&:empty?) if ctx[:fields]
542
+ out[:full_fields] = Array(ctx[:full_fields]).map(&:to_s).reject(&:empty?) if ctx[:full_fields]
543
+ out[:start_tag] = ctx[:start_tag].to_s if ctx[:start_tag]
544
+ out[:end_tag] = ctx[:end_tag].to_s if ctx[:end_tag]
545
+ out[:affix_tokens] = ctx[:affix_tokens] if ctx.key?(:affix_tokens)
546
+ out[:snippet_threshold] = ctx[:snippet_threshold] if ctx.key?(:snippet_threshold)
547
+ out
548
+ rescue StandardError
549
+ {}
550
+ end
551
+
552
+ def normalize_highlights(list)
553
+ result = {}
554
+ Array(list).each do |h|
555
+ field = (h['field'] || h[:field]).to_s
556
+ next if field.empty?
557
+
558
+ value = h['snippet'] || h[:snippet] || h['value'] || h[:value]
559
+ h.key?('snippet') || h.key?(:snippet)
560
+ snippet_flag = !(h['snippet'] || h[:snippet]).nil?
561
+ tokens = h['matched_tokens'] || h[:matched_tokens] || []
562
+
563
+ entry = {
564
+ value: value.to_s,
565
+ matched_tokens: Array(tokens).map(&:to_s),
566
+ snippet: snippet_flag
567
+ }
568
+
569
+ (result[field] ||= []) << entry
570
+ end
571
+
572
+ result
573
+ rescue StandardError
574
+ {}
575
+ end
576
+
577
+ class << self
578
+ # Replace allowed highlight tags with placeholders, escape HTML, then restore configured tags.
579
+ def sanitize_highlight_html(text, ctx)
580
+ s = text.to_s
581
+ return '' if s.empty?
582
+
583
+ start_tag = (ctx && ctx[:start_tag]) || '<mark>'
584
+ end_tag = (ctx && ctx[:end_tag]) || '</mark>'
585
+ placeholders = {
586
+ start: "\u0001__SE_HL_START__\u0001",
587
+ end: "\u0001__SE_HL_END__\u0001"
588
+ }
589
+
590
+ # Normalize known tokens from server (<mark>) and configured ones
591
+ start_tokens = [start_tag, '<mark>'].uniq
592
+ end_tokens = [end_tag, '</mark>'].uniq
593
+ start_tokens.each { |tok| s = s.gsub(tok, placeholders[:start]) }
594
+ end_tokens.each { |tok| s = s.gsub(tok, placeholders[:end]) }
595
+
596
+ # Escape everything
597
+ require 'cgi'
598
+ escaped = CGI.escapeHTML(s)
599
+
600
+ # Restore configured tags; any other tags remain escaped
601
+ escaped = escaped.gsub(placeholders[:start], start_tag)
602
+ escaped.gsub(placeholders[:end], end_tag)
603
+ rescue StandardError
604
+ text.to_s
605
+ end
606
+
607
+ # Build a minimal snippet around first matched token when server didn't provide one
608
+ def compute_snippet_from_full(html, matched_tokens, ctx)
609
+ plain = strip_tags_preserving_space(html)
610
+ return sanitize_highlight_html(html, ctx) if plain.empty?
611
+
612
+ tokens = tokenize(plain)
613
+ return sanitize_highlight_html(html, ctx) if tokens.empty?
614
+
615
+ # Find first occurrence of any matched token (case-insensitive)
616
+ target = matched_tokens.find { |t| !t.to_s.strip.empty? }
617
+ return sanitize_highlight_html(html, ctx) unless target
618
+
619
+ target_down = target.downcase
620
+ idx = tokens.index { |t| t.downcase.include?(target_down) } || 0
621
+ window = (ctx && ctx[:affix_tokens]).to_i
622
+ window = 8 if window.negative? || window.zero?
623
+ left = [idx - window, 0].max
624
+ right = [idx + window, tokens.size - 1].min
625
+ segment = tokens[left..right].join(' ')
626
+ # Wrap the first occurrence with configured tags
627
+ start_tag = (ctx && ctx[:start_tag]) || '<mark>'
628
+ end_tag = (ctx && ctx[:end_tag]) || '</mark>'
629
+ highlighted = segment.sub(/(#{Regexp.escape(target)})/i, "#{start_tag}\\1#{end_tag}")
630
+ sanitize_highlight_html(highlighted, ctx)
631
+ rescue StandardError
632
+ sanitize_highlight_html(html, ctx)
633
+ end
634
+
635
+ def strip_tags_preserving_space(html)
636
+ s = html.to_s
637
+ # Remove any HTML tags
638
+ s.gsub(/<[^>]*>/, '')
639
+ rescue StandardError
640
+ html.to_s
641
+ end
642
+
643
+ def tokenize(text)
644
+ s = text.to_s
645
+ return [] if s.empty?
646
+
647
+ tokens = s.split(/\s+/)
648
+ # Fast-path: if there are no empty tokens, return as-is
649
+ return tokens unless tokens.any?(&:empty?)
650
+
651
+ tokens.reject!(&:empty?)
652
+ tokens
653
+ end
654
+
655
+ def wrap_safe_if_rails(html)
656
+ if defined?(ActiveSupport::SafeBuffer)
657
+ ActiveSupport::SafeBuffer.new(html.to_s)
658
+ else
659
+ html.to_s
660
+ end
661
+ end
662
+ end
663
+ end
664
+ end