search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Result wraps a Typesense search response and exposes hydrated hits.
|
|
5
|
+
#
|
|
6
|
+
# Hydration converts each hit's document into either an instance of the
|
|
7
|
+
# provided model class or a generic OpenStruct when no class is available.
|
|
8
|
+
#
|
|
9
|
+
# - Enumeration yields hydrated objects (includes Enumerable)
|
|
10
|
+
# - Metadata readers: {#found}, {#out_of}, {#facets}, {#raw}
|
|
11
|
+
# - Selection is respected implicitly by hydrating only keys present in the
|
|
12
|
+
# returned document; no missing attributes are synthesized.
|
|
13
|
+
#
|
|
14
|
+
# Unknown collections: when +klass+ is +nil+, hydration falls back to
|
|
15
|
+
# OpenStruct.
|
|
16
|
+
class Result
|
|
17
|
+
include Enumerable
|
|
18
|
+
|
|
19
|
+
# Immutable lightweight group record for grouped responses.
|
|
20
|
+
#
|
|
21
|
+
# @!attribute [r] key
|
|
22
|
+
# @return [Hash{String=>Object}] mapping of field name to group value
|
|
23
|
+
# @!attribute [r] hits
|
|
24
|
+
# @return [Array<Object>] hydrated hits within the group
|
|
25
|
+
# @!attribute [r] size
|
|
26
|
+
# @return [Integer] number of hits in the group
|
|
27
|
+
class Group
|
|
28
|
+
attr_reader :key, :hits
|
|
29
|
+
|
|
30
|
+
# @param key [Hash{String=>Object}]
|
|
31
|
+
# @param hits [Array<Object>]
|
|
32
|
+
def initialize(key:, hits:)
|
|
33
|
+
@key = (key || {}).dup.freeze
|
|
34
|
+
@hits = Array(hits).freeze
|
|
35
|
+
freeze
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [Integer]
|
|
39
|
+
def size
|
|
40
|
+
@hits.size
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @return [String]
|
|
44
|
+
def inspect
|
|
45
|
+
"#<SearchEngine::Result::Group key=#{key.inspect} size=#{size}>"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def ==(other)
|
|
49
|
+
other.is_a?(Group) && other.key == key && other.hits == hits
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @return [Array<Object>] hydrated hits (frozen internal array)
|
|
54
|
+
# @return [Integer] number of documents that matched the search
|
|
55
|
+
# @return [Integer] number of documents searched
|
|
56
|
+
# @return [Array<Hash>, nil] facet counts as returned by Typesense
|
|
57
|
+
# @return [Hash] raw Typesense response (unmodified)
|
|
58
|
+
attr_reader :hits, :found, :out_of, :raw
|
|
59
|
+
|
|
60
|
+
# Build a new result wrapper.
|
|
61
|
+
#
|
|
62
|
+
# @param raw [Hash] Parsed Typesense response ("hits"/"grouped_hits", "found", "out_of", "facet_counts")
|
|
63
|
+
# @param klass [Class, nil] Optional model class used to hydrate each document
|
|
64
|
+
# @param selection [Hash, nil] Optional selection context for strict missing checks
|
|
65
|
+
# @param facets [Hash, nil] Optional facets context carrying declared facet queries/labels
|
|
66
|
+
# @param highlight [Hash, nil] Optional highlight context carrying configured tags and knobs
|
|
67
|
+
def initialize(raw, klass: nil, selection: nil, facets: nil, highlight: nil)
|
|
68
|
+
require 'ostruct'
|
|
69
|
+
|
|
70
|
+
@raw = raw || {}
|
|
71
|
+
@found = @raw['found'] || @raw[:found]
|
|
72
|
+
@out_of = @raw['out_of'] || @raw[:out_of]
|
|
73
|
+
# raw facet_counts preserved in @raw; parsed via #facets helper
|
|
74
|
+
@klass = klass
|
|
75
|
+
@selection_ctx = selection if selection
|
|
76
|
+
@facets_ctx = facets if facets
|
|
77
|
+
@highlight_ctx = highlight if highlight
|
|
78
|
+
|
|
79
|
+
@__groups_memo = nil
|
|
80
|
+
# Precompute facets memo before freeze to avoid later mutation
|
|
81
|
+
@__facets_parsed_memo = build_parsed_facets(@raw, @facets_ctx).freeze
|
|
82
|
+
|
|
83
|
+
if grouped?
|
|
84
|
+
groups_built = build_groups
|
|
85
|
+
@__groups_memo = groups_built.freeze
|
|
86
|
+
first_hits = groups_built.map { |g| g.hits.first }.compact
|
|
87
|
+
@hits = first_hits.freeze
|
|
88
|
+
instrument_group_parse(groups_built)
|
|
89
|
+
else
|
|
90
|
+
entries = Array(@raw['hits'] || @raw[:hits]).map { |h| symbolize_hit(h) }
|
|
91
|
+
hydrated = []
|
|
92
|
+
entries.each do |entry|
|
|
93
|
+
next unless entry[:document]
|
|
94
|
+
|
|
95
|
+
obj = hydrate(entry[:document])
|
|
96
|
+
attach_highlighting!(obj, entry)
|
|
97
|
+
hydrated << obj
|
|
98
|
+
end
|
|
99
|
+
@hits = hydrated.freeze
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
freeze
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Iterate over hydrated hits.
|
|
106
|
+
# @yieldparam obj [Object] hydrated object
|
|
107
|
+
# @return [Enumerator] when no block is given
|
|
108
|
+
def each(&block)
|
|
109
|
+
return @hits.each unless block_given?
|
|
110
|
+
|
|
111
|
+
@hits.each(&block)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# @return [Array<Object>] a shallow copy of hydrated hits
|
|
115
|
+
def to_a
|
|
116
|
+
@hits.dup
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# @return [Integer]
|
|
120
|
+
def size
|
|
121
|
+
@hits.size
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# @return [Boolean]
|
|
125
|
+
def empty?
|
|
126
|
+
@hits.empty?
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Whether this result represents a grouped response.
|
|
130
|
+
# Detection prefers presence and Array-ness of a grouped section.
|
|
131
|
+
# @return [Boolean]
|
|
132
|
+
def grouped?
|
|
133
|
+
gh = @raw['grouped_hits'] || @raw[:grouped_hits]
|
|
134
|
+
gh.is_a?(Array)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Groups for grouped responses. Returns an empty Array when not grouped.
|
|
138
|
+
# The returned Array is frozen; each Group is immutable.
|
|
139
|
+
# @return [Array<SearchEngine::Result::Group>]
|
|
140
|
+
def groups
|
|
141
|
+
return [].freeze unless grouped?
|
|
142
|
+
|
|
143
|
+
@__groups_memo.dup
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Enumerate over groups. Returns an Enumerator when no block given.
|
|
147
|
+
# Empty enumerator when not grouped.
|
|
148
|
+
# @yieldparam group [SearchEngine::Result::Group]
|
|
149
|
+
# @return [Enumerator]
|
|
150
|
+
def each_group(&block)
|
|
151
|
+
return enum_for(:each_group) unless block_given?
|
|
152
|
+
|
|
153
|
+
groups.each(&block)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Number of groups present in this result page.
|
|
157
|
+
# When grouping is disabled, returns 0.
|
|
158
|
+
# @return [Integer]
|
|
159
|
+
# @example
|
|
160
|
+
# res = SearchEngine::Product.group_by(:brand_id, limit: 1).execute
|
|
161
|
+
# res.groups_count #=> number of groups in this page
|
|
162
|
+
def groups_count
|
|
163
|
+
return 0 unless grouped?
|
|
164
|
+
|
|
165
|
+
@__groups_memo.size
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Total documents found by the backend for this query (not page-limited).
|
|
169
|
+
# Reads the backend-provided scalar (e.g., Typesense's `found`).
|
|
170
|
+
# @return [Integer, nil]
|
|
171
|
+
# @example
|
|
172
|
+
# res = SearchEngine::Product.group_by(:brand_id, limit: 1).execute
|
|
173
|
+
# res.total_found #=> total documents found
|
|
174
|
+
def total_found
|
|
175
|
+
@found
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Total number of groups for this query.
|
|
179
|
+
# If the backend exposes a total groups count, returns that value.
|
|
180
|
+
# Otherwise, falls back to the number of groups in the current page
|
|
181
|
+
# (i.e., {#groups_count}). When grouping is disabled, returns +nil+.
|
|
182
|
+
# @return [Integer, nil]
|
|
183
|
+
# @example
|
|
184
|
+
# res = SearchEngine::Product.group_by(:brand_id, limit: 1).execute
|
|
185
|
+
# res.total_groups #=> global groups if available; else groups_count (page-scoped)
|
|
186
|
+
def total_groups
|
|
187
|
+
return nil unless grouped?
|
|
188
|
+
|
|
189
|
+
api_total = detect_total_groups_from_raw(@raw)
|
|
190
|
+
api_total.nil? ? @__groups_memo.size : api_total
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# First group in this page or +nil+ when there are no groups.
|
|
194
|
+
# Returns a reference to the memoized group; no new objects are allocated.
|
|
195
|
+
# @return [SearchEngine::Result::Group, nil]
|
|
196
|
+
def first_group
|
|
197
|
+
return nil unless grouped?
|
|
198
|
+
|
|
199
|
+
@__groups_memo.first
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Last group in this page or +nil+ when there are no groups.
|
|
203
|
+
# Returns a reference to the memoized group; no new objects are allocated.
|
|
204
|
+
# @return [SearchEngine::Result::Group, nil]
|
|
205
|
+
def last_group
|
|
206
|
+
return nil unless grouped?
|
|
207
|
+
|
|
208
|
+
@__groups_memo.last
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Facets helpers
|
|
212
|
+
# ---------------
|
|
213
|
+
#
|
|
214
|
+
# Parse Typesense facet_counts into a stable Hash mapping field => [ { value:, count:, highlighted:, label: } ].
|
|
215
|
+
# Returns an empty Hash when no facets are present.
|
|
216
|
+
# Arrays/hashes in the returned structure are defensive copies and can be safely mutated by callers.
|
|
217
|
+
# @return [Hash{String=>Array<Hash{Symbol=>Object}>}]
|
|
218
|
+
def facets
|
|
219
|
+
parsed = parse_facets
|
|
220
|
+
parsed.dup
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Facet values for a given field name.
|
|
224
|
+
# @param name [#to_s]
|
|
225
|
+
# @return [Array<Hash{Symbol=>Object}>]
|
|
226
|
+
def facet_values(name)
|
|
227
|
+
field = name.to_s
|
|
228
|
+
arr = parse_facets[field] || []
|
|
229
|
+
arr.dup
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Optional convenience: map of value => count for a given facet field.
|
|
233
|
+
# @param name [#to_s]
|
|
234
|
+
# @return [Hash{Object=>Integer}]
|
|
235
|
+
def facet_value_map(name)
|
|
236
|
+
facet_values(name).each_with_object({}) { |h, acc| acc[h[:value]] = h[:count] }
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
private
|
|
240
|
+
|
|
241
|
+
# Per-hit highlighting mixin: added onto hydrated objects.
|
|
242
|
+
module HitHighlighting
|
|
243
|
+
# @return [Hash{String=>Array<Hash{Symbol=>Object}>}] normalized highlights by field
|
|
244
|
+
def highlights
|
|
245
|
+
h = instance_variable_get(:@__se_highlights_map__)
|
|
246
|
+
h ? h.dup : {}
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Return a sanitized HTML snippet or full highlighted value for a field.
|
|
250
|
+
# @param field [Symbol, String]
|
|
251
|
+
# @param full [Boolean] when true, prefer full highlighted value
|
|
252
|
+
# @return [String] HTML-safe string; SafeBuffer when ActiveSupport present
|
|
253
|
+
def snippet_for(field, full: false)
|
|
254
|
+
map = instance_variable_get(:@__se_highlights_map__)
|
|
255
|
+
return nil unless map && field
|
|
256
|
+
|
|
257
|
+
key = field.to_s
|
|
258
|
+
list = map[key]
|
|
259
|
+
return nil unless Array(list).any?
|
|
260
|
+
|
|
261
|
+
ctx = instance_variable_get(:@__se_highlight_ctx__)
|
|
262
|
+
entry = if full
|
|
263
|
+
list.find { |h| h[:snippet] == false } || list.first
|
|
264
|
+
else
|
|
265
|
+
list.find { |h| h[:snippet] == true } || list.first
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
return nil unless entry
|
|
269
|
+
|
|
270
|
+
value = entry[:value].to_s
|
|
271
|
+
matched = Array(entry[:matched_tokens]).map(&:to_s)
|
|
272
|
+
ctx && ctx[:affix_tokens]
|
|
273
|
+
threshold = ctx && ctx[:snippet_threshold]
|
|
274
|
+
|
|
275
|
+
html = SearchEngine::Result.send(:sanitize_highlight_html, value, ctx)
|
|
276
|
+
return SearchEngine::Result.send(:wrap_safe_if_rails, html) if entry[:snippet] == true
|
|
277
|
+
|
|
278
|
+
# Full value requested or only full value available
|
|
279
|
+
return SearchEngine::Result.send(:wrap_safe_if_rails, html) if full || threshold.nil?
|
|
280
|
+
|
|
281
|
+
# Compute a minimal snippet when server didn't provide one
|
|
282
|
+
snippet = SearchEngine::Result.send(:compute_snippet_from_full, html, matched, ctx)
|
|
283
|
+
SearchEngine::Result.send(:wrap_safe_if_rails, snippet)
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def parse_facets
|
|
288
|
+
@__facets_parsed_memo || {}.freeze
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def build_parsed_facets(raw, ctx)
|
|
292
|
+
raw_facets = (raw && (raw['facet_counts'] || raw[:facet_counts])) || []
|
|
293
|
+
result = {}
|
|
294
|
+
Array(raw_facets).each do |entry|
|
|
295
|
+
field = (entry['field_name'] || entry[:field_name]).to_s
|
|
296
|
+
next if field.empty?
|
|
297
|
+
|
|
298
|
+
values = Array(entry['counts'] || entry[:counts])
|
|
299
|
+
list = build_facet_value_list(values)
|
|
300
|
+
|
|
301
|
+
if ctx && Array(ctx[:queries]).any?
|
|
302
|
+
q_for_field = Array(ctx[:queries]).select { |q| (q[:field] || q['field']).to_s == field }
|
|
303
|
+
annotate_labels_for_field!(list, q_for_field) if q_for_field.any?
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
result[field] = list.freeze
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
result
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def build_facet_value_list(values)
|
|
313
|
+
Array(values).map do |v|
|
|
314
|
+
value = v['value'] || v[:value]
|
|
315
|
+
count = v['count'] || v[:count]
|
|
316
|
+
highlighted = v['highlighted'] || v[:highlighted]
|
|
317
|
+
{ value: value, count: Integer(count || 0), highlighted: highlighted, label: nil }
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def annotate_labels_for_field!(list, queries)
|
|
322
|
+
list.each do |h|
|
|
323
|
+
val_str = h[:value].to_s
|
|
324
|
+
match = queries.find { |q| (q[:expr] || q['expr']).to_s == val_str }
|
|
325
|
+
h[:label] = ((match && (match[:label] || match['label'])) || nil)
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Attempt to read a total groups count from the raw payload using common keys.
|
|
330
|
+
# Returns +nil+ when the backend does not provide a value.
|
|
331
|
+
# @param raw [Hash]
|
|
332
|
+
# @return [Integer, nil]
|
|
333
|
+
def detect_total_groups_from_raw(raw)
|
|
334
|
+
keys = %w[total_groups group_count groups_count found_groups total_group_count total_grouped total_group_matches]
|
|
335
|
+
keys.each do |key|
|
|
336
|
+
val = raw[key] || raw[key.to_sym]
|
|
337
|
+
next if val.nil?
|
|
338
|
+
return Integer(val) if val.is_a?(Integer) || (val.is_a?(String) && val.match?(/\A-?\d+\z/))
|
|
339
|
+
end
|
|
340
|
+
nil
|
|
341
|
+
rescue StandardError
|
|
342
|
+
nil
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Hydrate a single Typesense document (Hash) into a Ruby object.
|
|
346
|
+
#
|
|
347
|
+
# If +@klass+ is present, an instance of that class is allocated and each
|
|
348
|
+
# document key is assigned as an instance variable on the object. No reader
|
|
349
|
+
# methods are generated; callers may access via the model's own readers (if
|
|
350
|
+
# defined) or via reflection. Unknown keys are permitted.
|
|
351
|
+
#
|
|
352
|
+
# If +@klass+ is +nil+, an OpenStruct is created with the same keys.
|
|
353
|
+
#
|
|
354
|
+
# @param doc [Hash]
|
|
355
|
+
# @return [Object]
|
|
356
|
+
def hydrate(doc)
|
|
357
|
+
keys = doc.is_a?(Hash) ? doc.keys.map(&:to_s) : []
|
|
358
|
+
enforce_strict_missing_if_needed!(keys)
|
|
359
|
+
if @klass
|
|
360
|
+
if @klass.respond_to?(:from_document)
|
|
361
|
+
@klass.from_document(doc)
|
|
362
|
+
else
|
|
363
|
+
@klass.new.tap do |obj|
|
|
364
|
+
doc.each do |key, value|
|
|
365
|
+
obj.instance_variable_set(ivar_name(key), value)
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
else
|
|
370
|
+
OpenStruct.new(doc)
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Build Group objects from the raw grouped response.
|
|
375
|
+
# Preserves backend order and hydrates documents once.
|
|
376
|
+
# @return [Array<SearchEngine::Result::Group>]
|
|
377
|
+
def build_groups
|
|
378
|
+
grouped = @raw['grouped_hits'] || @raw[:grouped_hits] || []
|
|
379
|
+
fields = group_by_fields_from_raw
|
|
380
|
+
|
|
381
|
+
grouped.map do |entry|
|
|
382
|
+
key_values = Array(entry['group_key'] || entry[:group_key])
|
|
383
|
+
key_hash = build_group_key_hash(fields, key_values)
|
|
384
|
+
|
|
385
|
+
subhits = Array(entry['hits'] || entry[:hits])
|
|
386
|
+
hydrated = []
|
|
387
|
+
subhits.each do |sub|
|
|
388
|
+
doc = sub && (sub['document'] || sub[:document])
|
|
389
|
+
next unless doc
|
|
390
|
+
|
|
391
|
+
obj = hydrate(doc)
|
|
392
|
+
attach_highlighting!(obj, symbolize_hit(sub))
|
|
393
|
+
hydrated << obj
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
Group.new(key: key_hash, hits: hydrated)
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# Derive group_by fields from echoed request params when available.
|
|
401
|
+
# Returns an Array of field names (Strings). Empty when unknown.
|
|
402
|
+
def group_by_fields_from_raw
|
|
403
|
+
params = @raw['request_params'] || @raw[:request_params] || @raw['search_params'] || @raw[:search_params]
|
|
404
|
+
return [] unless params
|
|
405
|
+
|
|
406
|
+
gb = params['group_by'] || params[:group_by]
|
|
407
|
+
return [] unless gb.is_a?(String) && !gb.strip.empty?
|
|
408
|
+
|
|
409
|
+
gb.split(',').map!(&:strip).tap { |a| a.reject!(&:empty?) }
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Build a Hash mapping field names to coerced group key values.
|
|
413
|
+
# Falls back to a single-field synthetic key when fields are unknown.
|
|
414
|
+
def build_group_key_hash(fields, values)
|
|
415
|
+
return {} if values.empty?
|
|
416
|
+
|
|
417
|
+
if fields.any?
|
|
418
|
+
out = {}
|
|
419
|
+
fields.each_with_index do |field, idx|
|
|
420
|
+
break if idx >= values.size
|
|
421
|
+
|
|
422
|
+
out[field.to_s] = coerce_group_value(values[idx])
|
|
423
|
+
end
|
|
424
|
+
return out
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
return { 'group' => coerce_group_value(values.first) } if values.size == 1
|
|
428
|
+
|
|
429
|
+
out = {}
|
|
430
|
+
values.each_with_index do |val, idx|
|
|
431
|
+
out["group_#{idx}"] = coerce_group_value(val)
|
|
432
|
+
end
|
|
433
|
+
out
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# Best-effort coercion for common scalar types.
|
|
437
|
+
def coerce_group_value(value)
|
|
438
|
+
return nil if value.nil?
|
|
439
|
+
|
|
440
|
+
return true if value == true || value.to_s == 'true'
|
|
441
|
+
return false if value == false || value.to_s == 'false'
|
|
442
|
+
return Integer(value) if value.is_a?(String) && value.match?(/\A-?\d+\z/)
|
|
443
|
+
return Float(value) if value.is_a?(String) && value.match?(/\A-?\d+\.\d+\z/)
|
|
444
|
+
|
|
445
|
+
value
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def ivar_name(key)
|
|
449
|
+
@ivar_prefix_cache ||= {}
|
|
450
|
+
@ivar_prefix_cache[key] ||= "@#{key}"
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
def instrument_group_parse(groups)
|
|
454
|
+
count = groups.size
|
|
455
|
+
total = groups.inject(0) { |acc, g| acc + g.size }
|
|
456
|
+
avg = count.positive? ? (total.to_f / count) : 0.0
|
|
457
|
+
coll = begin
|
|
458
|
+
@klass.respond_to?(:collection) ? @klass.collection : nil
|
|
459
|
+
rescue StandardError
|
|
460
|
+
nil
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
SearchEngine::Instrumentation.instrument(
|
|
464
|
+
'search_engine.result.grouped_parsed',
|
|
465
|
+
collection: coll,
|
|
466
|
+
groups_count: count,
|
|
467
|
+
avg_group_size: avg
|
|
468
|
+
)
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
# Enforce strict-missing behavior when enabled.
|
|
472
|
+
# Computes missing = requested_root − present_keys and raises when non-empty.
|
|
473
|
+
def enforce_strict_missing_if_needed!(present_keys)
|
|
474
|
+
ctx = @selection_ctx || {}
|
|
475
|
+
strict = (ctx[:strict_missing] == true)
|
|
476
|
+
return unless strict
|
|
477
|
+
|
|
478
|
+
requested = Array(ctx[:requested_root]).map(&:to_s).reject(&:empty?)
|
|
479
|
+
return if requested.empty?
|
|
480
|
+
|
|
481
|
+
missing = requested - present_keys
|
|
482
|
+
return if missing.empty?
|
|
483
|
+
|
|
484
|
+
model_name = begin
|
|
485
|
+
@klass&.name || 'Object'
|
|
486
|
+
rescue StandardError
|
|
487
|
+
'Object'
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
sample = missing.take(3)
|
|
491
|
+
more = missing.size - sample.size
|
|
492
|
+
sample_str = sample.map { |f| %("#{f}") }.join(', ')
|
|
493
|
+
sample_str << " (+#{more} more)" if more.positive?
|
|
494
|
+
|
|
495
|
+
msg = 'MissingField: requested fields absent for ' \
|
|
496
|
+
"#{model_name}: #{sample_str}. " \
|
|
497
|
+
'They may be excluded by selection or upstream Typesense mapping. ' \
|
|
498
|
+
'Fix by adjusting select/exclude/reselect, relaxing strictness, or ' \
|
|
499
|
+
'ensuring the mapping includes these fields.'
|
|
500
|
+
raise SearchEngine::Errors::MissingField.new(
|
|
501
|
+
msg,
|
|
502
|
+
hint: 'Adjust select/exclude or disable strict_missing to avoid raising.',
|
|
503
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/field-selection#strict-vs-lenient-selection',
|
|
504
|
+
details: { requested: requested, present_keys: present_keys }
|
|
505
|
+
)
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
# --- Highlight internals -------------------------------------------------
|
|
509
|
+
|
|
510
|
+
def symbolize_hit(h)
|
|
511
|
+
return {} unless h.is_a?(Hash)
|
|
512
|
+
|
|
513
|
+
out = {}
|
|
514
|
+
h.each { |k, v| out[k.is_a?(String) ? k.to_sym : k] = v }
|
|
515
|
+
out
|
|
516
|
+
rescue StandardError
|
|
517
|
+
{}
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
def attach_highlighting!(obj, hit_entry)
|
|
521
|
+
raw_list = Array(hit_entry[:highlights])
|
|
522
|
+
return obj if raw_list.empty?
|
|
523
|
+
|
|
524
|
+
map = normalize_highlights(raw_list)
|
|
525
|
+
return obj if map.empty?
|
|
526
|
+
|
|
527
|
+
# Extend object once and inject context + normalized map
|
|
528
|
+
obj.extend(HitHighlighting) unless obj.singleton_class.included_modules.include?(HitHighlighting)
|
|
529
|
+
obj.instance_variable_set(:@__se_highlights_map__, map)
|
|
530
|
+
obj.instance_variable_set(:@__se_highlight_ctx__, safe_highlight_ctx)
|
|
531
|
+
obj
|
|
532
|
+
rescue StandardError
|
|
533
|
+
obj
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def safe_highlight_ctx
|
|
537
|
+
ctx = @highlight_ctx || {}
|
|
538
|
+
return {} unless ctx.is_a?(Hash)
|
|
539
|
+
|
|
540
|
+
out = {}
|
|
541
|
+
out[:fields] = Array(ctx[:fields]).map(&:to_s).reject(&:empty?) if ctx[:fields]
|
|
542
|
+
out[:full_fields] = Array(ctx[:full_fields]).map(&:to_s).reject(&:empty?) if ctx[:full_fields]
|
|
543
|
+
out[:start_tag] = ctx[:start_tag].to_s if ctx[:start_tag]
|
|
544
|
+
out[:end_tag] = ctx[:end_tag].to_s if ctx[:end_tag]
|
|
545
|
+
out[:affix_tokens] = ctx[:affix_tokens] if ctx.key?(:affix_tokens)
|
|
546
|
+
out[:snippet_threshold] = ctx[:snippet_threshold] if ctx.key?(:snippet_threshold)
|
|
547
|
+
out
|
|
548
|
+
rescue StandardError
|
|
549
|
+
{}
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def normalize_highlights(list)
|
|
553
|
+
result = {}
|
|
554
|
+
Array(list).each do |h|
|
|
555
|
+
field = (h['field'] || h[:field]).to_s
|
|
556
|
+
next if field.empty?
|
|
557
|
+
|
|
558
|
+
value = h['snippet'] || h[:snippet] || h['value'] || h[:value]
|
|
559
|
+
h.key?('snippet') || h.key?(:snippet)
|
|
560
|
+
snippet_flag = !(h['snippet'] || h[:snippet]).nil?
|
|
561
|
+
tokens = h['matched_tokens'] || h[:matched_tokens] || []
|
|
562
|
+
|
|
563
|
+
entry = {
|
|
564
|
+
value: value.to_s,
|
|
565
|
+
matched_tokens: Array(tokens).map(&:to_s),
|
|
566
|
+
snippet: snippet_flag
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
(result[field] ||= []) << entry
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
result
|
|
573
|
+
rescue StandardError
|
|
574
|
+
{}
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
class << self
|
|
578
|
+
# Replace allowed highlight tags with placeholders, escape HTML, then restore configured tags.
|
|
579
|
+
def sanitize_highlight_html(text, ctx)
|
|
580
|
+
s = text.to_s
|
|
581
|
+
return '' if s.empty?
|
|
582
|
+
|
|
583
|
+
start_tag = (ctx && ctx[:start_tag]) || '<mark>'
|
|
584
|
+
end_tag = (ctx && ctx[:end_tag]) || '</mark>'
|
|
585
|
+
placeholders = {
|
|
586
|
+
start: "\u0001__SE_HL_START__\u0001",
|
|
587
|
+
end: "\u0001__SE_HL_END__\u0001"
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
# Normalize known tokens from server (<mark>) and configured ones
|
|
591
|
+
start_tokens = [start_tag, '<mark>'].uniq
|
|
592
|
+
end_tokens = [end_tag, '</mark>'].uniq
|
|
593
|
+
start_tokens.each { |tok| s = s.gsub(tok, placeholders[:start]) }
|
|
594
|
+
end_tokens.each { |tok| s = s.gsub(tok, placeholders[:end]) }
|
|
595
|
+
|
|
596
|
+
# Escape everything
|
|
597
|
+
require 'cgi'
|
|
598
|
+
escaped = CGI.escapeHTML(s)
|
|
599
|
+
|
|
600
|
+
# Restore configured tags; any other tags remain escaped
|
|
601
|
+
escaped = escaped.gsub(placeholders[:start], start_tag)
|
|
602
|
+
escaped.gsub(placeholders[:end], end_tag)
|
|
603
|
+
rescue StandardError
|
|
604
|
+
text.to_s
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
# Build a minimal snippet around first matched token when server didn't provide one
|
|
608
|
+
def compute_snippet_from_full(html, matched_tokens, ctx)
|
|
609
|
+
plain = strip_tags_preserving_space(html)
|
|
610
|
+
return sanitize_highlight_html(html, ctx) if plain.empty?
|
|
611
|
+
|
|
612
|
+
tokens = tokenize(plain)
|
|
613
|
+
return sanitize_highlight_html(html, ctx) if tokens.empty?
|
|
614
|
+
|
|
615
|
+
# Find first occurrence of any matched token (case-insensitive)
|
|
616
|
+
target = matched_tokens.find { |t| !t.to_s.strip.empty? }
|
|
617
|
+
return sanitize_highlight_html(html, ctx) unless target
|
|
618
|
+
|
|
619
|
+
target_down = target.downcase
|
|
620
|
+
idx = tokens.index { |t| t.downcase.include?(target_down) } || 0
|
|
621
|
+
window = (ctx && ctx[:affix_tokens]).to_i
|
|
622
|
+
window = 8 if window.negative? || window.zero?
|
|
623
|
+
left = [idx - window, 0].max
|
|
624
|
+
right = [idx + window, tokens.size - 1].min
|
|
625
|
+
segment = tokens[left..right].join(' ')
|
|
626
|
+
# Wrap the first occurrence with configured tags
|
|
627
|
+
start_tag = (ctx && ctx[:start_tag]) || '<mark>'
|
|
628
|
+
end_tag = (ctx && ctx[:end_tag]) || '</mark>'
|
|
629
|
+
highlighted = segment.sub(/(#{Regexp.escape(target)})/i, "#{start_tag}\\1#{end_tag}")
|
|
630
|
+
sanitize_highlight_html(highlighted, ctx)
|
|
631
|
+
rescue StandardError
|
|
632
|
+
sanitize_highlight_html(html, ctx)
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
def strip_tags_preserving_space(html)
|
|
636
|
+
s = html.to_s
|
|
637
|
+
# Remove any HTML tags
|
|
638
|
+
s.gsub(/<[^>]*>/, '')
|
|
639
|
+
rescue StandardError
|
|
640
|
+
html.to_s
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
def tokenize(text)
|
|
644
|
+
s = text.to_s
|
|
645
|
+
return [] if s.empty?
|
|
646
|
+
|
|
647
|
+
tokens = s.split(/\s+/)
|
|
648
|
+
# Fast-path: if there are no empty tokens, return as-is
|
|
649
|
+
return tokens unless tokens.any?(&:empty?)
|
|
650
|
+
|
|
651
|
+
tokens.reject!(&:empty?)
|
|
652
|
+
tokens
|
|
653
|
+
end
|
|
654
|
+
|
|
655
|
+
def wrap_safe_if_rails(html)
|
|
656
|
+
if defined?(ActiveSupport::SafeBuffer)
|
|
657
|
+
ActiveSupport::SafeBuffer.new(html.to_s)
|
|
658
|
+
else
|
|
659
|
+
html.to_s
|
|
660
|
+
end
|
|
661
|
+
end
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
end
|