search-engine-for-typesense 30.1.6.1 → 30.1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 228f70410334d2a324508d0d44dfd31aadbcb4a957f28425909108490a3129a2
|
|
4
|
+
data.tar.gz: 89fd3243af9aec91757d9819136ce9b0fd783845ff1ff6e6fe72467106d20ecb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 42e556e2b25d40fd5ba85cb1ae1961223420d6affb6f532936642a48159dc013a8297398cba3724ef798f2cab05a0819140c9038d415b0035c7f9a95b2408e07
|
|
7
|
+
data.tar.gz: ea3aba9866079ade74da2712d73f6e2a7169cc0a013dff5814ccbc7034789e9f2d6ae38979e49761210e46e4a61f329fde3ae9bb356f02b643b14899a0d24d9d
|
|
@@ -96,7 +96,9 @@ module SearchEngine
|
|
|
96
96
|
#
|
|
97
97
|
# @param name [#to_sym]
|
|
98
98
|
# @param type [Object] type descriptor (e.g., :string, :integer)
|
|
99
|
-
# @param index [Boolean, nil] when false, omit from compiled Typesense schema (still
|
|
99
|
+
# @param index [Boolean, nil] when false, omit from compiled Typesense schema (still
|
|
100
|
+
# hydrated/displayed). Fields referenced by an embedding's `from:` are kept in the
|
|
101
|
+
# schema with Typesense-native `"index": false` instead of being omitted entirely.
|
|
100
102
|
# @param locale [String, nil]
|
|
101
103
|
# @param optional [Boolean, nil]
|
|
102
104
|
# @param sort [Boolean, nil]
|
data/lib/search_engine/mapper.rb
CHANGED
|
@@ -566,7 +566,7 @@ module SearchEngine
|
|
|
566
566
|
base_fields.reject! { |fname| fname.to_s.include?('.') }
|
|
567
567
|
required = base_fields.to_set
|
|
568
568
|
opts.each do |fname, o|
|
|
569
|
-
next unless o.is_a?(Hash) && o[:optional]
|
|
569
|
+
next unless o.is_a?(Hash) && (o[:optional] || o[:index] == false)
|
|
570
570
|
|
|
571
571
|
required.delete(fname.to_sym)
|
|
572
572
|
end
|
|
@@ -11,8 +11,10 @@ module SearchEngine
|
|
|
11
11
|
# Perform a vector (semantic / hybrid / ANN) search on an embedding field.
|
|
12
12
|
#
|
|
13
13
|
# Last call wins — Typesense supports a single `vector_query` per search.
|
|
14
|
+
# When +field+ is omitted the sole embedding declared on the model is
|
|
15
|
+
# used automatically; raises when the model has zero or multiple embeddings.
|
|
14
16
|
#
|
|
15
|
-
# @param field [Symbol, String] embedding field name
|
|
17
|
+
# @param field [Symbol, String, nil] embedding field name (auto-resolved when nil)
|
|
16
18
|
# @param k [Integer, nil] number of nearest neighbors
|
|
17
19
|
# @param alpha [Float, nil] hybrid blend weight (0.0 = keyword, 1.0 = vector)
|
|
18
20
|
# @param query [Array<Numeric>, nil] explicit embedding vector
|
|
@@ -24,11 +26,12 @@ module SearchEngine
|
|
|
24
26
|
# @param ef [Integer, nil] HNSW ef override
|
|
25
27
|
# @param flat_search_cutoff [Integer, nil] brute-force threshold
|
|
26
28
|
# @return [SearchEngine::Relation]
|
|
27
|
-
def vector_search(field, k: nil, alpha: nil, query: nil, id: nil,
|
|
29
|
+
def vector_search(field = nil, k: nil, alpha: nil, query: nil, id: nil,
|
|
28
30
|
distance_threshold: nil, queries: nil, weights: nil,
|
|
29
31
|
ef: nil, flat_search_cutoff: nil)
|
|
32
|
+
resolved = resolve_vector_field(field)
|
|
30
33
|
normalized = normalize_vector_search(
|
|
31
|
-
|
|
34
|
+
resolved, k: k, alpha: alpha, query: query, id: id,
|
|
32
35
|
distance_threshold: distance_threshold, queries: queries,
|
|
33
36
|
weights: weights, ef: ef, flat_search_cutoff: flat_search_cutoff
|
|
34
37
|
)
|
|
@@ -38,13 +41,15 @@ module SearchEngine
|
|
|
38
41
|
# Find documents similar to a given document ID.
|
|
39
42
|
#
|
|
40
43
|
# Sugar over `vector_search` with `id:`.
|
|
44
|
+
# When +field+ is omitted the sole embedding declared on the model is
|
|
45
|
+
# used automatically (same resolution as {#vector_search}).
|
|
41
46
|
#
|
|
42
47
|
# @param document_id [#to_s] ID of the reference document
|
|
43
|
-
# @param field [Symbol, String] embedding field name
|
|
48
|
+
# @param field [Symbol, String, nil] embedding field name (auto-resolved when nil)
|
|
44
49
|
# @param k [Integer, nil] number of nearest neighbors
|
|
45
50
|
# @param distance_threshold [Float, nil] max cosine distance
|
|
46
51
|
# @return [SearchEngine::Relation]
|
|
47
|
-
def find_similar(document_id, field
|
|
52
|
+
def find_similar(document_id, field: nil, k: nil, distance_threshold: nil)
|
|
48
53
|
vector_search(field, id: document_id, k: k, distance_threshold: distance_threshold)
|
|
49
54
|
end
|
|
50
55
|
|
|
@@ -297,6 +302,40 @@ module SearchEngine
|
|
|
297
302
|
)
|
|
298
303
|
end
|
|
299
304
|
|
|
305
|
+
# -- Field resolution ---------------------------------------------------
|
|
306
|
+
|
|
307
|
+
# Auto-resolve the embedding field when none is given explicitly.
|
|
308
|
+
# Returns the field as-is when provided, or the sole embedding field
|
|
309
|
+
# declared on the model. Raises when resolution is ambiguous or impossible.
|
|
310
|
+
#
|
|
311
|
+
# @param field [Symbol, String, nil] explicit field or nil for auto-resolution
|
|
312
|
+
# @return [Symbol, String]
|
|
313
|
+
def resolve_vector_field(field)
|
|
314
|
+
return field unless field.nil?
|
|
315
|
+
|
|
316
|
+
embeddings = vector_embeddings_map
|
|
317
|
+
case embeddings.size
|
|
318
|
+
when 1
|
|
319
|
+
embeddings.each_key.first
|
|
320
|
+
when 0
|
|
321
|
+
raise SearchEngine::Errors::InvalidVectorQuery.new(
|
|
322
|
+
"InvalidVectorQuery: cannot auto-resolve embedding field \u2014 " \
|
|
323
|
+
"no embeddings declared on #{klass_name_for_inspect}",
|
|
324
|
+
hint: 'Declare one with `embedding` in your model DSL',
|
|
325
|
+
doc: VECTOR_SEARCH_DOC_URL
|
|
326
|
+
)
|
|
327
|
+
else
|
|
328
|
+
fields_list = embeddings.keys.map { |k| ":#{k}" }.join(', ')
|
|
329
|
+
raise SearchEngine::Errors::InvalidVectorQuery.new(
|
|
330
|
+
"InvalidVectorQuery: cannot auto-resolve embedding field \u2014 " \
|
|
331
|
+
"#{klass_name_for_inspect} has multiple embeddings (#{fields_list})",
|
|
332
|
+
hint: 'Pass the field explicitly: .vector_search(:field_name)',
|
|
333
|
+
doc: VECTOR_SEARCH_DOC_URL,
|
|
334
|
+
details: { available_embeddings: embeddings.keys }
|
|
335
|
+
)
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
300
339
|
# -- Helpers ------------------------------------------------------------
|
|
301
340
|
|
|
302
341
|
def vector_embeddings_map
|
data/lib/search_engine/schema.rb
CHANGED
|
@@ -35,7 +35,7 @@ module SearchEngine
|
|
|
35
35
|
}.freeze
|
|
36
36
|
|
|
37
37
|
FIELD_COMPARE_KEYS = %i[
|
|
38
|
-
type reference async_reference locale sort optional infix facet
|
|
38
|
+
type reference async_reference locale sort optional index infix facet
|
|
39
39
|
embed num_dim hnsw_params
|
|
40
40
|
].freeze
|
|
41
41
|
PHYSICAL_SUFFIX_RE = /_\d{8}_\d{6}_\d{3}\z/
|
|
@@ -44,9 +44,14 @@ module SearchEngine
|
|
|
44
44
|
# Build a Typesense-compatible schema hash from a model class DSL.
|
|
45
45
|
#
|
|
46
46
|
# The output includes only keys that are supported and declared via the DSL.
|
|
47
|
-
# Fields explicitly marked with `index: false` are
|
|
48
|
-
#
|
|
49
|
-
#
|
|
47
|
+
# Fields explicitly marked with `index: false` are generally omitted from
|
|
48
|
+
# the compiled schema (they can still be sent in documents and will be
|
|
49
|
+
# hydrated/displayed, but are not indexed in memory).
|
|
50
|
+
#
|
|
51
|
+
# **Exception:** fields with `index: false` that are referenced by an
|
|
52
|
+
# embedding's `from:` list are included with Typesense-native
|
|
53
|
+
# `"index": false` and `"optional": true` so the embedding model can
|
|
54
|
+
# read their values without keyword-indexing them.
|
|
50
55
|
#
|
|
51
56
|
# @param klass [Class] model class inheriting from {SearchEngine::Base}
|
|
52
57
|
# @return [Hash] frozen schema hash with symbol keys
|
|
@@ -462,6 +467,7 @@ module SearchEngine
|
|
|
462
467
|
attribute_options = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
463
468
|
references_by_local_key = build_references_by_local_key(klass, client: client)
|
|
464
469
|
async_reference_by_local_key = build_async_reference_by_local_key(klass)
|
|
470
|
+
embed_source_fields = collect_embed_source_fields(attribute_options)
|
|
465
471
|
|
|
466
472
|
fields_array = []
|
|
467
473
|
needs_nested_fields = false
|
|
@@ -470,14 +476,16 @@ module SearchEngine
|
|
|
470
476
|
validate_attribute_type!(attribute_name, type_descriptor)
|
|
471
477
|
|
|
472
478
|
opts = attribute_options[attribute_name.to_sym] || {}
|
|
473
|
-
# Skip non-indexed attributes and any nested fields under a non-indexed base
|
|
479
|
+
# Skip non-indexed attributes and any nested fields under a non-indexed base,
|
|
480
|
+
# unless the field is referenced by an embedding's from: list.
|
|
474
481
|
base_index_false = false
|
|
475
482
|
if attribute_name.to_s.include?('.')
|
|
476
483
|
base_sym = attribute_name.to_s.split('.', 2).first.to_sym
|
|
477
484
|
base_opts = attribute_options[base_sym] || {}
|
|
478
485
|
base_index_false = (base_opts[:index] == false)
|
|
479
486
|
end
|
|
480
|
-
|
|
487
|
+
effectively_unindexed = opts[:index] == false || base_index_false
|
|
488
|
+
next if effectively_unindexed && !embed_source_fields.include?(attribute_name.to_sym)
|
|
481
489
|
|
|
482
490
|
ts_type = typesense_type_for(type_descriptor)
|
|
483
491
|
|
|
@@ -536,14 +544,34 @@ module SearchEngine
|
|
|
536
544
|
%w[object object[]].include?(ts_type)
|
|
537
545
|
end
|
|
538
546
|
|
|
547
|
+
# Collect field names referenced by any embedding's from: list.
|
|
548
|
+
# @param attribute_options [Hash] model attribute options
|
|
549
|
+
# @return [Set<Symbol>] field names used as embedding sources
|
|
550
|
+
def collect_embed_source_fields(attribute_options)
|
|
551
|
+
sources = Set.new
|
|
552
|
+
attribute_options.each_value do |opts|
|
|
553
|
+
next unless opts.is_a?(Hash)
|
|
554
|
+
|
|
555
|
+
embed = opts[:embed]
|
|
556
|
+
next unless embed.is_a?(Hash)
|
|
557
|
+
|
|
558
|
+
from = embed[:from] || embed['from']
|
|
559
|
+
Array(from).each { |f| sources << f.to_sym }
|
|
560
|
+
end
|
|
561
|
+
sources
|
|
562
|
+
end
|
|
563
|
+
private :collect_embed_source_fields
|
|
564
|
+
|
|
539
565
|
def build_field_entry(attribute_name, ts_type, references_by_local_key, async_reference_by_local_key, opts)
|
|
566
|
+
unindexed = opts[:index] == false
|
|
540
567
|
{
|
|
541
568
|
name: attribute_name.to_s,
|
|
542
569
|
type: ts_type,
|
|
543
570
|
**{
|
|
571
|
+
index: unindexed ? false : nil,
|
|
544
572
|
locale: opts[:locale],
|
|
545
573
|
sort: opts[:sort],
|
|
546
|
-
optional: opts[:optional],
|
|
574
|
+
optional: unindexed ? true : opts[:optional],
|
|
547
575
|
infix: opts[:infix],
|
|
548
576
|
facet: opts[:facet],
|
|
549
577
|
reference: references_by_local_key[attribute_name.to_sym],
|
|
@@ -621,7 +649,7 @@ module SearchEngine
|
|
|
621
649
|
entry = { name: fname, type: normalize_type(ftype) }
|
|
622
650
|
entry[:reference] = fref.to_s unless fref.nil? || fref.to_s.strip.empty?
|
|
623
651
|
|
|
624
|
-
%i[locale sort optional infix facet async_reference].each do |k|
|
|
652
|
+
%i[locale sort optional index infix facet async_reference].each do |k|
|
|
625
653
|
val = field[k] || field[k.to_s]
|
|
626
654
|
entry[k] = val unless val.nil?
|
|
627
655
|
end
|