zvec-ruby 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -0
- data/lib/zvec/active_record.rb +50 -3
- data/lib/zvec/collection.rb +245 -8
- data/lib/zvec/data_types.rb +123 -1
- data/lib/zvec/doc.rb +89 -1
- data/lib/zvec/query.rb +102 -5
- data/lib/zvec/ruby_llm.rb +79 -7
- data/lib/zvec/schema.rb +130 -3
- data/lib/zvec/version.rb +1 -1
- data/lib/zvec.rb +12 -0
- data/test/test_edge_cases.rb +380 -0
- data/test/test_helper.rb +9 -0
- data/test/test_validation.rb +11 -11
- data/test/test_version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 968f5cc5abadbda9360603f7fdc1c618b036171cb5f58c0a61e69c22cb511ea0
|
|
4
|
+
data.tar.gz: 9e1113dde47dbcba7d8b9fbdfb247b4f9cd68d6a14e77974541cb660ee45a63e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f722f02332f1f4c95b64e307ca021488c770070953912d077781b9338a41ff124c11e7645b7e50245afcb37b3d289b1c5ea67aac97e65b406a0d893cb03239f8
|
|
7
|
+
data.tar.gz: 555229b48ff063d7f59e1b97de38ad8f06b87befd425d6b05222bebccbb8374394d8b58c275f9db18266a4c5c6cfae860029890c30f194572adb1e0c18be8dd7
|
data/Rakefile
CHANGED
|
@@ -40,6 +40,9 @@ Rake::TestTask.new(:test_pure) do |t|
|
|
|
40
40
|
"test/test_schema.rb",
|
|
41
41
|
"test/test_doc.rb",
|
|
42
42
|
"test/test_query.rb",
|
|
43
|
+
"test/test_type_detection.rb",
|
|
44
|
+
"test/test_validation.rb",
|
|
45
|
+
"test/test_edge_cases.rb",
|
|
43
46
|
"test/test_active_record.rb",
|
|
44
47
|
]
|
|
45
48
|
t.warning = true
|
data/lib/zvec/active_record.rb
CHANGED
|
@@ -5,7 +5,10 @@ module Zvec
|
|
|
5
5
|
module ActiveRecord
|
|
6
6
|
# Rails concern that adds vector search capabilities to ActiveRecord models.
|
|
7
7
|
#
|
|
8
|
-
#
|
|
8
|
+
# When included in a model, call +vectorize+ to configure which text field
|
|
9
|
+
# to embed, the vector dimension, and the embedding function.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic usage
|
|
9
12
|
# class Article < ApplicationRecord
|
|
10
13
|
# include Zvec::ActiveRecord::Vectorize
|
|
11
14
|
#
|
|
@@ -15,13 +18,30 @@ module Zvec
|
|
|
15
18
|
# embed_with: ->(text) { OpenAI.embed(text) }
|
|
16
19
|
# end
|
|
17
20
|
#
|
|
18
|
-
#
|
|
19
|
-
#
|
|
21
|
+
# @example Searching
|
|
22
|
+
# Article.vector_search("Ruby programming", top_k: 5)
|
|
23
|
+
# Article.vector_search([0.1, 0.2, ...], top_k: 5, embed: false)
|
|
24
|
+
#
|
|
25
|
+
# @example Instance methods
|
|
26
|
+
# article.zvec_update_embedding! # re-embed and store
|
|
27
|
+
# article.zvec_remove_embedding! # remove from vector store
|
|
28
|
+
# article.zvec_embedding # fetch stored embedding doc
|
|
20
29
|
#
|
|
21
30
|
module Vectorize
|
|
22
31
|
extend ActiveSupport::Concern
|
|
23
32
|
|
|
24
33
|
class_methods do
|
|
34
|
+
# Configure vector search for this model.
|
|
35
|
+
#
|
|
36
|
+
# @param field [String, Symbol] the text field to embed
|
|
37
|
+
# @param dimensions [Integer] the vector dimension
|
|
38
|
+
# @param prefix [String, nil] collection prefix (defaults to table_name)
|
|
39
|
+
# @param embed_with [Proc, nil] a callable that takes text and returns
|
|
40
|
+
# a vector Array (e.g., +-> (text) { OpenAI.embed(text) }+)
|
|
41
|
+
# @param metric [Symbol] similarity metric (+:cosine+, +:l2+, or +:ip+)
|
|
42
|
+
# @param zvec_path [String, nil] path for the zvec collection
|
|
43
|
+
# (defaults to +tmp/zvec/<prefix>+)
|
|
44
|
+
# @return [void]
|
|
25
45
|
def vectorize(field, dimensions:, prefix: nil, embed_with: nil,
|
|
26
46
|
metric: :cosine, zvec_path: nil)
|
|
27
47
|
prefix ||= table_name
|
|
@@ -46,7 +66,12 @@ module Zvec
|
|
|
46
66
|
end
|
|
47
67
|
end
|
|
48
68
|
|
|
69
|
+
# Instance methods mixed into the model.
|
|
49
70
|
module InstanceMethods
|
|
71
|
+
# Re-embed the configured text field and store the embedding.
|
|
72
|
+
#
|
|
73
|
+
# @return [void]
|
|
74
|
+
# @raise [Zvec::Error] if no +embed_with+ function is configured
|
|
50
75
|
def zvec_update_embedding!
|
|
51
76
|
cfg = self.class.zvec_config
|
|
52
77
|
text = send(cfg[:field])
|
|
@@ -61,19 +86,29 @@ module Zvec
|
|
|
61
86
|
store.flush
|
|
62
87
|
end
|
|
63
88
|
|
|
89
|
+
# Remove this record's embedding from the vector store.
|
|
90
|
+
#
|
|
91
|
+
# @return [void]
|
|
64
92
|
def zvec_remove_embedding!
|
|
65
93
|
self.class.zvec_store.delete(id.to_s)
|
|
66
94
|
rescue
|
|
67
95
|
# Silently ignore if document doesn't exist
|
|
68
96
|
end
|
|
69
97
|
|
|
98
|
+
# Fetch this record's stored embedding document.
|
|
99
|
+
#
|
|
100
|
+
# @return [Zvec::Doc, nil] the stored document, or nil if not found
|
|
70
101
|
def zvec_embedding
|
|
71
102
|
result = self.class.zvec_store.fetch(id.to_s)
|
|
72
103
|
result[id.to_s]
|
|
73
104
|
end
|
|
74
105
|
end
|
|
75
106
|
|
|
107
|
+
# Class methods mixed into the model.
|
|
76
108
|
module SearchMethods
|
|
109
|
+
# Access the shared {Zvec::RubyLLM::Store} instance for this model.
|
|
110
|
+
#
|
|
111
|
+
# @return [Zvec::RubyLLM::Store]
|
|
77
112
|
def zvec_store
|
|
78
113
|
@zvec_store ||= begin
|
|
79
114
|
cfg = zvec_config
|
|
@@ -85,6 +120,18 @@ module Zvec
|
|
|
85
120
|
end
|
|
86
121
|
end
|
|
87
122
|
|
|
123
|
+
# Search for records by vector similarity.
|
|
124
|
+
#
|
|
125
|
+
# When +query+ is a String and +embed+ is true, the configured
|
|
126
|
+
# +embed_with+ function is called to convert it to a vector first.
|
|
127
|
+
#
|
|
128
|
+
# @param query [Array<Numeric>, String] query vector or text to embed
|
|
129
|
+
# @param top_k [Integer] maximum number of results (default: 10)
|
|
130
|
+
# @param embed [Boolean] whether to embed a String query (default: true)
|
|
131
|
+
# @return [Array<ActiveRecord::Base>] matching records, each with a
|
|
132
|
+
# +zvec_score+ singleton method returning the similarity score
|
|
133
|
+
# @raise [ArgumentError] if query is a String but no +embed_with+ is
|
|
134
|
+
# configured
|
|
88
135
|
def vector_search(query, top_k: 10, embed: true)
|
|
89
136
|
cfg = zvec_config
|
|
90
137
|
|
data/lib/zvec/collection.rb
CHANGED
|
@@ -1,17 +1,57 @@
|
|
|
1
1
|
require "monitor"
|
|
2
2
|
|
|
3
3
|
module Zvec
|
|
4
|
+
# A vector collection backed by the zvec C++ engine. Provides CRUD
|
|
5
|
+
# operations, vector similarity search, and index management.
|
|
6
|
+
#
|
|
7
|
+
# Collections must be explicitly closed via {#close} before they can be
|
|
8
|
+
# reopened from the same path. Use the +closed?+ method to check state.
|
|
9
|
+
#
|
|
10
|
+
# All mutating operations are thread-safe (protected by a Monitor).
|
|
11
|
+
#
|
|
12
|
+
# @example Create, populate, search, and close
|
|
13
|
+
# schema = Zvec::Schema.new("articles") do
|
|
14
|
+
# string "title"
|
|
15
|
+
# vector "embedding", dimension: 4,
|
|
16
|
+
# index: Zvec::Ext::HnswIndexParams.new(Zvec::COSINE)
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# col = Zvec::Collection.create_and_open("/tmp/articles", schema)
|
|
20
|
+
# col.add(pk: "1", title: "Hello", embedding: [0.1, 0.2, 0.3, 0.4])
|
|
21
|
+
# results = col.search([0.1, 0.2, 0.3, 0.4], top_k: 5)
|
|
22
|
+
# col.close
|
|
23
|
+
#
|
|
24
|
+
# @example Reopen an existing collection
|
|
25
|
+
# col = Zvec::Collection.open("/tmp/articles")
|
|
26
|
+
# puts col.doc_count
|
|
27
|
+
# col.close
|
|
28
|
+
#
|
|
4
29
|
class Collection
|
|
30
|
+
# @return [Zvec::Schema, nil] the schema, if provided at creation time
|
|
5
31
|
attr_reader :schema
|
|
6
32
|
|
|
33
|
+
# @param ext_collection [Ext::Collection] the underlying C++ collection
|
|
34
|
+
# @param schema [Zvec::Schema, nil] optional schema for type-aware access
|
|
35
|
+
# @param name [String, nil] optional collection name
|
|
7
36
|
def initialize(ext_collection, schema: nil, name: nil)
|
|
8
37
|
@ext = ext_collection
|
|
9
38
|
@schema = schema
|
|
10
39
|
@name = name
|
|
11
40
|
@monitor = Monitor.new
|
|
41
|
+
@closed = false
|
|
12
42
|
end
|
|
13
43
|
|
|
14
|
-
# Create a new collection and open it.
|
|
44
|
+
# Create a new collection on disk and open it.
|
|
45
|
+
#
|
|
46
|
+
# @param path [String] directory path for the collection data
|
|
47
|
+
# @param schema [Zvec::Schema] the collection schema
|
|
48
|
+
# @param read_only [Boolean] open in read-only mode
|
|
49
|
+
# @param enable_mmap [Boolean] use memory-mapped I/O (default: true)
|
|
50
|
+
# @return [Zvec::Collection]
|
|
51
|
+
# @raise [ArgumentError] if path is blank or schema is not a Zvec::Schema
|
|
52
|
+
#
|
|
53
|
+
# @example
|
|
54
|
+
# col = Zvec::Collection.create_and_open("/tmp/my_col", schema)
|
|
15
55
|
def self.create_and_open(path, schema, read_only: false, enable_mmap: true)
|
|
16
56
|
validate_path!(path)
|
|
17
57
|
raise ArgumentError, "schema must be a Zvec::Schema" unless schema.is_a?(Schema)
|
|
@@ -23,7 +63,16 @@ module Zvec
|
|
|
23
63
|
new(ext, schema: schema, name: schema.name)
|
|
24
64
|
end
|
|
25
65
|
|
|
26
|
-
# Open an existing collection.
|
|
66
|
+
# Open an existing collection from disk.
|
|
67
|
+
#
|
|
68
|
+
# @param path [String] directory path of an existing collection
|
|
69
|
+
# @param read_only [Boolean] open in read-only mode
|
|
70
|
+
# @param enable_mmap [Boolean] use memory-mapped I/O (default: true)
|
|
71
|
+
# @return [Zvec::Collection]
|
|
72
|
+
# @raise [ArgumentError] if path is blank
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# col = Zvec::Collection.open("/tmp/my_col", read_only: true)
|
|
27
76
|
def self.open(path, read_only: false, enable_mmap: true)
|
|
28
77
|
validate_path!(path)
|
|
29
78
|
|
|
@@ -34,25 +83,74 @@ module Zvec
|
|
|
34
83
|
new(ext)
|
|
35
84
|
end
|
|
36
85
|
|
|
86
|
+
# @return [String, nil] the collection name (from schema or explicit)
|
|
37
87
|
def collection_name
|
|
38
88
|
@name || (@schema ? @schema.name : nil)
|
|
39
89
|
end
|
|
40
90
|
|
|
91
|
+
# @return [String] the on-disk path of the collection
|
|
41
92
|
def path
|
|
42
93
|
@ext.path
|
|
43
94
|
end
|
|
44
95
|
|
|
96
|
+
# @return [Ext::CollectionStats] collection statistics
|
|
97
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
45
98
|
def stats
|
|
99
|
+
ensure_open!
|
|
46
100
|
@ext.stats
|
|
47
101
|
end
|
|
48
102
|
|
|
103
|
+
# @return [Integer] the number of documents in the collection
|
|
104
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
49
105
|
def doc_count
|
|
106
|
+
ensure_open!
|
|
50
107
|
@ext.stats.doc_count
|
|
51
108
|
end
|
|
52
109
|
|
|
110
|
+
# @return [Boolean] true if the collection has been closed
|
|
111
|
+
def closed?
|
|
112
|
+
@closed
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Close the collection, releasing the underlying C++ resources.
|
|
116
|
+
# The collection must be closed before it can be reopened from the
|
|
117
|
+
# same path.
|
|
118
|
+
#
|
|
119
|
+
# @return [void]
|
|
120
|
+
# @raise [Zvec::CollectionError] if already closed
|
|
121
|
+
#
|
|
122
|
+
# @example
|
|
123
|
+
# col.close
|
|
124
|
+
# col.closed? #=> true
|
|
125
|
+
def close
|
|
126
|
+
raise CollectionError, "#{error_prefix}Collection is already closed" if @closed
|
|
127
|
+
|
|
128
|
+
@monitor.synchronize do
|
|
129
|
+
begin
|
|
130
|
+
@ext.close
|
|
131
|
+
rescue NoMethodError
|
|
132
|
+
# C++ extension may not expose a close method; the GC will handle it.
|
|
133
|
+
end
|
|
134
|
+
@closed = true
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
53
138
|
# --- DDL ---
|
|
54
139
|
|
|
140
|
+
# Create an index on a field.
|
|
141
|
+
#
|
|
142
|
+
# @param field_name [String, Symbol] the field to index
|
|
143
|
+
# @param index_params [Ext::HnswIndexParams, Ext::FlatIndexParams,
|
|
144
|
+
# Ext::IVFIndexParams, Ext::InvertIndexParams] index configuration
|
|
145
|
+
# @return [self]
|
|
146
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
147
|
+
# @raise [ArgumentError] if field_name is blank
|
|
148
|
+
#
|
|
149
|
+
# @example
|
|
150
|
+
# col.create_index("embedding",
|
|
151
|
+
# Ext::HnswIndexParams.new(Zvec::COSINE, m: 32, ef_construction: 400))
|
|
55
152
|
def create_index(field_name, index_params)
|
|
153
|
+
ensure_open!
|
|
56
154
|
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
57
155
|
|
|
58
156
|
@monitor.synchronize do
|
|
@@ -61,7 +159,14 @@ module Zvec
|
|
|
61
159
|
self
|
|
62
160
|
end
|
|
63
161
|
|
|
162
|
+
# Drop an index on a field.
|
|
163
|
+
#
|
|
164
|
+
# @param field_name [String, Symbol] the field whose index to drop
|
|
165
|
+
# @return [self]
|
|
166
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
167
|
+
# @raise [ArgumentError] if field_name is blank
|
|
64
168
|
def drop_index(field_name)
|
|
169
|
+
ensure_open!
|
|
65
170
|
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
66
171
|
|
|
67
172
|
@monitor.synchronize do
|
|
@@ -70,23 +175,54 @@ module Zvec
|
|
|
70
175
|
self
|
|
71
176
|
end
|
|
72
177
|
|
|
178
|
+
# Optimize the collection (compact segments, rebuild indexes).
|
|
179
|
+
#
|
|
180
|
+
# @return [self]
|
|
181
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
73
182
|
def optimize
|
|
183
|
+
ensure_open!
|
|
74
184
|
@monitor.synchronize { @ext.optimize }
|
|
75
185
|
self
|
|
76
186
|
end
|
|
77
187
|
|
|
188
|
+
# Flush pending writes to disk.
|
|
189
|
+
#
|
|
190
|
+
# @return [self]
|
|
191
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
78
192
|
def flush
|
|
193
|
+
ensure_open!
|
|
79
194
|
@monitor.synchronize { @ext.flush }
|
|
80
195
|
self
|
|
81
196
|
end
|
|
82
197
|
|
|
198
|
+
# Destroy the collection, removing all data from disk.
|
|
199
|
+
#
|
|
200
|
+
# @return [void]
|
|
201
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
83
202
|
def destroy
|
|
84
|
-
|
|
203
|
+
ensure_open!
|
|
204
|
+
@monitor.synchronize do
|
|
205
|
+
@ext.destroy
|
|
206
|
+
@closed = true
|
|
207
|
+
end
|
|
85
208
|
end
|
|
86
209
|
|
|
87
210
|
# --- DML ---
|
|
88
211
|
|
|
212
|
+
# Insert one or more documents.
|
|
213
|
+
#
|
|
214
|
+
# @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to insert
|
|
215
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
216
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
217
|
+
# @raise [ArgumentError] if docs are not Zvec::Doc instances
|
|
218
|
+
# @raise [Zvec::Error] if any write fails
|
|
219
|
+
#
|
|
220
|
+
# @example
|
|
221
|
+
# doc = Zvec::Doc.new(pk: "1", schema: schema)
|
|
222
|
+
# doc["title"] = "Hello"
|
|
223
|
+
# col.insert(doc)
|
|
89
224
|
def insert(docs)
|
|
225
|
+
ensure_open!
|
|
90
226
|
docs = [docs] unless docs.is_a?(Array)
|
|
91
227
|
validate_docs!(docs)
|
|
92
228
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
@@ -94,7 +230,14 @@ module Zvec
|
|
|
94
230
|
check_write_results!(results)
|
|
95
231
|
end
|
|
96
232
|
|
|
233
|
+
# Upsert (insert or update) one or more documents.
|
|
234
|
+
#
|
|
235
|
+
# @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to upsert
|
|
236
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
237
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
238
|
+
# @raise [Zvec::Error] if any write fails
|
|
97
239
|
def upsert(docs)
|
|
240
|
+
ensure_open!
|
|
98
241
|
docs = [docs] unless docs.is_a?(Array)
|
|
99
242
|
validate_docs!(docs)
|
|
100
243
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
@@ -102,7 +245,14 @@ module Zvec
|
|
|
102
245
|
check_write_results!(results)
|
|
103
246
|
end
|
|
104
247
|
|
|
248
|
+
# Update one or more existing documents.
|
|
249
|
+
#
|
|
250
|
+
# @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to update
|
|
251
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
252
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
253
|
+
# @raise [Zvec::Error] if any write fails
|
|
105
254
|
def update(docs)
|
|
255
|
+
ensure_open!
|
|
106
256
|
docs = [docs] unless docs.is_a?(Array)
|
|
107
257
|
validate_docs!(docs)
|
|
108
258
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
@@ -110,7 +260,18 @@ module Zvec
|
|
|
110
260
|
check_write_results!(results)
|
|
111
261
|
end
|
|
112
262
|
|
|
263
|
+
# Delete documents by primary key(s).
|
|
264
|
+
#
|
|
265
|
+
# @param pks [Array<String>] one or more primary keys to delete
|
|
266
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
267
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
268
|
+
# @raise [ArgumentError] if no primary keys are provided
|
|
269
|
+
# @raise [Zvec::Error] if any write fails
|
|
270
|
+
#
|
|
271
|
+
# @example
|
|
272
|
+
# col.delete("doc-1", "doc-2")
|
|
113
273
|
def delete(*pks)
|
|
274
|
+
ensure_open!
|
|
114
275
|
pks = pks.flatten
|
|
115
276
|
raise ArgumentError, "#{error_prefix}No primary keys provided for delete" if pks.empty?
|
|
116
277
|
pks = pks.map(&:to_s)
|
|
@@ -118,15 +279,49 @@ module Zvec
|
|
|
118
279
|
check_write_results!(results)
|
|
119
280
|
end
|
|
120
281
|
|
|
282
|
+
# Delete documents matching a filter expression.
|
|
283
|
+
#
|
|
284
|
+
# @param filter [String] the filter expression (see {VectorQuery} for syntax)
|
|
285
|
+
# @return [void]
|
|
286
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
287
|
+
# @raise [ArgumentError] if filter is blank
|
|
288
|
+
#
|
|
289
|
+
# @example
|
|
290
|
+
# col.delete_by_filter("year < 2020")
|
|
121
291
|
def delete_by_filter(filter)
|
|
292
|
+
ensure_open!
|
|
122
293
|
raise ArgumentError, "#{error_prefix}filter must be a non-empty string" if filter.nil? || filter.to_s.strip.empty?
|
|
123
294
|
@monitor.synchronize { @ext.delete_by_filter(filter) }
|
|
124
295
|
end
|
|
125
296
|
|
|
126
297
|
# --- DQL ---
|
|
127
298
|
|
|
299
|
+
# Execute a vector similarity search with full control over parameters.
|
|
300
|
+
#
|
|
301
|
+
# @param field_name [String, Symbol] the vector field to search
|
|
302
|
+
# @param vector [Array<Numeric>] the query vector
|
|
303
|
+
# @param topk [Integer] maximum number of results (default: 10)
|
|
304
|
+
# @param filter [String, nil] optional filter expression
|
|
305
|
+
# @param include_vector [Boolean] include stored vectors in results
|
|
306
|
+
# @param output_fields [Array<String>, nil] specific fields to return
|
|
307
|
+
# @param query_params [Ext::HnswQueryParams, Ext::IVFQueryParams,
|
|
308
|
+
# Ext::FlatQueryParams, nil] search tuning params
|
|
309
|
+
# @return [Array<Zvec::Doc>] result documents with +pk+ and +score+ set
|
|
310
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
311
|
+
# @raise [ArgumentError] if vector is empty or contains non-numeric elements
|
|
312
|
+
# @raise [Zvec::DimensionError] if vector dimension doesn't match schema
|
|
313
|
+
#
|
|
314
|
+
# @example
|
|
315
|
+
# results = col.query(
|
|
316
|
+
# field_name: "embedding",
|
|
317
|
+
# vector: [0.1, 0.2, 0.3, 0.4],
|
|
318
|
+
# topk: 5,
|
|
319
|
+
# filter: "year > 2024"
|
|
320
|
+
# )
|
|
321
|
+
# results.each { |doc| puts "#{doc.pk}: #{doc.score}" }
|
|
128
322
|
def query(field_name:, vector:, topk: 10, filter: nil,
|
|
129
323
|
include_vector: false, output_fields: nil, query_params: nil)
|
|
324
|
+
ensure_open!
|
|
130
325
|
validate_query_vector!(vector, field_name)
|
|
131
326
|
|
|
132
327
|
vq = VectorQuery.new(
|
|
@@ -148,7 +343,18 @@ module Zvec
|
|
|
148
343
|
end
|
|
149
344
|
end
|
|
150
345
|
|
|
346
|
+
# Fetch documents by primary key(s).
|
|
347
|
+
#
|
|
348
|
+
# @param pks [Array<String>] one or more primary keys
|
|
349
|
+
# @return [Hash{String => Zvec::Doc}] mapping of pk to document
|
|
350
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
351
|
+
# @raise [ArgumentError] if no primary keys provided
|
|
352
|
+
#
|
|
353
|
+
# @example
|
|
354
|
+
# docs = col.fetch("doc-1", "doc-2")
|
|
355
|
+
# docs["doc-1"]["title"] #=> "Hello"
|
|
151
356
|
def fetch(*pks)
|
|
357
|
+
ensure_open!
|
|
152
358
|
pks = pks.flatten
|
|
153
359
|
raise ArgumentError, "#{error_prefix}No primary keys provided for fetch" if pks.empty?
|
|
154
360
|
pks = pks.map(&:to_s)
|
|
@@ -158,15 +364,41 @@ module Zvec
|
|
|
158
364
|
end
|
|
159
365
|
end
|
|
160
366
|
|
|
161
|
-
# Convenience
|
|
367
|
+
# Convenience method to insert a document from keyword arguments.
|
|
368
|
+
#
|
|
369
|
+
# @param pk [String, Integer] the primary key (required)
|
|
370
|
+
# @param fields [Hash] field name/value pairs
|
|
371
|
+
# @return [Array] write results
|
|
372
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
373
|
+
# @raise [ArgumentError] if pk is nil
|
|
374
|
+
#
|
|
375
|
+
# @example
|
|
376
|
+
# col.add(pk: "1", title: "Hello", embedding: [0.1, 0.2, 0.3, 0.4])
|
|
162
377
|
def add(pk:, **fields)
|
|
378
|
+
ensure_open!
|
|
163
379
|
raise ArgumentError, "#{error_prefix}pk must not be nil" if pk.nil?
|
|
164
380
|
doc = Doc.new(pk: pk, fields: fields, schema: @schema)
|
|
165
381
|
insert(doc)
|
|
166
382
|
end
|
|
167
383
|
|
|
168
|
-
# Convenience
|
|
384
|
+
# Convenience method for simple vector similarity search.
|
|
385
|
+
#
|
|
386
|
+
# Auto-detects the vector field from the schema if not specified.
|
|
387
|
+
#
|
|
388
|
+
# @param vector [Array<Numeric>] the query vector
|
|
389
|
+
# @param field [String, Symbol, nil] vector field name (auto-detected if nil)
|
|
390
|
+
# @param top_k [Integer] number of results (default: 10)
|
|
391
|
+
# @param filter [String, nil] optional filter expression
|
|
392
|
+
# @return [Array<Zvec::Doc>] result documents
|
|
393
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
394
|
+
# @raise [Zvec::Error] if no vector fields exist in the schema
|
|
395
|
+
#
|
|
396
|
+
# @example
|
|
397
|
+
# results = col.search([0.1, 0.2, 0.3, 0.4], top_k: 5)
|
|
398
|
+
# results.first.pk #=> "doc-1"
|
|
399
|
+
# results.first.score #=> 0.95
|
|
169
400
|
def search(vector, field: nil, top_k: 10, filter: nil)
|
|
401
|
+
ensure_open!
|
|
170
402
|
raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
|
|
171
403
|
|
|
172
404
|
# Auto-detect vector field if not specified
|
|
@@ -174,11 +406,11 @@ module Zvec
|
|
|
174
406
|
unless fname
|
|
175
407
|
if @schema
|
|
176
408
|
vfield = @schema.ext_schema.vector_fields.first
|
|
177
|
-
raise
|
|
409
|
+
raise CollectionError, "#{error_prefix}No vector fields in schema" unless vfield
|
|
178
410
|
fname = vfield.name
|
|
179
411
|
else
|
|
180
412
|
vfields = @ext.schema.vector_fields
|
|
181
|
-
raise
|
|
413
|
+
raise CollectionError, "#{error_prefix}No vector fields in schema" if vfields.empty?
|
|
182
414
|
fname = vfields.first.name
|
|
183
415
|
end
|
|
184
416
|
end
|
|
@@ -191,6 +423,11 @@ module Zvec
|
|
|
191
423
|
raise ArgumentError, "path must be a non-empty string" if path.nil? || path.to_s.strip.empty?
|
|
192
424
|
end
|
|
193
425
|
|
|
426
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
427
|
+
def ensure_open!
|
|
428
|
+
raise CollectionError, "#{error_prefix}Collection is closed" if @closed
|
|
429
|
+
end
|
|
430
|
+
|
|
194
431
|
def error_prefix
|
|
195
432
|
cn = collection_name
|
|
196
433
|
cn ? "[Collection '#{cn}'] " : ""
|
|
@@ -231,7 +468,7 @@ module Zvec
|
|
|
231
468
|
def check_write_results!(results)
|
|
232
469
|
results.each do |ok, msg|
|
|
233
470
|
error_msg = msg.nil? || msg.empty? ? "Write operation failed" : msg
|
|
234
|
-
raise
|
|
471
|
+
raise CollectionError, "#{error_prefix}#{error_msg}" unless ok
|
|
235
472
|
end
|
|
236
473
|
results
|
|
237
474
|
end
|