zvec-ruby 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/Rakefile +3 -0
- data/lib/zvec/active_record.rb +51 -4
- data/lib/zvec/collection.rb +344 -33
- data/lib/zvec/data_types.rb +250 -0
- data/lib/zvec/doc.rb +119 -10
- data/lib/zvec/query.rb +110 -1
- data/lib/zvec/ruby_llm.rb +79 -7
- data/lib/zvec/schema.rb +145 -1
- data/lib/zvec/version.rb +1 -1
- data/lib/zvec.rb +13 -0
- data/test/test_edge_cases.rb +380 -0
- data/test/test_helper.rb +102 -3
- data/test/test_type_detection.rb +258 -0
- data/test/test_validation.rb +305 -0
- data/test/test_version.rb +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 968f5cc5abadbda9360603f7fdc1c618b036171cb5f58c0a61e69c22cb511ea0
|
|
4
|
+
data.tar.gz: 9e1113dde47dbcba7d8b9fbdfb247b4f9cd68d6a14e77974541cb660ee45a63e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f722f02332f1f4c95b64e307ca021488c770070953912d077781b9338a41ff124c11e7645b7e50245afcb37b3d289b1c5ea67aac97e65b406a0d893cb03239f8
|
|
7
|
+
data.tar.gz: 555229b48ff063d7f59e1b97de38ad8f06b87befd425d6b05222bebccbb8374394d8b58c275f9db18266a4c5c6cfae860029890c30f194572adb1e0c18be8dd7
|
data/README.md
CHANGED
|
@@ -15,11 +15,11 @@ Precompiled native gems are available for:
|
|
|
15
15
|
|
|
16
16
|
```ruby
|
|
17
17
|
# Gemfile
|
|
18
|
-
gem "zvec"
|
|
18
|
+
gem "zvec-ruby"
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
gem install zvec
|
|
22
|
+
gem install zvec-ruby
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
No compiler or build tools needed — the gem ships with the native extension and all zvec dependencies statically linked.
|
|
@@ -36,7 +36,7 @@ cmake .. -DCMAKE_BUILD_TYPE=Release
|
|
|
36
36
|
make -j$(nproc)
|
|
37
37
|
|
|
38
38
|
# 2. Install the gem with ZVEC_DIR pointing to the build
|
|
39
|
-
ZVEC_DIR=/tmp/zvec gem install zvec
|
|
39
|
+
ZVEC_DIR=/tmp/zvec gem install zvec-ruby
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
Or using the included helper script:
|
data/Rakefile
CHANGED
|
@@ -40,6 +40,9 @@ Rake::TestTask.new(:test_pure) do |t|
|
|
|
40
40
|
"test/test_schema.rb",
|
|
41
41
|
"test/test_doc.rb",
|
|
42
42
|
"test/test_query.rb",
|
|
43
|
+
"test/test_type_detection.rb",
|
|
44
|
+
"test/test_validation.rb",
|
|
45
|
+
"test/test_edge_cases.rb",
|
|
43
46
|
"test/test_active_record.rb",
|
|
44
47
|
]
|
|
45
48
|
t.warning = true
|
data/lib/zvec/active_record.rb
CHANGED
|
@@ -5,7 +5,10 @@ module Zvec
|
|
|
5
5
|
module ActiveRecord
|
|
6
6
|
# Rails concern that adds vector search capabilities to ActiveRecord models.
|
|
7
7
|
#
|
|
8
|
-
#
|
|
8
|
+
# When included in a model, call +vectorize+ to configure which text field
|
|
9
|
+
# to embed, the vector dimension, and the embedding function.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic usage
|
|
9
12
|
# class Article < ApplicationRecord
|
|
10
13
|
# include Zvec::ActiveRecord::Vectorize
|
|
11
14
|
#
|
|
@@ -15,13 +18,30 @@ module Zvec
|
|
|
15
18
|
# embed_with: ->(text) { OpenAI.embed(text) }
|
|
16
19
|
# end
|
|
17
20
|
#
|
|
18
|
-
#
|
|
19
|
-
#
|
|
21
|
+
# @example Searching
|
|
22
|
+
# Article.vector_search("Ruby programming", top_k: 5)
|
|
23
|
+
# Article.vector_search([0.1, 0.2, ...], top_k: 5, embed: false)
|
|
24
|
+
#
|
|
25
|
+
# @example Instance methods
|
|
26
|
+
# article.zvec_update_embedding! # re-embed and store
|
|
27
|
+
# article.zvec_remove_embedding! # remove from vector store
|
|
28
|
+
# article.zvec_embedding # fetch stored embedding doc
|
|
20
29
|
#
|
|
21
30
|
module Vectorize
|
|
22
31
|
extend ActiveSupport::Concern
|
|
23
32
|
|
|
24
33
|
class_methods do
|
|
34
|
+
# Configure vector search for this model.
|
|
35
|
+
#
|
|
36
|
+
# @param field [String, Symbol] the text field to embed
|
|
37
|
+
# @param dimensions [Integer] the vector dimension
|
|
38
|
+
# @param prefix [String, nil] collection prefix (defaults to table_name)
|
|
39
|
+
# @param embed_with [Proc, nil] a callable that takes text and returns
|
|
40
|
+
# a vector Array (e.g., +-> (text) { OpenAI.embed(text) }+)
|
|
41
|
+
# @param metric [Symbol] similarity metric (+:cosine+, +:l2+, or +:ip+)
|
|
42
|
+
# @param zvec_path [String, nil] path for the zvec collection
|
|
43
|
+
# (defaults to +tmp/zvec/<prefix>+)
|
|
44
|
+
# @return [void]
|
|
25
45
|
def vectorize(field, dimensions:, prefix: nil, embed_with: nil,
|
|
26
46
|
metric: :cosine, zvec_path: nil)
|
|
27
47
|
prefix ||= table_name
|
|
@@ -46,7 +66,12 @@ module Zvec
|
|
|
46
66
|
end
|
|
47
67
|
end
|
|
48
68
|
|
|
69
|
+
# Instance methods mixed into the model.
|
|
49
70
|
module InstanceMethods
|
|
71
|
+
# Re-embed the configured text field and store the embedding.
|
|
72
|
+
#
|
|
73
|
+
# @return [void]
|
|
74
|
+
# @raise [Zvec::Error] if no +embed_with+ function is configured
|
|
50
75
|
def zvec_update_embedding!
|
|
51
76
|
cfg = self.class.zvec_config
|
|
52
77
|
text = send(cfg[:field])
|
|
@@ -61,19 +86,29 @@ module Zvec
|
|
|
61
86
|
store.flush
|
|
62
87
|
end
|
|
63
88
|
|
|
89
|
+
# Remove this record's embedding from the vector store.
|
|
90
|
+
#
|
|
91
|
+
# @return [void]
|
|
64
92
|
def zvec_remove_embedding!
|
|
65
93
|
self.class.zvec_store.delete(id.to_s)
|
|
66
|
-
rescue
|
|
94
|
+
rescue
|
|
67
95
|
# Silently ignore if document doesn't exist
|
|
68
96
|
end
|
|
69
97
|
|
|
98
|
+
# Fetch this record's stored embedding document.
|
|
99
|
+
#
|
|
100
|
+
# @return [Zvec::Doc, nil] the stored document, or nil if not found
|
|
70
101
|
def zvec_embedding
|
|
71
102
|
result = self.class.zvec_store.fetch(id.to_s)
|
|
72
103
|
result[id.to_s]
|
|
73
104
|
end
|
|
74
105
|
end
|
|
75
106
|
|
|
107
|
+
# Class methods mixed into the model.
|
|
76
108
|
module SearchMethods
|
|
109
|
+
# Access the shared {Zvec::RubyLLM::Store} instance for this model.
|
|
110
|
+
#
|
|
111
|
+
# @return [Zvec::RubyLLM::Store]
|
|
77
112
|
def zvec_store
|
|
78
113
|
@zvec_store ||= begin
|
|
79
114
|
cfg = zvec_config
|
|
@@ -85,6 +120,18 @@ module Zvec
|
|
|
85
120
|
end
|
|
86
121
|
end
|
|
87
122
|
|
|
123
|
+
# Search for records by vector similarity.
|
|
124
|
+
#
|
|
125
|
+
# When +query+ is a String and +embed+ is true, the configured
|
|
126
|
+
# +embed_with+ function is called to convert it to a vector first.
|
|
127
|
+
#
|
|
128
|
+
# @param query [Array<Numeric>, String] query vector or text to embed
|
|
129
|
+
# @param top_k [Integer] maximum number of results (default: 10)
|
|
130
|
+
# @param embed [Boolean] whether to embed a String query (default: true)
|
|
131
|
+
# @return [Array<ActiveRecord::Base>] matching records, each with a
|
|
132
|
+
# +zvec_score+ singleton method returning the similarity score
|
|
133
|
+
# @raise [ArgumentError] if query is a String but no +embed_with+ is
|
|
134
|
+
# configured
|
|
88
135
|
def vector_search(query, top_k: 10, embed: true)
|
|
89
136
|
cfg = zvec_config
|
|
90
137
|
|
data/lib/zvec/collection.rb
CHANGED
|
@@ -1,23 +1,81 @@
|
|
|
1
|
+
require "monitor"
|
|
2
|
+
|
|
1
3
|
module Zvec
|
|
4
|
+
# A vector collection backed by the zvec C++ engine. Provides CRUD
|
|
5
|
+
# operations, vector similarity search, and index management.
|
|
6
|
+
#
|
|
7
|
+
# Collections must be explicitly closed via {#close} before they can be
|
|
8
|
+
# reopened from the same path. Use the +closed?+ method to check state.
|
|
9
|
+
#
|
|
10
|
+
# All mutating operations are thread-safe (protected by a Monitor).
|
|
11
|
+
#
|
|
12
|
+
# @example Create, populate, search, and close
|
|
13
|
+
# schema = Zvec::Schema.new("articles") do
|
|
14
|
+
# string "title"
|
|
15
|
+
# vector "embedding", dimension: 4,
|
|
16
|
+
# index: Zvec::Ext::HnswIndexParams.new(Zvec::COSINE)
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# col = Zvec::Collection.create_and_open("/tmp/articles", schema)
|
|
20
|
+
# col.add(pk: "1", title: "Hello", embedding: [0.1, 0.2, 0.3, 0.4])
|
|
21
|
+
# results = col.search([0.1, 0.2, 0.3, 0.4], top_k: 5)
|
|
22
|
+
# col.close
|
|
23
|
+
#
|
|
24
|
+
# @example Reopen an existing collection
|
|
25
|
+
# col = Zvec::Collection.open("/tmp/articles")
|
|
26
|
+
# puts col.doc_count
|
|
27
|
+
# col.close
|
|
28
|
+
#
|
|
2
29
|
class Collection
|
|
30
|
+
# @return [Zvec::Schema, nil] the schema, if provided at creation time
|
|
3
31
|
attr_reader :schema
|
|
4
32
|
|
|
5
|
-
|
|
33
|
+
# @param ext_collection [Ext::Collection] the underlying C++ collection
|
|
34
|
+
# @param schema [Zvec::Schema, nil] optional schema for type-aware access
|
|
35
|
+
# @param name [String, nil] optional collection name
|
|
36
|
+
def initialize(ext_collection, schema: nil, name: nil)
|
|
6
37
|
@ext = ext_collection
|
|
7
38
|
@schema = schema
|
|
39
|
+
@name = name
|
|
40
|
+
@monitor = Monitor.new
|
|
41
|
+
@closed = false
|
|
8
42
|
end
|
|
9
43
|
|
|
10
|
-
# Create a new collection and open it.
|
|
44
|
+
# Create a new collection on disk and open it.
|
|
45
|
+
#
|
|
46
|
+
# @param path [String] directory path for the collection data
|
|
47
|
+
# @param schema [Zvec::Schema] the collection schema
|
|
48
|
+
# @param read_only [Boolean] open in read-only mode
|
|
49
|
+
# @param enable_mmap [Boolean] use memory-mapped I/O (default: true)
|
|
50
|
+
# @return [Zvec::Collection]
|
|
51
|
+
# @raise [ArgumentError] if path is blank or schema is not a Zvec::Schema
|
|
52
|
+
#
|
|
53
|
+
# @example
|
|
54
|
+
# col = Zvec::Collection.create_and_open("/tmp/my_col", schema)
|
|
11
55
|
def self.create_and_open(path, schema, read_only: false, enable_mmap: true)
|
|
56
|
+
validate_path!(path)
|
|
57
|
+
raise ArgumentError, "schema must be a Zvec::Schema" unless schema.is_a?(Schema)
|
|
58
|
+
|
|
12
59
|
opts = Ext::CollectionOptions.new
|
|
13
60
|
opts.read_only = read_only
|
|
14
61
|
opts.enable_mmap = enable_mmap
|
|
15
62
|
ext = Ext::Collection.create_and_open(path, schema.ext_schema, opts)
|
|
16
|
-
new(ext, schema: schema)
|
|
63
|
+
new(ext, schema: schema, name: schema.name)
|
|
17
64
|
end
|
|
18
65
|
|
|
19
|
-
# Open an existing collection.
|
|
66
|
+
# Open an existing collection from disk.
|
|
67
|
+
#
|
|
68
|
+
# @param path [String] directory path of an existing collection
|
|
69
|
+
# @param read_only [Boolean] open in read-only mode
|
|
70
|
+
# @param enable_mmap [Boolean] use memory-mapped I/O (default: true)
|
|
71
|
+
# @return [Zvec::Collection]
|
|
72
|
+
# @raise [ArgumentError] if path is blank
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# col = Zvec::Collection.open("/tmp/my_col", read_only: true)
|
|
20
76
|
def self.open(path, read_only: false, enable_mmap: true)
|
|
77
|
+
validate_path!(path)
|
|
78
|
+
|
|
21
79
|
opts = Ext::CollectionOptions.new
|
|
22
80
|
opts.read_only = read_only
|
|
23
81
|
opts.enable_mmap = enable_mmap
|
|
@@ -25,81 +83,247 @@ module Zvec
|
|
|
25
83
|
new(ext)
|
|
26
84
|
end
|
|
27
85
|
|
|
86
|
+
# @return [String, nil] the collection name (from schema or explicit)
|
|
87
|
+
def collection_name
|
|
88
|
+
@name || (@schema ? @schema.name : nil)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [String] the on-disk path of the collection
|
|
28
92
|
def path
|
|
29
93
|
@ext.path
|
|
30
94
|
end
|
|
31
95
|
|
|
96
|
+
# @return [Ext::CollectionStats] collection statistics
|
|
97
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
32
98
|
def stats
|
|
99
|
+
ensure_open!
|
|
33
100
|
@ext.stats
|
|
34
101
|
end
|
|
35
102
|
|
|
103
|
+
# @return [Integer] the number of documents in the collection
|
|
104
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
36
105
|
def doc_count
|
|
106
|
+
ensure_open!
|
|
37
107
|
@ext.stats.doc_count
|
|
38
108
|
end
|
|
39
109
|
|
|
110
|
+
# @return [Boolean] true if the collection has been closed
|
|
111
|
+
def closed?
|
|
112
|
+
@closed
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Close the collection, releasing the underlying C++ resources.
|
|
116
|
+
# The collection must be closed before it can be reopened from the
|
|
117
|
+
# same path.
|
|
118
|
+
#
|
|
119
|
+
# @return [void]
|
|
120
|
+
# @raise [Zvec::CollectionError] if already closed
|
|
121
|
+
#
|
|
122
|
+
# @example
|
|
123
|
+
# col.close
|
|
124
|
+
# col.closed? #=> true
|
|
125
|
+
def close
|
|
126
|
+
raise CollectionError, "#{error_prefix}Collection is already closed" if @closed
|
|
127
|
+
|
|
128
|
+
@monitor.synchronize do
|
|
129
|
+
begin
|
|
130
|
+
@ext.close
|
|
131
|
+
rescue NoMethodError
|
|
132
|
+
# C++ extension may not expose a close method; the GC will handle it.
|
|
133
|
+
end
|
|
134
|
+
@closed = true
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
40
138
|
# --- DDL ---
|
|
41
139
|
|
|
140
|
+
# Create an index on a field.
|
|
141
|
+
#
|
|
142
|
+
# @param field_name [String, Symbol] the field to index
|
|
143
|
+
# @param index_params [Ext::HnswIndexParams, Ext::FlatIndexParams,
|
|
144
|
+
# Ext::IVFIndexParams, Ext::InvertIndexParams] index configuration
|
|
145
|
+
# @return [self]
|
|
146
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
147
|
+
# @raise [ArgumentError] if field_name is blank
|
|
148
|
+
#
|
|
149
|
+
# @example
|
|
150
|
+
# col.create_index("embedding",
|
|
151
|
+
# Ext::HnswIndexParams.new(Zvec::COSINE, m: 32, ef_construction: 400))
|
|
42
152
|
def create_index(field_name, index_params)
|
|
43
|
-
|
|
153
|
+
ensure_open!
|
|
154
|
+
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
155
|
+
|
|
156
|
+
@monitor.synchronize do
|
|
157
|
+
@ext.create_index(field_name.to_s, index_params)
|
|
158
|
+
end
|
|
44
159
|
self
|
|
45
160
|
end
|
|
46
161
|
|
|
162
|
+
# Drop an index on a field.
|
|
163
|
+
#
|
|
164
|
+
# @param field_name [String, Symbol] the field whose index to drop
|
|
165
|
+
# @return [self]
|
|
166
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
167
|
+
# @raise [ArgumentError] if field_name is blank
|
|
47
168
|
def drop_index(field_name)
|
|
48
|
-
|
|
169
|
+
ensure_open!
|
|
170
|
+
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
171
|
+
|
|
172
|
+
@monitor.synchronize do
|
|
173
|
+
@ext.drop_index(field_name.to_s)
|
|
174
|
+
end
|
|
49
175
|
self
|
|
50
176
|
end
|
|
51
177
|
|
|
178
|
+
# Optimize the collection (compact segments, rebuild indexes).
|
|
179
|
+
#
|
|
180
|
+
# @return [self]
|
|
181
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
52
182
|
def optimize
|
|
53
|
-
|
|
183
|
+
ensure_open!
|
|
184
|
+
@monitor.synchronize { @ext.optimize }
|
|
54
185
|
self
|
|
55
186
|
end
|
|
56
187
|
|
|
188
|
+
# Flush pending writes to disk.
|
|
189
|
+
#
|
|
190
|
+
# @return [self]
|
|
191
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
57
192
|
def flush
|
|
58
|
-
|
|
193
|
+
ensure_open!
|
|
194
|
+
@monitor.synchronize { @ext.flush }
|
|
59
195
|
self
|
|
60
196
|
end
|
|
61
197
|
|
|
198
|
+
# Destroy the collection, removing all data from disk.
|
|
199
|
+
#
|
|
200
|
+
# @return [void]
|
|
201
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
62
202
|
def destroy
|
|
63
|
-
|
|
203
|
+
ensure_open!
|
|
204
|
+
@monitor.synchronize do
|
|
205
|
+
@ext.destroy
|
|
206
|
+
@closed = true
|
|
207
|
+
end
|
|
64
208
|
end
|
|
65
209
|
|
|
66
210
|
# --- DML ---
|
|
67
211
|
|
|
212
|
+
# Insert one or more documents.
|
|
213
|
+
#
|
|
214
|
+
# @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to insert
|
|
215
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
216
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
217
|
+
# @raise [ArgumentError] if docs are not Zvec::Doc instances
|
|
218
|
+
# @raise [Zvec::Error] if any write fails
|
|
219
|
+
#
|
|
220
|
+
# @example
|
|
221
|
+
# doc = Zvec::Doc.new(pk: "1", schema: schema)
|
|
222
|
+
# doc["title"] = "Hello"
|
|
223
|
+
# col.insert(doc)
|
|
68
224
|
def insert(docs)
|
|
225
|
+
ensure_open!
|
|
69
226
|
docs = [docs] unless docs.is_a?(Array)
|
|
227
|
+
validate_docs!(docs)
|
|
70
228
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
71
|
-
results = @ext.insert(ext_docs)
|
|
229
|
+
results = @monitor.synchronize { @ext.insert(ext_docs) }
|
|
72
230
|
check_write_results!(results)
|
|
73
231
|
end
|
|
74
232
|
|
|
233
|
+
# Upsert (insert or update) one or more documents.
|
|
234
|
+
#
|
|
235
|
+
# @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to upsert
|
|
236
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
237
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
238
|
+
# @raise [Zvec::Error] if any write fails
|
|
75
239
|
def upsert(docs)
|
|
240
|
+
ensure_open!
|
|
76
241
|
docs = [docs] unless docs.is_a?(Array)
|
|
242
|
+
validate_docs!(docs)
|
|
77
243
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
78
|
-
results = @ext.upsert(ext_docs)
|
|
244
|
+
results = @monitor.synchronize { @ext.upsert(ext_docs) }
|
|
79
245
|
check_write_results!(results)
|
|
80
246
|
end
|
|
81
247
|
|
|
248
|
+
# Update one or more existing documents.
|
|
249
|
+
#
|
|
250
|
+
# @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to update
|
|
251
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
252
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
253
|
+
# @raise [Zvec::Error] if any write fails
|
|
82
254
|
def update(docs)
|
|
255
|
+
ensure_open!
|
|
83
256
|
docs = [docs] unless docs.is_a?(Array)
|
|
257
|
+
validate_docs!(docs)
|
|
84
258
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
85
|
-
results = @ext.update(ext_docs)
|
|
259
|
+
results = @monitor.synchronize { @ext.update(ext_docs) }
|
|
86
260
|
check_write_results!(results)
|
|
87
261
|
end
|
|
88
262
|
|
|
263
|
+
# Delete documents by primary key(s).
|
|
264
|
+
#
|
|
265
|
+
# @param pks [Array<String>] one or more primary keys to delete
|
|
266
|
+
# @return [Array<Array(Boolean, String)>] write results
|
|
267
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
268
|
+
# @raise [ArgumentError] if no primary keys are provided
|
|
269
|
+
# @raise [Zvec::Error] if any write fails
|
|
270
|
+
#
|
|
271
|
+
# @example
|
|
272
|
+
# col.delete("doc-1", "doc-2")
|
|
89
273
|
def delete(*pks)
|
|
90
|
-
|
|
91
|
-
|
|
274
|
+
ensure_open!
|
|
275
|
+
pks = pks.flatten
|
|
276
|
+
raise ArgumentError, "#{error_prefix}No primary keys provided for delete" if pks.empty?
|
|
277
|
+
pks = pks.map(&:to_s)
|
|
278
|
+
results = @monitor.synchronize { @ext.delete_pks(pks) }
|
|
92
279
|
check_write_results!(results)
|
|
93
280
|
end
|
|
94
281
|
|
|
282
|
+
# Delete documents matching a filter expression.
|
|
283
|
+
#
|
|
284
|
+
# @param filter [String] the filter expression (see {VectorQuery} for syntax)
|
|
285
|
+
# @return [void]
|
|
286
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
287
|
+
# @raise [ArgumentError] if filter is blank
|
|
288
|
+
#
|
|
289
|
+
# @example
|
|
290
|
+
# col.delete_by_filter("year < 2020")
|
|
95
291
|
def delete_by_filter(filter)
|
|
96
|
-
|
|
292
|
+
ensure_open!
|
|
293
|
+
raise ArgumentError, "#{error_prefix}filter must be a non-empty string" if filter.nil? || filter.to_s.strip.empty?
|
|
294
|
+
@monitor.synchronize { @ext.delete_by_filter(filter) }
|
|
97
295
|
end
|
|
98
296
|
|
|
99
297
|
# --- DQL ---
|
|
100
298
|
|
|
299
|
+
# Execute a vector similarity search with full control over parameters.
|
|
300
|
+
#
|
|
301
|
+
# @param field_name [String, Symbol] the vector field to search
|
|
302
|
+
# @param vector [Array<Numeric>] the query vector
|
|
303
|
+
# @param topk [Integer] maximum number of results (default: 10)
|
|
304
|
+
# @param filter [String, nil] optional filter expression
|
|
305
|
+
# @param include_vector [Boolean] include stored vectors in results
|
|
306
|
+
# @param output_fields [Array<String>, nil] specific fields to return
|
|
307
|
+
# @param query_params [Ext::HnswQueryParams, Ext::IVFQueryParams,
|
|
308
|
+
# Ext::FlatQueryParams, nil] search tuning params
|
|
309
|
+
# @return [Array<Zvec::Doc>] result documents with +pk+ and +score+ set
|
|
310
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
311
|
+
# @raise [ArgumentError] if vector is empty or contains non-numeric elements
|
|
312
|
+
# @raise [Zvec::DimensionError] if vector dimension doesn't match schema
|
|
313
|
+
#
|
|
314
|
+
# @example
|
|
315
|
+
# results = col.query(
|
|
316
|
+
# field_name: "embedding",
|
|
317
|
+
# vector: [0.1, 0.2, 0.3, 0.4],
|
|
318
|
+
# topk: 5,
|
|
319
|
+
# filter: "year > 2024"
|
|
320
|
+
# )
|
|
321
|
+
# results.each { |doc| puts "#{doc.pk}: #{doc.score}" }
|
|
101
322
|
def query(field_name:, vector:, topk: 10, filter: nil,
|
|
102
323
|
include_vector: false, output_fields: nil, query_params: nil)
|
|
324
|
+
ensure_open!
|
|
325
|
+
validate_query_vector!(vector, field_name)
|
|
326
|
+
|
|
103
327
|
vq = VectorQuery.new(
|
|
104
328
|
field_name: field_name,
|
|
105
329
|
vector: vector,
|
|
@@ -109,7 +333,7 @@ module Zvec
|
|
|
109
333
|
output_fields: output_fields,
|
|
110
334
|
query_params: query_params
|
|
111
335
|
)
|
|
112
|
-
raw_results = @ext.query(vq.ext_query)
|
|
336
|
+
raw_results = @monitor.synchronize { @ext.query(vq.ext_query) }
|
|
113
337
|
raw_results.map do |h|
|
|
114
338
|
Doc.new(
|
|
115
339
|
pk: h["pk"],
|
|
@@ -119,47 +343,134 @@ module Zvec
|
|
|
119
343
|
end
|
|
120
344
|
end
|
|
121
345
|
|
|
346
|
+
# Fetch documents by primary key(s).
|
|
347
|
+
#
|
|
348
|
+
# @param pks [Array<String>] one or more primary keys
|
|
349
|
+
# @return [Hash{String => Zvec::Doc}] mapping of pk to document
|
|
350
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
351
|
+
# @raise [ArgumentError] if no primary keys provided
|
|
352
|
+
#
|
|
353
|
+
# @example
|
|
354
|
+
# docs = col.fetch("doc-1", "doc-2")
|
|
355
|
+
# docs["doc-1"]["title"] #=> "Hello"
|
|
122
356
|
def fetch(*pks)
|
|
123
|
-
|
|
124
|
-
|
|
357
|
+
ensure_open!
|
|
358
|
+
pks = pks.flatten
|
|
359
|
+
raise ArgumentError, "#{error_prefix}No primary keys provided for fetch" if pks.empty?
|
|
360
|
+
pks = pks.map(&:to_s)
|
|
361
|
+
raw = @monitor.synchronize { @ext.fetch(pks) }
|
|
125
362
|
raw.transform_values do |h|
|
|
126
363
|
Doc.new(pk: nil, fields: h, schema: @schema)
|
|
127
364
|
end
|
|
128
365
|
end
|
|
129
366
|
|
|
130
|
-
# Convenience
|
|
367
|
+
# Convenience method to insert a document from keyword arguments.
|
|
368
|
+
#
|
|
369
|
+
# @param pk [String, Integer] the primary key (required)
|
|
370
|
+
# @param fields [Hash] field name/value pairs
|
|
371
|
+
# @return [Array] write results
|
|
372
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
373
|
+
# @raise [ArgumentError] if pk is nil
|
|
374
|
+
#
|
|
375
|
+
# @example
|
|
376
|
+
# col.add(pk: "1", title: "Hello", embedding: [0.1, 0.2, 0.3, 0.4])
|
|
131
377
|
def add(pk:, **fields)
|
|
378
|
+
ensure_open!
|
|
379
|
+
raise ArgumentError, "#{error_prefix}pk must not be nil" if pk.nil?
|
|
132
380
|
doc = Doc.new(pk: pk, fields: fields, schema: @schema)
|
|
133
381
|
insert(doc)
|
|
134
382
|
end
|
|
135
383
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
#
|
|
384
|
+
# Convenience method for simple vector similarity search.
|
|
385
|
+
#
|
|
386
|
+
# Auto-detects the vector field from the schema if not specified.
|
|
387
|
+
#
|
|
388
|
+
# @param vector [Array<Numeric>] the query vector
|
|
389
|
+
# @param field [String, Symbol, nil] vector field name (auto-detected if nil)
|
|
390
|
+
# @param top_k [Integer] number of results (default: 10)
|
|
391
|
+
# @param filter [String, nil] optional filter expression
|
|
392
|
+
# @return [Array<Zvec::Doc>] result documents
|
|
393
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
394
|
+
# @raise [Zvec::Error] if no vector fields exist in the schema
|
|
395
|
+
#
|
|
396
|
+
# @example
|
|
397
|
+
# results = col.search([0.1, 0.2, 0.3, 0.4], top_k: 5)
|
|
398
|
+
# results.first.pk #=> "doc-1"
|
|
399
|
+
# results.first.score #=> 0.95
|
|
148
400
|
def search(vector, field: nil, top_k: 10, filter: nil)
|
|
401
|
+
ensure_open!
|
|
402
|
+
raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
|
|
403
|
+
|
|
149
404
|
# Auto-detect vector field if not specified
|
|
150
405
|
fname = field&.to_s
|
|
151
406
|
unless fname
|
|
152
407
|
if @schema
|
|
153
408
|
vfield = @schema.ext_schema.vector_fields.first
|
|
154
|
-
raise
|
|
409
|
+
raise CollectionError, "#{error_prefix}No vector fields in schema" unless vfield
|
|
155
410
|
fname = vfield.name
|
|
156
411
|
else
|
|
157
412
|
vfields = @ext.schema.vector_fields
|
|
158
|
-
raise
|
|
413
|
+
raise CollectionError, "#{error_prefix}No vector fields in schema" if vfields.empty?
|
|
159
414
|
fname = vfields.first.name
|
|
160
415
|
end
|
|
161
416
|
end
|
|
162
417
|
query(field_name: fname, vector: vector, topk: top_k, filter: filter)
|
|
163
418
|
end
|
|
419
|
+
|
|
420
|
+
private
|
|
421
|
+
|
|
422
|
+
def self.validate_path!(path)
|
|
423
|
+
raise ArgumentError, "path must be a non-empty string" if path.nil? || path.to_s.strip.empty?
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# @raise [Zvec::CollectionError] if the collection is closed
|
|
427
|
+
def ensure_open!
|
|
428
|
+
raise CollectionError, "#{error_prefix}Collection is closed" if @closed
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
def error_prefix
|
|
432
|
+
cn = collection_name
|
|
433
|
+
cn ? "[Collection '#{cn}'] " : ""
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def validate_docs!(docs)
|
|
437
|
+
docs.each_with_index do |doc, i|
|
|
438
|
+
unless doc.is_a?(Doc) || doc.is_a?(Ext::Doc)
|
|
439
|
+
raise ArgumentError,
|
|
440
|
+
"#{error_prefix}Expected Zvec::Doc at index #{i}, got #{doc.class}"
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def validate_query_vector!(vector, field_name)
|
|
446
|
+
raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
|
|
447
|
+
|
|
448
|
+
vector.each_with_index do |v, i|
|
|
449
|
+
unless v.is_a?(Numeric)
|
|
450
|
+
raise ArgumentError,
|
|
451
|
+
"#{error_prefix}Query vector for field '#{field_name}' contains non-numeric element at index #{i}: #{v.inspect}"
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Dimension check against schema
|
|
456
|
+
return unless @schema
|
|
457
|
+
|
|
458
|
+
expected_dim = @schema.field_dimension(field_name.to_s)
|
|
459
|
+
return unless expected_dim
|
|
460
|
+
|
|
461
|
+
if vector.size != expected_dim
|
|
462
|
+
raise DimensionError,
|
|
463
|
+
"#{error_prefix}Query vector dimension mismatch for field '#{field_name}': " \
|
|
464
|
+
"expected #{expected_dim}, got #{vector.size}"
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def check_write_results!(results)
|
|
469
|
+
results.each do |ok, msg|
|
|
470
|
+
error_msg = msg.nil? || msg.empty? ? "Write operation failed" : msg
|
|
471
|
+
raise CollectionError, "#{error_prefix}#{error_msg}" unless ok
|
|
472
|
+
end
|
|
473
|
+
results
|
|
474
|
+
end
|
|
164
475
|
end
|
|
165
476
|
end
|