zvec-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d2d9ae7fa3290edf87996ab690b06669c130f7d40c8f551c936fec0fcef7f21
4
- data.tar.gz: 2ae81deaa2c80feecadeb7bf0a78431af054aee5dfc095f96f3069b45f5cab05
3
+ metadata.gz: 968f5cc5abadbda9360603f7fdc1c618b036171cb5f58c0a61e69c22cb511ea0
4
+ data.tar.gz: 9e1113dde47dbcba7d8b9fbdfb247b4f9cd68d6a14e77974541cb660ee45a63e
5
5
  SHA512:
6
- metadata.gz: bda7fd52e20ab379efedace6180aca4925ef4136d0de0a8da068b96fc628ef1d7d98d0fcdad368f646098c06596bc6a81aedd85c41b94d4e2f51457a04b45a2a
7
- data.tar.gz: 52adf87fc675871312342561fc5f1b04fa38519f2787d9e33680e3adecda84d9ab801c977dee97e26a077a6b2f4a6545a57e6110caf562a07a86e547b591195a
6
+ metadata.gz: f722f02332f1f4c95b64e307ca021488c770070953912d077781b9338a41ff124c11e7645b7e50245afcb37b3d289b1c5ea67aac97e65b406a0d893cb03239f8
7
+ data.tar.gz: 555229b48ff063d7f59e1b97de38ad8f06b87befd425d6b05222bebccbb8374394d8b58c275f9db18266a4c5c6cfae860029890c30f194572adb1e0c18be8dd7
data/README.md CHANGED
@@ -15,11 +15,11 @@ Precompiled native gems are available for:
15
15
 
16
16
  ```ruby
17
17
  # Gemfile
18
- gem "zvec"
18
+ gem "zvec-ruby"
19
19
  ```
20
20
 
21
21
  ```bash
22
- gem install zvec
22
+ gem install zvec-ruby
23
23
  ```
24
24
 
25
25
  No compiler or build tools needed — the gem ships with the native extension and all zvec dependencies statically linked.
@@ -36,7 +36,7 @@ cmake .. -DCMAKE_BUILD_TYPE=Release
36
36
  make -j$(nproc)
37
37
 
38
38
  # 2. Install the gem with ZVEC_DIR pointing to the build
39
- ZVEC_DIR=/tmp/zvec gem install zvec
39
+ ZVEC_DIR=/tmp/zvec gem install zvec-ruby
40
40
  ```
41
41
 
42
42
  Or using the included helper script:
data/Rakefile CHANGED
@@ -40,6 +40,9 @@ Rake::TestTask.new(:test_pure) do |t|
40
40
  "test/test_schema.rb",
41
41
  "test/test_doc.rb",
42
42
  "test/test_query.rb",
43
+ "test/test_type_detection.rb",
44
+ "test/test_validation.rb",
45
+ "test/test_edge_cases.rb",
43
46
  "test/test_active_record.rb",
44
47
  ]
45
48
  t.warning = true
@@ -5,7 +5,10 @@ module Zvec
5
5
  module ActiveRecord
6
6
  # Rails concern that adds vector search capabilities to ActiveRecord models.
7
7
  #
8
- # Usage:
8
+ # When included in a model, call +vectorize+ to configure which text field
9
+ # to embed, the vector dimension, and the embedding function.
10
+ #
11
+ # @example Basic usage
9
12
  # class Article < ApplicationRecord
10
13
  # include Zvec::ActiveRecord::Vectorize
11
14
  #
@@ -15,13 +18,30 @@ module Zvec
15
18
  # embed_with: ->(text) { OpenAI.embed(text) }
16
19
  # end
17
20
  #
18
- # Article.vector_search([0.1, 0.2, ...], top_k: 5)
19
- # article.update_embedding!
21
+ # @example Searching
22
+ # Article.vector_search("Ruby programming", top_k: 5)
23
+ # Article.vector_search([0.1, 0.2, ...], top_k: 5, embed: false)
24
+ #
25
+ # @example Instance methods
26
+ # article.zvec_update_embedding! # re-embed and store
27
+ # article.zvec_remove_embedding! # remove from vector store
28
+ # article.zvec_embedding # fetch stored embedding doc
20
29
  #
21
30
  module Vectorize
22
31
  extend ActiveSupport::Concern
23
32
 
24
33
  class_methods do
34
+ # Configure vector search for this model.
35
+ #
36
+ # @param field [String, Symbol] the text field to embed
37
+ # @param dimensions [Integer] the vector dimension
38
+ # @param prefix [String, nil] collection prefix (defaults to table_name)
39
+ # @param embed_with [Proc, nil] a callable that takes text and returns
40
+ # a vector Array (e.g., +-> (text) { OpenAI.embed(text) }+)
41
+ # @param metric [Symbol] similarity metric (+:cosine+, +:l2+, or +:ip+)
42
+ # @param zvec_path [String, nil] path for the zvec collection
43
+ # (defaults to +tmp/zvec/<prefix>+)
44
+ # @return [void]
25
45
  def vectorize(field, dimensions:, prefix: nil, embed_with: nil,
26
46
  metric: :cosine, zvec_path: nil)
27
47
  prefix ||= table_name
@@ -46,7 +66,12 @@ module Zvec
46
66
  end
47
67
  end
48
68
 
69
+ # Instance methods mixed into the model.
49
70
  module InstanceMethods
71
+ # Re-embed the configured text field and store the embedding.
72
+ #
73
+ # @return [void]
74
+ # @raise [Zvec::Error] if no +embed_with+ function is configured
50
75
  def zvec_update_embedding!
51
76
  cfg = self.class.zvec_config
52
77
  text = send(cfg[:field])
@@ -61,19 +86,29 @@ module Zvec
61
86
  store.flush
62
87
  end
63
88
 
89
+ # Remove this record's embedding from the vector store.
90
+ #
91
+ # @return [void]
64
92
  def zvec_remove_embedding!
65
93
  self.class.zvec_store.delete(id.to_s)
66
- rescue => e
94
+ rescue
67
95
  # Silently ignore if document doesn't exist
68
96
  end
69
97
 
98
+ # Fetch this record's stored embedding document.
99
+ #
100
+ # @return [Zvec::Doc, nil] the stored document, or nil if not found
70
101
  def zvec_embedding
71
102
  result = self.class.zvec_store.fetch(id.to_s)
72
103
  result[id.to_s]
73
104
  end
74
105
  end
75
106
 
107
+ # Class methods mixed into the model.
76
108
  module SearchMethods
109
+ # Access the shared {Zvec::RubyLLM::Store} instance for this model.
110
+ #
111
+ # @return [Zvec::RubyLLM::Store]
77
112
  def zvec_store
78
113
  @zvec_store ||= begin
79
114
  cfg = zvec_config
@@ -85,6 +120,18 @@ module Zvec
85
120
  end
86
121
  end
87
122
 
123
+ # Search for records by vector similarity.
124
+ #
125
+ # When +query+ is a String and +embed+ is true, the configured
126
+ # +embed_with+ function is called to convert it to a vector first.
127
+ #
128
+ # @param query [Array<Numeric>, String] query vector or text to embed
129
+ # @param top_k [Integer] maximum number of results (default: 10)
130
+ # @param embed [Boolean] whether to embed a String query (default: true)
131
+ # @return [Array<ActiveRecord::Base>] matching records, each with a
132
+ # +zvec_score+ singleton method returning the similarity score
133
+ # @raise [ArgumentError] if query is a String but no +embed_with+ is
134
+ # configured
88
135
  def vector_search(query, top_k: 10, embed: true)
89
136
  cfg = zvec_config
90
137
 
@@ -1,23 +1,81 @@
1
+ require "monitor"
2
+
1
3
  module Zvec
4
+ # A vector collection backed by the zvec C++ engine. Provides CRUD
5
+ # operations, vector similarity search, and index management.
6
+ #
7
+ # Collections must be explicitly closed via {#close} before they can be
8
+ # reopened from the same path. Use the +closed?+ method to check state.
9
+ #
10
+ # All mutating operations are thread-safe (protected by a Monitor).
11
+ #
12
+ # @example Create, populate, search, and close
13
+ # schema = Zvec::Schema.new("articles") do
14
+ # string "title"
15
+ # vector "embedding", dimension: 4,
16
+ # index: Zvec::Ext::HnswIndexParams.new(Zvec::COSINE)
17
+ # end
18
+ #
19
+ # col = Zvec::Collection.create_and_open("/tmp/articles", schema)
20
+ # col.add(pk: "1", title: "Hello", embedding: [0.1, 0.2, 0.3, 0.4])
21
+ # results = col.search([0.1, 0.2, 0.3, 0.4], top_k: 5)
22
+ # col.close
23
+ #
24
+ # @example Reopen an existing collection
25
+ # col = Zvec::Collection.open("/tmp/articles")
26
+ # puts col.doc_count
27
+ # col.close
28
+ #
2
29
  class Collection
30
+ # @return [Zvec::Schema, nil] the schema, if provided at creation time
3
31
  attr_reader :schema
4
32
 
5
- def initialize(ext_collection, schema: nil)
33
+ # @param ext_collection [Ext::Collection] the underlying C++ collection
34
+ # @param schema [Zvec::Schema, nil] optional schema for type-aware access
35
+ # @param name [String, nil] optional collection name
36
+ def initialize(ext_collection, schema: nil, name: nil)
6
37
  @ext = ext_collection
7
38
  @schema = schema
39
+ @name = name
40
+ @monitor = Monitor.new
41
+ @closed = false
8
42
  end
9
43
 
10
- # Create a new collection and open it.
44
+ # Create a new collection on disk and open it.
45
+ #
46
+ # @param path [String] directory path for the collection data
47
+ # @param schema [Zvec::Schema] the collection schema
48
+ # @param read_only [Boolean] open in read-only mode
49
+ # @param enable_mmap [Boolean] use memory-mapped I/O (default: true)
50
+ # @return [Zvec::Collection]
51
+ # @raise [ArgumentError] if path is blank or schema is not a Zvec::Schema
52
+ #
53
+ # @example
54
+ # col = Zvec::Collection.create_and_open("/tmp/my_col", schema)
11
55
  def self.create_and_open(path, schema, read_only: false, enable_mmap: true)
56
+ validate_path!(path)
57
+ raise ArgumentError, "schema must be a Zvec::Schema" unless schema.is_a?(Schema)
58
+
12
59
  opts = Ext::CollectionOptions.new
13
60
  opts.read_only = read_only
14
61
  opts.enable_mmap = enable_mmap
15
62
  ext = Ext::Collection.create_and_open(path, schema.ext_schema, opts)
16
- new(ext, schema: schema)
63
+ new(ext, schema: schema, name: schema.name)
17
64
  end
18
65
 
19
- # Open an existing collection.
66
+ # Open an existing collection from disk.
67
+ #
68
+ # @param path [String] directory path of an existing collection
69
+ # @param read_only [Boolean] open in read-only mode
70
+ # @param enable_mmap [Boolean] use memory-mapped I/O (default: true)
71
+ # @return [Zvec::Collection]
72
+ # @raise [ArgumentError] if path is blank
73
+ #
74
+ # @example
75
+ # col = Zvec::Collection.open("/tmp/my_col", read_only: true)
20
76
  def self.open(path, read_only: false, enable_mmap: true)
77
+ validate_path!(path)
78
+
21
79
  opts = Ext::CollectionOptions.new
22
80
  opts.read_only = read_only
23
81
  opts.enable_mmap = enable_mmap
@@ -25,81 +83,247 @@ module Zvec
25
83
  new(ext)
26
84
  end
27
85
 
86
+ # @return [String, nil] the collection name (from schema or explicit)
87
+ def collection_name
88
+ @name || (@schema ? @schema.name : nil)
89
+ end
90
+
91
+ # @return [String] the on-disk path of the collection
28
92
  def path
29
93
  @ext.path
30
94
  end
31
95
 
96
+ # @return [Ext::CollectionStats] collection statistics
97
+ # @raise [Zvec::CollectionError] if the collection is closed
32
98
  def stats
99
+ ensure_open!
33
100
  @ext.stats
34
101
  end
35
102
 
103
+ # @return [Integer] the number of documents in the collection
104
+ # @raise [Zvec::CollectionError] if the collection is closed
36
105
  def doc_count
106
+ ensure_open!
37
107
  @ext.stats.doc_count
38
108
  end
39
109
 
110
+ # @return [Boolean] true if the collection has been closed
111
+ def closed?
112
+ @closed
113
+ end
114
+
115
+ # Close the collection, releasing the underlying C++ resources.
116
+ # The collection must be closed before it can be reopened from the
117
+ # same path.
118
+ #
119
+ # @return [void]
120
+ # @raise [Zvec::CollectionError] if already closed
121
+ #
122
+ # @example
123
+ # col.close
124
+ # col.closed? #=> true
125
+ def close
126
+ raise CollectionError, "#{error_prefix}Collection is already closed" if @closed
127
+
128
+ @monitor.synchronize do
129
+ begin
130
+ @ext.close
131
+ rescue NoMethodError
132
+ # C++ extension may not expose a close method; the GC will handle it.
133
+ end
134
+ @closed = true
135
+ end
136
+ end
137
+
40
138
  # --- DDL ---
41
139
 
140
+ # Create an index on a field.
141
+ #
142
+ # @param field_name [String, Symbol] the field to index
143
+ # @param index_params [Ext::HnswIndexParams, Ext::FlatIndexParams,
144
+ # Ext::IVFIndexParams, Ext::InvertIndexParams] index configuration
145
+ # @return [self]
146
+ # @raise [Zvec::CollectionError] if the collection is closed
147
+ # @raise [ArgumentError] if field_name is blank
148
+ #
149
+ # @example
150
+ # col.create_index("embedding",
151
+ # Ext::HnswIndexParams.new(Zvec::COSINE, m: 32, ef_construction: 400))
42
152
  def create_index(field_name, index_params)
43
- @ext.create_index(field_name.to_s, index_params)
153
+ ensure_open!
154
+ raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
155
+
156
+ @monitor.synchronize do
157
+ @ext.create_index(field_name.to_s, index_params)
158
+ end
44
159
  self
45
160
  end
46
161
 
162
+ # Drop an index on a field.
163
+ #
164
+ # @param field_name [String, Symbol] the field whose index to drop
165
+ # @return [self]
166
+ # @raise [Zvec::CollectionError] if the collection is closed
167
+ # @raise [ArgumentError] if field_name is blank
47
168
  def drop_index(field_name)
48
- @ext.drop_index(field_name.to_s)
169
+ ensure_open!
170
+ raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
171
+
172
+ @monitor.synchronize do
173
+ @ext.drop_index(field_name.to_s)
174
+ end
49
175
  self
50
176
  end
51
177
 
178
+ # Optimize the collection (compact segments, rebuild indexes).
179
+ #
180
+ # @return [self]
181
+ # @raise [Zvec::CollectionError] if the collection is closed
52
182
  def optimize
53
- @ext.optimize
183
+ ensure_open!
184
+ @monitor.synchronize { @ext.optimize }
54
185
  self
55
186
  end
56
187
 
188
+ # Flush pending writes to disk.
189
+ #
190
+ # @return [self]
191
+ # @raise [Zvec::CollectionError] if the collection is closed
57
192
  def flush
58
- @ext.flush
193
+ ensure_open!
194
+ @monitor.synchronize { @ext.flush }
59
195
  self
60
196
  end
61
197
 
198
+ # Destroy the collection, removing all data from disk.
199
+ #
200
+ # @return [void]
201
+ # @raise [Zvec::CollectionError] if the collection is closed
62
202
  def destroy
63
- @ext.destroy
203
+ ensure_open!
204
+ @monitor.synchronize do
205
+ @ext.destroy
206
+ @closed = true
207
+ end
64
208
  end
65
209
 
66
210
  # --- DML ---
67
211
 
212
+ # Insert one or more documents.
213
+ #
214
+ # @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to insert
215
+ # @return [Array<Array(Boolean, String)>] write results
216
+ # @raise [Zvec::CollectionError] if the collection is closed
217
+ # @raise [ArgumentError] if docs are not Zvec::Doc instances
218
+ # @raise [Zvec::Error] if any write fails
219
+ #
220
+ # @example
221
+ # doc = Zvec::Doc.new(pk: "1", schema: schema)
222
+ # doc["title"] = "Hello"
223
+ # col.insert(doc)
68
224
  def insert(docs)
225
+ ensure_open!
69
226
  docs = [docs] unless docs.is_a?(Array)
227
+ validate_docs!(docs)
70
228
  ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
71
- results = @ext.insert(ext_docs)
229
+ results = @monitor.synchronize { @ext.insert(ext_docs) }
72
230
  check_write_results!(results)
73
231
  end
74
232
 
233
+ # Upsert (insert or update) one or more documents.
234
+ #
235
+ # @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to upsert
236
+ # @return [Array<Array(Boolean, String)>] write results
237
+ # @raise [Zvec::CollectionError] if the collection is closed
238
+ # @raise [Zvec::Error] if any write fails
75
239
  def upsert(docs)
240
+ ensure_open!
76
241
  docs = [docs] unless docs.is_a?(Array)
242
+ validate_docs!(docs)
77
243
  ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
78
- results = @ext.upsert(ext_docs)
244
+ results = @monitor.synchronize { @ext.upsert(ext_docs) }
79
245
  check_write_results!(results)
80
246
  end
81
247
 
248
+ # Update one or more existing documents.
249
+ #
250
+ # @param docs [Zvec::Doc, Array<Zvec::Doc>] document(s) to update
251
+ # @return [Array<Array(Boolean, String)>] write results
252
+ # @raise [Zvec::CollectionError] if the collection is closed
253
+ # @raise [Zvec::Error] if any write fails
82
254
  def update(docs)
255
+ ensure_open!
83
256
  docs = [docs] unless docs.is_a?(Array)
257
+ validate_docs!(docs)
84
258
  ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
85
- results = @ext.update(ext_docs)
259
+ results = @monitor.synchronize { @ext.update(ext_docs) }
86
260
  check_write_results!(results)
87
261
  end
88
262
 
263
+ # Delete documents by primary key(s).
264
+ #
265
+ # @param pks [Array<String>] one or more primary keys to delete
266
+ # @return [Array<Array(Boolean, String)>] write results
267
+ # @raise [Zvec::CollectionError] if the collection is closed
268
+ # @raise [ArgumentError] if no primary keys are provided
269
+ # @raise [Zvec::Error] if any write fails
270
+ #
271
+ # @example
272
+ # col.delete("doc-1", "doc-2")
89
273
  def delete(*pks)
90
- pks = pks.flatten.map(&:to_s)
91
- results = @ext.delete_pks(pks)
274
+ ensure_open!
275
+ pks = pks.flatten
276
+ raise ArgumentError, "#{error_prefix}No primary keys provided for delete" if pks.empty?
277
+ pks = pks.map(&:to_s)
278
+ results = @monitor.synchronize { @ext.delete_pks(pks) }
92
279
  check_write_results!(results)
93
280
  end
94
281
 
282
+ # Delete documents matching a filter expression.
283
+ #
284
+ # @param filter [String] the filter expression (see {VectorQuery} for syntax)
285
+ # @return [void]
286
+ # @raise [Zvec::CollectionError] if the collection is closed
287
+ # @raise [ArgumentError] if filter is blank
288
+ #
289
+ # @example
290
+ # col.delete_by_filter("year < 2020")
95
291
  def delete_by_filter(filter)
96
- @ext.delete_by_filter(filter)
292
+ ensure_open!
293
+ raise ArgumentError, "#{error_prefix}filter must be a non-empty string" if filter.nil? || filter.to_s.strip.empty?
294
+ @monitor.synchronize { @ext.delete_by_filter(filter) }
97
295
  end
98
296
 
99
297
  # --- DQL ---
100
298
 
299
+ # Execute a vector similarity search with full control over parameters.
300
+ #
301
+ # @param field_name [String, Symbol] the vector field to search
302
+ # @param vector [Array<Numeric>] the query vector
303
+ # @param topk [Integer] maximum number of results (default: 10)
304
+ # @param filter [String, nil] optional filter expression
305
+ # @param include_vector [Boolean] include stored vectors in results
306
+ # @param output_fields [Array<String>, nil] specific fields to return
307
+ # @param query_params [Ext::HnswQueryParams, Ext::IVFQueryParams,
308
+ # Ext::FlatQueryParams, nil] search tuning params
309
+ # @return [Array<Zvec::Doc>] result documents with +pk+ and +score+ set
310
+ # @raise [Zvec::CollectionError] if the collection is closed
311
+ # @raise [ArgumentError] if vector is empty or contains non-numeric elements
312
+ # @raise [Zvec::DimensionError] if vector dimension doesn't match schema
313
+ #
314
+ # @example
315
+ # results = col.query(
316
+ # field_name: "embedding",
317
+ # vector: [0.1, 0.2, 0.3, 0.4],
318
+ # topk: 5,
319
+ # filter: "year > 2024"
320
+ # )
321
+ # results.each { |doc| puts "#{doc.pk}: #{doc.score}" }
101
322
  def query(field_name:, vector:, topk: 10, filter: nil,
102
323
  include_vector: false, output_fields: nil, query_params: nil)
324
+ ensure_open!
325
+ validate_query_vector!(vector, field_name)
326
+
103
327
  vq = VectorQuery.new(
104
328
  field_name: field_name,
105
329
  vector: vector,
@@ -109,7 +333,7 @@ module Zvec
109
333
  output_fields: output_fields,
110
334
  query_params: query_params
111
335
  )
112
- raw_results = @ext.query(vq.ext_query)
336
+ raw_results = @monitor.synchronize { @ext.query(vq.ext_query) }
113
337
  raw_results.map do |h|
114
338
  Doc.new(
115
339
  pk: h["pk"],
@@ -119,47 +343,134 @@ module Zvec
119
343
  end
120
344
  end
121
345
 
346
+ # Fetch documents by primary key(s).
347
+ #
348
+ # @param pks [Array<String>] one or more primary keys
349
+ # @return [Hash{String => Zvec::Doc}] mapping of pk to document
350
+ # @raise [Zvec::CollectionError] if the collection is closed
351
+ # @raise [ArgumentError] if no primary keys provided
352
+ #
353
+ # @example
354
+ # docs = col.fetch("doc-1", "doc-2")
355
+ # docs["doc-1"]["title"] #=> "Hello"
122
356
  def fetch(*pks)
123
- pks = pks.flatten.map(&:to_s)
124
- raw = @ext.fetch(pks)
357
+ ensure_open!
358
+ pks = pks.flatten
359
+ raise ArgumentError, "#{error_prefix}No primary keys provided for fetch" if pks.empty?
360
+ pks = pks.map(&:to_s)
361
+ raw = @monitor.synchronize { @ext.fetch(pks) }
125
362
  raw.transform_values do |h|
126
363
  Doc.new(pk: nil, fields: h, schema: @schema)
127
364
  end
128
365
  end
129
366
 
130
- # Convenience: insert a hash directly
367
+ # Convenience method to insert a document from keyword arguments.
368
+ #
369
+ # @param pk [String, Integer] the primary key (required)
370
+ # @param fields [Hash] field name/value pairs
371
+ # @return [Array] write results
372
+ # @raise [Zvec::CollectionError] if the collection is closed
373
+ # @raise [ArgumentError] if pk is nil
374
+ #
375
+ # @example
376
+ # col.add(pk: "1", title: "Hello", embedding: [0.1, 0.2, 0.3, 0.4])
131
377
  def add(pk:, **fields)
378
+ ensure_open!
379
+ raise ArgumentError, "#{error_prefix}pk must not be nil" if pk.nil?
132
380
  doc = Doc.new(pk: pk, fields: fields, schema: @schema)
133
381
  insert(doc)
134
382
  end
135
383
 
136
- private
137
-
138
- def check_write_results!(results)
139
- results.each do |ok, msg|
140
- raise Error, (msg.empty? ? "Write operation failed" : msg) unless ok
141
- end
142
- results
143
- end
144
-
145
- public
146
-
147
- # Convenience: search with simpler API
384
+ # Convenience method for simple vector similarity search.
385
+ #
386
+ # Auto-detects the vector field from the schema if not specified.
387
+ #
388
+ # @param vector [Array<Numeric>] the query vector
389
+ # @param field [String, Symbol, nil] vector field name (auto-detected if nil)
390
+ # @param top_k [Integer] number of results (default: 10)
391
+ # @param filter [String, nil] optional filter expression
392
+ # @return [Array<Zvec::Doc>] result documents
393
+ # @raise [Zvec::CollectionError] if the collection is closed
394
+ # @raise [Zvec::Error] if no vector fields exist in the schema
395
+ #
396
+ # @example
397
+ # results = col.search([0.1, 0.2, 0.3, 0.4], top_k: 5)
398
+ # results.first.pk #=> "doc-1"
399
+ # results.first.score #=> 0.95
148
400
  def search(vector, field: nil, top_k: 10, filter: nil)
401
+ ensure_open!
402
+ raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
403
+
149
404
  # Auto-detect vector field if not specified
150
405
  fname = field&.to_s
151
406
  unless fname
152
407
  if @schema
153
408
  vfield = @schema.ext_schema.vector_fields.first
154
- raise Error, "No vector fields in schema" unless vfield
409
+ raise CollectionError, "#{error_prefix}No vector fields in schema" unless vfield
155
410
  fname = vfield.name
156
411
  else
157
412
  vfields = @ext.schema.vector_fields
158
- raise Error, "No vector fields in schema" if vfields.empty?
413
+ raise CollectionError, "#{error_prefix}No vector fields in schema" if vfields.empty?
159
414
  fname = vfields.first.name
160
415
  end
161
416
  end
162
417
  query(field_name: fname, vector: vector, topk: top_k, filter: filter)
163
418
  end
419
+
420
+ private
421
+
422
+ def self.validate_path!(path)
423
+ raise ArgumentError, "path must be a non-empty string" if path.nil? || path.to_s.strip.empty?
424
+ end
425
+
426
+ # @raise [Zvec::CollectionError] if the collection is closed
427
+ def ensure_open!
428
+ raise CollectionError, "#{error_prefix}Collection is closed" if @closed
429
+ end
430
+
431
+ def error_prefix
432
+ cn = collection_name
433
+ cn ? "[Collection '#{cn}'] " : ""
434
+ end
435
+
436
+ def validate_docs!(docs)
437
+ docs.each_with_index do |doc, i|
438
+ unless doc.is_a?(Doc) || doc.is_a?(Ext::Doc)
439
+ raise ArgumentError,
440
+ "#{error_prefix}Expected Zvec::Doc at index #{i}, got #{doc.class}"
441
+ end
442
+ end
443
+ end
444
+
445
+ def validate_query_vector!(vector, field_name)
446
+ raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
447
+
448
+ vector.each_with_index do |v, i|
449
+ unless v.is_a?(Numeric)
450
+ raise ArgumentError,
451
+ "#{error_prefix}Query vector for field '#{field_name}' contains non-numeric element at index #{i}: #{v.inspect}"
452
+ end
453
+ end
454
+
455
+ # Dimension check against schema
456
+ return unless @schema
457
+
458
+ expected_dim = @schema.field_dimension(field_name.to_s)
459
+ return unless expected_dim
460
+
461
+ if vector.size != expected_dim
462
+ raise DimensionError,
463
+ "#{error_prefix}Query vector dimension mismatch for field '#{field_name}': " \
464
+ "expected #{expected_dim}, got #{vector.size}"
465
+ end
466
+ end
467
+
468
+ def check_write_results!(results)
469
+ results.each do |ok, msg|
470
+ error_msg = msg.nil? || msg.empty? ? "Write operation failed" : msg
471
+ raise CollectionError, "#{error_prefix}#{error_msg}" unless ok
472
+ end
473
+ results
474
+ end
164
475
  end
165
476
  end