zvec-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,380 @@
1
+ require_relative "test_helper"
2
+
3
+ class TestEdgeCases < Minitest::Test
4
+ include TempDirHelper
5
+
6
+ # --- Empty vectors ---
7
+
8
+ def test_empty_vector_in_doc_without_schema
9
+ doc = Zvec::Doc.new
10
+ doc["vec"] = []
11
+ result = doc["vec"]
12
+ assert(result.nil? || result == [])
13
+ end
14
+
15
+ def test_empty_vector_in_doc_with_schema
16
+ schema = Zvec::Schema.new("test") do
17
+ vector "embedding", dimension: 4
18
+ end
19
+ doc = Zvec::Doc.new(schema: schema)
20
+ # Empty vectors should be accepted (no dimension to check)
21
+ doc["embedding"] = []
22
+ # Should not raise
23
+ end
24
+
25
+ # --- Single-element vectors ---
26
+
27
+ def test_single_element_vector
28
+ doc = Zvec::Doc.new
29
+ doc["vec"] = [42.0]
30
+ result = doc["vec"]
31
+ assert_kind_of Array, result
32
+ assert_equal 1, result.size
33
+ assert_in_delta 42.0, result[0], 0.001
34
+ end
35
+
36
+ def test_single_element_vector_with_schema
37
+ schema = Zvec::Schema.new("test") do
38
+ vector "embedding", dimension: 1
39
+ end
40
+ doc = Zvec::Doc.new(schema: schema)
41
+ doc["embedding"] = [3.14]
42
+ result = doc["embedding"]
43
+ assert_equal 1, result.size
44
+ assert_in_delta 3.14, result[0], 0.001
45
+ end
46
+
47
+ def test_single_element_vector_query
48
+ q = Zvec::VectorQuery.new(field_name: "vec", vector: [1.0])
49
+ assert_kind_of Zvec::VectorQuery, q
50
+ end
51
+
52
+ # --- Very large dimension vectors ---
53
+
54
+ def test_large_dimension_schema
55
+ schema = Zvec::Schema.new("test") do
56
+ vector "big_vec", dimension: 10_000
57
+ end
58
+ assert schema.has_field?("big_vec")
59
+ assert_equal 10_000, schema.field_dimension("big_vec")
60
+ end
61
+
62
+ def test_large_dimension_doc
63
+ schema = Zvec::Schema.new("test") do
64
+ vector "big_vec", dimension: 10_000
65
+ end
66
+ doc = Zvec::Doc.new(schema: schema)
67
+ big_vector = Array.new(10_000) { |i| i.to_f / 10_000 }
68
+ doc["big_vec"] = big_vector
69
+ result = doc["big_vec"]
70
+ assert_equal 10_000, result.size
71
+ assert_in_delta 0.0, result[0], 0.001
72
+ assert_in_delta 0.9999, result[9_999], 0.001
73
+ end
74
+
75
+ def test_large_dimension_mismatch
76
+ schema = Zvec::Schema.new("test") do
77
+ vector "big_vec", dimension: 10_000
78
+ end
79
+ doc = Zvec::Doc.new(schema: schema)
80
+ small_vector = [1.0, 2.0, 3.0]
81
+ assert_raises(Zvec::DimensionError) do
82
+ doc["big_vec"] = small_vector
83
+ end
84
+ end
85
+
86
+ def test_large_dimension_query
87
+ big_vector = Array.new(10_000) { 0.1 }
88
+ q = Zvec::VectorQuery.new(field_name: "big_vec", vector: big_vector)
89
+ assert_kind_of Zvec::VectorQuery, q
90
+ end
91
+
92
+ # --- Special characters in field names ---
93
+
94
+ def test_special_chars_in_field_name
95
+ doc = Zvec::Doc.new
96
+ doc["field-with-dashes"] = "dash"
97
+ assert_equal "dash", doc["field-with-dashes"]
98
+ end
99
+
100
+ def test_dot_in_field_name
101
+ doc = Zvec::Doc.new
102
+ doc["meta.title"] = "dotted"
103
+ assert_equal "dotted", doc["meta.title"]
104
+ end
105
+
106
+ def test_underscore_field_name
107
+ doc = Zvec::Doc.new
108
+ doc["_private_field"] = "private"
109
+ assert_equal "private", doc["_private_field"]
110
+ end
111
+
112
+ def test_numeric_string_field_name
113
+ doc = Zvec::Doc.new
114
+ doc["123"] = "numeric name"
115
+ assert_equal "numeric name", doc["123"]
116
+ end
117
+
118
+ def test_space_in_field_name
119
+ doc = Zvec::Doc.new
120
+ doc["field name"] = "spaced"
121
+ assert_equal "spaced", doc["field name"]
122
+ end
123
+
124
+ def test_special_chars_in_schema_field_name
125
+ schema = Zvec::Schema.new("test") do
126
+ string "field-with-dashes"
127
+ string "meta.title"
128
+ string "_private"
129
+ end
130
+ assert schema.has_field?("field-with-dashes")
131
+ assert schema.has_field?("meta.title")
132
+ assert schema.has_field?("_private")
133
+ end
134
+
135
+ # --- Unicode field names ---
136
+
137
+ def test_unicode_field_name
138
+ doc = Zvec::Doc.new
139
+ doc["\u30BF\u30A4\u30C8\u30EB"] = "Japanese title field"
140
+ assert_equal "Japanese title field", doc["\u30BF\u30A4\u30C8\u30EB"]
141
+ end
142
+
143
+ def test_emoji_field_name
144
+ doc = Zvec::Doc.new
145
+ doc["\u{1F680}rocket"] = "launched"
146
+ assert_equal "launched", doc["\u{1F680}rocket"]
147
+ end
148
+
149
+ def test_chinese_field_name
150
+ doc = Zvec::Doc.new
151
+ doc["\u6807\u9898"] = "Chinese title"
152
+ assert_equal "Chinese title", doc["\u6807\u9898"]
153
+ end
154
+
155
+ def test_unicode_schema_field_name
156
+ schema = Zvec::Schema.new("test") do
157
+ string "\u30BF\u30A4\u30C8\u30EB"
158
+ end
159
+ assert schema.has_field?("\u30BF\u30A4\u30C8\u30EB")
160
+ assert_equal Zvec::DataTypes::STRING, schema.field_type("\u30BF\u30A4\u30C8\u30EB")
161
+ end
162
+
163
+ # --- Unicode values ---
164
+
165
+ def test_unicode_string_value
166
+ doc = Zvec::Doc.new
167
+ doc["title"] = "\u3053\u3093\u306B\u3061\u306F\u4E16\u754C"
168
+ assert_equal "\u3053\u3093\u306B\u3061\u306F\u4E16\u754C", doc["title"]
169
+ end
170
+
171
+ def test_unicode_in_string_array
172
+ doc = Zvec::Doc.new
173
+ doc["tags"] = ["\u30BF\u30B0\u4E00", "\u30BF\u30B0\u4E8C"]
174
+ result = doc["tags"]
175
+ assert_equal ["\u30BF\u30B0\u4E00", "\u30BF\u30B0\u4E8C"], result
176
+ end
177
+
178
+ # --- Custom exception classes ---
179
+
180
+ def test_schema_error_is_zvec_error
181
+ assert Zvec::SchemaError < Zvec::Error
182
+ end
183
+
184
+ def test_query_error_is_zvec_error
185
+ assert Zvec::QueryError < Zvec::Error
186
+ end
187
+
188
+ def test_collection_error_is_zvec_error
189
+ assert Zvec::CollectionError < Zvec::Error
190
+ end
191
+
192
+ def test_dimension_error_is_zvec_error
193
+ assert Zvec::DimensionError < Zvec::Error
194
+ end
195
+
196
+ def test_schema_error_raised_for_nil_name
197
+ assert_raises(Zvec::SchemaError) { Zvec::Schema.new(nil) }
198
+ end
199
+
200
+ def test_schema_error_raised_for_empty_field
201
+ schema = Zvec::Schema.new("test")
202
+ assert_raises(Zvec::SchemaError) { schema.field("", Zvec::DataTypes::STRING) }
203
+ end
204
+
205
+ def test_query_error_raised_for_bad_vector
206
+ assert_raises(Zvec::QueryError) do
207
+ Zvec::VectorQuery.new(field_name: "vec", vector: [])
208
+ end
209
+ end
210
+
211
+ def test_query_error_raised_for_nil_field
212
+ assert_raises(Zvec::QueryError) do
213
+ Zvec::VectorQuery.new(field_name: nil, vector: [1.0])
214
+ end
215
+ end
216
+
217
+ # --- Collection close/reopen lifecycle ---
218
+
219
+ def test_collection_closed_flag
220
+ with_temp_dir("zvec_edge") do |dir|
221
+ col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
222
+ refute col.closed?
223
+ col.close
224
+ assert col.closed?
225
+ end
226
+ end
227
+
228
+ def test_collection_close_prevents_operations
229
+ with_temp_dir("zvec_edge") do |dir|
230
+ col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
231
+ col.close
232
+
233
+ assert_raises(Zvec::CollectionError) { col.doc_count }
234
+ assert_raises(Zvec::CollectionError) { col.stats }
235
+ assert_raises(Zvec::CollectionError) { col.flush }
236
+ assert_raises(Zvec::CollectionError) { col.optimize }
237
+ assert_raises(Zvec::CollectionError) do
238
+ col.add(pk: "x", title: "t", embedding: [1.0, 2.0, 3.0, 4.0])
239
+ end
240
+ assert_raises(Zvec::CollectionError) do
241
+ col.search([1.0, 2.0, 3.0, 4.0])
242
+ end
243
+ end
244
+ end
245
+
246
+ def test_collection_double_close_raises
247
+ with_temp_dir("zvec_edge") do |dir|
248
+ col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
249
+ col.close
250
+ assert_raises(Zvec::CollectionError) { col.close }
251
+ end
252
+ end
253
+
254
+ def test_collection_destroy_marks_closed
255
+ with_temp_dir("zvec_edge") do |dir|
256
+ col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
257
+ col.destroy
258
+ assert col.closed?
259
+ end
260
+ end
261
+
262
+ # --- Edge cases in coercion ---
263
+
264
+ def test_coerce_empty_string_to_integer_raises
265
+ assert_raises(ArgumentError) do
266
+ Zvec::DataTypes.coerce_value("", Zvec::Ext::DataType::INT64)
267
+ end
268
+ end
269
+
270
+ def test_coerce_empty_string_to_float_raises
271
+ assert_raises(ArgumentError) do
272
+ Zvec::DataTypes.coerce_value("", Zvec::Ext::DataType::DOUBLE)
273
+ end
274
+ end
275
+
276
+ def test_coerce_very_large_integer
277
+ result = Zvec::DataTypes.coerce_value(2**62, Zvec::Ext::DataType::INT64)
278
+ assert_equal 2**62, result
279
+ end
280
+
281
+ def test_coerce_negative_float_to_integer
282
+ result = Zvec::DataTypes.coerce_value(-3.7, Zvec::Ext::DataType::INT64)
283
+ assert_equal(-3, result)
284
+ end
285
+
286
+ # --- Edge cases in type detection ---
287
+
288
+ def test_detect_type_mixed_array_starts_with_float
289
+ # Mixed arrays: type detected from first non-nil element
290
+ assert_equal Zvec::Ext::DataType::VECTOR_FP32, Zvec::DataTypes.detect_type([1.0, "mixed"])
291
+ end
292
+
293
+ def test_detect_type_symbol
294
+ # Symbols are not a recognized type, returns nil
295
+ assert_nil Zvec::DataTypes.detect_type(:symbol)
296
+ end
297
+
298
+ def test_detect_type_hash
299
+ # Hashes are not a recognized type, returns nil
300
+ assert_nil Zvec::DataTypes.detect_type({ a: 1 })
301
+ end
302
+
303
+ # --- Collection with special character schema names ---
304
+
305
+ def test_schema_with_unicode_name
306
+ schema = Zvec::Schema.new("\u30C6\u30B9\u30C8") do
307
+ string "title"
308
+ vector "embedding", dimension: 4
309
+ end
310
+ assert_equal "\u30C6\u30B9\u30C8", schema.name
311
+ end
312
+
313
+ def test_schema_with_hyphenated_name
314
+ schema = Zvec::Schema.new("my-collection") do
315
+ string "title"
316
+ end
317
+ assert_equal "my-collection", schema.name
318
+ end
319
+
320
+ # --- Doc with many fields ---
321
+
322
+ def test_doc_with_many_fields
323
+ doc = Zvec::Doc.new
324
+ 100.times do |i|
325
+ doc["field_#{i}"] = "value_#{i}"
326
+ end
327
+ assert_equal 100, doc.field_names.size
328
+ assert_equal "value_0", doc["field_0"]
329
+ assert_equal "value_99", doc["field_99"]
330
+ end
331
+
332
+ # --- Vector with all zeros ---
333
+
334
+ def test_zero_vector
335
+ doc = Zvec::Doc.new
336
+ doc["vec"] = [0.0, 0.0, 0.0, 0.0]
337
+ result = doc["vec"]
338
+ assert_equal [0.0, 0.0, 0.0, 0.0], result
339
+ end
340
+
341
+ # --- Vector with negative values ---
342
+
343
+ def test_negative_vector
344
+ doc = Zvec::Doc.new
345
+ doc["vec"] = [-1.0, -2.5, -0.001, 0.0]
346
+ result = doc["vec"]
347
+ assert_in_delta(-1.0, result[0], 0.001)
348
+ assert_in_delta(-2.5, result[1], 0.001)
349
+ assert_in_delta(-0.001, result[2], 0.001)
350
+ end
351
+
352
+ # --- Query with very large topk ---
353
+
354
+ def test_query_with_large_topk
355
+ q = Zvec::VectorQuery.new(field_name: "vec", vector: [1.0], topk: 100_000)
356
+ assert_equal 100_000, q.ext_query.topk
357
+ end
358
+
359
+ # --- Doc pk edge cases ---
360
+
361
+ def test_doc_empty_string_pk
362
+ doc = Zvec::Doc.new(pk: "")
363
+ assert_equal "", doc.pk
364
+ end
365
+
366
+ def test_doc_very_long_pk
367
+ long_pk = "x" * 10_000
368
+ doc = Zvec::Doc.new(pk: long_pk)
369
+ assert_equal long_pk, doc.pk
370
+ end
371
+
372
+ private
373
+
374
+ def make_schema
375
+ Zvec::Schema.new("edge_test") do
376
+ string "title"
377
+ vector "embedding", dimension: 4
378
+ end
379
+ end
380
+ end
data/test/test_helper.rb CHANGED
@@ -14,6 +14,10 @@ rescue LoadError
14
14
  # Minimal stubs so pure-Ruby logic can be tested without the compiled extension.
15
15
  module Zvec
16
16
  class Error < StandardError; end
17
+ class DimensionError < Error; end
18
+ class SchemaError < Error; end
19
+ class QueryError < Error; end
20
+ class CollectionError < Error; end
17
21
 
18
22
  module Ext
19
23
  # Stub enums as simple modules with constants
@@ -68,9 +72,9 @@ rescue LoadError
68
72
  def get_int64(f); @fields[f].is_a?(Integer) ? @fields[f] : nil; end
69
73
  def get_float(f); @fields[f].is_a?(Float) ? @fields[f] : nil; end
70
74
  def get_double(f); @fields[f].is_a?(Float) ? @fields[f] : nil; end
71
- def get_float_vector(f); @fields[f].is_a?(Array) && @fields[f].first.is_a?(Float) ? @fields[f] : nil; end
75
+ def get_float_vector(f); @fields[f].is_a?(Array) && (@fields[f].empty? || @fields[f].first.is_a?(Float)) ? @fields[f] : nil; end
72
76
  def get_double_vector(f); get_float_vector(f); end
73
- def get_string_array(f); @fields[f].is_a?(Array) && @fields[f].first.is_a?(String) ? @fields[f] : nil; end
77
+ def get_string_array(f); @fields[f].is_a?(Array) && !@fields[f].empty? && @fields[f].first.is_a?(String) ? @fields[f] : nil; end
74
78
  def to_s; "[pk:#{@pk}, score:#{@score}, fields:#{@fields.size}]"; end
75
79
  end
76
80
 
@@ -90,7 +94,6 @@ rescue LoadError
90
94
  def has_field?(n); @fields.key?(n); end
91
95
  def field_names; @fields.keys; end
92
96
  def all_field_names; @fields.keys; end
93
- alias_method :field_names, :all_field_names
94
97
  def fields; @fields.values; end
95
98
  def vector_fields; @fields.values.select(&:vector_field?); end
96
99
  def forward_fields; @fields.values.reject(&:vector_field?); end
@@ -118,6 +121,15 @@ rescue LoadError
118
121
  def initialize(ef: 200); @ef = ef; end
119
122
  end
120
123
 
124
+ class IVFQueryParams
125
+ attr_reader :nprobe
126
+ def initialize(nprobe: 10); @nprobe = nprobe; end
127
+ end
128
+
129
+ class FlatQueryParams
130
+ def initialize; end
131
+ end
132
+
121
133
  class CollectionOptions
122
134
  attr_accessor :read_only, :enable_mmap, :max_buffer_size
123
135
  def initialize; @read_only = false; @enable_mmap = true; @max_buffer_size = 64 * 1024 * 1024; end
@@ -131,6 +143,9 @@ rescue LoadError
131
143
  def set_query_vector(arr); @query_vector = arr; end
132
144
  def set_output_fields(f); @output_fields = f; end
133
145
  def set_query_params(p); @query_params = p; end
146
+ def set_hnsw_query_params(p); @query_params = p; end
147
+ def set_ivf_query_params(p); @query_params = p; end
148
+ def set_flat_query_params(p); @query_params = p; end
134
149
  alias_method :include_vector?, :include_vector
135
150
  end
136
151
 
@@ -147,6 +162,89 @@ rescue LoadError
147
162
  def message; @msg; end
148
163
  def to_s; @ok ? "OK" : @msg; end
149
164
  end
165
+
166
+ # Stub Collection for pure-Ruby testing of the wrapper layer
167
+ class Collection
168
+ attr_reader :path_value, :schema_value, :docs
169
+
170
+ def initialize
171
+ @docs = {}
172
+ @stats = CollectionStats.new
173
+ @path_value = ""
174
+ @closed = false
175
+ end
176
+
177
+ def self.create_and_open(path, ext_schema, opts)
178
+ c = new
179
+ c.instance_variable_set(:@path_value, path)
180
+ c.instance_variable_set(:@schema_value, ext_schema)
181
+ c
182
+ end
183
+
184
+ def self.open(path, opts)
185
+ c = new
186
+ c.instance_variable_set(:@path_value, path)
187
+ c
188
+ end
189
+
190
+ def path; @path_value; end
191
+ def schema; @schema_value; end
192
+ def closed?; @closed; end
193
+
194
+ def close
195
+ @closed = true
196
+ end
197
+
198
+ def stats
199
+ s = CollectionStats.new
200
+ s.doc_count = @docs.size
201
+ s
202
+ end
203
+
204
+ def insert(ext_docs)
205
+ ext_docs.each { |d| @docs[d.pk] = d }
206
+ ext_docs.map { |_| [true, ""] }
207
+ end
208
+
209
+ def upsert(ext_docs)
210
+ ext_docs.each { |d| @docs[d.pk] = d }
211
+ ext_docs.map { |_| [true, ""] }
212
+ end
213
+
214
+ def update(ext_docs)
215
+ ext_docs.each { |d| @docs[d.pk] = d if @docs.key?(d.pk) }
216
+ ext_docs.map { |_| [true, ""] }
217
+ end
218
+
219
+ def delete_pks(pks)
220
+ pks.each { |pk| @docs.delete(pk) }
221
+ pks.map { |_| [true, ""] }
222
+ end
223
+
224
+ def delete_by_filter(filter)
225
+ # no-op in stub
226
+ end
227
+
228
+ def query(vq)
229
+ @docs.values.first(vq.topk).map do |d|
230
+ h = { "pk" => d.pk, "score" => 0.95 }
231
+ d.field_names.each { |f| h[f] = d.get_string(f) || d.get_int64(f) || d.get_float(f) || d.get_double(f) || d.get_bool(f) || d.get_float_vector(f) || d.get_string_array(f) }
232
+ h
233
+ end
234
+ end
235
+
236
+ def fetch(pks)
237
+ result = {}
238
+ pks.each { |pk| result[pk] = @docs[pk] if @docs.key?(pk) }
239
+ result
240
+ end
241
+
242
+ def create_index(field_name, index_params); end
243
+ def drop_index(field_name); end
244
+ def optimize; end
245
+ def flush; end
246
+ def destroy; end
247
+ end
150
248
  end
151
249
 
152
250
  require_relative "../lib/zvec/version"
@@ -154,6 +252,7 @@ rescue LoadError
154
252
  require_relative "../lib/zvec/schema"
155
253
  require_relative "../lib/zvec/doc"
156
254
  require_relative "../lib/zvec/query"
255
+ require_relative "../lib/zvec/collection"
157
256
 
158
257
  include DataTypes
159
258
  end