zvec-ruby 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -0
- data/lib/zvec/active_record.rb +50 -3
- data/lib/zvec/collection.rb +245 -8
- data/lib/zvec/data_types.rb +123 -1
- data/lib/zvec/doc.rb +89 -1
- data/lib/zvec/query.rb +102 -5
- data/lib/zvec/ruby_llm.rb +79 -7
- data/lib/zvec/schema.rb +130 -3
- data/lib/zvec/version.rb +1 -1
- data/lib/zvec.rb +12 -0
- data/test/test_edge_cases.rb +380 -0
- data/test/test_helper.rb +9 -0
- data/test/test_validation.rb +11 -11
- data/test/test_version.rb +1 -1
- metadata +2 -1
data/lib/zvec/schema.rb
CHANGED
|
@@ -1,9 +1,50 @@
|
|
|
1
1
|
module Zvec
|
|
2
|
+
# Defines the structure of a collection: its name, fields, types, and
|
|
3
|
+
# vector dimensions.
|
|
4
|
+
#
|
|
5
|
+
# Schemas are immutable once created -- fields can be added during
|
|
6
|
+
# initialization but not removed afterward.
|
|
7
|
+
#
|
|
8
|
+
# @example Creating a schema with a DSL block
|
|
9
|
+
# schema = Zvec::Schema.new("articles") do
|
|
10
|
+
# string "title"
|
|
11
|
+
# string "body", nullable: true
|
|
12
|
+
# int32 "year"
|
|
13
|
+
# float "rating"
|
|
14
|
+
# bool "published"
|
|
15
|
+
# vector "embedding", dimension: 384,
|
|
16
|
+
# index: Zvec::Ext::HnswIndexParams.new(Zvec::COSINE)
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @example Binary vector field
|
|
20
|
+
# schema = Zvec::Schema.new("hashes") do
|
|
21
|
+
# field "hash_vec", DataTypes::BINARY, dimension: 128
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# @example Sparse vector field
|
|
25
|
+
# schema = Zvec::Schema.new("sparse_docs") do
|
|
26
|
+
# field "tfidf", DataTypes::SPARSE_VECTOR_FP32, dimension: 30000
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
2
29
|
class Schema
|
|
30
|
+
# @return [Ext::CollectionSchema] the underlying C++ schema object
|
|
3
31
|
attr_reader :ext_schema
|
|
4
32
|
|
|
33
|
+
# Create a new schema.
|
|
34
|
+
#
|
|
35
|
+
# @param name [String, Symbol] the collection name (must be non-empty)
|
|
36
|
+
# @yield optional DSL block evaluated in the schema's context
|
|
37
|
+
# @raise [Zvec::SchemaError] if name is nil or blank
|
|
38
|
+
#
|
|
39
|
+
# @example
|
|
40
|
+
# schema = Zvec::Schema.new("my_collection") do
|
|
41
|
+
# string "title"
|
|
42
|
+
# vector "embedding", dimension: 128
|
|
43
|
+
# end
|
|
5
44
|
def initialize(name, &block)
|
|
6
|
-
|
|
45
|
+
if name.nil? || name.to_s.strip.empty?
|
|
46
|
+
raise SchemaError, "Schema name must be a non-empty string"
|
|
47
|
+
end
|
|
7
48
|
|
|
8
49
|
@ext_schema = Ext::CollectionSchema.new(name.to_s)
|
|
9
50
|
@field_types = {}
|
|
@@ -11,9 +52,25 @@ module Zvec
|
|
|
11
52
|
instance_eval(&block) if block
|
|
12
53
|
end
|
|
13
54
|
|
|
55
|
+
# Add a field with an explicit data type.
|
|
56
|
+
#
|
|
57
|
+
# @param name [String, Symbol] the field name (must be non-empty)
|
|
58
|
+
# @param type [Symbol] a DataTypes constant (e.g., +DataTypes::STRING+)
|
|
59
|
+
# @param dimension [Integer, nil] required for vector fields
|
|
60
|
+
# @param nullable [Boolean] whether the field allows null values
|
|
61
|
+
# @param index [Ext::HnswIndexParams, Ext::FlatIndexParams, Ext::IVFIndexParams, nil]
|
|
62
|
+
# optional index parameters for this field
|
|
63
|
+
# @return [self] for method chaining
|
|
64
|
+
# @raise [Zvec::SchemaError] if field name is blank
|
|
65
|
+
#
|
|
66
|
+
# @example
|
|
67
|
+
# schema.field("tags", DataTypes::ARRAY_STRING)
|
|
68
|
+
# schema.field("embedding", DataTypes::VECTOR_FP32, dimension: 128)
|
|
14
69
|
def field(name, type, dimension: nil, nullable: false, index: nil)
|
|
15
70
|
name = name.to_s
|
|
16
|
-
|
|
71
|
+
if name.strip.empty?
|
|
72
|
+
raise SchemaError, "Field name must be a non-empty string"
|
|
73
|
+
end
|
|
17
74
|
|
|
18
75
|
fs = Ext::FieldSchema.new(name, type)
|
|
19
76
|
fs.dimension = dimension if dimension
|
|
@@ -25,61 +82,131 @@ module Zvec
|
|
|
25
82
|
self
|
|
26
83
|
end
|
|
27
84
|
|
|
85
|
+
# Add a dense vector field. Defaults to FP32 precision.
|
|
86
|
+
#
|
|
87
|
+
# @param name [String, Symbol] the field name
|
|
88
|
+
# @param dimension [Integer] the vector dimension (must be > 0)
|
|
89
|
+
# @param type [Symbol] vector data type (default: {DataTypes::VECTOR_FP32}).
|
|
90
|
+
# Also accepts {DataTypes::VECTOR_FP64}, {DataTypes::VECTOR_FP16},
|
|
91
|
+
# or {DataTypes::VECTOR_INT8}.
|
|
92
|
+
# @param index [Ext::HnswIndexParams, Ext::FlatIndexParams, Ext::IVFIndexParams, nil]
|
|
93
|
+
# optional index parameters
|
|
94
|
+
# @return [self]
|
|
95
|
+
# @raise [ArgumentError] if dimension is not a positive integer
|
|
96
|
+
#
|
|
97
|
+
# @example Standard FP32 vector with HNSW index
|
|
98
|
+
# schema.vector "embedding", dimension: 384,
|
|
99
|
+
# index: Ext::HnswIndexParams.new(Zvec::COSINE)
|
|
100
|
+
#
|
|
101
|
+
# @example FP16 vector (half memory)
|
|
102
|
+
# schema.vector "embedding", dimension: 384,
|
|
103
|
+
# type: DataTypes::VECTOR_FP16
|
|
104
|
+
#
|
|
105
|
+
# @example INT8 quantized vector (minimal memory)
|
|
106
|
+
# schema.vector "embedding", dimension: 384,
|
|
107
|
+
# type: DataTypes::VECTOR_INT8
|
|
28
108
|
def vector(name, dimension:, type: DataTypes::VECTOR_FP32, index: nil)
|
|
29
109
|
raise ArgumentError, "Vector dimension must be a positive integer, got #{dimension.inspect}" unless dimension.is_a?(Integer) && dimension > 0
|
|
30
110
|
|
|
31
111
|
field(name, type, dimension: dimension, index: index)
|
|
32
112
|
end
|
|
33
113
|
|
|
114
|
+
# Add a string field.
|
|
115
|
+
#
|
|
116
|
+
# @param name [String, Symbol] the field name
|
|
117
|
+
# @param opts [Hash] options passed to {#field} (+nullable:+, +index:+)
|
|
118
|
+
# @return [self]
|
|
34
119
|
def string(name, **opts)
|
|
35
120
|
field(name, DataTypes::STRING, **opts)
|
|
36
121
|
end
|
|
37
122
|
|
|
123
|
+
# Add a 32-bit integer field.
|
|
124
|
+
#
|
|
125
|
+
# @param name [String, Symbol] the field name
|
|
126
|
+
# @param opts [Hash] options passed to {#field}
|
|
127
|
+
# @return [self]
|
|
38
128
|
def int32(name, **opts)
|
|
39
129
|
field(name, DataTypes::INT32, **opts)
|
|
40
130
|
end
|
|
41
131
|
|
|
132
|
+
# Add a 64-bit integer field.
|
|
133
|
+
#
|
|
134
|
+
# @param name [String, Symbol] the field name
|
|
135
|
+
# @param opts [Hash] options passed to {#field}
|
|
136
|
+
# @return [self]
|
|
42
137
|
def int64(name, **opts)
|
|
43
138
|
field(name, DataTypes::INT64, **opts)
|
|
44
139
|
end
|
|
45
140
|
|
|
141
|
+
# Add a 32-bit float field.
|
|
142
|
+
#
|
|
143
|
+
# @param name [String, Symbol] the field name
|
|
144
|
+
# @param opts [Hash] options passed to {#field}
|
|
145
|
+
# @return [self]
|
|
46
146
|
def float(name, **opts)
|
|
47
147
|
field(name, DataTypes::FLOAT, **opts)
|
|
48
148
|
end
|
|
49
149
|
|
|
150
|
+
# Add a 64-bit double field.
|
|
151
|
+
#
|
|
152
|
+
# @param name [String, Symbol] the field name
|
|
153
|
+
# @param opts [Hash] options passed to {#field}
|
|
154
|
+
# @return [self]
|
|
50
155
|
def double(name, **opts)
|
|
51
156
|
field(name, DataTypes::DOUBLE, **opts)
|
|
52
157
|
end
|
|
53
158
|
|
|
159
|
+
# Add a boolean field.
|
|
160
|
+
#
|
|
161
|
+
# @param name [String, Symbol] the field name
|
|
162
|
+
# @param opts [Hash] options passed to {#field}
|
|
163
|
+
# @return [self]
|
|
54
164
|
def bool(name, **opts)
|
|
55
165
|
field(name, DataTypes::BOOL, **opts)
|
|
56
166
|
end
|
|
57
167
|
|
|
168
|
+
# @return [String] the collection name
|
|
58
169
|
def name
|
|
59
170
|
@ext_schema.name
|
|
60
171
|
end
|
|
61
172
|
|
|
173
|
+
# @return [Array<String>] all field names in this schema
|
|
62
174
|
def field_names
|
|
63
175
|
@ext_schema.field_names
|
|
64
176
|
end
|
|
65
177
|
|
|
178
|
+
# Look up the data type of a field by name.
|
|
179
|
+
#
|
|
180
|
+
# @param name [String, Symbol] the field name
|
|
181
|
+
# @return [Symbol, nil] the data type constant, or nil if not found
|
|
66
182
|
def field_type(name)
|
|
67
183
|
@field_types[name.to_s]
|
|
68
184
|
end
|
|
69
185
|
|
|
186
|
+
# Look up the dimension of a vector field.
|
|
187
|
+
#
|
|
188
|
+
# @param name [String, Symbol] the field name
|
|
189
|
+
# @return [Integer, nil] the dimension, or nil if the field is not a vector
|
|
70
190
|
def field_dimension(name)
|
|
71
191
|
@field_dimensions[name.to_s]
|
|
72
192
|
end
|
|
73
193
|
|
|
194
|
+
# Check whether a field exists in the schema.
|
|
195
|
+
#
|
|
196
|
+
# @param name [String, Symbol] the field name
|
|
197
|
+
# @return [Boolean]
|
|
74
198
|
def has_field?(name)
|
|
75
199
|
@ext_schema.has_field?(name.to_s)
|
|
76
200
|
end
|
|
77
201
|
|
|
78
|
-
# Returns
|
|
202
|
+
# Returns a hash of vector field names to their dimensions.
|
|
203
|
+
#
|
|
204
|
+
# @return [Hash{String => Integer}] e.g. +{"embedding" => 384}+
|
|
79
205
|
def vector_fields_with_dimensions
|
|
80
206
|
@field_dimensions.select { |name, _| DataTypes::VECTOR_TYPES.include?(@field_types[name]) }
|
|
81
207
|
end
|
|
82
208
|
|
|
209
|
+
# @return [String] human-readable representation of the schema
|
|
83
210
|
def to_s
|
|
84
211
|
@ext_schema.to_s
|
|
85
212
|
end
|
data/lib/zvec/version.rb
CHANGED
data/lib/zvec.rb
CHANGED
|
@@ -16,8 +16,20 @@ require_relative "zvec/query"
|
|
|
16
16
|
require_relative "zvec/collection"
|
|
17
17
|
|
|
18
18
|
module Zvec
|
|
19
|
+
# Base error class for all Zvec errors.
|
|
19
20
|
class Error < StandardError; end
|
|
21
|
+
|
|
22
|
+
# Raised when vector dimensions do not match the expected schema dimension.
|
|
20
23
|
class DimensionError < Error; end
|
|
21
24
|
|
|
25
|
+
# Raised for schema definition errors (invalid field names, types, etc.).
|
|
26
|
+
class SchemaError < Error; end
|
|
27
|
+
|
|
28
|
+
# Raised for query construction or execution errors.
|
|
29
|
+
class QueryError < Error; end
|
|
30
|
+
|
|
31
|
+
# Raised for collection lifecycle errors (open/close/reopen issues).
|
|
32
|
+
class CollectionError < Error; end
|
|
33
|
+
|
|
22
34
|
include DataTypes
|
|
23
35
|
end
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class TestEdgeCases < Minitest::Test
|
|
4
|
+
include TempDirHelper
|
|
5
|
+
|
|
6
|
+
# --- Empty vectors ---
|
|
7
|
+
|
|
8
|
+
def test_empty_vector_in_doc_without_schema
|
|
9
|
+
doc = Zvec::Doc.new
|
|
10
|
+
doc["vec"] = []
|
|
11
|
+
result = doc["vec"]
|
|
12
|
+
assert(result.nil? || result == [])
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_empty_vector_in_doc_with_schema
|
|
16
|
+
schema = Zvec::Schema.new("test") do
|
|
17
|
+
vector "embedding", dimension: 4
|
|
18
|
+
end
|
|
19
|
+
doc = Zvec::Doc.new(schema: schema)
|
|
20
|
+
# Empty vectors should be accepted (no dimension to check)
|
|
21
|
+
doc["embedding"] = []
|
|
22
|
+
# Should not raise
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# --- Single-element vectors ---
|
|
26
|
+
|
|
27
|
+
def test_single_element_vector
|
|
28
|
+
doc = Zvec::Doc.new
|
|
29
|
+
doc["vec"] = [42.0]
|
|
30
|
+
result = doc["vec"]
|
|
31
|
+
assert_kind_of Array, result
|
|
32
|
+
assert_equal 1, result.size
|
|
33
|
+
assert_in_delta 42.0, result[0], 0.001
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_single_element_vector_with_schema
|
|
37
|
+
schema = Zvec::Schema.new("test") do
|
|
38
|
+
vector "embedding", dimension: 1
|
|
39
|
+
end
|
|
40
|
+
doc = Zvec::Doc.new(schema: schema)
|
|
41
|
+
doc["embedding"] = [3.14]
|
|
42
|
+
result = doc["embedding"]
|
|
43
|
+
assert_equal 1, result.size
|
|
44
|
+
assert_in_delta 3.14, result[0], 0.001
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_single_element_vector_query
|
|
48
|
+
q = Zvec::VectorQuery.new(field_name: "vec", vector: [1.0])
|
|
49
|
+
assert_kind_of Zvec::VectorQuery, q
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# --- Very large dimension vectors ---
|
|
53
|
+
|
|
54
|
+
def test_large_dimension_schema
|
|
55
|
+
schema = Zvec::Schema.new("test") do
|
|
56
|
+
vector "big_vec", dimension: 10_000
|
|
57
|
+
end
|
|
58
|
+
assert schema.has_field?("big_vec")
|
|
59
|
+
assert_equal 10_000, schema.field_dimension("big_vec")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def test_large_dimension_doc
|
|
63
|
+
schema = Zvec::Schema.new("test") do
|
|
64
|
+
vector "big_vec", dimension: 10_000
|
|
65
|
+
end
|
|
66
|
+
doc = Zvec::Doc.new(schema: schema)
|
|
67
|
+
big_vector = Array.new(10_000) { |i| i.to_f / 10_000 }
|
|
68
|
+
doc["big_vec"] = big_vector
|
|
69
|
+
result = doc["big_vec"]
|
|
70
|
+
assert_equal 10_000, result.size
|
|
71
|
+
assert_in_delta 0.0, result[0], 0.001
|
|
72
|
+
assert_in_delta 0.9999, result[9_999], 0.001
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def test_large_dimension_mismatch
|
|
76
|
+
schema = Zvec::Schema.new("test") do
|
|
77
|
+
vector "big_vec", dimension: 10_000
|
|
78
|
+
end
|
|
79
|
+
doc = Zvec::Doc.new(schema: schema)
|
|
80
|
+
small_vector = [1.0, 2.0, 3.0]
|
|
81
|
+
assert_raises(Zvec::DimensionError) do
|
|
82
|
+
doc["big_vec"] = small_vector
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def test_large_dimension_query
|
|
87
|
+
big_vector = Array.new(10_000) { 0.1 }
|
|
88
|
+
q = Zvec::VectorQuery.new(field_name: "big_vec", vector: big_vector)
|
|
89
|
+
assert_kind_of Zvec::VectorQuery, q
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# --- Special characters in field names ---
|
|
93
|
+
|
|
94
|
+
def test_special_chars_in_field_name
|
|
95
|
+
doc = Zvec::Doc.new
|
|
96
|
+
doc["field-with-dashes"] = "dash"
|
|
97
|
+
assert_equal "dash", doc["field-with-dashes"]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_dot_in_field_name
|
|
101
|
+
doc = Zvec::Doc.new
|
|
102
|
+
doc["meta.title"] = "dotted"
|
|
103
|
+
assert_equal "dotted", doc["meta.title"]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def test_underscore_field_name
|
|
107
|
+
doc = Zvec::Doc.new
|
|
108
|
+
doc["_private_field"] = "private"
|
|
109
|
+
assert_equal "private", doc["_private_field"]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def test_numeric_string_field_name
|
|
113
|
+
doc = Zvec::Doc.new
|
|
114
|
+
doc["123"] = "numeric name"
|
|
115
|
+
assert_equal "numeric name", doc["123"]
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def test_space_in_field_name
|
|
119
|
+
doc = Zvec::Doc.new
|
|
120
|
+
doc["field name"] = "spaced"
|
|
121
|
+
assert_equal "spaced", doc["field name"]
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def test_special_chars_in_schema_field_name
|
|
125
|
+
schema = Zvec::Schema.new("test") do
|
|
126
|
+
string "field-with-dashes"
|
|
127
|
+
string "meta.title"
|
|
128
|
+
string "_private"
|
|
129
|
+
end
|
|
130
|
+
assert schema.has_field?("field-with-dashes")
|
|
131
|
+
assert schema.has_field?("meta.title")
|
|
132
|
+
assert schema.has_field?("_private")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# --- Unicode field names ---
|
|
136
|
+
|
|
137
|
+
def test_unicode_field_name
|
|
138
|
+
doc = Zvec::Doc.new
|
|
139
|
+
doc["\u30BF\u30A4\u30C8\u30EB"] = "Japanese title field"
|
|
140
|
+
assert_equal "Japanese title field", doc["\u30BF\u30A4\u30C8\u30EB"]
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_emoji_field_name
|
|
144
|
+
doc = Zvec::Doc.new
|
|
145
|
+
doc["\u{1F680}rocket"] = "launched"
|
|
146
|
+
assert_equal "launched", doc["\u{1F680}rocket"]
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def test_chinese_field_name
|
|
150
|
+
doc = Zvec::Doc.new
|
|
151
|
+
doc["\u6807\u9898"] = "Chinese title"
|
|
152
|
+
assert_equal "Chinese title", doc["\u6807\u9898"]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def test_unicode_schema_field_name
|
|
156
|
+
schema = Zvec::Schema.new("test") do
|
|
157
|
+
string "\u30BF\u30A4\u30C8\u30EB"
|
|
158
|
+
end
|
|
159
|
+
assert schema.has_field?("\u30BF\u30A4\u30C8\u30EB")
|
|
160
|
+
assert_equal Zvec::DataTypes::STRING, schema.field_type("\u30BF\u30A4\u30C8\u30EB")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# --- Unicode values ---
|
|
164
|
+
|
|
165
|
+
def test_unicode_string_value
|
|
166
|
+
doc = Zvec::Doc.new
|
|
167
|
+
doc["title"] = "\u3053\u3093\u306B\u3061\u306F\u4E16\u754C"
|
|
168
|
+
assert_equal "\u3053\u3093\u306B\u3061\u306F\u4E16\u754C", doc["title"]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def test_unicode_in_string_array
|
|
172
|
+
doc = Zvec::Doc.new
|
|
173
|
+
doc["tags"] = ["\u30BF\u30B0\u4E00", "\u30BF\u30B0\u4E8C"]
|
|
174
|
+
result = doc["tags"]
|
|
175
|
+
assert_equal ["\u30BF\u30B0\u4E00", "\u30BF\u30B0\u4E8C"], result
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# --- Custom exception classes ---
|
|
179
|
+
|
|
180
|
+
def test_schema_error_is_zvec_error
|
|
181
|
+
assert Zvec::SchemaError < Zvec::Error
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def test_query_error_is_zvec_error
|
|
185
|
+
assert Zvec::QueryError < Zvec::Error
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def test_collection_error_is_zvec_error
|
|
189
|
+
assert Zvec::CollectionError < Zvec::Error
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def test_dimension_error_is_zvec_error
|
|
193
|
+
assert Zvec::DimensionError < Zvec::Error
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def test_schema_error_raised_for_nil_name
|
|
197
|
+
assert_raises(Zvec::SchemaError) { Zvec::Schema.new(nil) }
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def test_schema_error_raised_for_empty_field
|
|
201
|
+
schema = Zvec::Schema.new("test")
|
|
202
|
+
assert_raises(Zvec::SchemaError) { schema.field("", Zvec::DataTypes::STRING) }
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def test_query_error_raised_for_bad_vector
|
|
206
|
+
assert_raises(Zvec::QueryError) do
|
|
207
|
+
Zvec::VectorQuery.new(field_name: "vec", vector: [])
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def test_query_error_raised_for_nil_field
|
|
212
|
+
assert_raises(Zvec::QueryError) do
|
|
213
|
+
Zvec::VectorQuery.new(field_name: nil, vector: [1.0])
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# --- Collection close/reopen lifecycle ---
|
|
218
|
+
|
|
219
|
+
def test_collection_closed_flag
|
|
220
|
+
with_temp_dir("zvec_edge") do |dir|
|
|
221
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
|
|
222
|
+
refute col.closed?
|
|
223
|
+
col.close
|
|
224
|
+
assert col.closed?
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def test_collection_close_prevents_operations
|
|
229
|
+
with_temp_dir("zvec_edge") do |dir|
|
|
230
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
|
|
231
|
+
col.close
|
|
232
|
+
|
|
233
|
+
assert_raises(Zvec::CollectionError) { col.doc_count }
|
|
234
|
+
assert_raises(Zvec::CollectionError) { col.stats }
|
|
235
|
+
assert_raises(Zvec::CollectionError) { col.flush }
|
|
236
|
+
assert_raises(Zvec::CollectionError) { col.optimize }
|
|
237
|
+
assert_raises(Zvec::CollectionError) do
|
|
238
|
+
col.add(pk: "x", title: "t", embedding: [1.0, 2.0, 3.0, 4.0])
|
|
239
|
+
end
|
|
240
|
+
assert_raises(Zvec::CollectionError) do
|
|
241
|
+
col.search([1.0, 2.0, 3.0, 4.0])
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def test_collection_double_close_raises
|
|
247
|
+
with_temp_dir("zvec_edge") do |dir|
|
|
248
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
|
|
249
|
+
col.close
|
|
250
|
+
assert_raises(Zvec::CollectionError) { col.close }
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def test_collection_destroy_marks_closed
|
|
255
|
+
with_temp_dir("zvec_edge") do |dir|
|
|
256
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", make_schema)
|
|
257
|
+
col.destroy
|
|
258
|
+
assert col.closed?
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# --- Edge cases in coercion ---
|
|
263
|
+
|
|
264
|
+
def test_coerce_empty_string_to_integer_raises
|
|
265
|
+
assert_raises(ArgumentError) do
|
|
266
|
+
Zvec::DataTypes.coerce_value("", Zvec::Ext::DataType::INT64)
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def test_coerce_empty_string_to_float_raises
|
|
271
|
+
assert_raises(ArgumentError) do
|
|
272
|
+
Zvec::DataTypes.coerce_value("", Zvec::Ext::DataType::DOUBLE)
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def test_coerce_very_large_integer
|
|
277
|
+
result = Zvec::DataTypes.coerce_value(2**62, Zvec::Ext::DataType::INT64)
|
|
278
|
+
assert_equal 2**62, result
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def test_coerce_negative_float_to_integer
|
|
282
|
+
result = Zvec::DataTypes.coerce_value(-3.7, Zvec::Ext::DataType::INT64)
|
|
283
|
+
assert_equal(-3, result)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# --- Edge cases in type detection ---
|
|
287
|
+
|
|
288
|
+
def test_detect_type_mixed_array_starts_with_float
|
|
289
|
+
# Mixed arrays: type detected from first non-nil element
|
|
290
|
+
assert_equal Zvec::Ext::DataType::VECTOR_FP32, Zvec::DataTypes.detect_type([1.0, "mixed"])
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def test_detect_type_symbol
|
|
294
|
+
# Symbols are not a recognized type, returns nil
|
|
295
|
+
assert_nil Zvec::DataTypes.detect_type(:symbol)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def test_detect_type_hash
|
|
299
|
+
# Hashes are not a recognized type, returns nil
|
|
300
|
+
assert_nil Zvec::DataTypes.detect_type({ a: 1 })
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# --- Collection with special character schema names ---
|
|
304
|
+
|
|
305
|
+
def test_schema_with_unicode_name
|
|
306
|
+
schema = Zvec::Schema.new("\u30C6\u30B9\u30C8") do
|
|
307
|
+
string "title"
|
|
308
|
+
vector "embedding", dimension: 4
|
|
309
|
+
end
|
|
310
|
+
assert_equal "\u30C6\u30B9\u30C8", schema.name
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def test_schema_with_hyphenated_name
|
|
314
|
+
schema = Zvec::Schema.new("my-collection") do
|
|
315
|
+
string "title"
|
|
316
|
+
end
|
|
317
|
+
assert_equal "my-collection", schema.name
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# --- Doc with many fields ---
|
|
321
|
+
|
|
322
|
+
def test_doc_with_many_fields
|
|
323
|
+
doc = Zvec::Doc.new
|
|
324
|
+
100.times do |i|
|
|
325
|
+
doc["field_#{i}"] = "value_#{i}"
|
|
326
|
+
end
|
|
327
|
+
assert_equal 100, doc.field_names.size
|
|
328
|
+
assert_equal "value_0", doc["field_0"]
|
|
329
|
+
assert_equal "value_99", doc["field_99"]
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# --- Vector with all zeros ---
|
|
333
|
+
|
|
334
|
+
def test_zero_vector
|
|
335
|
+
doc = Zvec::Doc.new
|
|
336
|
+
doc["vec"] = [0.0, 0.0, 0.0, 0.0]
|
|
337
|
+
result = doc["vec"]
|
|
338
|
+
assert_equal [0.0, 0.0, 0.0, 0.0], result
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# --- Vector with negative values ---
|
|
342
|
+
|
|
343
|
+
def test_negative_vector
|
|
344
|
+
doc = Zvec::Doc.new
|
|
345
|
+
doc["vec"] = [-1.0, -2.5, -0.001, 0.0]
|
|
346
|
+
result = doc["vec"]
|
|
347
|
+
assert_in_delta(-1.0, result[0], 0.001)
|
|
348
|
+
assert_in_delta(-2.5, result[1], 0.001)
|
|
349
|
+
assert_in_delta(-0.001, result[2], 0.001)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# --- Query with very large topk ---
|
|
353
|
+
|
|
354
|
+
def test_query_with_large_topk
|
|
355
|
+
q = Zvec::VectorQuery.new(field_name: "vec", vector: [1.0], topk: 100_000)
|
|
356
|
+
assert_equal 100_000, q.ext_query.topk
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# --- Doc pk edge cases ---
|
|
360
|
+
|
|
361
|
+
def test_doc_empty_string_pk
|
|
362
|
+
doc = Zvec::Doc.new(pk: "")
|
|
363
|
+
assert_equal "", doc.pk
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def test_doc_very_long_pk
|
|
367
|
+
long_pk = "x" * 10_000
|
|
368
|
+
doc = Zvec::Doc.new(pk: long_pk)
|
|
369
|
+
assert_equal long_pk, doc.pk
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
private
|
|
373
|
+
|
|
374
|
+
def make_schema
|
|
375
|
+
Zvec::Schema.new("edge_test") do
|
|
376
|
+
string "title"
|
|
377
|
+
vector "embedding", dimension: 4
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
end
|
data/test/test_helper.rb
CHANGED
|
@@ -15,6 +15,9 @@ rescue LoadError
|
|
|
15
15
|
module Zvec
|
|
16
16
|
class Error < StandardError; end
|
|
17
17
|
class DimensionError < Error; end
|
|
18
|
+
class SchemaError < Error; end
|
|
19
|
+
class QueryError < Error; end
|
|
20
|
+
class CollectionError < Error; end
|
|
18
21
|
|
|
19
22
|
module Ext
|
|
20
23
|
# Stub enums as simple modules with constants
|
|
@@ -168,6 +171,7 @@ rescue LoadError
|
|
|
168
171
|
@docs = {}
|
|
169
172
|
@stats = CollectionStats.new
|
|
170
173
|
@path_value = ""
|
|
174
|
+
@closed = false
|
|
171
175
|
end
|
|
172
176
|
|
|
173
177
|
def self.create_and_open(path, ext_schema, opts)
|
|
@@ -185,6 +189,11 @@ rescue LoadError
|
|
|
185
189
|
|
|
186
190
|
def path; @path_value; end
|
|
187
191
|
def schema; @schema_value; end
|
|
192
|
+
def closed?; @closed; end
|
|
193
|
+
|
|
194
|
+
def close
|
|
195
|
+
@closed = true
|
|
196
|
+
end
|
|
188
197
|
|
|
189
198
|
def stats
|
|
190
199
|
s = CollectionStats.new
|