zvec-ruby 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/zvec/active_record.rb +1 -1
- data/lib/zvec/collection.rb +103 -29
- data/lib/zvec/data_types.rb +128 -0
- data/lib/zvec/doc.rb +30 -9
- data/lib/zvec/query.rb +12 -0
- data/lib/zvec/schema.rb +18 -1
- data/lib/zvec/version.rb +1 -1
- data/lib/zvec.rb +1 -0
- data/test/test_helper.rb +93 -3
- data/test/test_type_detection.rb +258 -0
- data/test/test_validation.rb +305 -0
- data/test/test_version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9ba14d14fe74cef98438fa55ee3852580473329b4c2d06abce25cdbb67ce5f7a
|
|
4
|
+
data.tar.gz: 3c5ef9a204dc0f8b9f787559009b00b6131d30c62e9981b829a67f1a01003364
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3e90824d6ffb928da3b5f6ec7327c01c76101282c10a25c56d501637e4597e4480fe50b609e3b415d3e56d43c0414252a09ecc519941556f6362a7e0aff3d3c8
|
|
7
|
+
data.tar.gz: 7a03160f30fd6ec83511ebeb61ffaeddeea69da3c6b64e1168f0c0a802101e7d34a3303f6071f5085037aec579fe6a833a3526f0cc7ef693b39acfc47a787fe3
|
data/README.md
CHANGED
|
@@ -15,11 +15,11 @@ Precompiled native gems are available for:
|
|
|
15
15
|
|
|
16
16
|
```ruby
|
|
17
17
|
# Gemfile
|
|
18
|
-
gem "zvec"
|
|
18
|
+
gem "zvec-ruby"
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
gem install zvec
|
|
22
|
+
gem install zvec-ruby
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
No compiler or build tools needed — the gem ships with the native extension and all zvec dependencies statically linked.
|
|
@@ -36,7 +36,7 @@ cmake .. -DCMAKE_BUILD_TYPE=Release
|
|
|
36
36
|
make -j$(nproc)
|
|
37
37
|
|
|
38
38
|
# 2. Install the gem with ZVEC_DIR pointing to the build
|
|
39
|
-
ZVEC_DIR=/tmp/zvec gem install zvec
|
|
39
|
+
ZVEC_DIR=/tmp/zvec gem install zvec-ruby
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
Or using the included helper script:
|
data/lib/zvec/active_record.rb
CHANGED
data/lib/zvec/collection.rb
CHANGED
|
@@ -1,23 +1,32 @@
|
|
|
1
|
+
require "monitor"
|
|
2
|
+
|
|
1
3
|
module Zvec
|
|
2
4
|
class Collection
|
|
3
5
|
attr_reader :schema
|
|
4
6
|
|
|
5
|
-
def initialize(ext_collection, schema: nil)
|
|
7
|
+
def initialize(ext_collection, schema: nil, name: nil)
|
|
6
8
|
@ext = ext_collection
|
|
7
9
|
@schema = schema
|
|
10
|
+
@name = name
|
|
11
|
+
@monitor = Monitor.new
|
|
8
12
|
end
|
|
9
13
|
|
|
10
14
|
# Create a new collection and open it.
|
|
11
15
|
def self.create_and_open(path, schema, read_only: false, enable_mmap: true)
|
|
16
|
+
validate_path!(path)
|
|
17
|
+
raise ArgumentError, "schema must be a Zvec::Schema" unless schema.is_a?(Schema)
|
|
18
|
+
|
|
12
19
|
opts = Ext::CollectionOptions.new
|
|
13
20
|
opts.read_only = read_only
|
|
14
21
|
opts.enable_mmap = enable_mmap
|
|
15
22
|
ext = Ext::Collection.create_and_open(path, schema.ext_schema, opts)
|
|
16
|
-
new(ext, schema: schema)
|
|
23
|
+
new(ext, schema: schema, name: schema.name)
|
|
17
24
|
end
|
|
18
25
|
|
|
19
26
|
# Open an existing collection.
|
|
20
27
|
def self.open(path, read_only: false, enable_mmap: true)
|
|
28
|
+
validate_path!(path)
|
|
29
|
+
|
|
21
30
|
opts = Ext::CollectionOptions.new
|
|
22
31
|
opts.read_only = read_only
|
|
23
32
|
opts.enable_mmap = enable_mmap
|
|
@@ -25,6 +34,10 @@ module Zvec
|
|
|
25
34
|
new(ext)
|
|
26
35
|
end
|
|
27
36
|
|
|
37
|
+
def collection_name
|
|
38
|
+
@name || (@schema ? @schema.name : nil)
|
|
39
|
+
end
|
|
40
|
+
|
|
28
41
|
def path
|
|
29
42
|
@ext.path
|
|
30
43
|
end
|
|
@@ -40,66 +53,82 @@ module Zvec
|
|
|
40
53
|
# --- DDL ---
|
|
41
54
|
|
|
42
55
|
def create_index(field_name, index_params)
|
|
43
|
-
|
|
56
|
+
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
57
|
+
|
|
58
|
+
@monitor.synchronize do
|
|
59
|
+
@ext.create_index(field_name.to_s, index_params)
|
|
60
|
+
end
|
|
44
61
|
self
|
|
45
62
|
end
|
|
46
63
|
|
|
47
64
|
def drop_index(field_name)
|
|
48
|
-
|
|
65
|
+
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
66
|
+
|
|
67
|
+
@monitor.synchronize do
|
|
68
|
+
@ext.drop_index(field_name.to_s)
|
|
69
|
+
end
|
|
49
70
|
self
|
|
50
71
|
end
|
|
51
72
|
|
|
52
73
|
def optimize
|
|
53
|
-
@ext.optimize
|
|
74
|
+
@monitor.synchronize { @ext.optimize }
|
|
54
75
|
self
|
|
55
76
|
end
|
|
56
77
|
|
|
57
78
|
def flush
|
|
58
|
-
@ext.flush
|
|
79
|
+
@monitor.synchronize { @ext.flush }
|
|
59
80
|
self
|
|
60
81
|
end
|
|
61
82
|
|
|
62
83
|
def destroy
|
|
63
|
-
@ext.destroy
|
|
84
|
+
@monitor.synchronize { @ext.destroy }
|
|
64
85
|
end
|
|
65
86
|
|
|
66
87
|
# --- DML ---
|
|
67
88
|
|
|
68
89
|
def insert(docs)
|
|
69
90
|
docs = [docs] unless docs.is_a?(Array)
|
|
91
|
+
validate_docs!(docs)
|
|
70
92
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
71
|
-
results = @ext.insert(ext_docs)
|
|
93
|
+
results = @monitor.synchronize { @ext.insert(ext_docs) }
|
|
72
94
|
check_write_results!(results)
|
|
73
95
|
end
|
|
74
96
|
|
|
75
97
|
def upsert(docs)
|
|
76
98
|
docs = [docs] unless docs.is_a?(Array)
|
|
99
|
+
validate_docs!(docs)
|
|
77
100
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
78
|
-
results = @ext.upsert(ext_docs)
|
|
101
|
+
results = @monitor.synchronize { @ext.upsert(ext_docs) }
|
|
79
102
|
check_write_results!(results)
|
|
80
103
|
end
|
|
81
104
|
|
|
82
105
|
def update(docs)
|
|
83
106
|
docs = [docs] unless docs.is_a?(Array)
|
|
107
|
+
validate_docs!(docs)
|
|
84
108
|
ext_docs = docs.map { |d| d.is_a?(Doc) ? d.ext_doc : d }
|
|
85
|
-
results = @ext.update(ext_docs)
|
|
109
|
+
results = @monitor.synchronize { @ext.update(ext_docs) }
|
|
86
110
|
check_write_results!(results)
|
|
87
111
|
end
|
|
88
112
|
|
|
89
113
|
def delete(*pks)
|
|
90
|
-
pks = pks.flatten
|
|
91
|
-
|
|
114
|
+
pks = pks.flatten
|
|
115
|
+
raise ArgumentError, "#{error_prefix}No primary keys provided for delete" if pks.empty?
|
|
116
|
+
pks = pks.map(&:to_s)
|
|
117
|
+
results = @monitor.synchronize { @ext.delete_pks(pks) }
|
|
92
118
|
check_write_results!(results)
|
|
93
119
|
end
|
|
94
120
|
|
|
95
121
|
def delete_by_filter(filter)
|
|
96
|
-
|
|
122
|
+
raise ArgumentError, "#{error_prefix}filter must be a non-empty string" if filter.nil? || filter.to_s.strip.empty?
|
|
123
|
+
@monitor.synchronize { @ext.delete_by_filter(filter) }
|
|
97
124
|
end
|
|
98
125
|
|
|
99
126
|
# --- DQL ---
|
|
100
127
|
|
|
101
128
|
def query(field_name:, vector:, topk: 10, filter: nil,
|
|
102
129
|
include_vector: false, output_fields: nil, query_params: nil)
|
|
130
|
+
validate_query_vector!(vector, field_name)
|
|
131
|
+
|
|
103
132
|
vq = VectorQuery.new(
|
|
104
133
|
field_name: field_name,
|
|
105
134
|
vector: vector,
|
|
@@ -109,7 +138,7 @@ module Zvec
|
|
|
109
138
|
output_fields: output_fields,
|
|
110
139
|
query_params: query_params
|
|
111
140
|
)
|
|
112
|
-
raw_results = @ext.query(vq.ext_query)
|
|
141
|
+
raw_results = @monitor.synchronize { @ext.query(vq.ext_query) }
|
|
113
142
|
raw_results.map do |h|
|
|
114
143
|
Doc.new(
|
|
115
144
|
pk: h["pk"],
|
|
@@ -120,8 +149,10 @@ module Zvec
|
|
|
120
149
|
end
|
|
121
150
|
|
|
122
151
|
def fetch(*pks)
|
|
123
|
-
pks = pks.flatten
|
|
124
|
-
|
|
152
|
+
pks = pks.flatten
|
|
153
|
+
raise ArgumentError, "#{error_prefix}No primary keys provided for fetch" if pks.empty?
|
|
154
|
+
pks = pks.map(&:to_s)
|
|
155
|
+
raw = @monitor.synchronize { @ext.fetch(pks) }
|
|
125
156
|
raw.transform_values do |h|
|
|
126
157
|
Doc.new(pk: nil, fields: h, schema: @schema)
|
|
127
158
|
end
|
|
@@ -129,37 +160,80 @@ module Zvec
|
|
|
129
160
|
|
|
130
161
|
# Convenience: insert a hash directly
|
|
131
162
|
def add(pk:, **fields)
|
|
163
|
+
raise ArgumentError, "#{error_prefix}pk must not be nil" if pk.nil?
|
|
132
164
|
doc = Doc.new(pk: pk, fields: fields, schema: @schema)
|
|
133
165
|
insert(doc)
|
|
134
166
|
end
|
|
135
167
|
|
|
136
|
-
private
|
|
137
|
-
|
|
138
|
-
def check_write_results!(results)
|
|
139
|
-
results.each do |ok, msg|
|
|
140
|
-
raise Error, (msg.empty? ? "Write operation failed" : msg) unless ok
|
|
141
|
-
end
|
|
142
|
-
results
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
public
|
|
146
|
-
|
|
147
168
|
# Convenience: search with simpler API
|
|
148
169
|
def search(vector, field: nil, top_k: 10, filter: nil)
|
|
170
|
+
raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
|
|
171
|
+
|
|
149
172
|
# Auto-detect vector field if not specified
|
|
150
173
|
fname = field&.to_s
|
|
151
174
|
unless fname
|
|
152
175
|
if @schema
|
|
153
176
|
vfield = @schema.ext_schema.vector_fields.first
|
|
154
|
-
raise Error, "No vector fields in schema" unless vfield
|
|
177
|
+
raise Error, "#{error_prefix}No vector fields in schema" unless vfield
|
|
155
178
|
fname = vfield.name
|
|
156
179
|
else
|
|
157
180
|
vfields = @ext.schema.vector_fields
|
|
158
|
-
raise Error, "No vector fields in schema" if vfields.empty?
|
|
181
|
+
raise Error, "#{error_prefix}No vector fields in schema" if vfields.empty?
|
|
159
182
|
fname = vfields.first.name
|
|
160
183
|
end
|
|
161
184
|
end
|
|
162
185
|
query(field_name: fname, vector: vector, topk: top_k, filter: filter)
|
|
163
186
|
end
|
|
187
|
+
|
|
188
|
+
private
|
|
189
|
+
|
|
190
|
+
def self.validate_path!(path)
|
|
191
|
+
raise ArgumentError, "path must be a non-empty string" if path.nil? || path.to_s.strip.empty?
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def error_prefix
|
|
195
|
+
cn = collection_name
|
|
196
|
+
cn ? "[Collection '#{cn}'] " : ""
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def validate_docs!(docs)
|
|
200
|
+
docs.each_with_index do |doc, i|
|
|
201
|
+
unless doc.is_a?(Doc) || doc.is_a?(Ext::Doc)
|
|
202
|
+
raise ArgumentError,
|
|
203
|
+
"#{error_prefix}Expected Zvec::Doc at index #{i}, got #{doc.class}"
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def validate_query_vector!(vector, field_name)
|
|
209
|
+
raise ArgumentError, "#{error_prefix}vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
|
|
210
|
+
|
|
211
|
+
vector.each_with_index do |v, i|
|
|
212
|
+
unless v.is_a?(Numeric)
|
|
213
|
+
raise ArgumentError,
|
|
214
|
+
"#{error_prefix}Query vector for field '#{field_name}' contains non-numeric element at index #{i}: #{v.inspect}"
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Dimension check against schema
|
|
219
|
+
return unless @schema
|
|
220
|
+
|
|
221
|
+
expected_dim = @schema.field_dimension(field_name.to_s)
|
|
222
|
+
return unless expected_dim
|
|
223
|
+
|
|
224
|
+
if vector.size != expected_dim
|
|
225
|
+
raise DimensionError,
|
|
226
|
+
"#{error_prefix}Query vector dimension mismatch for field '#{field_name}': " \
|
|
227
|
+
"expected #{expected_dim}, got #{vector.size}"
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def check_write_results!(results)
|
|
232
|
+
results.each do |ok, msg|
|
|
233
|
+
error_msg = msg.nil? || msg.empty? ? "Write operation failed" : msg
|
|
234
|
+
raise Error, "#{error_prefix}#{error_msg}" unless ok
|
|
235
|
+
end
|
|
236
|
+
results
|
|
237
|
+
end
|
|
164
238
|
end
|
|
165
239
|
end
|
data/lib/zvec/data_types.rb
CHANGED
|
@@ -31,6 +31,14 @@ module Zvec
|
|
|
31
31
|
IP = Ext::MetricType::IP
|
|
32
32
|
COSINE = Ext::MetricType::COSINE
|
|
33
33
|
|
|
34
|
+
# Vector data types for dimension validation
|
|
35
|
+
VECTOR_TYPES = [
|
|
36
|
+
Ext::DataType::VECTOR_FP32,
|
|
37
|
+
Ext::DataType::VECTOR_FP64,
|
|
38
|
+
Ext::DataType::VECTOR_FP16,
|
|
39
|
+
Ext::DataType::VECTOR_INT8,
|
|
40
|
+
].freeze
|
|
41
|
+
|
|
34
42
|
# Setter dispatch table: DataType -> Doc setter method name
|
|
35
43
|
SETTER_FOR = {
|
|
36
44
|
Ext::DataType::STRING => :set_string,
|
|
@@ -59,5 +67,125 @@ module Zvec
|
|
|
59
67
|
Ext::DataType::VECTOR_FP64 => :get_double_vector,
|
|
60
68
|
Ext::DataType::ARRAY_STRING => :get_string_array,
|
|
61
69
|
}.freeze
|
|
70
|
+
|
|
71
|
+
# Detect the zvec data type for a Ruby value.
|
|
72
|
+
# Handles edge cases: Integer vs Float, String booleans, nil, empty arrays.
|
|
73
|
+
def self.detect_type(value)
|
|
74
|
+
case value
|
|
75
|
+
when NilClass then nil
|
|
76
|
+
when String then Ext::DataType::STRING
|
|
77
|
+
when Integer then Ext::DataType::INT64
|
|
78
|
+
when Float then Ext::DataType::DOUBLE
|
|
79
|
+
when TrueClass, FalseClass then Ext::DataType::BOOL
|
|
80
|
+
when Array then detect_array_type(value)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Coerce a Ruby value into a form suitable for the given zvec data type.
|
|
85
|
+
# Returns the coerced value, or raises ArgumentError on impossible coercion.
|
|
86
|
+
def self.coerce_value(value, target_type, field_name: nil)
|
|
87
|
+
return value if value.nil?
|
|
88
|
+
|
|
89
|
+
ctx = field_name ? " for field '#{field_name}'" : ""
|
|
90
|
+
|
|
91
|
+
case target_type
|
|
92
|
+
when Ext::DataType::STRING
|
|
93
|
+
value.to_s
|
|
94
|
+
when Ext::DataType::BOOL
|
|
95
|
+
coerce_bool(value, ctx)
|
|
96
|
+
when Ext::DataType::INT32, Ext::DataType::INT64,
|
|
97
|
+
Ext::DataType::UINT32, Ext::DataType::UINT64
|
|
98
|
+
coerce_integer(value, ctx)
|
|
99
|
+
when Ext::DataType::FLOAT, Ext::DataType::DOUBLE
|
|
100
|
+
coerce_float(value, ctx)
|
|
101
|
+
when Ext::DataType::VECTOR_FP32, Ext::DataType::VECTOR_FP64
|
|
102
|
+
coerce_float_vector(value, ctx)
|
|
103
|
+
when Ext::DataType::ARRAY_STRING
|
|
104
|
+
coerce_string_array(value, ctx)
|
|
105
|
+
else
|
|
106
|
+
value
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class << self
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
def detect_array_type(arr)
|
|
114
|
+
return Ext::DataType::VECTOR_FP32 if arr.empty?
|
|
115
|
+
|
|
116
|
+
first_non_nil = arr.find { |v| !v.nil? }
|
|
117
|
+
return Ext::DataType::VECTOR_FP32 if first_non_nil.nil?
|
|
118
|
+
|
|
119
|
+
case first_non_nil
|
|
120
|
+
when Float then Ext::DataType::VECTOR_FP32
|
|
121
|
+
when Integer then Ext::DataType::VECTOR_FP32
|
|
122
|
+
when String then Ext::DataType::ARRAY_STRING
|
|
123
|
+
when TrueClass, FalseClass then Ext::DataType::ARRAY_BOOL
|
|
124
|
+
else Ext::DataType::VECTOR_FP32
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def coerce_bool(value, ctx)
|
|
129
|
+
case value
|
|
130
|
+
when TrueClass, FalseClass then value
|
|
131
|
+
when "true", "1" then true
|
|
132
|
+
when "false", "0" then false
|
|
133
|
+
when Integer then !value.zero?
|
|
134
|
+
else
|
|
135
|
+
raise ArgumentError,
|
|
136
|
+
"Cannot coerce #{value.class} (#{value.inspect}) to Bool#{ctx}"
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def coerce_integer(value, ctx)
|
|
141
|
+
case value
|
|
142
|
+
when Integer then value
|
|
143
|
+
when Float then value.to_i
|
|
144
|
+
when String
|
|
145
|
+
Integer(value)
|
|
146
|
+
else
|
|
147
|
+
raise ArgumentError,
|
|
148
|
+
"Cannot coerce #{value.class} (#{value.inspect}) to Integer#{ctx}"
|
|
149
|
+
end
|
|
150
|
+
rescue ::ArgumentError
|
|
151
|
+
raise ArgumentError,
|
|
152
|
+
"Cannot coerce #{value.class} (#{value.inspect}) to Integer#{ctx}"
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def coerce_float(value, ctx)
|
|
156
|
+
case value
|
|
157
|
+
when Numeric then value.to_f
|
|
158
|
+
when String
|
|
159
|
+
Float(value)
|
|
160
|
+
else
|
|
161
|
+
raise ArgumentError,
|
|
162
|
+
"Cannot coerce #{value.class} (#{value.inspect}) to Float#{ctx}"
|
|
163
|
+
end
|
|
164
|
+
rescue ::ArgumentError
|
|
165
|
+
raise ArgumentError,
|
|
166
|
+
"Cannot coerce #{value.class} (#{value.inspect}) to Float#{ctx}"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def coerce_float_vector(value, ctx)
|
|
170
|
+
unless value.is_a?(Array)
|
|
171
|
+
raise ArgumentError, "Expected Array for vector#{ctx}, got #{value.class}"
|
|
172
|
+
end
|
|
173
|
+
value.map do |v|
|
|
174
|
+
next 0.0 if v.nil?
|
|
175
|
+
unless v.is_a?(Numeric)
|
|
176
|
+
raise ArgumentError,
|
|
177
|
+
"Vector#{ctx} contains non-numeric element: #{v.inspect}"
|
|
178
|
+
end
|
|
179
|
+
v.to_f
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def coerce_string_array(value, ctx)
|
|
184
|
+
unless value.is_a?(Array)
|
|
185
|
+
raise ArgumentError, "Expected Array for string array#{ctx}, got #{value.class}"
|
|
186
|
+
end
|
|
187
|
+
value.map { |v| v.nil? ? "" : v.to_s }
|
|
188
|
+
end
|
|
189
|
+
end
|
|
62
190
|
end
|
|
63
191
|
end
|
data/lib/zvec/doc.rb
CHANGED
|
@@ -31,28 +31,49 @@ module Zvec
|
|
|
31
31
|
|
|
32
32
|
def set(field_name, value)
|
|
33
33
|
field_name = field_name.to_s
|
|
34
|
+
raise ArgumentError, "Field name must be a non-empty string" if field_name.strip.empty?
|
|
35
|
+
|
|
34
36
|
return @ext_doc.set_null(field_name) if value.nil?
|
|
35
37
|
|
|
36
38
|
if @schema
|
|
37
39
|
type = @schema.field_type(field_name)
|
|
38
40
|
if type
|
|
41
|
+
coerced = DataTypes.coerce_value(value, type, field_name: field_name)
|
|
39
42
|
setter = DataTypes::SETTER_FOR[type]
|
|
40
|
-
|
|
43
|
+
if setter
|
|
44
|
+
# Validate vector dimension if schema has dimension info
|
|
45
|
+
if DataTypes::VECTOR_TYPES.include?(type) && coerced.is_a?(Array)
|
|
46
|
+
expected_dim = @schema.field_dimension(field_name)
|
|
47
|
+
if expected_dim && !coerced.empty? && coerced.size != expected_dim
|
|
48
|
+
raise DimensionError,
|
|
49
|
+
"Vector dimension mismatch for field '#{field_name}': " \
|
|
50
|
+
"expected #{expected_dim}, got #{coerced.size}"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
return @ext_doc.send(setter, field_name, coerced)
|
|
54
|
+
end
|
|
41
55
|
end
|
|
42
56
|
end
|
|
43
57
|
|
|
44
|
-
# Auto-detect type
|
|
58
|
+
# Auto-detect type (schema-less mode)
|
|
45
59
|
case value
|
|
46
|
-
when String
|
|
47
|
-
when Integer
|
|
48
|
-
when Float
|
|
60
|
+
when String then @ext_doc.set_string(field_name, value)
|
|
61
|
+
when Integer then @ext_doc.set_int64(field_name, value)
|
|
62
|
+
when Float then @ext_doc.set_double(field_name, value)
|
|
49
63
|
when TrueClass, FalseClass then @ext_doc.set_bool(field_name, value)
|
|
50
64
|
when Array
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@ext_doc.set_string_array(field_name, value)
|
|
65
|
+
detected = DataTypes.detect_type(value)
|
|
66
|
+
case detected
|
|
67
|
+
when Ext::DataType::ARRAY_STRING
|
|
68
|
+
@ext_doc.set_string_array(field_name, value.map { |v| v.nil? ? "" : v.to_s })
|
|
69
|
+
else
|
|
70
|
+
# Default: treat as float vector
|
|
71
|
+
coerced = value.map { |v| v.nil? ? 0.0 : v.to_f }
|
|
72
|
+
@ext_doc.set_float_vector(field_name, coerced)
|
|
55
73
|
end
|
|
74
|
+
else
|
|
75
|
+
raise ArgumentError,
|
|
76
|
+
"Unsupported value type #{value.class} for field '#{field_name}'"
|
|
56
77
|
end
|
|
57
78
|
end
|
|
58
79
|
|
data/lib/zvec/query.rb
CHANGED
|
@@ -4,6 +4,18 @@ module Zvec
|
|
|
4
4
|
|
|
5
5
|
def initialize(field_name:, vector:, topk: 10, filter: nil,
|
|
6
6
|
include_vector: false, output_fields: nil, query_params: nil)
|
|
7
|
+
raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
|
|
8
|
+
raise ArgumentError, "vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
|
|
9
|
+
raise ArgumentError, "topk must be a positive integer" unless topk.is_a?(Integer) && topk > 0
|
|
10
|
+
|
|
11
|
+
# Validate all vector elements are numeric
|
|
12
|
+
vector.each_with_index do |v, i|
|
|
13
|
+
unless v.is_a?(Numeric)
|
|
14
|
+
raise ArgumentError,
|
|
15
|
+
"Query vector contains non-numeric element at index #{i}: #{v.inspect}"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
7
19
|
@ext_query = Ext::VectorQuery.new
|
|
8
20
|
@ext_query.field_name = field_name.to_s
|
|
9
21
|
@ext_query.topk = topk
|
data/lib/zvec/schema.rb
CHANGED
|
@@ -3,23 +3,31 @@ module Zvec
|
|
|
3
3
|
attr_reader :ext_schema
|
|
4
4
|
|
|
5
5
|
def initialize(name, &block)
|
|
6
|
-
|
|
6
|
+
raise ArgumentError, "Schema name must be a non-empty string" if name.nil? || name.to_s.strip.empty?
|
|
7
|
+
|
|
8
|
+
@ext_schema = Ext::CollectionSchema.new(name.to_s)
|
|
7
9
|
@field_types = {}
|
|
10
|
+
@field_dimensions = {}
|
|
8
11
|
instance_eval(&block) if block
|
|
9
12
|
end
|
|
10
13
|
|
|
11
14
|
def field(name, type, dimension: nil, nullable: false, index: nil)
|
|
12
15
|
name = name.to_s
|
|
16
|
+
raise ArgumentError, "Field name must be a non-empty string" if name.strip.empty?
|
|
17
|
+
|
|
13
18
|
fs = Ext::FieldSchema.new(name, type)
|
|
14
19
|
fs.dimension = dimension if dimension
|
|
15
20
|
fs.nullable = nullable
|
|
16
21
|
fs.set_index_params(index) if index
|
|
17
22
|
@ext_schema.add_field(fs)
|
|
18
23
|
@field_types[name] = type
|
|
24
|
+
@field_dimensions[name] = dimension if dimension
|
|
19
25
|
self
|
|
20
26
|
end
|
|
21
27
|
|
|
22
28
|
def vector(name, dimension:, type: DataTypes::VECTOR_FP32, index: nil)
|
|
29
|
+
raise ArgumentError, "Vector dimension must be a positive integer, got #{dimension.inspect}" unless dimension.is_a?(Integer) && dimension > 0
|
|
30
|
+
|
|
23
31
|
field(name, type, dimension: dimension, index: index)
|
|
24
32
|
end
|
|
25
33
|
|
|
@@ -59,10 +67,19 @@ module Zvec
|
|
|
59
67
|
@field_types[name.to_s]
|
|
60
68
|
end
|
|
61
69
|
|
|
70
|
+
def field_dimension(name)
|
|
71
|
+
@field_dimensions[name.to_s]
|
|
72
|
+
end
|
|
73
|
+
|
|
62
74
|
def has_field?(name)
|
|
63
75
|
@ext_schema.has_field?(name.to_s)
|
|
64
76
|
end
|
|
65
77
|
|
|
78
|
+
# Returns an array of [field_name, dimension] for all vector fields.
|
|
79
|
+
def vector_fields_with_dimensions
|
|
80
|
+
@field_dimensions.select { |name, _| DataTypes::VECTOR_TYPES.include?(@field_types[name]) }
|
|
81
|
+
end
|
|
82
|
+
|
|
66
83
|
def to_s
|
|
67
84
|
@ext_schema.to_s
|
|
68
85
|
end
|
data/lib/zvec/version.rb
CHANGED
data/lib/zvec.rb
CHANGED
data/test/test_helper.rb
CHANGED
|
@@ -14,6 +14,7 @@ rescue LoadError
|
|
|
14
14
|
# Minimal stubs so pure-Ruby logic can be tested without the compiled extension.
|
|
15
15
|
module Zvec
|
|
16
16
|
class Error < StandardError; end
|
|
17
|
+
class DimensionError < Error; end
|
|
17
18
|
|
|
18
19
|
module Ext
|
|
19
20
|
# Stub enums as simple modules with constants
|
|
@@ -68,9 +69,9 @@ rescue LoadError
|
|
|
68
69
|
def get_int64(f); @fields[f].is_a?(Integer) ? @fields[f] : nil; end
|
|
69
70
|
def get_float(f); @fields[f].is_a?(Float) ? @fields[f] : nil; end
|
|
70
71
|
def get_double(f); @fields[f].is_a?(Float) ? @fields[f] : nil; end
|
|
71
|
-
def get_float_vector(f); @fields[f].is_a?(Array) && @fields[f].first.is_a?(Float) ? @fields[f] : nil; end
|
|
72
|
+
def get_float_vector(f); @fields[f].is_a?(Array) && (@fields[f].empty? || @fields[f].first.is_a?(Float)) ? @fields[f] : nil; end
|
|
72
73
|
def get_double_vector(f); get_float_vector(f); end
|
|
73
|
-
def get_string_array(f); @fields[f].is_a?(Array) && @fields[f].first.is_a?(String) ? @fields[f] : nil; end
|
|
74
|
+
def get_string_array(f); @fields[f].is_a?(Array) && !@fields[f].empty? && @fields[f].first.is_a?(String) ? @fields[f] : nil; end
|
|
74
75
|
def to_s; "[pk:#{@pk}, score:#{@score}, fields:#{@fields.size}]"; end
|
|
75
76
|
end
|
|
76
77
|
|
|
@@ -90,7 +91,6 @@ rescue LoadError
|
|
|
90
91
|
def has_field?(n); @fields.key?(n); end
|
|
91
92
|
def field_names; @fields.keys; end
|
|
92
93
|
def all_field_names; @fields.keys; end
|
|
93
|
-
alias_method :field_names, :all_field_names
|
|
94
94
|
def fields; @fields.values; end
|
|
95
95
|
def vector_fields; @fields.values.select(&:vector_field?); end
|
|
96
96
|
def forward_fields; @fields.values.reject(&:vector_field?); end
|
|
@@ -118,6 +118,15 @@ rescue LoadError
|
|
|
118
118
|
def initialize(ef: 200); @ef = ef; end
|
|
119
119
|
end
|
|
120
120
|
|
|
121
|
+
class IVFQueryParams
|
|
122
|
+
attr_reader :nprobe
|
|
123
|
+
def initialize(nprobe: 10); @nprobe = nprobe; end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
class FlatQueryParams
|
|
127
|
+
def initialize; end
|
|
128
|
+
end
|
|
129
|
+
|
|
121
130
|
class CollectionOptions
|
|
122
131
|
attr_accessor :read_only, :enable_mmap, :max_buffer_size
|
|
123
132
|
def initialize; @read_only = false; @enable_mmap = true; @max_buffer_size = 64 * 1024 * 1024; end
|
|
@@ -131,6 +140,9 @@ rescue LoadError
|
|
|
131
140
|
def set_query_vector(arr); @query_vector = arr; end
|
|
132
141
|
def set_output_fields(f); @output_fields = f; end
|
|
133
142
|
def set_query_params(p); @query_params = p; end
|
|
143
|
+
def set_hnsw_query_params(p); @query_params = p; end
|
|
144
|
+
def set_ivf_query_params(p); @query_params = p; end
|
|
145
|
+
def set_flat_query_params(p); @query_params = p; end
|
|
134
146
|
alias_method :include_vector?, :include_vector
|
|
135
147
|
end
|
|
136
148
|
|
|
@@ -147,6 +159,83 @@ rescue LoadError
|
|
|
147
159
|
def message; @msg; end
|
|
148
160
|
def to_s; @ok ? "OK" : @msg; end
|
|
149
161
|
end
|
|
162
|
+
|
|
163
|
+
# Stub Collection for pure-Ruby testing of the wrapper layer
|
|
164
|
+
class Collection
|
|
165
|
+
attr_reader :path_value, :schema_value, :docs
|
|
166
|
+
|
|
167
|
+
def initialize
|
|
168
|
+
@docs = {}
|
|
169
|
+
@stats = CollectionStats.new
|
|
170
|
+
@path_value = ""
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def self.create_and_open(path, ext_schema, opts)
|
|
174
|
+
c = new
|
|
175
|
+
c.instance_variable_set(:@path_value, path)
|
|
176
|
+
c.instance_variable_set(:@schema_value, ext_schema)
|
|
177
|
+
c
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def self.open(path, opts)
|
|
181
|
+
c = new
|
|
182
|
+
c.instance_variable_set(:@path_value, path)
|
|
183
|
+
c
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def path; @path_value; end
|
|
187
|
+
def schema; @schema_value; end
|
|
188
|
+
|
|
189
|
+
def stats
|
|
190
|
+
s = CollectionStats.new
|
|
191
|
+
s.doc_count = @docs.size
|
|
192
|
+
s
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def insert(ext_docs)
|
|
196
|
+
ext_docs.each { |d| @docs[d.pk] = d }
|
|
197
|
+
ext_docs.map { |_| [true, ""] }
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def upsert(ext_docs)
|
|
201
|
+
ext_docs.each { |d| @docs[d.pk] = d }
|
|
202
|
+
ext_docs.map { |_| [true, ""] }
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def update(ext_docs)
|
|
206
|
+
ext_docs.each { |d| @docs[d.pk] = d if @docs.key?(d.pk) }
|
|
207
|
+
ext_docs.map { |_| [true, ""] }
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def delete_pks(pks)
|
|
211
|
+
pks.each { |pk| @docs.delete(pk) }
|
|
212
|
+
pks.map { |_| [true, ""] }
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def delete_by_filter(filter)
|
|
216
|
+
# no-op in stub
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def query(vq)
|
|
220
|
+
@docs.values.first(vq.topk).map do |d|
|
|
221
|
+
h = { "pk" => d.pk, "score" => 0.95 }
|
|
222
|
+
d.field_names.each { |f| h[f] = d.get_string(f) || d.get_int64(f) || d.get_float(f) || d.get_double(f) || d.get_bool(f) || d.get_float_vector(f) || d.get_string_array(f) }
|
|
223
|
+
h
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def fetch(pks)
|
|
228
|
+
result = {}
|
|
229
|
+
pks.each { |pk| result[pk] = @docs[pk] if @docs.key?(pk) }
|
|
230
|
+
result
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def create_index(field_name, index_params); end
|
|
234
|
+
def drop_index(field_name); end
|
|
235
|
+
def optimize; end
|
|
236
|
+
def flush; end
|
|
237
|
+
def destroy; end
|
|
238
|
+
end
|
|
150
239
|
end
|
|
151
240
|
|
|
152
241
|
require_relative "../lib/zvec/version"
|
|
@@ -154,6 +243,7 @@ rescue LoadError
|
|
|
154
243
|
require_relative "../lib/zvec/schema"
|
|
155
244
|
require_relative "../lib/zvec/doc"
|
|
156
245
|
require_relative "../lib/zvec/query"
|
|
246
|
+
require_relative "../lib/zvec/collection"
|
|
157
247
|
|
|
158
248
|
include DataTypes
|
|
159
249
|
end
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class TestTypeDetection < Minitest::Test
|
|
4
|
+
# --- DataTypes.detect_type ---
|
|
5
|
+
|
|
6
|
+
def test_detect_type_nil
|
|
7
|
+
assert_nil Zvec::DataTypes.detect_type(nil)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def test_detect_type_string
|
|
11
|
+
assert_equal Zvec::Ext::DataType::STRING, Zvec::DataTypes.detect_type("hello")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_detect_type_integer
|
|
15
|
+
assert_equal Zvec::Ext::DataType::INT64, Zvec::DataTypes.detect_type(42)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_detect_type_float
|
|
19
|
+
assert_equal Zvec::Ext::DataType::DOUBLE, Zvec::DataTypes.detect_type(3.14)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def test_detect_type_true
|
|
23
|
+
assert_equal Zvec::Ext::DataType::BOOL, Zvec::DataTypes.detect_type(true)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_detect_type_false
|
|
27
|
+
assert_equal Zvec::Ext::DataType::BOOL, Zvec::DataTypes.detect_type(false)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Integer vs Float distinction: 1 should be INT64, 1.0 should be DOUBLE
|
|
31
|
+
def test_detect_type_integer_one
|
|
32
|
+
assert_equal Zvec::Ext::DataType::INT64, Zvec::DataTypes.detect_type(1)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_detect_type_float_one
|
|
36
|
+
assert_equal Zvec::Ext::DataType::DOUBLE, Zvec::DataTypes.detect_type(1.0)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# --- Array type detection ---
|
|
40
|
+
|
|
41
|
+
def test_detect_type_empty_array
|
|
42
|
+
# Empty arrays default to VECTOR_FP32
|
|
43
|
+
assert_equal Zvec::Ext::DataType::VECTOR_FP32, Zvec::DataTypes.detect_type([])
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_detect_type_float_array
|
|
47
|
+
assert_equal Zvec::Ext::DataType::VECTOR_FP32, Zvec::DataTypes.detect_type([1.0, 2.0])
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_detect_type_integer_array
|
|
51
|
+
# Integers in arrays are treated as vectors (float)
|
|
52
|
+
assert_equal Zvec::Ext::DataType::VECTOR_FP32, Zvec::DataTypes.detect_type([1, 2, 3])
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_detect_type_string_array
|
|
56
|
+
assert_equal Zvec::Ext::DataType::ARRAY_STRING, Zvec::DataTypes.detect_type(["a", "b"])
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_detect_type_bool_array
|
|
60
|
+
assert_equal Zvec::Ext::DataType::ARRAY_BOOL, Zvec::DataTypes.detect_type([true, false])
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_detect_type_nil_filled_array
|
|
64
|
+
# Array of all nils defaults to VECTOR_FP32
|
|
65
|
+
assert_equal Zvec::Ext::DataType::VECTOR_FP32, Zvec::DataTypes.detect_type([nil, nil, nil])
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def test_detect_type_array_with_leading_nil
|
|
69
|
+
# Skips nils to find first real element
|
|
70
|
+
assert_equal Zvec::Ext::DataType::ARRAY_STRING, Zvec::DataTypes.detect_type([nil, "hello", "world"])
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# --- DataTypes.coerce_value ---
|
|
74
|
+
|
|
75
|
+
def test_coerce_nil_returns_nil
|
|
76
|
+
assert_nil Zvec::DataTypes.coerce_value(nil, Zvec::Ext::DataType::STRING)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def test_coerce_to_string
|
|
80
|
+
assert_equal "42", Zvec::DataTypes.coerce_value(42, Zvec::Ext::DataType::STRING)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def test_coerce_string_true_to_bool
|
|
84
|
+
assert_equal true, Zvec::DataTypes.coerce_value("true", Zvec::Ext::DataType::BOOL)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def test_coerce_string_false_to_bool
|
|
88
|
+
assert_equal false, Zvec::DataTypes.coerce_value("false", Zvec::Ext::DataType::BOOL)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def test_coerce_actual_bool_true
|
|
92
|
+
assert_equal true, Zvec::DataTypes.coerce_value(true, Zvec::Ext::DataType::BOOL)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def test_coerce_actual_bool_false
|
|
96
|
+
assert_equal false, Zvec::DataTypes.coerce_value(false, Zvec::Ext::DataType::BOOL)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def test_coerce_integer_to_bool_nonzero
|
|
100
|
+
assert_equal true, Zvec::DataTypes.coerce_value(1, Zvec::Ext::DataType::BOOL)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def test_coerce_integer_to_bool_zero
|
|
104
|
+
assert_equal false, Zvec::DataTypes.coerce_value(0, Zvec::Ext::DataType::BOOL)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def test_coerce_invalid_string_to_bool_raises
|
|
108
|
+
assert_raises(ArgumentError) do
|
|
109
|
+
Zvec::DataTypes.coerce_value("maybe", Zvec::Ext::DataType::BOOL)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def test_coerce_float_to_integer
|
|
114
|
+
assert_equal 3, Zvec::DataTypes.coerce_value(3.7, Zvec::Ext::DataType::INT64)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def test_coerce_string_to_integer
|
|
118
|
+
assert_equal 42, Zvec::DataTypes.coerce_value("42", Zvec::Ext::DataType::INT64)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def test_coerce_bad_string_to_integer_raises
|
|
122
|
+
assert_raises(ArgumentError) do
|
|
123
|
+
Zvec::DataTypes.coerce_value("abc", Zvec::Ext::DataType::INT64)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def test_coerce_integer_to_float
|
|
128
|
+
assert_in_delta 42.0, Zvec::DataTypes.coerce_value(42, Zvec::Ext::DataType::DOUBLE), 0.001
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def test_coerce_string_to_float
|
|
132
|
+
assert_in_delta 3.14, Zvec::DataTypes.coerce_value("3.14", Zvec::Ext::DataType::DOUBLE), 0.001
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def test_coerce_bad_string_to_float_raises
|
|
136
|
+
assert_raises(ArgumentError) do
|
|
137
|
+
Zvec::DataTypes.coerce_value("xyz", Zvec::Ext::DataType::DOUBLE)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# --- Vector coercion ---
|
|
142
|
+
|
|
143
|
+
def test_coerce_integer_array_to_vector
|
|
144
|
+
result = Zvec::DataTypes.coerce_value([1, 2, 3], Zvec::Ext::DataType::VECTOR_FP32)
|
|
145
|
+
assert_equal [1.0, 2.0, 3.0], result
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def test_coerce_nil_in_vector_becomes_zero
|
|
149
|
+
result = Zvec::DataTypes.coerce_value([1.0, nil, 3.0], Zvec::Ext::DataType::VECTOR_FP32)
|
|
150
|
+
assert_equal [1.0, 0.0, 3.0], result
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def test_coerce_non_numeric_in_vector_raises
|
|
154
|
+
assert_raises(ArgumentError) do
|
|
155
|
+
Zvec::DataTypes.coerce_value([1.0, "bad", 3.0], Zvec::Ext::DataType::VECTOR_FP32)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def test_coerce_non_array_to_vector_raises
|
|
160
|
+
assert_raises(ArgumentError) do
|
|
161
|
+
Zvec::DataTypes.coerce_value("not array", Zvec::Ext::DataType::VECTOR_FP32)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# --- String array coercion ---
|
|
166
|
+
|
|
167
|
+
def test_coerce_string_array
|
|
168
|
+
result = Zvec::DataTypes.coerce_value(["a", "b"], Zvec::Ext::DataType::ARRAY_STRING)
|
|
169
|
+
assert_equal ["a", "b"], result
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def test_coerce_nil_in_string_array_becomes_empty
|
|
173
|
+
result = Zvec::DataTypes.coerce_value(["a", nil, "c"], Zvec::Ext::DataType::ARRAY_STRING)
|
|
174
|
+
assert_equal ["a", "", "c"], result
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def test_coerce_non_array_to_string_array_raises
|
|
178
|
+
assert_raises(ArgumentError) do
|
|
179
|
+
Zvec::DataTypes.coerce_value("not array", Zvec::Ext::DataType::ARRAY_STRING)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# --- Coerce error messages include field name ---
|
|
184
|
+
|
|
185
|
+
def test_coerce_error_message_includes_field_name
|
|
186
|
+
err = assert_raises(ArgumentError) do
|
|
187
|
+
Zvec::DataTypes.coerce_value("abc", Zvec::Ext::DataType::INT64, field_name: "count")
|
|
188
|
+
end
|
|
189
|
+
assert_includes err.message, "count"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# --- Doc auto-detection edge cases ---
|
|
193
|
+
|
|
194
|
+
def test_doc_auto_detect_integer_vs_float
|
|
195
|
+
doc = Zvec::Doc.new
|
|
196
|
+
doc["int_val"] = 42
|
|
197
|
+
doc["float_val"] = 42.0
|
|
198
|
+
# int_val stored via set_int64, float_val via set_double
|
|
199
|
+
assert_equal 42, doc["int_val"]
|
|
200
|
+
assert_in_delta 42.0, doc["float_val"], 0.001
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def test_doc_auto_detect_true_boolean
|
|
204
|
+
doc = Zvec::Doc.new
|
|
205
|
+
doc["flag"] = true
|
|
206
|
+
assert_equal true, doc["flag"]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def test_doc_auto_detect_false_boolean
|
|
210
|
+
doc = Zvec::Doc.new
|
|
211
|
+
doc["flag"] = false
|
|
212
|
+
assert_equal false, doc["flag"]
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def test_doc_string_true_stays_string
|
|
216
|
+
# Without schema, "true" is a String, not a Boolean
|
|
217
|
+
doc = Zvec::Doc.new
|
|
218
|
+
doc["val"] = "true"
|
|
219
|
+
assert_equal "true", doc["val"]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def test_doc_string_false_stays_string
|
|
223
|
+
doc = Zvec::Doc.new
|
|
224
|
+
doc["val"] = "false"
|
|
225
|
+
assert_equal "false", doc["val"]
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def test_doc_nil_value_in_array_auto_detect
|
|
229
|
+
doc = Zvec::Doc.new
|
|
230
|
+
doc["vec"] = [1.0, nil, 3.0]
|
|
231
|
+
result = doc["vec"]
|
|
232
|
+
assert_kind_of Array, result
|
|
233
|
+
assert_equal 3, result.size
|
|
234
|
+
assert_in_delta 0.0, result[1], 0.001
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def test_doc_empty_array_auto_detect
|
|
238
|
+
doc = Zvec::Doc.new
|
|
239
|
+
doc["vec"] = []
|
|
240
|
+
# Should not crash
|
|
241
|
+
result = doc["vec"]
|
|
242
|
+
assert(result.nil? || result == [])
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def test_doc_schema_coerces_string_bool
|
|
246
|
+
schema = Zvec::Schema.new("test") { bool "active" }
|
|
247
|
+
doc = Zvec::Doc.new(schema: schema)
|
|
248
|
+
doc["active"] = "true"
|
|
249
|
+
assert_equal true, doc["active"]
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def test_doc_schema_coerces_int_to_float
|
|
253
|
+
schema = Zvec::Schema.new("test") { double "score" }
|
|
254
|
+
doc = Zvec::Doc.new(schema: schema)
|
|
255
|
+
doc["score"] = 42
|
|
256
|
+
assert_in_delta 42.0, doc["score"], 0.001
|
|
257
|
+
end
|
|
258
|
+
end
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
require_relative "test_helper"
|
|
2
|
+
|
|
3
|
+
class TestValidation < Minitest::Test
|
|
4
|
+
include TempDirHelper
|
|
5
|
+
|
|
6
|
+
def setup
|
|
7
|
+
@schema = Zvec::Schema.new("test_validation") do
|
|
8
|
+
string "title"
|
|
9
|
+
int32 "count"
|
|
10
|
+
float "rating"
|
|
11
|
+
bool "active"
|
|
12
|
+
vector "embedding", dimension: 4
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# --- Schema validation ---
|
|
17
|
+
|
|
18
|
+
def test_schema_rejects_nil_name
|
|
19
|
+
assert_raises(ArgumentError) { Zvec::Schema.new(nil) }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def test_schema_rejects_empty_name
|
|
23
|
+
assert_raises(ArgumentError) { Zvec::Schema.new("") }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_schema_rejects_blank_name
|
|
27
|
+
assert_raises(ArgumentError) { Zvec::Schema.new(" ") }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def test_schema_field_rejects_empty_name
|
|
31
|
+
schema = Zvec::Schema.new("test")
|
|
32
|
+
assert_raises(ArgumentError) { schema.field("", Zvec::DataTypes::STRING) }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_schema_vector_rejects_zero_dimension
|
|
36
|
+
assert_raises(ArgumentError) do
|
|
37
|
+
Zvec::Schema.new("test") { vector "v", dimension: 0 }
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def test_schema_vector_rejects_negative_dimension
|
|
42
|
+
assert_raises(ArgumentError) do
|
|
43
|
+
Zvec::Schema.new("test") { vector "v", dimension: -1 }
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_schema_vector_rejects_non_integer_dimension
|
|
48
|
+
assert_raises(ArgumentError) do
|
|
49
|
+
Zvec::Schema.new("test") { vector "v", dimension: 3.5 }
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def test_schema_field_dimension_tracking
|
|
54
|
+
schema = Zvec::Schema.new("test") do
|
|
55
|
+
vector "embedding", dimension: 128
|
|
56
|
+
end
|
|
57
|
+
assert_equal 128, schema.field_dimension("embedding")
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def test_schema_field_dimension_nil_for_non_vector
|
|
61
|
+
schema = Zvec::Schema.new("test") do
|
|
62
|
+
string "title"
|
|
63
|
+
end
|
|
64
|
+
assert_nil schema.field_dimension("title")
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def test_schema_vector_fields_with_dimensions
|
|
68
|
+
schema = Zvec::Schema.new("test") do
|
|
69
|
+
string "title"
|
|
70
|
+
vector "embedding", dimension: 128
|
|
71
|
+
vector "small_embedding", dimension: 32
|
|
72
|
+
end
|
|
73
|
+
dims = schema.vector_fields_with_dimensions
|
|
74
|
+
assert_equal 128, dims["embedding"]
|
|
75
|
+
assert_equal 32, dims["small_embedding"]
|
|
76
|
+
refute dims.key?("title")
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# --- Doc field name validation ---
|
|
80
|
+
|
|
81
|
+
def test_doc_set_rejects_empty_field_name
|
|
82
|
+
doc = Zvec::Doc.new
|
|
83
|
+
assert_raises(ArgumentError) { doc.set("", "value") }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def test_doc_set_rejects_blank_field_name
|
|
87
|
+
doc = Zvec::Doc.new
|
|
88
|
+
assert_raises(ArgumentError) { doc.set(" ", "value") }
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# --- Dimension validation in Doc ---
|
|
92
|
+
|
|
93
|
+
def test_doc_dimension_mismatch_raises_error
|
|
94
|
+
doc = Zvec::Doc.new(schema: @schema)
|
|
95
|
+
assert_raises(Zvec::DimensionError) do
|
|
96
|
+
doc["embedding"] = [1.0, 2.0, 3.0] # expects 4
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_doc_dimension_mismatch_error_message
|
|
101
|
+
doc = Zvec::Doc.new(schema: @schema)
|
|
102
|
+
err = assert_raises(Zvec::DimensionError) do
|
|
103
|
+
doc["embedding"] = [1.0, 2.0]
|
|
104
|
+
end
|
|
105
|
+
assert_includes err.message, "embedding"
|
|
106
|
+
assert_includes err.message, "4"
|
|
107
|
+
assert_includes err.message, "2"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def test_doc_correct_dimension_accepted
|
|
111
|
+
doc = Zvec::Doc.new(schema: @schema)
|
|
112
|
+
doc["embedding"] = [1.0, 2.0, 3.0, 4.0]
|
|
113
|
+
result = doc["embedding"]
|
|
114
|
+
assert_equal 4, result.size
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def test_doc_empty_vector_accepted
|
|
118
|
+
doc = Zvec::Doc.new(schema: @schema)
|
|
119
|
+
doc["embedding"] = []
|
|
120
|
+
# Empty vectors should be accepted (no dimension to check)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# --- VectorQuery validation ---
|
|
124
|
+
|
|
125
|
+
def test_query_rejects_nil_field_name
|
|
126
|
+
assert_raises(ArgumentError) do
|
|
127
|
+
Zvec::VectorQuery.new(field_name: nil, vector: [1.0])
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def test_query_rejects_empty_field_name
|
|
132
|
+
assert_raises(ArgumentError) do
|
|
133
|
+
Zvec::VectorQuery.new(field_name: "", vector: [1.0])
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def test_query_rejects_empty_vector
|
|
138
|
+
assert_raises(ArgumentError) do
|
|
139
|
+
Zvec::VectorQuery.new(field_name: "vec", vector: [])
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_query_rejects_non_array_vector
|
|
144
|
+
assert_raises(ArgumentError) do
|
|
145
|
+
Zvec::VectorQuery.new(field_name: "vec", vector: "not an array")
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def test_query_rejects_non_numeric_vector_elements
|
|
150
|
+
assert_raises(ArgumentError) do
|
|
151
|
+
Zvec::VectorQuery.new(field_name: "vec", vector: [1.0, "bad", 3.0])
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def test_query_rejects_zero_topk
|
|
156
|
+
assert_raises(ArgumentError) do
|
|
157
|
+
Zvec::VectorQuery.new(field_name: "vec", vector: [1.0], topk: 0)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def test_query_rejects_negative_topk
|
|
162
|
+
assert_raises(ArgumentError) do
|
|
163
|
+
Zvec::VectorQuery.new(field_name: "vec", vector: [1.0], topk: -1)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# --- Collection validation (using stubs) ---
|
|
168
|
+
|
|
169
|
+
def test_collection_create_rejects_nil_path
|
|
170
|
+
assert_raises(ArgumentError) do
|
|
171
|
+
Zvec::Collection.create_and_open(nil, @schema)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def test_collection_create_rejects_empty_path
|
|
176
|
+
assert_raises(ArgumentError) do
|
|
177
|
+
Zvec::Collection.create_and_open("", @schema)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def test_collection_create_rejects_non_schema
|
|
182
|
+
assert_raises(ArgumentError) do
|
|
183
|
+
Zvec::Collection.create_and_open("/tmp/test", "not a schema")
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def test_collection_open_rejects_nil_path
|
|
188
|
+
assert_raises(ArgumentError) do
|
|
189
|
+
Zvec::Collection.open(nil)
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def test_collection_add_rejects_nil_pk
|
|
194
|
+
with_temp_dir("zvec_val") do |dir|
|
|
195
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
196
|
+
assert_raises(ArgumentError) do
|
|
197
|
+
col.add(pk: nil, title: "test")
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def test_collection_search_rejects_empty_vector
|
|
203
|
+
with_temp_dir("zvec_val") do |dir|
|
|
204
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
205
|
+
assert_raises(ArgumentError) do
|
|
206
|
+
col.search([])
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def test_collection_search_rejects_non_array_vector
|
|
212
|
+
with_temp_dir("zvec_val") do |dir|
|
|
213
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
214
|
+
assert_raises(ArgumentError) do
|
|
215
|
+
col.search("not an array")
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def test_collection_delete_rejects_empty_pks
|
|
221
|
+
with_temp_dir("zvec_val") do |dir|
|
|
222
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
223
|
+
assert_raises(ArgumentError) do
|
|
224
|
+
col.delete
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def test_collection_fetch_rejects_empty_pks
|
|
230
|
+
with_temp_dir("zvec_val") do |dir|
|
|
231
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
232
|
+
assert_raises(ArgumentError) do
|
|
233
|
+
col.fetch
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def test_collection_query_dimension_mismatch
|
|
239
|
+
with_temp_dir("zvec_val") do |dir|
|
|
240
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
241
|
+
assert_raises(Zvec::DimensionError) do
|
|
242
|
+
col.query(field_name: "embedding", vector: [1.0, 2.0], topk: 5)
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def test_collection_query_dimension_mismatch_message
|
|
248
|
+
with_temp_dir("zvec_val") do |dir|
|
|
249
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
250
|
+
err = assert_raises(Zvec::DimensionError) do
|
|
251
|
+
col.query(field_name: "embedding", vector: [1.0, 2.0], topk: 5)
|
|
252
|
+
end
|
|
253
|
+
assert_includes err.message, "embedding"
|
|
254
|
+
assert_includes err.message, "4"
|
|
255
|
+
assert_includes err.message, "2"
|
|
256
|
+
assert_includes err.message, "test_validation"
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def test_collection_query_correct_dimension_accepted
|
|
261
|
+
with_temp_dir("zvec_val") do |dir|
|
|
262
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
263
|
+
col.add(pk: "d1", title: "Test", count: 1, rating: 1.0, active: true,
|
|
264
|
+
embedding: [1.0, 2.0, 3.0, 4.0])
|
|
265
|
+
results = col.query(field_name: "embedding", vector: [1.0, 2.0, 3.0, 4.0], topk: 5)
|
|
266
|
+
assert_kind_of Array, results
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# --- Error context includes collection name ---
|
|
271
|
+
|
|
272
|
+
def test_error_prefix_includes_collection_name
|
|
273
|
+
with_temp_dir("zvec_val") do |dir|
|
|
274
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
275
|
+
err = assert_raises(ArgumentError) do
|
|
276
|
+
col.delete # no pks
|
|
277
|
+
end
|
|
278
|
+
assert_includes err.message, "test_validation"
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# --- DimensionError is subclass of Error ---
|
|
283
|
+
|
|
284
|
+
def test_dimension_error_is_zvec_error
|
|
285
|
+
assert Zvec::DimensionError < Zvec::Error
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# --- Collection name is accessible ---
|
|
289
|
+
|
|
290
|
+
def test_collection_name_from_schema
|
|
291
|
+
with_temp_dir("zvec_val") do |dir|
|
|
292
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
293
|
+
assert_equal "test_validation", col.collection_name
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# --- Thread safety: collection has a monitor ---
|
|
298
|
+
|
|
299
|
+
def test_collection_has_monitor
|
|
300
|
+
with_temp_dir("zvec_val") do |dir|
|
|
301
|
+
col = Zvec::Collection.create_and_open("#{dir}/col", @schema)
|
|
302
|
+
assert_respond_to col.instance_variable_get(:@monitor), :synchronize
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
data/test/test_version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zvec-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -99,6 +99,8 @@ files:
|
|
|
99
99
|
- test/test_query.rb
|
|
100
100
|
- test/test_ruby_llm_store.rb
|
|
101
101
|
- test/test_schema.rb
|
|
102
|
+
- test/test_type_detection.rb
|
|
103
|
+
- test/test_validation.rb
|
|
102
104
|
- test/test_version.rb
|
|
103
105
|
- zvec.gemspec
|
|
104
106
|
homepage: https://github.com/johannesdwicahyo/zvec-ruby
|