zvec-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,143 @@
1
1
  module Zvec
2
+ # Data type constants, coercion utilities, and dispatch tables for mapping
3
+ # between Ruby types and the underlying C++ zvec engine types.
4
+ #
5
+ # == Scalar Types
6
+ #
7
+ # * {BINARY} -- Raw binary data
8
+ # * {STRING} -- UTF-8 string
9
+ # * {BOOL} -- Boolean (true/false)
10
+ # * {INT32} -- 32-bit signed integer
11
+ # * {INT64} -- 64-bit signed integer
12
+ # * {UINT32} -- 32-bit unsigned integer
13
+ # * {UINT64} -- 64-bit unsigned integer
14
+ # * {FLOAT} -- 32-bit IEEE 754 float
15
+ # * {DOUBLE} -- 64-bit IEEE 754 double
16
+ #
17
+ # == Dense Vector Types
18
+ #
19
+ # Dense vectors store a fixed-length array of numeric values. Choose the
20
+ # precision that balances accuracy vs. memory:
21
+ #
22
+ # * {VECTOR_FP32} -- 32-bit float vector (default, best accuracy)
23
+ # * {VECTOR_FP64} -- 64-bit double vector (highest accuracy, 2x memory)
24
+ # * {VECTOR_FP16} -- 16-bit half-precision vector (half the memory of FP32)
25
+ # * {VECTOR_INT8} -- 8-bit integer vector (smallest, for quantized models)
26
+ #
27
+ # == Sparse Vector Types
28
+ #
29
+ # Sparse vectors store only non-zero elements, ideal for high-dimensional
30
+ # data where most values are zero (e.g., BM25 or TF-IDF features):
31
+ #
32
+ # * {SPARSE_VECTOR_FP32} -- Sparse vector with 32-bit float values
33
+ # * {SPARSE_VECTOR_FP16} -- Sparse vector with 16-bit float values
34
+ #
35
+ # == Binary Vectors
36
+ #
37
+ # Binary vectors use the {BINARY} type and store bit-packed data, useful for
38
+ # binary hash codes or Hamming distance searches.
39
+ #
40
+ # == Array Types
41
+ #
42
+ # * {ARRAY_STRING} -- Array of strings (e.g., tags)
43
+ # * {ARRAY_INT32} -- Array of 32-bit integers
44
+ # * {ARRAY_INT64} -- Array of 64-bit integers
45
+ # * {ARRAY_FLOAT} -- Array of 32-bit floats
46
+ # * {ARRAY_DOUBLE} -- Array of 64-bit doubles
47
+ # * {ARRAY_BOOL} -- Array of booleans
48
+ #
49
+ # == Quantization Types
50
+ #
51
+ # Quantization reduces memory usage and speeds up search at the cost of some
52
+ # accuracy. Specify a quantization type when creating an index:
53
+ #
54
+ # Ext::HnswIndexParams.new(metric, quantize_type: Ext::QuantizeType::INT8)
55
+ #
56
+ # Available quantization types (via +Ext::QuantizeType+):
57
+ #
58
+ # * +FP16+ -- Half-precision (16-bit) quantization. Good balance of speed
59
+ # and accuracy. Halves memory vs. FP32.
60
+ # * +INT8+ -- 8-bit integer quantization. ~4x memory reduction vs. FP32.
61
+ # Slight accuracy loss.
62
+ # * +INT4+ -- 4-bit integer quantization. ~8x memory reduction vs. FP32.
63
+ # Larger accuracy loss, best for large-scale approximate search.
64
+ #
65
+ # == Metric Types
66
+ #
67
+ # * {L2} -- Euclidean (L2) distance. Lower is more similar.
68
+ # * {IP} -- Inner product. Higher is more similar.
69
+ # * {COSINE} -- Cosine similarity. Higher is more similar. Vectors are
70
+ # normalized internally.
71
+ #
2
72
  module DataTypes
3
73
  # Re-export C++ enum values as Ruby-friendly constants
74
+
75
+ # @return [Symbol] Raw binary data type
4
76
  BINARY = Ext::DataType::BINARY
77
+ # @return [Symbol] UTF-8 string data type
5
78
  STRING = Ext::DataType::STRING
79
+ # @return [Symbol] Boolean data type
6
80
  BOOL = Ext::DataType::BOOL
81
+ # @return [Symbol] 32-bit signed integer data type
7
82
  INT32 = Ext::DataType::INT32
83
+ # @return [Symbol] 64-bit signed integer data type
8
84
  INT64 = Ext::DataType::INT64
85
+ # @return [Symbol] 32-bit unsigned integer data type
9
86
  UINT32 = Ext::DataType::UINT32
87
+ # @return [Symbol] 64-bit unsigned integer data type
10
88
  UINT64 = Ext::DataType::UINT64
89
+ # @return [Symbol] 32-bit float data type
11
90
  FLOAT = Ext::DataType::FLOAT
91
+ # @return [Symbol] 64-bit double data type
12
92
  DOUBLE = Ext::DataType::DOUBLE
13
93
 
94
+ # @return [Symbol] 32-bit float dense vector
14
95
  VECTOR_FP32 = Ext::DataType::VECTOR_FP32
96
+ # @return [Symbol] 64-bit double dense vector
15
97
  VECTOR_FP64 = Ext::DataType::VECTOR_FP64
98
+ # @return [Symbol] 16-bit half-precision dense vector
16
99
  VECTOR_FP16 = Ext::DataType::VECTOR_FP16
100
+ # @return [Symbol] 8-bit integer dense vector (quantized)
17
101
  VECTOR_INT8 = Ext::DataType::VECTOR_INT8
18
102
 
103
+ # @return [Symbol] 32-bit float sparse vector
19
104
  SPARSE_VECTOR_FP32 = Ext::DataType::SPARSE_VECTOR_FP32
105
+ # @return [Symbol] 16-bit float sparse vector
20
106
  SPARSE_VECTOR_FP16 = Ext::DataType::SPARSE_VECTOR_FP16
21
107
 
108
+ # @return [Symbol] Array of strings
22
109
  ARRAY_STRING = Ext::DataType::ARRAY_STRING
110
+ # @return [Symbol] Array of 32-bit integers
23
111
  ARRAY_INT32 = Ext::DataType::ARRAY_INT32
112
+ # @return [Symbol] Array of 64-bit integers
24
113
  ARRAY_INT64 = Ext::DataType::ARRAY_INT64
114
+ # @return [Symbol] Array of 32-bit floats
25
115
  ARRAY_FLOAT = Ext::DataType::ARRAY_FLOAT
116
+ # @return [Symbol] Array of 64-bit doubles
26
117
  ARRAY_DOUBLE = Ext::DataType::ARRAY_DOUBLE
118
+ # @return [Symbol] Array of booleans
27
119
  ARRAY_BOOL = Ext::DataType::ARRAY_BOOL
28
120
 
29
121
  # Metric types
122
+
123
+ # @return [Symbol] Euclidean (L2) distance metric
30
124
  L2 = Ext::MetricType::L2
125
+ # @return [Symbol] Inner product metric
31
126
  IP = Ext::MetricType::IP
127
+ # @return [Symbol] Cosine similarity metric
32
128
  COSINE = Ext::MetricType::COSINE
33
129
 
130
+ # Vector data types for dimension validation
131
+ # @return [Array<Symbol>] All dense vector data type constants
132
+ VECTOR_TYPES = [
133
+ Ext::DataType::VECTOR_FP32,
134
+ Ext::DataType::VECTOR_FP64,
135
+ Ext::DataType::VECTOR_FP16,
136
+ Ext::DataType::VECTOR_INT8,
137
+ ].freeze
138
+
34
139
  # Setter dispatch table: DataType -> Doc setter method name
140
+ # @return [Hash{Symbol => Symbol}]
35
141
  SETTER_FOR = {
36
142
  Ext::DataType::STRING => :set_string,
37
143
  Ext::DataType::BOOL => :set_bool,
@@ -46,6 +152,8 @@ module Zvec
46
152
  Ext::DataType::ARRAY_STRING => :set_string_array,
47
153
  }.freeze
48
154
 
155
+ # Getter dispatch table: DataType -> Doc getter method name
156
+ # @return [Hash{Symbol => Symbol}]
49
157
  GETTER_FOR = {
50
158
  Ext::DataType::STRING => :get_string,
51
159
  Ext::DataType::BOOL => :get_bool,
@@ -59,5 +167,147 @@ module Zvec
59
167
  Ext::DataType::VECTOR_FP64 => :get_double_vector,
60
168
  Ext::DataType::ARRAY_STRING => :get_string_array,
61
169
  }.freeze
170
+
171
+ # Detect the zvec data type for a Ruby value.
172
+ #
173
+ # Handles edge cases: Integer vs Float, String booleans, nil, empty arrays.
174
+ #
175
+ # @param value [Object] the Ruby value to inspect
176
+ # @return [Symbol, nil] the zvec data type constant, or nil for nil input
177
+ #
178
+ # @example
179
+ # DataTypes.detect_type("hello") #=> Ext::DataType::STRING
180
+ # DataTypes.detect_type(42) #=> Ext::DataType::INT64
181
+ # DataTypes.detect_type([1.0]) #=> Ext::DataType::VECTOR_FP32
182
+ # DataTypes.detect_type(nil) #=> nil
183
+ def self.detect_type(value)
184
+ case value
185
+ when NilClass then nil
186
+ when String then Ext::DataType::STRING
187
+ when Integer then Ext::DataType::INT64
188
+ when Float then Ext::DataType::DOUBLE
189
+ when TrueClass, FalseClass then Ext::DataType::BOOL
190
+ when Array then detect_array_type(value)
191
+ end
192
+ end
193
+
194
+ # Coerce a Ruby value into a form suitable for the given zvec data type.
195
+ #
196
+ # @param value [Object] the value to coerce
197
+ # @param target_type [Symbol] the target zvec data type constant
198
+ # @param field_name [String, nil] optional field name for error messages
199
+ # @return [Object] the coerced value
200
+ # @raise [ArgumentError] if the value cannot be coerced to the target type
201
+ #
202
+ # @example
203
+ # DataTypes.coerce_value(42, Ext::DataType::STRING) #=> "42"
204
+ # DataTypes.coerce_value("3.14", Ext::DataType::DOUBLE) #=> 3.14
205
+ # DataTypes.coerce_value([1, 2], Ext::DataType::VECTOR_FP32) #=> [1.0, 2.0]
206
+ def self.coerce_value(value, target_type, field_name: nil)
207
+ return value if value.nil?
208
+
209
+ ctx = field_name ? " for field '#{field_name}'" : ""
210
+
211
+ case target_type
212
+ when Ext::DataType::STRING
213
+ value.to_s
214
+ when Ext::DataType::BOOL
215
+ coerce_bool(value, ctx)
216
+ when Ext::DataType::INT32, Ext::DataType::INT64,
217
+ Ext::DataType::UINT32, Ext::DataType::UINT64
218
+ coerce_integer(value, ctx)
219
+ when Ext::DataType::FLOAT, Ext::DataType::DOUBLE
220
+ coerce_float(value, ctx)
221
+ when Ext::DataType::VECTOR_FP32, Ext::DataType::VECTOR_FP64
222
+ coerce_float_vector(value, ctx)
223
+ when Ext::DataType::ARRAY_STRING
224
+ coerce_string_array(value, ctx)
225
+ else
226
+ value
227
+ end
228
+ end
229
+
230
+ class << self
231
+ private
232
+
233
+ # @param arr [Array] the array to detect the element type for
234
+ # @return [Symbol] the detected zvec data type
235
+ def detect_array_type(arr)
236
+ return Ext::DataType::VECTOR_FP32 if arr.empty?
237
+
238
+ first_non_nil = arr.find { |v| !v.nil? }
239
+ return Ext::DataType::VECTOR_FP32 if first_non_nil.nil?
240
+
241
+ case first_non_nil
242
+ when Float then Ext::DataType::VECTOR_FP32
243
+ when Integer then Ext::DataType::VECTOR_FP32
244
+ when String then Ext::DataType::ARRAY_STRING
245
+ when TrueClass, FalseClass then Ext::DataType::ARRAY_BOOL
246
+ else Ext::DataType::VECTOR_FP32
247
+ end
248
+ end
249
+
250
+ def coerce_bool(value, ctx)
251
+ case value
252
+ when TrueClass, FalseClass then value
253
+ when "true", "1" then true
254
+ when "false", "0" then false
255
+ when Integer then !value.zero?
256
+ else
257
+ raise ArgumentError,
258
+ "Cannot coerce #{value.class} (#{value.inspect}) to Bool#{ctx}"
259
+ end
260
+ end
261
+
262
+ def coerce_integer(value, ctx)
263
+ case value
264
+ when Integer then value
265
+ when Float then value.to_i
266
+ when String
267
+ Integer(value)
268
+ else
269
+ raise ArgumentError,
270
+ "Cannot coerce #{value.class} (#{value.inspect}) to Integer#{ctx}"
271
+ end
272
+ rescue ::ArgumentError
273
+ raise ArgumentError,
274
+ "Cannot coerce #{value.class} (#{value.inspect}) to Integer#{ctx}"
275
+ end
276
+
277
+ def coerce_float(value, ctx)
278
+ case value
279
+ when Numeric then value.to_f
280
+ when String
281
+ Float(value)
282
+ else
283
+ raise ArgumentError,
284
+ "Cannot coerce #{value.class} (#{value.inspect}) to Float#{ctx}"
285
+ end
286
+ rescue ::ArgumentError
287
+ raise ArgumentError,
288
+ "Cannot coerce #{value.class} (#{value.inspect}) to Float#{ctx}"
289
+ end
290
+
291
+ def coerce_float_vector(value, ctx)
292
+ unless value.is_a?(Array)
293
+ raise ArgumentError, "Expected Array for vector#{ctx}, got #{value.class}"
294
+ end
295
+ value.map do |v|
296
+ next 0.0 if v.nil?
297
+ unless v.is_a?(Numeric)
298
+ raise ArgumentError,
299
+ "Vector#{ctx} contains non-numeric element: #{v.inspect}"
300
+ end
301
+ v.to_f
302
+ end
303
+ end
304
+
305
+ def coerce_string_array(value, ctx)
306
+ unless value.is_a?(Array)
307
+ raise ArgumentError, "Expected Array for string array#{ctx}, got #{value.class}"
308
+ end
309
+ value.map { |v| v.nil? ? "" : v.to_s }
310
+ end
311
+ end
62
312
  end
63
313
  end
data/lib/zvec/doc.rb CHANGED
@@ -1,7 +1,37 @@
1
1
  module Zvec
2
+ # A document (row) in a zvec collection. Wraps the C++ Doc object and
3
+ # provides Ruby-friendly field access with automatic type coercion.
4
+ #
5
+ # Documents can be created with or without a schema. With a schema,
6
+ # values are coerced and validated against declared field types and
7
+ # vector dimensions. Without a schema, types are auto-detected.
8
+ #
9
+ # @example Creating a document with a schema
10
+ # doc = Zvec::Doc.new(pk: "doc-1", schema: schema)
11
+ # doc["title"] = "Hello World"
12
+ # doc["embedding"] = [0.1, 0.2, 0.3, 0.4]
13
+ #
14
+ # @example Schema-less document (types auto-detected)
15
+ # doc = Zvec::Doc.new(pk: "doc-2")
16
+ # doc["name"] = "Alice" # stored as string
17
+ # doc["age"] = 30 # stored as int64
18
+ # doc["score"] = 0.95 # stored as double
19
+ # doc["active"] = true # stored as bool
20
+ # doc["vec"] = [1.0, 2.0] # stored as float vector
21
+ # doc["tags"] = ["a", "b"] # stored as string array
22
+ #
2
23
  class Doc
24
+ # @return [Ext::Doc] the underlying C++ document object
3
25
  attr_reader :ext_doc
4
26
 
27
+ # Create a new document.
28
+ #
29
+ # @param pk [String, Integer, nil] primary key (converted to String)
30
+ # @param fields [Hash{String, Symbol => Object}] initial field values
31
+ # @param schema [Zvec::Schema, nil] optional schema for type validation
32
+ #
33
+ # @example
34
+ # doc = Zvec::Doc.new(pk: "abc", fields: { "title" => "Hello" }, schema: schema)
5
35
  def initialize(pk: nil, fields: {}, schema: nil)
6
36
  @ext_doc = Ext::Doc.new
7
37
  @ext_doc.pk = pk.to_s if pk
@@ -9,53 +39,119 @@ module Zvec
9
39
  fields.each { |k, v| set(k, v) } if schema
10
40
  end
11
41
 
42
+ # @return [String] the primary key
12
43
  def pk
13
44
  @ext_doc.pk
14
45
  end
15
46
 
47
+ # Set the primary key.
48
+ #
49
+ # @param value [String, Integer] the new primary key (converted to String)
50
+ # @return [void]
16
51
  def pk=(value)
17
52
  @ext_doc.pk = value.to_s
18
53
  end
19
54
 
55
+ # @return [Float] the similarity score (set after search queries)
20
56
  def score
21
57
  @score || @ext_doc.score
22
58
  end
23
59
 
60
+ # Read a field value by name (bracket accessor).
61
+ #
62
+ # @param field_name [String, Symbol] the field name
63
+ # @return [Object, nil] the field value, or nil if not set
64
+ #
65
+ # @example
66
+ # doc["title"] #=> "Hello"
24
67
  def [](field_name)
25
68
  get(field_name)
26
69
  end
27
70
 
71
+ # Write a field value by name (bracket accessor).
72
+ #
73
+ # @param field_name [String, Symbol] the field name
74
+ # @param value [Object] the value to set
75
+ # @return [void]
76
+ #
77
+ # @example
78
+ # doc["title"] = "Hello"
28
79
  def []=(field_name, value)
29
80
  set(field_name, value)
30
81
  end
31
82
 
83
+ # Set a field value. When a schema is present, the value is coerced to
84
+ # the declared type and validated. Without a schema, the type is
85
+ # auto-detected from the Ruby value.
86
+ #
87
+ # @param field_name [String, Symbol] the field name (must be non-empty)
88
+ # @param value [Object] the value to set (nil sets the field to null)
89
+ # @return [void]
90
+ # @raise [ArgumentError] if field_name is blank or value type is unsupported
91
+ # @raise [Zvec::DimensionError] if vector dimension doesn't match schema
92
+ #
93
+ # @example
94
+ # doc.set("title", "Hello")
95
+ # doc.set(:count, 42)
96
+ # doc.set("embedding", [0.1, 0.2, 0.3])
32
97
  def set(field_name, value)
33
98
  field_name = field_name.to_s
99
+ raise ArgumentError, "Field name must be a non-empty string" if field_name.strip.empty?
100
+
34
101
  return @ext_doc.set_null(field_name) if value.nil?
35
102
 
36
103
  if @schema
37
104
  type = @schema.field_type(field_name)
38
105
  if type
106
+ coerced = DataTypes.coerce_value(value, type, field_name: field_name)
39
107
  setter = DataTypes::SETTER_FOR[type]
40
- return @ext_doc.send(setter, field_name, value) if setter
108
+ if setter
109
+ # Validate vector dimension if schema has dimension info
110
+ if DataTypes::VECTOR_TYPES.include?(type) && coerced.is_a?(Array)
111
+ expected_dim = @schema.field_dimension(field_name)
112
+ if expected_dim && !coerced.empty? && coerced.size != expected_dim
113
+ raise DimensionError,
114
+ "Vector dimension mismatch for field '#{field_name}': " \
115
+ "expected #{expected_dim}, got #{coerced.size}"
116
+ end
117
+ end
118
+ return @ext_doc.send(setter, field_name, coerced)
119
+ end
41
120
  end
42
121
  end
43
122
 
44
- # Auto-detect type
123
+ # Auto-detect type (schema-less mode)
45
124
  case value
46
- when String then @ext_doc.set_string(field_name, value)
47
- when Integer then @ext_doc.set_int64(field_name, value)
48
- when Float then @ext_doc.set_double(field_name, value)
125
+ when String then @ext_doc.set_string(field_name, value)
126
+ when Integer then @ext_doc.set_int64(field_name, value)
127
+ when Float then @ext_doc.set_double(field_name, value)
49
128
  when TrueClass, FalseClass then @ext_doc.set_bool(field_name, value)
50
129
  when Array
51
- if value.empty? || value.first.is_a?(Float) || value.first.is_a?(Integer)
52
- @ext_doc.set_float_vector(field_name, value.map(&:to_f))
53
- elsif value.first.is_a?(String)
54
- @ext_doc.set_string_array(field_name, value)
130
+ detected = DataTypes.detect_type(value)
131
+ case detected
132
+ when Ext::DataType::ARRAY_STRING
133
+ @ext_doc.set_string_array(field_name, value.map { |v| v.nil? ? "" : v.to_s })
134
+ else
135
+ # Default: treat as float vector
136
+ coerced = value.map { |v| v.nil? ? 0.0 : v.to_f }
137
+ @ext_doc.set_float_vector(field_name, coerced)
55
138
  end
139
+ else
140
+ raise ArgumentError,
141
+ "Unsupported value type #{value.class} for field '#{field_name}'"
56
142
  end
57
143
  end
58
144
 
145
+ # Get a field value by name. Uses the schema getter if available,
146
+ # otherwise tries common types in order.
147
+ #
148
+ # @param field_name [String, Symbol] the field name
149
+ # @return [Object, nil] the value, or nil if not found or null
150
+ #
151
+ # @example
152
+ # doc.get("title") #=> "Hello"
153
+ # doc.get(:embedding) #=> [0.1, 0.2, 0.3]
154
+ # doc.get("missing") #=> nil
59
155
  def get(field_name)
60
156
  field_name = field_name.to_s
61
157
  return nil unless @ext_doc.has?(field_name)
@@ -78,25 +174,38 @@ module Zvec
78
174
  nil
79
175
  end
80
176
 
177
+ # @return [Array<String>] names of all fields set on this document
81
178
  def field_names
82
179
  @ext_doc.field_names
83
180
  end
84
181
 
182
+ # @return [Boolean] true if no fields have been set
85
183
  def empty?
86
184
  @ext_doc.empty?
87
185
  end
88
186
 
187
+ # Convert the document to a plain Ruby Hash.
188
+ #
189
+ # @return [Hash{String => Object}] includes "pk", "score", and all fields
190
+ #
191
+ # @example
192
+ # doc.to_h #=> {"pk" => "doc-1", "score" => 0.95, "title" => "Hello"}
89
193
  def to_h
90
194
  h = { "pk" => pk, "score" => score }
91
195
  field_names.each { |f| h[f] = get(f) }
92
196
  h
93
197
  end
94
198
 
199
+ # @return [String] human-readable representation
95
200
  def to_s
96
201
  @ext_doc.to_s
97
202
  end
98
203
 
99
- # Wrap a C++ Doc::Ptr into a Ruby Doc
204
+ # Wrap a C++ Doc::Ptr into a Ruby Doc.
205
+ #
206
+ # @param ext_doc [Ext::Doc] the C++ document to wrap
207
+ # @param schema [Zvec::Schema, nil] optional schema for type-aware access
208
+ # @return [Zvec::Doc]
100
209
  def self.from_ext(ext_doc, schema: nil)
101
210
  doc = allocate
102
211
  doc.instance_variable_set(:@ext_doc, ext_doc)
data/lib/zvec/query.rb CHANGED
@@ -1,9 +1,118 @@
1
1
  module Zvec
2
+ # Represents a vector similarity search query.
3
+ #
4
+ # == Filter Expression Syntax
5
+ #
6
+ # Filters narrow search results using scalar field conditions. The syntax
7
+ # supports the following operators and combinators:
8
+ #
9
+ # === Comparison Operators
10
+ #
11
+ # field == value # equality
12
+ # field != value # inequality
13
+ # field > value # greater than
14
+ # field >= value # greater than or equal
15
+ # field < value # less than
16
+ # field <= value # less than or equal
17
+ #
18
+ # === Logical Operators
19
+ #
20
+ # expr AND expr # both conditions must match
21
+ # expr OR expr # either condition matches
22
+ # NOT expr # negation
23
+ # (expr) # grouping
24
+ #
25
+ # === Set / Range Operators
26
+ #
27
+ # field IN [v1, v2] # field equals any value in the list
28
+ # field NOT IN [v1] # field does not equal any value in the list
29
+ #
30
+ # === String Operators
31
+ #
32
+ # field LIKE "pattern" # SQL-style LIKE with % and _ wildcards
33
+ #
34
+ # === Examples
35
+ #
36
+ # "year > 2024"
37
+ # "year >= 2020 AND year <= 2025"
38
+ # "category IN ['science', 'tech']"
39
+ # "title LIKE '%Ruby%'"
40
+ # "active == true AND rating > 4.0"
41
+ # "(year > 2020 OR featured == true) AND active == true"
42
+ #
43
+ # @example Basic query
44
+ # query = Zvec::VectorQuery.new(
45
+ # field_name: "embedding",
46
+ # vector: [0.1, 0.2, 0.3, 0.4],
47
+ # topk: 10
48
+ # )
49
+ #
50
+ # @example Query with filter
51
+ # query = Zvec::VectorQuery.new(
52
+ # field_name: "embedding",
53
+ # vector: [0.1, 0.2, 0.3, 0.4],
54
+ # topk: 5,
55
+ # filter: "year > 2024 AND category == 'science'"
56
+ # )
57
+ #
58
+ # @example Query with HNSW search params
59
+ # query = Zvec::VectorQuery.new(
60
+ # field_name: "embedding",
61
+ # vector: [0.1, 0.2, 0.3, 0.4],
62
+ # topk: 10,
63
+ # query_params: Zvec::Ext::HnswQueryParams.new(ef: 300)
64
+ # )
65
+ #
2
66
  class VectorQuery
67
+ # @return [Ext::VectorQuery] the underlying C++ query object
3
68
  attr_reader :ext_query
4
69
 
70
+ # Create a new vector similarity query.
71
+ #
72
+ # @param field_name [String, Symbol] the vector field to search
73
+ # (must be non-empty)
74
+ # @param vector [Array<Numeric>] the query vector (must be non-empty,
75
+ # all elements must be Numeric)
76
+ # @param topk [Integer] number of nearest results to return (must be > 0)
77
+ # @param filter [String, nil] optional filter expression
78
+ # (see class-level docs for syntax)
79
+ # @param include_vector [Boolean] whether to include the stored vectors
80
+ # in results
81
+ # @param output_fields [Array<String>, nil] specific fields to return
82
+ # (nil returns all)
83
+ # @param query_params [Ext::HnswQueryParams, Ext::IVFQueryParams,
84
+ # Ext::FlatQueryParams, nil] optional search-time tuning params
85
+ # @return [VectorQuery]
86
+ # @raise [Zvec::QueryError] if field_name, vector, or topk are invalid
87
+ #
88
+ # @example
89
+ # vq = Zvec::VectorQuery.new(
90
+ # field_name: "embedding",
91
+ # vector: [0.1, 0.2, 0.3],
92
+ # topk: 5,
93
+ # filter: "year > 2024",
94
+ # output_fields: ["title", "year"]
95
+ # )
5
96
  def initialize(field_name:, vector:, topk: 10, filter: nil,
6
97
  include_vector: false, output_fields: nil, query_params: nil)
98
+ if field_name.nil? || field_name.to_s.strip.empty?
99
+ raise QueryError, "field_name must be a non-empty string"
100
+ end
101
+ unless vector.is_a?(Array) && !vector.empty?
102
+ raise QueryError, "vector must be a non-empty Array"
103
+ end
104
+ unless topk.is_a?(Integer) && topk > 0
105
+ raise QueryError, "topk must be a positive integer"
106
+ end
107
+
108
+ # Validate all vector elements are numeric
109
+ vector.each_with_index do |v, i|
110
+ unless v.is_a?(Numeric)
111
+ raise QueryError,
112
+ "Query vector contains non-numeric element at index #{i}: #{v.inspect}"
113
+ end
114
+ end
115
+
7
116
  @ext_query = Ext::VectorQuery.new
8
117
  @ext_query.field_name = field_name.to_s
9
118
  @ext_query.topk = topk
@@ -20,7 +129,7 @@ module Zvec
20
129
  when Ext::FlatQueryParams
21
130
  @ext_query.set_flat_query_params(query_params)
22
131
  else
23
- raise ArgumentError, "Unknown query_params type: #{query_params.class}"
132
+ raise QueryError, "Unknown query_params type: #{query_params.class}"
24
133
  end
25
134
  end
26
135
  end