zvec-ruby 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,134 @@
1
1
  module Zvec
2
+ # Data type constants, coercion utilities, and dispatch tables for mapping
3
+ # between Ruby types and the underlying C++ zvec engine types.
4
+ #
5
+ # == Scalar Types
6
+ #
7
+ # * {BINARY} -- Raw binary data
8
+ # * {STRING} -- UTF-8 string
9
+ # * {BOOL} -- Boolean (true/false)
10
+ # * {INT32} -- 32-bit signed integer
11
+ # * {INT64} -- 64-bit signed integer
12
+ # * {UINT32} -- 32-bit unsigned integer
13
+ # * {UINT64} -- 64-bit unsigned integer
14
+ # * {FLOAT} -- 32-bit IEEE 754 float
15
+ # * {DOUBLE} -- 64-bit IEEE 754 double
16
+ #
17
+ # == Dense Vector Types
18
+ #
19
+ # Dense vectors store a fixed-length array of numeric values. Choose the
20
+ # precision that balances accuracy vs. memory:
21
+ #
22
+ # * {VECTOR_FP32} -- 32-bit float vector (default, best accuracy)
23
+ # * {VECTOR_FP64} -- 64-bit double vector (highest accuracy, 2x memory)
24
+ # * {VECTOR_FP16} -- 16-bit half-precision vector (half the memory of FP32)
25
+ # * {VECTOR_INT8} -- 8-bit integer vector (smallest, for quantized models)
26
+ #
27
+ # == Sparse Vector Types
28
+ #
29
+ # Sparse vectors store only non-zero elements, ideal for high-dimensional
30
+ # data where most values are zero (e.g., BM25 or TF-IDF features):
31
+ #
32
+ # * {SPARSE_VECTOR_FP32} -- Sparse vector with 32-bit float values
33
+ # * {SPARSE_VECTOR_FP16} -- Sparse vector with 16-bit float values
34
+ #
35
+ # == Binary Vectors
36
+ #
37
+ # Binary vectors use the {BINARY} type and store bit-packed data, useful for
38
+ # binary hash codes or Hamming distance searches.
39
+ #
40
+ # == Array Types
41
+ #
42
+ # * {ARRAY_STRING} -- Array of strings (e.g., tags)
43
+ # * {ARRAY_INT32} -- Array of 32-bit integers
44
+ # * {ARRAY_INT64} -- Array of 64-bit integers
45
+ # * {ARRAY_FLOAT} -- Array of 32-bit floats
46
+ # * {ARRAY_DOUBLE} -- Array of 64-bit doubles
47
+ # * {ARRAY_BOOL} -- Array of booleans
48
+ #
49
+ # == Quantization Types
50
+ #
51
+ # Quantization reduces memory usage and speeds up search at the cost of some
52
+ # accuracy. Specify a quantization type when creating an index:
53
+ #
54
+ # Ext::HnswIndexParams.new(metric, quantize_type: Ext::QuantizeType::INT8)
55
+ #
56
+ # Available quantization types (via +Ext::QuantizeType+):
57
+ #
58
+ # * +FP16+ -- Half-precision (16-bit) quantization. Good balance of speed
59
+ # and accuracy. Halves memory vs. FP32.
60
+ # * +INT8+ -- 8-bit integer quantization. ~4x memory reduction vs. FP32.
61
+ # Slight accuracy loss.
62
+ # * +INT4+ -- 4-bit integer quantization. ~8x memory reduction vs. FP32.
63
+ # Larger accuracy loss, best for large-scale approximate search.
64
+ #
65
+ # == Metric Types
66
+ #
67
+ # * {L2} -- Euclidean (L2) distance. Lower is more similar.
68
+ # * {IP} -- Inner product. Higher is more similar.
69
+ # * {COSINE} -- Cosine similarity. Higher is more similar. Vectors are
70
+ # normalized internally.
71
+ #
2
72
  module DataTypes
3
73
  # Re-export C++ enum values as Ruby-friendly constants
74
+
75
+ # @return [Symbol] Raw binary data type
4
76
  BINARY = Ext::DataType::BINARY
77
+ # @return [Symbol] UTF-8 string data type
5
78
  STRING = Ext::DataType::STRING
79
+ # @return [Symbol] Boolean data type
6
80
  BOOL = Ext::DataType::BOOL
81
+ # @return [Symbol] 32-bit signed integer data type
7
82
  INT32 = Ext::DataType::INT32
83
+ # @return [Symbol] 64-bit signed integer data type
8
84
  INT64 = Ext::DataType::INT64
85
+ # @return [Symbol] 32-bit unsigned integer data type
9
86
  UINT32 = Ext::DataType::UINT32
87
+ # @return [Symbol] 64-bit unsigned integer data type
10
88
  UINT64 = Ext::DataType::UINT64
89
+ # @return [Symbol] 32-bit float data type
11
90
  FLOAT = Ext::DataType::FLOAT
91
+ # @return [Symbol] 64-bit double data type
12
92
  DOUBLE = Ext::DataType::DOUBLE
13
93
 
94
+ # @return [Symbol] 32-bit float dense vector
14
95
  VECTOR_FP32 = Ext::DataType::VECTOR_FP32
96
+ # @return [Symbol] 64-bit double dense vector
15
97
  VECTOR_FP64 = Ext::DataType::VECTOR_FP64
98
+ # @return [Symbol] 16-bit half-precision dense vector
16
99
  VECTOR_FP16 = Ext::DataType::VECTOR_FP16
100
+ # @return [Symbol] 8-bit integer dense vector (quantized)
17
101
  VECTOR_INT8 = Ext::DataType::VECTOR_INT8
18
102
 
103
+ # @return [Symbol] 32-bit float sparse vector
19
104
  SPARSE_VECTOR_FP32 = Ext::DataType::SPARSE_VECTOR_FP32
105
+ # @return [Symbol] 16-bit float sparse vector
20
106
  SPARSE_VECTOR_FP16 = Ext::DataType::SPARSE_VECTOR_FP16
21
107
 
108
+ # @return [Symbol] Array of strings
22
109
  ARRAY_STRING = Ext::DataType::ARRAY_STRING
110
+ # @return [Symbol] Array of 32-bit integers
23
111
  ARRAY_INT32 = Ext::DataType::ARRAY_INT32
112
+ # @return [Symbol] Array of 64-bit integers
24
113
  ARRAY_INT64 = Ext::DataType::ARRAY_INT64
114
+ # @return [Symbol] Array of 32-bit floats
25
115
  ARRAY_FLOAT = Ext::DataType::ARRAY_FLOAT
116
+ # @return [Symbol] Array of 64-bit doubles
26
117
  ARRAY_DOUBLE = Ext::DataType::ARRAY_DOUBLE
118
+ # @return [Symbol] Array of booleans
27
119
  ARRAY_BOOL = Ext::DataType::ARRAY_BOOL
28
120
 
29
121
  # Metric types
122
+
123
+ # @return [Symbol] Euclidean (L2) distance metric
30
124
  L2 = Ext::MetricType::L2
125
+ # @return [Symbol] Inner product metric
31
126
  IP = Ext::MetricType::IP
127
+ # @return [Symbol] Cosine similarity metric
32
128
  COSINE = Ext::MetricType::COSINE
33
129
 
34
130
  # Vector data types for dimension validation
131
+ # @return [Array<Symbol>] All dense vector data type constants
35
132
  VECTOR_TYPES = [
36
133
  Ext::DataType::VECTOR_FP32,
37
134
  Ext::DataType::VECTOR_FP64,
@@ -40,6 +137,7 @@ module Zvec
40
137
  ].freeze
41
138
 
42
139
  # Setter dispatch table: DataType -> Doc setter method name
140
+ # @return [Hash{Symbol => Symbol}]
43
141
  SETTER_FOR = {
44
142
  Ext::DataType::STRING => :set_string,
45
143
  Ext::DataType::BOOL => :set_bool,
@@ -54,6 +152,8 @@ module Zvec
54
152
  Ext::DataType::ARRAY_STRING => :set_string_array,
55
153
  }.freeze
56
154
 
155
+ # Getter dispatch table: DataType -> Doc getter method name
156
+ # @return [Hash{Symbol => Symbol}]
57
157
  GETTER_FOR = {
58
158
  Ext::DataType::STRING => :get_string,
59
159
  Ext::DataType::BOOL => :get_bool,
@@ -69,7 +169,17 @@ module Zvec
69
169
  }.freeze
70
170
 
71
171
  # Detect the zvec data type for a Ruby value.
172
+ #
72
173
  # Handles edge cases: Integer vs Float, String booleans, nil, empty arrays.
174
+ #
175
+ # @param value [Object] the Ruby value to inspect
176
+ # @return [Symbol, nil] the zvec data type constant, or nil for nil input
177
+ #
178
+ # @example
179
+ # DataTypes.detect_type("hello") #=> Ext::DataType::STRING
180
+ # DataTypes.detect_type(42) #=> Ext::DataType::INT64
181
+ # DataTypes.detect_type([1.0]) #=> Ext::DataType::VECTOR_FP32
182
+ # DataTypes.detect_type(nil) #=> nil
73
183
  def self.detect_type(value)
74
184
  case value
75
185
  when NilClass then nil
@@ -82,7 +192,17 @@ module Zvec
82
192
  end
83
193
 
84
194
  # Coerce a Ruby value into a form suitable for the given zvec data type.
85
- # Returns the coerced value, or raises ArgumentError on impossible coercion.
195
+ #
196
+ # @param value [Object] the value to coerce
197
+ # @param target_type [Symbol] the target zvec data type constant
198
+ # @param field_name [String, nil] optional field name for error messages
199
+ # @return [Object] the coerced value
200
+ # @raise [ArgumentError] if the value cannot be coerced to the target type
201
+ #
202
+ # @example
203
+ # DataTypes.coerce_value(42, Ext::DataType::STRING) #=> "42"
204
+ # DataTypes.coerce_value("3.14", Ext::DataType::DOUBLE) #=> 3.14
205
+ # DataTypes.coerce_value([1, 2], Ext::DataType::VECTOR_FP32) #=> [1.0, 2.0]
86
206
  def self.coerce_value(value, target_type, field_name: nil)
87
207
  return value if value.nil?
88
208
 
@@ -110,6 +230,8 @@ module Zvec
110
230
  class << self
111
231
  private
112
232
 
233
+ # @param arr [Array] the array to detect the element type for
234
+ # @return [Symbol] the detected zvec data type
113
235
  def detect_array_type(arr)
114
236
  return Ext::DataType::VECTOR_FP32 if arr.empty?
115
237
 
data/lib/zvec/doc.rb CHANGED
@@ -1,7 +1,37 @@
1
1
  module Zvec
2
+ # A document (row) in a zvec collection. Wraps the C++ Doc object and
3
+ # provides Ruby-friendly field access with automatic type coercion.
4
+ #
5
+ # Documents can be created with or without a schema. With a schema,
6
+ # values are coerced and validated against declared field types and
7
+ # vector dimensions. Without a schema, types are auto-detected.
8
+ #
9
+ # @example Creating a document with a schema
10
+ # doc = Zvec::Doc.new(pk: "doc-1", schema: schema)
11
+ # doc["title"] = "Hello World"
12
+ # doc["embedding"] = [0.1, 0.2, 0.3, 0.4]
13
+ #
14
+ # @example Schema-less document (types auto-detected)
15
+ # doc = Zvec::Doc.new(pk: "doc-2")
16
+ # doc["name"] = "Alice" # stored as string
17
+ # doc["age"] = 30 # stored as int64
18
+ # doc["score"] = 0.95 # stored as double
19
+ # doc["active"] = true # stored as bool
20
+ # doc["vec"] = [1.0, 2.0] # stored as float vector
21
+ # doc["tags"] = ["a", "b"] # stored as string array
22
+ #
2
23
  class Doc
24
+ # @return [Ext::Doc] the underlying C++ document object
3
25
  attr_reader :ext_doc
4
26
 
27
+ # Create a new document.
28
+ #
29
+ # @param pk [String, Integer, nil] primary key (converted to String)
30
+ # @param fields [Hash{String, Symbol => Object}] initial field values
31
+ # @param schema [Zvec::Schema, nil] optional schema for type validation
32
+ #
33
+ # @example
34
+ # doc = Zvec::Doc.new(pk: "abc", fields: { "title" => "Hello" }, schema: schema)
5
35
  def initialize(pk: nil, fields: {}, schema: nil)
6
36
  @ext_doc = Ext::Doc.new
7
37
  @ext_doc.pk = pk.to_s if pk
@@ -9,26 +39,61 @@ module Zvec
9
39
  fields.each { |k, v| set(k, v) } if schema
10
40
  end
11
41
 
42
+ # @return [String] the primary key
12
43
  def pk
13
44
  @ext_doc.pk
14
45
  end
15
46
 
47
+ # Set the primary key.
48
+ #
49
+ # @param value [String, Integer] the new primary key (converted to String)
50
+ # @return [void]
16
51
  def pk=(value)
17
52
  @ext_doc.pk = value.to_s
18
53
  end
19
54
 
55
+ # @return [Float] the similarity score (set after search queries)
20
56
  def score
21
57
  @score || @ext_doc.score
22
58
  end
23
59
 
60
+ # Read a field value by name (bracket accessor).
61
+ #
62
+ # @param field_name [String, Symbol] the field name
63
+ # @return [Object, nil] the field value, or nil if not set
64
+ #
65
+ # @example
66
+ # doc["title"] #=> "Hello"
24
67
  def [](field_name)
25
68
  get(field_name)
26
69
  end
27
70
 
71
+ # Write a field value by name (bracket accessor).
72
+ #
73
+ # @param field_name [String, Symbol] the field name
74
+ # @param value [Object] the value to set
75
+ # @return [void]
76
+ #
77
+ # @example
78
+ # doc["title"] = "Hello"
28
79
  def []=(field_name, value)
29
80
  set(field_name, value)
30
81
  end
31
82
 
83
+ # Set a field value. When a schema is present, the value is coerced to
84
+ # the declared type and validated. Without a schema, the type is
85
+ # auto-detected from the Ruby value.
86
+ #
87
+ # @param field_name [String, Symbol] the field name (must be non-empty)
88
+ # @param value [Object] the value to set (nil sets the field to null)
89
+ # @return [void]
90
+ # @raise [ArgumentError] if field_name is blank or value type is unsupported
91
+ # @raise [Zvec::DimensionError] if vector dimension doesn't match schema
92
+ #
93
+ # @example
94
+ # doc.set("title", "Hello")
95
+ # doc.set(:count, 42)
96
+ # doc.set("embedding", [0.1, 0.2, 0.3])
32
97
  def set(field_name, value)
33
98
  field_name = field_name.to_s
34
99
  raise ArgumentError, "Field name must be a non-empty string" if field_name.strip.empty?
@@ -77,6 +142,16 @@ module Zvec
77
142
  end
78
143
  end
79
144
 
145
+ # Get a field value by name. Uses the schema getter if available,
146
+ # otherwise tries common types in order.
147
+ #
148
+ # @param field_name [String, Symbol] the field name
149
+ # @return [Object, nil] the value, or nil if not found or null
150
+ #
151
+ # @example
152
+ # doc.get("title") #=> "Hello"
153
+ # doc.get(:embedding) #=> [0.1, 0.2, 0.3]
154
+ # doc.get("missing") #=> nil
80
155
  def get(field_name)
81
156
  field_name = field_name.to_s
82
157
  return nil unless @ext_doc.has?(field_name)
@@ -99,25 +174,38 @@ module Zvec
99
174
  nil
100
175
  end
101
176
 
177
+ # @return [Array<String>] names of all fields set on this document
102
178
  def field_names
103
179
  @ext_doc.field_names
104
180
  end
105
181
 
182
+ # @return [Boolean] true if no fields have been set
106
183
  def empty?
107
184
  @ext_doc.empty?
108
185
  end
109
186
 
187
+ # Convert the document to a plain Ruby Hash.
188
+ #
189
+ # @return [Hash{String => Object}] includes "pk", "score", and all fields
190
+ #
191
+ # @example
192
+ # doc.to_h #=> {"pk" => "doc-1", "score" => 0.95, "title" => "Hello"}
110
193
  def to_h
111
194
  h = { "pk" => pk, "score" => score }
112
195
  field_names.each { |f| h[f] = get(f) }
113
196
  h
114
197
  end
115
198
 
199
+ # @return [String] human-readable representation
116
200
  def to_s
117
201
  @ext_doc.to_s
118
202
  end
119
203
 
120
- # Wrap a C++ Doc::Ptr into a Ruby Doc
204
+ # Wrap a C++ Doc::Ptr into a Ruby Doc.
205
+ #
206
+ # @param ext_doc [Ext::Doc] the C++ document to wrap
207
+ # @param schema [Zvec::Schema, nil] optional schema for type-aware access
208
+ # @return [Zvec::Doc]
121
209
  def self.from_ext(ext_doc, schema: nil)
122
210
  doc = allocate
123
211
  doc.instance_variable_set(:@ext_doc, ext_doc)
data/lib/zvec/query.rb CHANGED
@@ -1,17 +1,114 @@
1
1
  module Zvec
2
+ # Represents a vector similarity search query.
3
+ #
4
+ # == Filter Expression Syntax
5
+ #
6
+ # Filters narrow search results using scalar field conditions. The syntax
7
+ # supports the following operators and combinators:
8
+ #
9
+ # === Comparison Operators
10
+ #
11
+ # field == value # equality
12
+ # field != value # inequality
13
+ # field > value # greater than
14
+ # field >= value # greater than or equal
15
+ # field < value # less than
16
+ # field <= value # less than or equal
17
+ #
18
+ # === Logical Operators
19
+ #
20
+ # expr AND expr # both conditions must match
21
+ # expr OR expr # either condition matches
22
+ # NOT expr # negation
23
+ # (expr) # grouping
24
+ #
25
+ # === Set / Range Operators
26
+ #
27
+ # field IN [v1, v2] # field equals any value in the list
28
+ # field NOT IN [v1] # field does not equal any value in the list
29
+ #
30
+ # === String Operators
31
+ #
32
+ # field LIKE "pattern" # SQL-style LIKE with % and _ wildcards
33
+ #
34
+ # === Examples
35
+ #
36
+ # "year > 2024"
37
+ # "year >= 2020 AND year <= 2025"
38
+ # "category IN ['science', 'tech']"
39
+ # "title LIKE '%Ruby%'"
40
+ # "active == true AND rating > 4.0"
41
+ # "(year > 2020 OR featured == true) AND active == true"
42
+ #
43
+ # @example Basic query
44
+ # query = Zvec::VectorQuery.new(
45
+ # field_name: "embedding",
46
+ # vector: [0.1, 0.2, 0.3, 0.4],
47
+ # topk: 10
48
+ # )
49
+ #
50
+ # @example Query with filter
51
+ # query = Zvec::VectorQuery.new(
52
+ # field_name: "embedding",
53
+ # vector: [0.1, 0.2, 0.3, 0.4],
54
+ # topk: 5,
55
+ # filter: "year > 2024 AND category == 'science'"
56
+ # )
57
+ #
58
+ # @example Query with HNSW search params
59
+ # query = Zvec::VectorQuery.new(
60
+ # field_name: "embedding",
61
+ # vector: [0.1, 0.2, 0.3, 0.4],
62
+ # topk: 10,
63
+ # query_params: Zvec::Ext::HnswQueryParams.new(ef: 300)
64
+ # )
65
+ #
2
66
  class VectorQuery
67
+ # @return [Ext::VectorQuery] the underlying C++ query object
3
68
  attr_reader :ext_query
4
69
 
70
+ # Create a new vector similarity query.
71
+ #
72
+ # @param field_name [String, Symbol] the vector field to search
73
+ # (must be non-empty)
74
+ # @param vector [Array<Numeric>] the query vector (must be non-empty,
75
+ # all elements must be Numeric)
76
+ # @param topk [Integer] number of nearest results to return (must be > 0)
77
+ # @param filter [String, nil] optional filter expression
78
+ # (see class-level docs for syntax)
79
+ # @param include_vector [Boolean] whether to include the stored vectors
80
+ # in results
81
+ # @param output_fields [Array<String>, nil] specific fields to return
82
+ # (nil returns all)
83
+ # @param query_params [Ext::HnswQueryParams, Ext::IVFQueryParams,
84
+ # Ext::FlatQueryParams, nil] optional search-time tuning params
85
+ # @return [VectorQuery]
86
+ # @raise [Zvec::QueryError] if field_name, vector, or topk are invalid
87
+ #
88
+ # @example
89
+ # vq = Zvec::VectorQuery.new(
90
+ # field_name: "embedding",
91
+ # vector: [0.1, 0.2, 0.3],
92
+ # topk: 5,
93
+ # filter: "year > 2024",
94
+ # output_fields: ["title", "year"]
95
+ # )
5
96
  def initialize(field_name:, vector:, topk: 10, filter: nil,
6
97
  include_vector: false, output_fields: nil, query_params: nil)
7
- raise ArgumentError, "field_name must be a non-empty string" if field_name.nil? || field_name.to_s.strip.empty?
8
- raise ArgumentError, "vector must be a non-empty Array" unless vector.is_a?(Array) && !vector.empty?
9
- raise ArgumentError, "topk must be a positive integer" unless topk.is_a?(Integer) && topk > 0
98
+ if field_name.nil? || field_name.to_s.strip.empty?
99
+ raise QueryError, "field_name must be a non-empty string"
100
+ end
101
+ unless vector.is_a?(Array) && !vector.empty?
102
+ raise QueryError, "vector must be a non-empty Array"
103
+ end
104
+ unless topk.is_a?(Integer) && topk > 0
105
+ raise QueryError, "topk must be a positive integer"
106
+ end
10
107
 
11
108
  # Validate all vector elements are numeric
12
109
  vector.each_with_index do |v, i|
13
110
  unless v.is_a?(Numeric)
14
- raise ArgumentError,
111
+ raise QueryError,
15
112
  "Query vector contains non-numeric element at index #{i}: #{v.inspect}"
16
113
  end
17
114
  end
@@ -32,7 +129,7 @@ module Zvec
32
129
  when Ext::FlatQueryParams
33
130
  @ext_query.set_flat_query_params(query_params)
34
131
  else
35
- raise ArgumentError, "Unknown query_params type: #{query_params.class}"
132
+ raise QueryError, "Unknown query_params type: #{query_params.class}"
36
133
  end
37
134
  end
38
135
  end
data/lib/zvec/ruby_llm.rb CHANGED
@@ -4,17 +4,40 @@ module Zvec
4
4
  module RubyLLM
5
5
  # A vector store backend for the ruby_llm gem.
6
6
  #
7
- # Usage with ruby_llm:
7
+ # Provides a simple add/search/delete interface on top of a {Zvec::Collection}.
8
+ # Compatible with the ruby_llm vector store protocol.
9
+ #
10
+ # @example Basic usage
8
11
  # store = Zvec::RubyLLM::Store.new("/path/to/db", dimension: 1536)
9
- # store.add("doc-1", embedding: [...], metadata: { title: "Hello" })
12
+ # store.add("doc-1", embedding: [...], content: "Hello world")
10
13
  # results = store.search([0.1, 0.2, ...], top_k: 5)
14
+ # results.first #=> { id: "doc-1", score: 0.98, content: "Hello world", metadata: {} }
15
+ #
16
+ # @example With metadata
17
+ # store.add("doc-2", embedding: [...], content: "Ruby", metadata: { category: "lang" })
11
18
  #
12
19
  class Store
20
+ # @return [String] default vector field name
13
21
  DEFAULT_VECTOR_FIELD = "embedding"
22
+ # @return [String] default content field name
14
23
  DEFAULT_CONTENT_FIELD = "content"
15
24
 
16
- attr_reader :collection, :dimension
25
+ # @return [Zvec::Collection] the underlying collection
26
+ attr_reader :collection
27
+ # @return [Integer] the vector dimension
28
+ attr_reader :dimension
17
29
 
30
+ # Create a new store, opening an existing collection or creating one.
31
+ #
32
+ # @param path [String] directory path for the collection data
33
+ # @param dimension [Integer] the vector dimension (must be > 0)
34
+ # @param metric [Symbol] similarity metric (+:cosine+, +:l2+, or +:ip+)
35
+ # @param vector_field [String] name of the vector field (default: "embedding")
36
+ # @param content_field [String] name of the content field (default: "content")
37
+ # @raise [ArgumentError] if metric is not one of +:cosine+, +:l2+, +:ip+
38
+ #
39
+ # @example
40
+ # store = Zvec::RubyLLM::Store.new("/tmp/store", dimension: 384, metric: :l2)
18
41
  def initialize(path, dimension:, metric: :cosine, vector_field: DEFAULT_VECTOR_FIELD,
19
42
  content_field: DEFAULT_CONTENT_FIELD)
20
43
  @vector_field = vector_field.to_s
@@ -47,6 +70,15 @@ module Zvec
47
70
  end
48
71
 
49
72
  # Add a document with its embedding and optional metadata.
73
+ #
74
+ # @param id [String, Integer] the document's primary key
75
+ # @param embedding [Array<Numeric>] the vector embedding
76
+ # @param content [String, nil] optional text content
77
+ # @param metadata [Hash{String, Symbol => Object}] additional fields to store
78
+ # @return [Array] write results from the collection
79
+ #
80
+ # @example
81
+ # store.add("doc-1", embedding: [0.1, 0.2, 0.3], content: "Hello")
50
82
  def add(id, embedding:, content: nil, metadata: {})
51
83
  doc = Zvec::Doc.new(pk: id, schema: @schema)
52
84
  doc[@vector_field] = embedding
@@ -55,8 +87,20 @@ module Zvec
55
87
  @collection.insert(doc)
56
88
  end
57
89
 
58
- # Batch-add documents.
59
- # docs: array of { id:, embedding:, content:, metadata: {} }
90
+ # Batch-add multiple documents at once.
91
+ #
92
+ # @param docs [Array<Hash>] documents, each containing:
93
+ # * +:id+ [String, Integer] -- primary key (required)
94
+ # * +:embedding+ [Array<Numeric>] -- the vector (required)
95
+ # * +:content+ [String, nil] -- optional text content
96
+ # * +:metadata+ [Hash, nil] -- optional additional fields
97
+ # @return [Array] write results from the collection
98
+ #
99
+ # @example
100
+ # store.add_many([
101
+ # { id: "a", embedding: [0.1, 0.2], content: "Hello" },
102
+ # { id: "b", embedding: [0.3, 0.4], content: "World" },
103
+ # ])
60
104
  def add_many(docs)
61
105
  zvec_docs = docs.map do |d|
62
106
  doc = Zvec::Doc.new(pk: d[:id], schema: @schema)
@@ -69,6 +113,22 @@ module Zvec
69
113
  end
70
114
 
71
115
  # Search for similar vectors.
116
+ #
117
+ # @param query_vector [Array<Numeric>] the query vector
118
+ # @param top_k [Integer] maximum number of results (default: 10)
119
+ # @param filter [String, nil] optional filter expression
120
+ # (see {Zvec::VectorQuery} for filter syntax)
121
+ # @return [Array<Hash>] results, each containing:
122
+ # * +:id+ [String] -- document primary key
123
+ # * +:score+ [Float] -- similarity score
124
+ # * +:content+ [String, nil] -- the content field value
125
+ # * +:metadata+ [Hash] -- all other stored fields
126
+ #
127
+ # @example
128
+ # results = store.search([0.1, 0.2, 0.3], top_k: 5)
129
+ # results.first[:id] #=> "doc-1"
130
+ # results.first[:score] #=> 0.95
131
+ # results.first[:content] #=> "Hello"
72
132
  def search(query_vector, top_k: 10, filter: nil)
73
133
  results = @collection.query(
74
134
  field_name: @vector_field,
@@ -86,20 +146,32 @@ module Zvec
86
146
  end
87
147
  end
88
148
 
89
- # Delete documents by IDs.
149
+ # Delete documents by primary key(s).
150
+ #
151
+ # @param ids [Array<String, Integer>] one or more primary keys
152
+ # @return [Array] write results from the collection
90
153
  def delete(*ids)
91
154
  @collection.delete(*ids.flatten)
92
155
  end
93
156
 
94
- # Fetch documents by IDs.
157
+ # Fetch documents by primary key(s).
158
+ #
159
+ # @param ids [Array<String, Integer>] one or more primary keys
160
+ # @return [Hash{String => Zvec::Doc}] mapping of pk to document
95
161
  def fetch(*ids)
96
162
  @collection.fetch(*ids.flatten)
97
163
  end
98
164
 
165
+ # Flush pending writes to disk.
166
+ #
167
+ # @return [self]
99
168
  def flush
100
169
  @collection.flush
101
170
  end
102
171
 
172
+ # Return the number of documents in the store.
173
+ #
174
+ # @return [Integer]
103
175
  def count
104
176
  @collection.doc_count
105
177
  end