zvec-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/zvec/ruby_llm.rb CHANGED
@@ -4,17 +4,40 @@ module Zvec
4
4
  module RubyLLM
5
5
  # A vector store backend for the ruby_llm gem.
6
6
  #
7
- # Usage with ruby_llm:
7
+ # Provides a simple add/search/delete interface on top of a {Zvec::Collection}.
8
+ # Compatible with the ruby_llm vector store protocol.
9
+ #
10
+ # @example Basic usage
8
11
  # store = Zvec::RubyLLM::Store.new("/path/to/db", dimension: 1536)
9
- # store.add("doc-1", embedding: [...], metadata: { title: "Hello" })
12
+ # store.add("doc-1", embedding: [...], content: "Hello world")
10
13
  # results = store.search([0.1, 0.2, ...], top_k: 5)
14
+ # results.first #=> { id: "doc-1", score: 0.98, content: "Hello world", metadata: {} }
15
+ #
16
+ # @example With metadata
17
+ # store.add("doc-2", embedding: [...], content: "Ruby", metadata: { category: "lang" })
11
18
  #
12
19
  class Store
20
+ # @return [String] default vector field name
13
21
  DEFAULT_VECTOR_FIELD = "embedding"
22
+ # @return [String] default content field name
14
23
  DEFAULT_CONTENT_FIELD = "content"
15
24
 
16
- attr_reader :collection, :dimension
25
+ # @return [Zvec::Collection] the underlying collection
26
+ attr_reader :collection
27
+ # @return [Integer] the vector dimension
28
+ attr_reader :dimension
17
29
 
30
+ # Create a new store, opening an existing collection or creating one.
31
+ #
32
+ # @param path [String] directory path for the collection data
33
+ # @param dimension [Integer] the vector dimension (must be > 0)
34
+ # @param metric [Symbol] similarity metric (+:cosine+, +:l2+, or +:ip+)
35
+ # @param vector_field [String] name of the vector field (default: "embedding")
36
+ # @param content_field [String] name of the content field (default: "content")
37
+ # @raise [ArgumentError] if metric is not one of +:cosine+, +:l2+, +:ip+
38
+ #
39
+ # @example
40
+ # store = Zvec::RubyLLM::Store.new("/tmp/store", dimension: 384, metric: :l2)
18
41
  def initialize(path, dimension:, metric: :cosine, vector_field: DEFAULT_VECTOR_FIELD,
19
42
  content_field: DEFAULT_CONTENT_FIELD)
20
43
  @vector_field = vector_field.to_s
@@ -47,6 +70,15 @@ module Zvec
47
70
  end
48
71
 
49
72
  # Add a document with its embedding and optional metadata.
73
+ #
74
+ # @param id [String, Integer] the document's primary key
75
+ # @param embedding [Array<Numeric>] the vector embedding
76
+ # @param content [String, nil] optional text content
77
+ # @param metadata [Hash{String, Symbol => Object}] additional fields to store
78
+ # @return [Array] write results from the collection
79
+ #
80
+ # @example
81
+ # store.add("doc-1", embedding: [0.1, 0.2, 0.3], content: "Hello")
50
82
  def add(id, embedding:, content: nil, metadata: {})
51
83
  doc = Zvec::Doc.new(pk: id, schema: @schema)
52
84
  doc[@vector_field] = embedding
@@ -55,8 +87,20 @@ module Zvec
55
87
  @collection.insert(doc)
56
88
  end
57
89
 
58
- # Batch-add documents.
59
- # docs: array of { id:, embedding:, content:, metadata: {} }
90
+ # Batch-add multiple documents at once.
91
+ #
92
+ # @param docs [Array<Hash>] documents, each containing:
93
+ # * +:id+ [String, Integer] -- primary key (required)
94
+ # * +:embedding+ [Array<Numeric>] -- the vector (required)
95
+ # * +:content+ [String, nil] -- optional text content
96
+ # * +:metadata+ [Hash, nil] -- optional additional fields
97
+ # @return [Array] write results from the collection
98
+ #
99
+ # @example
100
+ # store.add_many([
101
+ # { id: "a", embedding: [0.1, 0.2], content: "Hello" },
102
+ # { id: "b", embedding: [0.3, 0.4], content: "World" },
103
+ # ])
60
104
  def add_many(docs)
61
105
  zvec_docs = docs.map do |d|
62
106
  doc = Zvec::Doc.new(pk: d[:id], schema: @schema)
@@ -69,6 +113,22 @@ module Zvec
69
113
  end
70
114
 
71
115
  # Search for similar vectors.
116
+ #
117
+ # @param query_vector [Array<Numeric>] the query vector
118
+ # @param top_k [Integer] maximum number of results (default: 10)
119
+ # @param filter [String, nil] optional filter expression
120
+ # (see {Zvec::VectorQuery} for filter syntax)
121
+ # @return [Array<Hash>] results, each containing:
122
+ # * +:id+ [String] -- document primary key
123
+ # * +:score+ [Float] -- similarity score
124
+ # * +:content+ [String, nil] -- the content field value
125
+ # * +:metadata+ [Hash] -- all other stored fields
126
+ #
127
+ # @example
128
+ # results = store.search([0.1, 0.2, 0.3], top_k: 5)
129
+ # results.first[:id] #=> "doc-1"
130
+ # results.first[:score] #=> 0.95
131
+ # results.first[:content] #=> "Hello"
72
132
  def search(query_vector, top_k: 10, filter: nil)
73
133
  results = @collection.query(
74
134
  field_name: @vector_field,
@@ -86,20 +146,32 @@ module Zvec
86
146
  end
87
147
  end
88
148
 
89
- # Delete documents by IDs.
149
+ # Delete documents by primary key(s).
150
+ #
151
+ # @param ids [Array<String, Integer>] one or more primary keys
152
+ # @return [Array] write results from the collection
90
153
  def delete(*ids)
91
154
  @collection.delete(*ids.flatten)
92
155
  end
93
156
 
94
- # Fetch documents by IDs.
157
+ # Fetch documents by primary key(s).
158
+ #
159
+ # @param ids [Array<String, Integer>] one or more primary keys
160
+ # @return [Hash{String => Zvec::Doc}] mapping of pk to document
95
161
  def fetch(*ids)
96
162
  @collection.fetch(*ids.flatten)
97
163
  end
98
164
 
165
+ # Flush pending writes to disk.
166
+ #
167
+ # @return [self]
99
168
  def flush
100
169
  @collection.flush
101
170
  end
102
171
 
172
+ # Return the number of documents in the store.
173
+ #
174
+ # @return [Integer]
103
175
  def count
104
176
  @collection.doc_count
105
177
  end
data/lib/zvec/schema.rb CHANGED
@@ -1,68 +1,212 @@
1
1
  module Zvec
2
+ # Defines the structure of a collection: its name, fields, types, and
3
+ # vector dimensions.
4
+ #
5
+ # Schemas are immutable once created -- fields can be added during
6
+ # initialization but not removed afterward.
7
+ #
8
+ # @example Creating a schema with a DSL block
9
+ # schema = Zvec::Schema.new("articles") do
10
+ # string "title"
11
+ # string "body", nullable: true
12
+ # int32 "year"
13
+ # float "rating"
14
+ # bool "published"
15
+ # vector "embedding", dimension: 384,
16
+ # index: Zvec::Ext::HnswIndexParams.new(Zvec::COSINE)
17
+ # end
18
+ #
19
+ # @example Binary vector field
20
+ # schema = Zvec::Schema.new("hashes") do
21
+ # field "hash_vec", DataTypes::BINARY, dimension: 128
22
+ # end
23
+ #
24
+ # @example Sparse vector field
25
+ # schema = Zvec::Schema.new("sparse_docs") do
26
+ # field "tfidf", DataTypes::SPARSE_VECTOR_FP32, dimension: 30000
27
+ # end
28
+ #
2
29
  class Schema
30
+ # @return [Ext::CollectionSchema] the underlying C++ schema object
3
31
  attr_reader :ext_schema
4
32
 
33
+ # Create a new schema.
34
+ #
35
+ # @param name [String, Symbol] the collection name (must be non-empty)
36
+ # @yield optional DSL block evaluated in the schema's context
37
+ # @raise [Zvec::SchemaError] if name is nil or blank
38
+ #
39
+ # @example
40
+ # schema = Zvec::Schema.new("my_collection") do
41
+ # string "title"
42
+ # vector "embedding", dimension: 128
43
+ # end
5
44
  def initialize(name, &block)
6
- @ext_schema = Ext::CollectionSchema.new(name)
45
+ if name.nil? || name.to_s.strip.empty?
46
+ raise SchemaError, "Schema name must be a non-empty string"
47
+ end
48
+
49
+ @ext_schema = Ext::CollectionSchema.new(name.to_s)
7
50
  @field_types = {}
51
+ @field_dimensions = {}
8
52
  instance_eval(&block) if block
9
53
  end
10
54
 
55
+ # Add a field with an explicit data type.
56
+ #
57
+ # @param name [String, Symbol] the field name (must be non-empty)
58
+ # @param type [Symbol] a DataTypes constant (e.g., +DataTypes::STRING+)
59
+ # @param dimension [Integer, nil] required for vector fields
60
+ # @param nullable [Boolean] whether the field allows null values
61
+ # @param index [Ext::HnswIndexParams, Ext::FlatIndexParams, Ext::IVFIndexParams, nil]
62
+ # optional index parameters for this field
63
+ # @return [self] for method chaining
64
+ # @raise [Zvec::SchemaError] if field name is blank
65
+ #
66
+ # @example
67
+ # schema.field("tags", DataTypes::ARRAY_STRING)
68
+ # schema.field("embedding", DataTypes::VECTOR_FP32, dimension: 128)
11
69
  def field(name, type, dimension: nil, nullable: false, index: nil)
12
70
  name = name.to_s
71
+ if name.strip.empty?
72
+ raise SchemaError, "Field name must be a non-empty string"
73
+ end
74
+
13
75
  fs = Ext::FieldSchema.new(name, type)
14
76
  fs.dimension = dimension if dimension
15
77
  fs.nullable = nullable
16
78
  fs.set_index_params(index) if index
17
79
  @ext_schema.add_field(fs)
18
80
  @field_types[name] = type
81
+ @field_dimensions[name] = dimension if dimension
19
82
  self
20
83
  end
21
84
 
85
+ # Add a dense vector field. Defaults to FP32 precision.
86
+ #
87
+ # @param name [String, Symbol] the field name
88
+ # @param dimension [Integer] the vector dimension (must be > 0)
89
+ # @param type [Symbol] vector data type (default: {DataTypes::VECTOR_FP32}).
90
+ # Also accepts {DataTypes::VECTOR_FP64}, {DataTypes::VECTOR_FP16},
91
+ # or {DataTypes::VECTOR_INT8}.
92
+ # @param index [Ext::HnswIndexParams, Ext::FlatIndexParams, Ext::IVFIndexParams, nil]
93
+ # optional index parameters
94
+ # @return [self]
95
+ # @raise [ArgumentError] if dimension is not a positive integer
96
+ #
97
+ # @example Standard FP32 vector with HNSW index
98
+ # schema.vector "embedding", dimension: 384,
99
+ # index: Ext::HnswIndexParams.new(Zvec::COSINE)
100
+ #
101
+ # @example FP16 vector (half memory)
102
+ # schema.vector "embedding", dimension: 384,
103
+ # type: DataTypes::VECTOR_FP16
104
+ #
105
+ # @example INT8 quantized vector (minimal memory)
106
+ # schema.vector "embedding", dimension: 384,
107
+ # type: DataTypes::VECTOR_INT8
22
108
  def vector(name, dimension:, type: DataTypes::VECTOR_FP32, index: nil)
109
+ raise ArgumentError, "Vector dimension must be a positive integer, got #{dimension.inspect}" unless dimension.is_a?(Integer) && dimension > 0
110
+
23
111
  field(name, type, dimension: dimension, index: index)
24
112
  end
25
113
 
114
+ # Add a string field.
115
+ #
116
+ # @param name [String, Symbol] the field name
117
+ # @param opts [Hash] options passed to {#field} (+nullable:+, +index:+)
118
+ # @return [self]
26
119
  def string(name, **opts)
27
120
  field(name, DataTypes::STRING, **opts)
28
121
  end
29
122
 
123
+ # Add a 32-bit integer field.
124
+ #
125
+ # @param name [String, Symbol] the field name
126
+ # @param opts [Hash] options passed to {#field}
127
+ # @return [self]
30
128
  def int32(name, **opts)
31
129
  field(name, DataTypes::INT32, **opts)
32
130
  end
33
131
 
132
+ # Add a 64-bit integer field.
133
+ #
134
+ # @param name [String, Symbol] the field name
135
+ # @param opts [Hash] options passed to {#field}
136
+ # @return [self]
34
137
  def int64(name, **opts)
35
138
  field(name, DataTypes::INT64, **opts)
36
139
  end
37
140
 
141
+ # Add a 32-bit float field.
142
+ #
143
+ # @param name [String, Symbol] the field name
144
+ # @param opts [Hash] options passed to {#field}
145
+ # @return [self]
38
146
  def float(name, **opts)
39
147
  field(name, DataTypes::FLOAT, **opts)
40
148
  end
41
149
 
150
+ # Add a 64-bit double field.
151
+ #
152
+ # @param name [String, Symbol] the field name
153
+ # @param opts [Hash] options passed to {#field}
154
+ # @return [self]
42
155
  def double(name, **opts)
43
156
  field(name, DataTypes::DOUBLE, **opts)
44
157
  end
45
158
 
159
+ # Add a boolean field.
160
+ #
161
+ # @param name [String, Symbol] the field name
162
+ # @param opts [Hash] options passed to {#field}
163
+ # @return [self]
46
164
  def bool(name, **opts)
47
165
  field(name, DataTypes::BOOL, **opts)
48
166
  end
49
167
 
168
+ # @return [String] the collection name
50
169
  def name
51
170
  @ext_schema.name
52
171
  end
53
172
 
173
+ # @return [Array<String>] all field names in this schema
54
174
  def field_names
55
175
  @ext_schema.field_names
56
176
  end
57
177
 
178
+ # Look up the data type of a field by name.
179
+ #
180
+ # @param name [String, Symbol] the field name
181
+ # @return [Symbol, nil] the data type constant, or nil if not found
58
182
  def field_type(name)
59
183
  @field_types[name.to_s]
60
184
  end
61
185
 
186
+ # Look up the dimension of a vector field.
187
+ #
188
+ # @param name [String, Symbol] the field name
189
+ # @return [Integer, nil] the dimension, or nil if the field is not a vector
190
+ def field_dimension(name)
191
+ @field_dimensions[name.to_s]
192
+ end
193
+
194
+ # Check whether a field exists in the schema.
195
+ #
196
+ # @param name [String, Symbol] the field name
197
+ # @return [Boolean]
62
198
  def has_field?(name)
63
199
  @ext_schema.has_field?(name.to_s)
64
200
  end
65
201
 
202
+ # Returns a hash of vector field names to their dimensions.
203
+ #
204
+ # @return [Hash{String => Integer}] e.g. +{"embedding" => 384}+
205
+ def vector_fields_with_dimensions
206
+ @field_dimensions.select { |name, _| DataTypes::VECTOR_TYPES.include?(@field_types[name]) }
207
+ end
208
+
209
+ # @return [String] human-readable representation of the schema
66
210
  def to_s
67
211
  @ext_schema.to_s
68
212
  end
data/lib/zvec/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Zvec
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/zvec.rb CHANGED
@@ -16,7 +16,20 @@ require_relative "zvec/query"
16
16
  require_relative "zvec/collection"
17
17
 
18
18
  module Zvec
19
+ # Base error class for all Zvec errors.
19
20
  class Error < StandardError; end
20
21
 
22
+ # Raised when vector dimensions do not match the expected schema dimension.
23
+ class DimensionError < Error; end
24
+
25
+ # Raised for schema definition errors (invalid field names, types, etc.).
26
+ class SchemaError < Error; end
27
+
28
+ # Raised for query construction or execution errors.
29
+ class QueryError < Error; end
30
+
31
+ # Raised for collection lifecycle errors (open/close/reopen issues).
32
+ class CollectionError < Error; end
33
+
21
34
  include DataTypes
22
35
  end