vectra-client 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Providers
5
+ # In-memory vector database provider for testing
6
+ #
7
+ # This provider stores all vectors in memory using Ruby hashes.
8
+ # Perfect for testing without external dependencies.
9
+ #
10
+ # @example Usage in tests
11
+ # Vectra.configure do |config|
12
+ # config.provider = :memory if Rails.env.test?
13
+ # end
14
+ #
15
+ # client = Vectra::Client.new
16
+ # client.upsert(index: 'test', vectors: [...])
17
+ #
18
+ class Memory < Base
19
+ def initialize(config)
20
+ super
21
+ # Storage structure: @storage[index][namespace][id] = Vector
22
+ @storage = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
23
+ @index_configs = {} # Store index configurations (dimension, metric)
24
+ end
25
+
26
+ # @see Base#provider_name
27
+ def provider_name
28
+ :memory
29
+ end
30
+
31
+ # @see Base#upsert
32
+ def upsert(index:, vectors:, namespace: nil)
33
+ normalized = normalize_vectors(vectors)
34
+ ns = namespace || ""
35
+
36
+ normalized.each do |vec|
37
+ # Infer dimension from first vector if not set
38
+ if @index_configs[index].nil?
39
+ @index_configs[index] = {
40
+ dimension: vec[:values].length,
41
+ metric: "cosine"
42
+ }
43
+ end
44
+
45
+ # Store vector
46
+ vector_obj = Vector.new(
47
+ id: vec[:id],
48
+ values: vec[:values],
49
+ metadata: vec[:metadata] || {}
50
+ )
51
+ @storage[index][ns][vec[:id]] = vector_obj
52
+ end
53
+
54
+ log_debug("Upserted #{normalized.size} vectors to #{index}")
55
+ { upserted_count: normalized.size }
56
+ end
57
+
58
+ # @see Base#query
59
+ def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
60
+ include_values: false, include_metadata: true)
61
+ ns = namespace || ""
62
+ candidates = @storage[index][ns].values
63
+
64
+ # Apply metadata filter
65
+ if filter
66
+ candidates = candidates.select { |v| matches_filter?(v, filter) }
67
+ end
68
+
69
+ # Calculate similarity scores
70
+ matches = candidates.map do |vec|
71
+ score = calculate_similarity(vector, vec.values, index)
72
+ build_match(vec, score, include_values, include_metadata)
73
+ end
74
+
75
+ # Sort by score (descending) and take top_k
76
+ matches.sort_by! { |m| -m[:score] }
77
+ matches = matches.first(top_k)
78
+
79
+ log_debug("Query returned #{matches.size} results")
80
+ QueryResult.from_response(matches: matches, namespace: namespace)
81
+ end
82
+
83
+ # @see Base#fetch
84
+ def fetch(index:, ids:, namespace: nil)
85
+ ns = namespace || ""
86
+ vectors = {}
87
+
88
+ ids.each do |id|
89
+ vec = @storage[index][ns][id]
90
+ vectors[id] = vec if vec
91
+ end
92
+
93
+ vectors
94
+ end
95
+
96
+ # @see Base#update
97
+ def update(index:, id:, metadata:, namespace: nil)
98
+ ns = namespace || ""
99
+ vec = @storage[index][ns][id]
100
+
101
+ raise NotFoundError, "Vector '#{id}' not found in index '#{index}'" unless vec
102
+
103
+ # Merge metadata
104
+ new_metadata = (vec.metadata || {}).merge(metadata.transform_keys(&:to_s))
105
+ updated_vec = Vector.new(
106
+ id: vec.id,
107
+ values: vec.values,
108
+ metadata: new_metadata,
109
+ sparse_values: vec.sparse_values
110
+ )
111
+ @storage[index][ns][id] = updated_vec
112
+
113
+ log_debug("Updated vector #{id}")
114
+ { updated: true }
115
+ end
116
+
117
+ # @see Base#delete
118
+ def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
119
+ ns = namespace || ""
120
+
121
+ if delete_all
122
+ @storage[index].clear
123
+ elsif ids
124
+ ids.each { |id| @storage[index][ns].delete(id) }
125
+ elsif namespace && !filter
126
+ @storage[index].delete(ns)
127
+ elsif filter
128
+ # Delete vectors matching filter
129
+ @storage[index][ns].delete_if { |_id, vec| matches_filter?(vec, filter) }
130
+ else
131
+ raise ValidationError, "Must specify ids, filter, namespace, or delete_all"
132
+ end
133
+
134
+ log_debug("Deleted vectors from #{index}")
135
+ { deleted: true }
136
+ end
137
+
138
+ # @see Base#list_indexes
139
+ def list_indexes
140
+ @index_configs.keys.map { |name| describe_index(index: name) }
141
+ end
142
+
143
+ # @see Base#describe_index
144
+ def describe_index(index:)
145
+ config = @index_configs[index]
146
+ raise NotFoundError, "Index '#{index}' not found" unless config
147
+
148
+ {
149
+ name: index,
150
+ dimension: config[:dimension],
151
+ metric: config[:metric],
152
+ status: "ready"
153
+ }
154
+ end
155
+
156
+ # @see Base#stats
157
+ def stats(index:, namespace: nil)
158
+ config = @index_configs[index]
159
+ raise NotFoundError, "Index '#{index}' not found" unless config
160
+
161
+ if namespace
162
+ ns = namespace
163
+ count = @storage[index][ns].size
164
+ namespaces = { ns => { vector_count: count } }
165
+ else
166
+ # Count all namespaces
167
+ namespaces = {}
168
+ @storage[index].each do |ns, vectors|
169
+ namespaces[ns] = { vector_count: vectors.size }
170
+ end
171
+ count = @storage[index].values.sum(&:size)
172
+ end
173
+
174
+ {
175
+ total_vector_count: count,
176
+ dimension: config[:dimension],
177
+ namespaces: namespaces
178
+ }
179
+ end
180
+
181
+ # Clear all stored data (useful for tests)
182
+ #
183
+ # @return [void]
184
+ def clear!
185
+ @storage.clear
186
+ @index_configs.clear
187
+ end
188
+
189
+ private
190
+
191
+ # Calculate similarity score based on index metric
192
+ def calculate_similarity(query_vector, candidate_vector, index)
193
+ config = @index_configs[index] || { metric: "cosine" }
194
+ metric = config[:metric] || "cosine"
195
+
196
+ case metric.to_s.downcase
197
+ when "euclidean", "l2"
198
+ # Convert distance to similarity (1 / (1 + distance))
199
+ distance = euclidean_distance(query_vector, candidate_vector)
200
+ 1.0 / (1.0 + distance)
201
+ when "dot_product", "inner_product", "dot"
202
+ dot_product(query_vector, candidate_vector)
203
+ else # cosine (default)
204
+ cosine_similarity(query_vector, candidate_vector)
205
+ end
206
+ end
207
+
208
+ # Calculate cosine similarity
209
+ def cosine_similarity(vec_a, vec_b)
210
+ raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
211
+
212
+ dot = vec_a.zip(vec_b).sum { |a, b| a * b }
213
+ mag_a = Math.sqrt(vec_a.sum { |v| v**2 })
214
+ mag_b = Math.sqrt(vec_b.sum { |v| v**2 })
215
+
216
+ return 0.0 if mag_a.zero? || mag_b.zero?
217
+
218
+ dot / (mag_a * mag_b)
219
+ end
220
+
221
+ # Calculate Euclidean distance
222
+ def euclidean_distance(vec_a, vec_b)
223
+ raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
224
+
225
+ Math.sqrt(vec_a.zip(vec_b).sum { |a, b| (a - b)**2 })
226
+ end
227
+
228
+ # Calculate dot product
229
+ def dot_product(vec_a, vec_b)
230
+ raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
231
+
232
+ vec_a.zip(vec_b).sum { |a, b| a * b }
233
+ end
234
+
235
+ # Check if vector matches filter
236
+ def matches_filter?(vector, filter)
237
+ filter.all? do |key, value|
238
+ vec_value = vector.metadata[key.to_s]
239
+ matches_filter_value?(vec_value, value)
240
+ end
241
+ end
242
+
243
+ # Check if a value matches filter criteria
244
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
245
+ def matches_filter_value?(actual, expected)
246
+ case expected
247
+ when Hash
248
+ # Support operators like { "$gt" => 5, "$lt" => 10 }
249
+ expected.all? do |op, val|
250
+ case op.to_s
251
+ when "$eq"
252
+ actual == val
253
+ when "$ne"
254
+ actual != val
255
+ when "$gt"
256
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual > val
257
+ when "$gte"
258
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual >= val
259
+ when "$lt"
260
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual < val
261
+ when "$lte"
262
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual <= val
263
+ when "$in"
264
+ val.is_a?(Array) && val.include?(actual)
265
+ else
266
+ actual == expected
267
+ end
268
+ end
269
+ when Array
270
+ expected.include?(actual)
271
+ else
272
+ actual == expected
273
+ end
274
+ end
275
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
276
+
277
+ # Build match hash from vector
278
+ def build_match(vector, score, include_values, include_metadata)
279
+ match = {
280
+ id: vector.id,
281
+ score: score
282
+ }
283
+ match[:values] = vector.values if include_values
284
+ match[:metadata] = vector.metadata if include_metadata
285
+ match[:sparse_values] = vector.sparse_values if vector.sparse?
286
+ match
287
+ end
288
+
289
+ # Override validate_config! - Memory provider doesn't need host or API key
290
+ # rubocop:disable Naming/PredicateMethod
291
+ def validate_config!
292
+ # Memory provider has no special requirements
293
+ true
294
+ end
295
+ # rubocop:enable Naming/PredicateMethod
296
+ end
297
+ end
298
+ end
@@ -299,6 +299,37 @@ module Vectra
299
299
  handle_retriable_response(e)
300
300
  end
301
301
 
302
+ # Extract error message from Qdrant response format
303
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
304
+ def extract_error_message(body)
305
+ case body
306
+ when Hash
307
+ # Qdrant wraps errors in "status" key
308
+ status = body["status"] || body
309
+ msg = status["error"] || body["message"] || body["error_message"] || body.to_s
310
+
311
+ # Add details
312
+ details = status["details"] || status["error_details"]
313
+ if details
314
+ details_str = details.is_a?(Hash) ? details.to_json : details.to_s
315
+ msg += " (#{details_str})" unless msg.include?(details_str)
316
+ end
317
+
318
+ # Add field-specific errors
319
+ if status["errors"].is_a?(Array)
320
+ field_errors = status["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
321
+ msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
322
+ end
323
+
324
+ msg
325
+ when String
326
+ body
327
+ else
328
+ "Unknown error"
329
+ end
330
+ end
331
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
332
+
302
333
  def auth_headers
303
334
  headers = {}
304
335
  headers["api-key"] = config.api_key if config.api_key && !config.api_key.empty?
data/lib/vectra/vector.rb CHANGED
@@ -106,6 +106,62 @@ module Vectra
106
106
  Math.sqrt(values.zip(other_values).sum { |a, b| (a - b)**2 })
107
107
  end
108
108
 
109
+ # Normalize the vector in-place (mutates the vector)
110
+ #
111
+ # @param type [Symbol] normalization type: :l2 (default) or :l1
112
+ # @return [Vector] self (for method chaining)
113
+ #
114
+ # @example L2 normalization (unit vector)
115
+ # vector = Vectra::Vector.new(id: 'v1', values: [3.0, 4.0])
116
+ # vector.normalize!
117
+ # vector.values # => [0.6, 0.8] (magnitude = 1.0)
118
+ #
119
+ # @example L1 normalization (sum = 1)
120
+ # vector.normalize!(type: :l1)
121
+ # vector.values.sum(&:abs) # => 1.0
122
+ def normalize!(type: :l2)
123
+ case type
124
+ when :l2
125
+ magnitude = Math.sqrt(values.sum { |v| v**2 })
126
+ if magnitude.zero?
127
+ # Zero vector - cannot normalize, return as-is
128
+ return self
129
+ end
130
+
131
+ @values = values.map { |v| v / magnitude }
132
+ when :l1
133
+ sum = values.sum(&:abs)
134
+ if sum.zero?
135
+ # Zero vector - cannot normalize, return as-is
136
+ return self
137
+ end
138
+
139
+ @values = values.map { |v| v / sum }
140
+ else
141
+ raise ArgumentError, "Unknown normalization type: #{type}. Use :l2 or :l1"
142
+ end
143
+ self
144
+ end
145
+
146
+ # Normalize a vector array without creating a Vector object
147
+ #
148
+ # @param vector [Array<Float>] vector values to normalize
149
+ # @param type [Symbol] normalization type: :l2 (default) or :l1
150
+ # @return [Array<Float>] normalized vector values
151
+ #
152
+ # @example Normalize OpenAI embedding
153
+ # embedding = openai_response['data'][0]['embedding']
154
+ # normalized = Vectra::Vector.normalize(embedding)
155
+ # client.upsert(vectors: [{ id: '1', values: normalized }])
156
+ #
157
+ # @example L1 normalization
158
+ # normalized = Vectra::Vector.normalize([1.0, 2.0, 3.0], type: :l1)
159
+ def self.normalize(vector, type: :l2)
160
+ temp_vector = new(id: "temp", values: vector.dup)
161
+ temp_vector.normalize!(type: type)
162
+ temp_vector.values
163
+ end
164
+
109
165
  # Check equality with another vector
110
166
  #
111
167
  # @param other [Vector] the other vector
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectra
4
- VERSION = "0.3.4"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/vectra.rb CHANGED
@@ -23,6 +23,7 @@ require_relative "vectra/providers/pinecone"
23
23
  require_relative "vectra/providers/qdrant"
24
24
  require_relative "vectra/providers/weaviate"
25
25
  require_relative "vectra/providers/pgvector"
26
+ require_relative "vectra/providers/memory"
26
27
  require_relative "vectra/client"
27
28
 
28
29
  # Vectra - Unified Ruby client for vector databases
@@ -157,5 +158,24 @@ module Vectra
157
158
  **options
158
159
  )
159
160
  end
161
+
162
+ # Shortcut to create a Memory client (for testing)
163
+ #
164
+ # @param options [Hash] additional options
165
+ # @return [Client]
166
+ #
167
+ # @example In test environment
168
+ # Vectra.configure do |config|
169
+ # config.provider = :memory if Rails.env.test?
170
+ # end
171
+ #
172
+ # client = Vectra::Client.new
173
+ #
174
+ def memory(**options)
175
+ Client.new(
176
+ provider: :memory,
177
+ **options
178
+ )
179
+ end
160
180
  end
161
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vectra-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mijo Kristo
@@ -269,6 +269,7 @@ files:
269
269
  - docs/guides/security.md
270
270
  - docs/index.md
271
271
  - docs/providers/index.md
272
+ - docs/providers/memory.md
272
273
  - docs/providers/pgvector.md
273
274
  - docs/providers/pinecone.md
274
275
  - docs/providers/qdrant.md
@@ -303,6 +304,7 @@ files:
303
304
  - lib/vectra/logging.rb
304
305
  - lib/vectra/pool.rb
305
306
  - lib/vectra/providers/base.rb
307
+ - lib/vectra/providers/memory.rb
306
308
  - lib/vectra/providers/pgvector.rb
307
309
  - lib/vectra/providers/pgvector/connection.rb
308
310
  - lib/vectra/providers/pgvector/index_management.rb