vectra-client 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/vectra/client.rb CHANGED
@@ -98,8 +98,26 @@ module Vectra
98
98
  # filter: { category: 'programming' }
99
99
  # )
100
100
  #
101
- def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
101
+ # @example Chainable query builder
102
+ # results = client.query("my-index")
103
+ # .vector([0.1, 0.2, 0.3])
104
+ # .top_k(10)
105
+ # .filter(category: "programming")
106
+ # .with_metadata
107
+ # .execute
108
+ #
109
+ def query(index_arg = nil, index: nil, vector: nil, top_k: 10, namespace: nil, filter: nil,
102
110
  include_values: false, include_metadata: true)
111
+ # If called with a positional index string only, return a query builder:
112
+ # client.query("docs").vector(vec).top_k(10).filter(...).execute
113
+ if index_arg && index.nil? && vector.nil? && !block_given?
114
+ return QueryBuilder.new(self, index_arg)
115
+ end
116
+
117
+ # Handle positional argument for index in non-builder case
118
+ index = index_arg if index_arg && index.nil?
119
+
120
+ # Backwards-compatible path: perform query immediately
103
121
  validate_index!(index)
104
122
  validate_query_vector!(vector)
105
123
 
@@ -276,6 +294,137 @@ module Vectra
276
294
  provider.provider_name
277
295
  end
278
296
 
297
+ # Quick health check - tests if provider connection is healthy
298
+ #
299
+ # @param timeout [Float] timeout in seconds (default: 5)
300
+ # @return [Boolean] true if connection is healthy
301
+ #
302
+ # @example
303
+ # if client.healthy?
304
+ # client.upsert(...)
305
+ # else
306
+ # handle_unhealthy_connection
307
+ # end
308
+ def healthy?
309
+ start = Time.now
310
+ provider.list_indexes
311
+ true
312
+ rescue StandardError => e
313
+ log_error("Health check failed", e)
314
+ false
315
+ ensure
316
+ duration = ((Time.now - start) * 1000).round(2) if defined?(start)
317
+ log_debug("Health check completed in #{duration}ms") if duration
318
+ end
319
+
320
+ # Ping provider and get connection health status with latency
321
+ #
322
+ # @param timeout [Float] timeout in seconds (default: 5)
323
+ # @return [Hash] health status with :healthy, :provider, :latency_ms
324
+ #
325
+ # @example
326
+ # status = client.ping
327
+ # puts "Provider: #{status[:provider]}, Healthy: #{status[:healthy]}, Latency: #{status[:latency_ms]}ms"
328
+ def ping
329
+ start = Time.now
330
+ healthy = true
331
+ error_info = nil
332
+
333
+ begin
334
+ provider.list_indexes
335
+ rescue StandardError => e
336
+ healthy = false
337
+ error_info = { error: e.class.name, error_message: e.message }
338
+ log_error("Health check failed", e)
339
+ end
340
+
341
+ duration = ((Time.now - start) * 1000).round(2)
342
+
343
+ result = {
344
+ healthy: healthy,
345
+ provider: provider_name,
346
+ latency_ms: duration
347
+ }
348
+
349
+ result.merge!(error_info) if error_info
350
+ result
351
+ end
352
+
353
+ # Chainable query builder
354
+ #
355
+ # @api public
356
+ # @example
357
+ # results = client.query("docs")
358
+ # .vector(embedding)
359
+ # .top_k(20)
360
+ # .namespace("prod")
361
+ # .filter(category: "ruby")
362
+ # .with_metadata
363
+ # .execute
364
+ #
365
+ class QueryBuilder
366
+ def initialize(client, index)
367
+ @client = client
368
+ @index = index
369
+ @vector = nil
370
+ @top_k = 10
371
+ @namespace = nil
372
+ @filter = nil
373
+ @include_values = false
374
+ @include_metadata = true
375
+ end
376
+
377
+ attr_reader :index
378
+
379
+ def vector(value)
380
+ @vector = value
381
+ self
382
+ end
383
+
384
+ def top_k(value)
385
+ @top_k = value.to_i
386
+ self
387
+ end
388
+
389
+ def namespace(value)
390
+ @namespace = value
391
+ self
392
+ end
393
+
394
+ def filter(value = nil, **kwargs)
395
+ @filter = value || kwargs
396
+ self
397
+ end
398
+
399
+ def with_values
400
+ @include_values = true
401
+ self
402
+ end
403
+
404
+ def with_metadata
405
+ @include_metadata = true
406
+ self
407
+ end
408
+
409
+ def without_metadata
410
+ @include_metadata = false
411
+ self
412
+ end
413
+
414
+ # Execute the built query and return a QueryResult
415
+ def execute
416
+ @client.query(
417
+ index: @index,
418
+ vector: @vector,
419
+ top_k: @top_k,
420
+ namespace: @namespace,
421
+ filter: @filter,
422
+ include_values: @include_values,
423
+ include_metadata: @include_metadata
424
+ )
425
+ end
426
+ end
427
+
279
428
  private
280
429
 
281
430
  def build_config(provider_name, api_key, environment, host, options)
@@ -306,6 +455,8 @@ module Vectra
306
455
  Providers::Weaviate.new(config)
307
456
  when :pgvector
308
457
  Providers::Pgvector.new(config)
458
+ when :memory
459
+ Providers::Memory.new(config)
309
460
  else
310
461
  raise UnsupportedProviderError, "Provider '#{config.provider}' is not supported"
311
462
  end
@@ -317,11 +468,32 @@ module Vectra
317
468
  raise ValidationError, "Index name cannot be empty" if index.empty?
318
469
  end
319
470
 
471
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
320
472
  def validate_vectors!(vectors)
321
473
  raise ValidationError, "Vectors cannot be nil" if vectors.nil?
322
474
  raise ValidationError, "Vectors must be an array" unless vectors.is_a?(Array)
323
475
  raise ValidationError, "Vectors cannot be empty" if vectors.empty?
476
+
477
+ # Check dimension consistency
478
+ first_vector = vectors.first
479
+ first_values = first_vector.is_a?(Vector) ? first_vector.values : first_vector[:values]
480
+ first_dim = first_values&.size
481
+
482
+ return unless first_dim
483
+
484
+ vectors.each_with_index do |vec, index|
485
+ values = vec.is_a?(Vector) ? vec.values : vec[:values]
486
+ dim = values&.size
487
+
488
+ next unless dim && dim != first_dim
489
+
490
+ raise ValidationError,
491
+ "Inconsistent vector dimensions at index #{index}: " \
492
+ "expected #{first_dim}, got #{dim}. " \
493
+ "All vectors in a batch must have the same dimension."
494
+ end
324
495
  end
496
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
325
497
 
326
498
  def validate_query_vector!(vector)
327
499
  raise ValidationError, "Query vector cannot be nil" if vector.nil?
@@ -340,5 +512,20 @@ module Vectra
340
512
  raise ValidationError, "ID must be a string" unless id.is_a?(String)
341
513
  raise ValidationError, "ID cannot be empty" if id.empty?
342
514
  end
515
+
516
+ def log_error(message, error = nil)
517
+ return unless config.logger
518
+
519
+ config.logger.error("[Vectra] #{message}")
520
+ config.logger.error("[Vectra] #{error.class}: #{error.message}") if error
521
+ config.logger.error("[Vectra] #{error.backtrace&.first(3)&.join("\n")}") if error&.backtrace
522
+ end
523
+
524
+ def log_debug(message, data = nil)
525
+ return unless config.logger
526
+
527
+ config.logger.debug("[Vectra] #{message}")
528
+ config.logger.debug("[Vectra] #{data.inspect}") if data
529
+ end
343
530
  end
344
531
  end
@@ -11,7 +11,7 @@ module Vectra
11
11
  # end
12
12
  #
13
13
  class Configuration
14
- SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector].freeze
14
+ SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector memory].freeze
15
15
 
16
16
  attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
17
17
  :max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
@@ -117,7 +117,7 @@ module Vectra
117
117
 
118
118
  # Providers that don't require API key (local instances)
119
119
  def api_key_optional_provider?
120
- %i[qdrant pgvector].include?(provider)
120
+ %i[qdrant pgvector memory].include?(provider)
121
121
  end
122
122
 
123
123
  def validate_provider_specific!
@@ -130,6 +130,8 @@ module Vectra
130
130
  validate_weaviate!
131
131
  when :pgvector
132
132
  validate_pgvector!
133
+ when :memory
134
+ # Memory provider has no special requirements
133
135
  end
134
136
  end
135
137
 
@@ -37,13 +37,12 @@ module Vectra
37
37
 
38
38
  # Test if secondary key is valid
39
39
  #
40
- # @param timeout [Float] Test timeout in seconds
41
40
  # @return [Boolean] true if secondary key works
42
- def test_secondary(timeout: 5)
41
+ def test_secondary
43
42
  return false if secondary_key.nil? || secondary_key.empty?
44
43
 
45
44
  client = build_test_client(secondary_key)
46
- client.healthy?(timeout: timeout)
45
+ client.healthy?
47
46
  rescue StandardError
48
47
  false
49
48
  end
@@ -232,16 +232,34 @@ module Vectra
232
232
  #
233
233
  # @param body [Hash, String, nil] response body
234
234
  # @return [String]
235
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
235
236
  def extract_error_message(body)
236
237
  case body
237
238
  when Hash
238
- body["message"] || body["error"] || body.to_s
239
+ # Primary error message
240
+ msg = body["message"] || body["error"] || body["error_message"] || body.to_s
241
+
242
+ # Add context from details
243
+ details = body["details"] || body["error_details"] || body["detail"]
244
+ if details
245
+ details_str = details.is_a?(Hash) ? details.to_json : details.to_s
246
+ msg += " (#{details_str})" unless msg.include?(details_str)
247
+ end
248
+
249
+ # Add field-specific errors if available
250
+ if body["errors"].is_a?(Array)
251
+ field_errors = body["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
252
+ msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
253
+ end
254
+
255
+ msg
239
256
  when String
240
257
  body
241
258
  else
242
259
  "Unknown error"
243
260
  end
244
261
  end
262
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
245
263
 
246
264
  # Log debug information
247
265
  #
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Providers
5
+ # In-memory vector database provider for testing
6
+ #
7
+ # This provider stores all vectors in memory using Ruby hashes.
8
+ # Perfect for testing without external dependencies.
9
+ #
10
+ # @example Usage in tests
11
+ # Vectra.configure do |config|
12
+ # config.provider = :memory if Rails.env.test?
13
+ # end
14
+ #
15
+ # client = Vectra::Client.new
16
+ # client.upsert(index: 'test', vectors: [...])
17
+ #
18
+ class Memory < Base
19
+ def initialize(config)
20
+ super
21
+ # Storage structure: @storage[index][namespace][id] = Vector
22
+ @storage = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
23
+ @index_configs = {} # Store index configurations (dimension, metric)
24
+ end
25
+
26
+ # @see Base#provider_name
27
+ def provider_name
28
+ :memory
29
+ end
30
+
31
+ # @see Base#upsert
32
+ def upsert(index:, vectors:, namespace: nil)
33
+ normalized = normalize_vectors(vectors)
34
+ ns = namespace || ""
35
+
36
+ normalized.each do |vec|
37
+ # Infer dimension from first vector if not set
38
+ if @index_configs[index].nil?
39
+ @index_configs[index] = {
40
+ dimension: vec[:values].length,
41
+ metric: "cosine"
42
+ }
43
+ end
44
+
45
+ # Store vector
46
+ vector_obj = Vector.new(
47
+ id: vec[:id],
48
+ values: vec[:values],
49
+ metadata: vec[:metadata] || {}
50
+ )
51
+ @storage[index][ns][vec[:id]] = vector_obj
52
+ end
53
+
54
+ log_debug("Upserted #{normalized.size} vectors to #{index}")
55
+ { upserted_count: normalized.size }
56
+ end
57
+
58
+ # @see Base#query
59
+ def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
60
+ include_values: false, include_metadata: true)
61
+ ns = namespace || ""
62
+ candidates = @storage[index][ns].values
63
+
64
+ # Apply metadata filter
65
+ if filter
66
+ candidates = candidates.select { |v| matches_filter?(v, filter) }
67
+ end
68
+
69
+ # Calculate similarity scores
70
+ matches = candidates.map do |vec|
71
+ score = calculate_similarity(vector, vec.values, index)
72
+ build_match(vec, score, include_values, include_metadata)
73
+ end
74
+
75
+ # Sort by score (descending) and take top_k
76
+ matches.sort_by! { |m| -m[:score] }
77
+ matches = matches.first(top_k)
78
+
79
+ log_debug("Query returned #{matches.size} results")
80
+ QueryResult.from_response(matches: matches, namespace: namespace)
81
+ end
82
+
83
+ # @see Base#fetch
84
+ def fetch(index:, ids:, namespace: nil)
85
+ ns = namespace || ""
86
+ vectors = {}
87
+
88
+ ids.each do |id|
89
+ vec = @storage[index][ns][id]
90
+ vectors[id] = vec if vec
91
+ end
92
+
93
+ vectors
94
+ end
95
+
96
+ # @see Base#update
97
+ def update(index:, id:, metadata:, namespace: nil)
98
+ ns = namespace || ""
99
+ vec = @storage[index][ns][id]
100
+
101
+ raise NotFoundError, "Vector '#{id}' not found in index '#{index}'" unless vec
102
+
103
+ # Merge metadata
104
+ new_metadata = (vec.metadata || {}).merge(metadata.transform_keys(&:to_s))
105
+ updated_vec = Vector.new(
106
+ id: vec.id,
107
+ values: vec.values,
108
+ metadata: new_metadata,
109
+ sparse_values: vec.sparse_values
110
+ )
111
+ @storage[index][ns][id] = updated_vec
112
+
113
+ log_debug("Updated vector #{id}")
114
+ { updated: true }
115
+ end
116
+
117
+ # @see Base#delete
118
+ def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
119
+ ns = namespace || ""
120
+
121
+ if delete_all
122
+ @storage[index].clear
123
+ elsif ids
124
+ ids.each { |id| @storage[index][ns].delete(id) }
125
+ elsif namespace && !filter
126
+ @storage[index].delete(ns)
127
+ elsif filter
128
+ # Delete vectors matching filter
129
+ @storage[index][ns].delete_if { |_id, vec| matches_filter?(vec, filter) }
130
+ else
131
+ raise ValidationError, "Must specify ids, filter, namespace, or delete_all"
132
+ end
133
+
134
+ log_debug("Deleted vectors from #{index}")
135
+ { deleted: true }
136
+ end
137
+
138
+ # @see Base#list_indexes
139
+ def list_indexes
140
+ @index_configs.keys.map { |name| describe_index(index: name) }
141
+ end
142
+
143
+ # @see Base#describe_index
144
+ def describe_index(index:)
145
+ config = @index_configs[index]
146
+ raise NotFoundError, "Index '#{index}' not found" unless config
147
+
148
+ {
149
+ name: index,
150
+ dimension: config[:dimension],
151
+ metric: config[:metric],
152
+ status: "ready"
153
+ }
154
+ end
155
+
156
+ # @see Base#stats
157
+ def stats(index:, namespace: nil)
158
+ config = @index_configs[index]
159
+ raise NotFoundError, "Index '#{index}' not found" unless config
160
+
161
+ if namespace
162
+ ns = namespace
163
+ count = @storage[index][ns].size
164
+ namespaces = { ns => { vector_count: count } }
165
+ else
166
+ # Count all namespaces
167
+ namespaces = {}
168
+ @storage[index].each do |ns, vectors|
169
+ namespaces[ns] = { vector_count: vectors.size }
170
+ end
171
+ count = @storage[index].values.sum(&:size)
172
+ end
173
+
174
+ {
175
+ total_vector_count: count,
176
+ dimension: config[:dimension],
177
+ namespaces: namespaces
178
+ }
179
+ end
180
+
181
+ # Clear all stored data (useful for tests)
182
+ #
183
+ # @return [void]
184
+ def clear!
185
+ @storage.clear
186
+ @index_configs.clear
187
+ end
188
+
189
+ private
190
+
191
+ # Calculate similarity score based on index metric
192
+ def calculate_similarity(query_vector, candidate_vector, index)
193
+ config = @index_configs[index] || { metric: "cosine" }
194
+ metric = config[:metric] || "cosine"
195
+
196
+ case metric.to_s.downcase
197
+ when "euclidean", "l2"
198
+ # Convert distance to similarity (1 / (1 + distance))
199
+ distance = euclidean_distance(query_vector, candidate_vector)
200
+ 1.0 / (1.0 + distance)
201
+ when "dot_product", "inner_product", "dot"
202
+ dot_product(query_vector, candidate_vector)
203
+ else # cosine (default)
204
+ cosine_similarity(query_vector, candidate_vector)
205
+ end
206
+ end
207
+
208
+ # Calculate cosine similarity
209
+ def cosine_similarity(vec_a, vec_b)
210
+ raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
211
+
212
+ dot = vec_a.zip(vec_b).sum { |a, b| a * b }
213
+ mag_a = Math.sqrt(vec_a.sum { |v| v**2 })
214
+ mag_b = Math.sqrt(vec_b.sum { |v| v**2 })
215
+
216
+ return 0.0 if mag_a.zero? || mag_b.zero?
217
+
218
+ dot / (mag_a * mag_b)
219
+ end
220
+
221
+ # Calculate Euclidean distance
222
+ def euclidean_distance(vec_a, vec_b)
223
+ raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
224
+
225
+ Math.sqrt(vec_a.zip(vec_b).sum { |a, b| (a - b)**2 })
226
+ end
227
+
228
+ # Calculate dot product
229
+ def dot_product(vec_a, vec_b)
230
+ raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
231
+
232
+ vec_a.zip(vec_b).sum { |a, b| a * b }
233
+ end
234
+
235
+ # Check if vector matches filter
236
+ def matches_filter?(vector, filter)
237
+ filter.all? do |key, value|
238
+ vec_value = vector.metadata[key.to_s]
239
+ matches_filter_value?(vec_value, value)
240
+ end
241
+ end
242
+
243
+ # Check if a value matches filter criteria
244
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
245
+ def matches_filter_value?(actual, expected)
246
+ case expected
247
+ when Hash
248
+ # Support operators like { "$gt" => 5, "$lt" => 10 }
249
+ expected.all? do |op, val|
250
+ case op.to_s
251
+ when "$eq"
252
+ actual == val
253
+ when "$ne"
254
+ actual != val
255
+ when "$gt"
256
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual > val
257
+ when "$gte"
258
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual >= val
259
+ when "$lt"
260
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual < val
261
+ when "$lte"
262
+ actual.is_a?(Numeric) && val.is_a?(Numeric) && actual <= val
263
+ when "$in"
264
+ val.is_a?(Array) && val.include?(actual)
265
+ else
266
+ actual == expected
267
+ end
268
+ end
269
+ when Array
270
+ expected.include?(actual)
271
+ else
272
+ actual == expected
273
+ end
274
+ end
275
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
276
+
277
+ # Build match hash from vector
278
+ def build_match(vector, score, include_values, include_metadata)
279
+ match = {
280
+ id: vector.id,
281
+ score: score
282
+ }
283
+ match[:values] = vector.values if include_values
284
+ match[:metadata] = vector.metadata if include_metadata
285
+ match[:sparse_values] = vector.sparse_values if vector.sparse?
286
+ match
287
+ end
288
+
289
+ # Override validate_config! - Memory provider doesn't need host or API key
290
+ # rubocop:disable Naming/PredicateMethod
291
+ def validate_config!
292
+ # Memory provider has no special requirements
293
+ true
294
+ end
295
+ # rubocop:enable Naming/PredicateMethod
296
+ end
297
+ end
298
+ end
@@ -299,6 +299,37 @@ module Vectra
299
299
  handle_retriable_response(e)
300
300
  end
301
301
 
302
+ # Extract error message from Qdrant response format
303
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
304
+ def extract_error_message(body)
305
+ case body
306
+ when Hash
307
+ # Qdrant wraps errors in "status" key
308
+ status = body["status"] || body
309
+ msg = status["error"] || body["message"] || body["error_message"] || body.to_s
310
+
311
+ # Add details
312
+ details = status["details"] || status["error_details"]
313
+ if details
314
+ details_str = details.is_a?(Hash) ? details.to_json : details.to_s
315
+ msg += " (#{details_str})" unless msg.include?(details_str)
316
+ end
317
+
318
+ # Add field-specific errors
319
+ if status["errors"].is_a?(Array)
320
+ field_errors = status["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
321
+ msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
322
+ end
323
+
324
+ msg
325
+ when String
326
+ body
327
+ else
328
+ "Unknown error"
329
+ end
330
+ end
331
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
332
+
302
333
  def auth_headers
303
334
  headers = {}
304
335
  headers["api-key"] = config.api_key if config.api_key && !config.api_key.empty?