vectra-client 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/vectra/batch.rb CHANGED
@@ -17,6 +17,17 @@ module Vectra
17
17
  # )
18
18
  # puts "Upserted: #{result[:upserted_count]}"
19
19
  #
20
+ # @example With progress tracking
21
+ # batch.upsert_async(
22
+ # index: 'docs',
23
+ # vectors: large_array,
24
+ # on_progress: ->(stats) {
25
+ # puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
26
+ # puts " Chunk #{stats[:current_chunk] + 1}/#{stats[:total_chunks]}"
27
+ # puts " Success: #{stats[:success_count]}, Failed: #{stats[:failed_count]}"
28
+ # }
29
+ # )
30
+ #
20
31
  class Batch
21
32
  DEFAULT_CONCURRENCY = 4
22
33
  DEFAULT_CHUNK_SIZE = 100
@@ -38,12 +49,23 @@ module Vectra
38
49
  # @param vectors [Array<Hash>] vectors to upsert
39
50
  # @param namespace [String, nil] optional namespace
40
51
  # @param chunk_size [Integer] vectors per chunk (default: 100)
52
+ # @param on_progress [Proc, nil] optional callback called after each chunk completes
53
+ # Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
41
54
  # @return [Hash] aggregated result with :upserted_count, :chunks, :errors
42
- def upsert_async(index:, vectors:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
55
+ #
56
+ # @example With progress callback
57
+ # batch.upsert_async(
58
+ # index: 'docs',
59
+ # vectors: large_array,
60
+ # on_progress: ->(stats) {
61
+ # puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
62
+ # }
63
+ # )
64
+ def upsert_async(index:, vectors:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
43
65
  chunks = vectors.each_slice(chunk_size).to_a
44
66
  return { upserted_count: 0, chunks: 0, errors: [] } if chunks.empty?
45
67
 
46
- results = process_chunks_concurrently(chunks) do |chunk|
68
+ results = process_chunks_concurrently(chunks, total_items: vectors.size, on_progress: on_progress) do |chunk|
47
69
  client.upsert(index: index, vectors: chunk, namespace: namespace)
48
70
  end
49
71
 
@@ -56,12 +78,14 @@ module Vectra
56
78
  # @param ids [Array<String>] IDs to delete
57
79
  # @param namespace [String, nil] optional namespace
58
80
  # @param chunk_size [Integer] IDs per chunk (default: 100)
81
+ # @param on_progress [Proc, nil] optional callback called after each chunk completes
82
+ # Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
59
83
  # @return [Hash] aggregated result
60
- def delete_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
84
+ def delete_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
61
85
  chunks = ids.each_slice(chunk_size).to_a
62
86
  return { deleted_count: 0, chunks: 0, errors: [] } if chunks.empty?
63
87
 
64
- results = process_chunks_concurrently(chunks) do |chunk|
88
+ results = process_chunks_concurrently(chunks, total_items: ids.size, on_progress: on_progress) do |chunk|
65
89
  client.delete(index: index, ids: chunk, namespace: namespace)
66
90
  end
67
91
 
@@ -74,12 +98,14 @@ module Vectra
74
98
  # @param ids [Array<String>] IDs to fetch
75
99
  # @param namespace [String, nil] optional namespace
76
100
  # @param chunk_size [Integer] IDs per chunk (default: 100)
101
+ # @param on_progress [Proc, nil] optional callback called after each chunk completes
102
+ # Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
77
103
  # @return [Hash<String, Vector>] merged results
78
- def fetch_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
104
+ def fetch_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
79
105
  chunks = ids.each_slice(chunk_size).to_a
80
106
  return {} if chunks.empty?
81
107
 
82
- results = process_chunks_concurrently(chunks) do |chunk|
108
+ results = process_chunks_concurrently(chunks, total_items: ids.size, on_progress: on_progress) do |chunk|
83
109
  client.fetch(index: index, ids: chunk, namespace: namespace)
84
110
  end
85
111
 
@@ -88,15 +114,43 @@ module Vectra
88
114
 
89
115
  private
90
116
 
91
- def process_chunks_concurrently(chunks)
117
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
118
+ def process_chunks_concurrently(chunks, total_items: nil, on_progress: nil)
92
119
  pool = Concurrent::FixedThreadPool.new(concurrency)
93
120
  futures = []
121
+ progress_mutex = Mutex.new
122
+ completed_count = Concurrent::AtomicFixnum.new(0)
123
+ success_count = Concurrent::AtomicFixnum.new(0)
124
+ failed_count = Concurrent::AtomicFixnum.new(0)
94
125
 
95
126
  chunks.each_with_index do |chunk, index|
96
127
  futures << Concurrent::Future.execute(executor: pool) do
97
- { index: index, result: yield(chunk), error: nil }
128
+ result = yield(chunk)
129
+ success_count.increment
130
+ { index: index, result: result, error: nil }
98
131
  rescue StandardError => e
132
+ failed_count.increment
99
133
  { index: index, result: nil, error: e }
134
+ ensure
135
+ # Call progress callback when chunk completes
136
+ if on_progress
137
+ completed = completed_count.increment
138
+ total_size = chunks.size * chunks.first.size
139
+ processed = [completed * chunks.first.size, total_items || total_size].min
140
+ percentage = total_items ? (processed.to_f / total_items * 100).round(2) : (completed.to_f / chunks.size * 100).round(2)
141
+
142
+ progress_mutex.synchronize do
143
+ on_progress.call(
144
+ processed: processed,
145
+ total: total_items || total_size,
146
+ percentage: percentage,
147
+ current_chunk: completed - 1,
148
+ total_chunks: chunks.size,
149
+ success_count: success_count.value,
150
+ failed_count: failed_count.value
151
+ )
152
+ end
153
+ end
100
154
  end
101
155
  end
102
156
 
@@ -107,6 +161,7 @@ module Vectra
107
161
 
108
162
  results.sort_by { |r| r[:index] }
109
163
  end
164
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
110
165
 
111
166
  def aggregate_results(results, total_vectors)
112
167
  errors = results.select { |r| r[:error] }.map { |r| r[:error] }
data/lib/vectra/client.rb CHANGED
@@ -98,8 +98,26 @@ module Vectra
98
98
  # filter: { category: 'programming' }
99
99
  # )
100
100
  #
101
- def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
101
+ # @example Chainable query builder
102
+ # results = client.query("my-index")
103
+ # .vector([0.1, 0.2, 0.3])
104
+ # .top_k(10)
105
+ # .filter(category: "programming")
106
+ # .with_metadata
107
+ # .execute
108
+ #
109
+ def query(index_arg = nil, index: nil, vector: nil, top_k: 10, namespace: nil, filter: nil,
102
110
  include_values: false, include_metadata: true)
111
+ # If called with a positional index string only, return a query builder:
112
+ # client.query("docs").vector(vec).top_k(10).filter(...).execute
113
+ if index_arg && index.nil? && vector.nil? && !block_given?
114
+ return QueryBuilder.new(self, index_arg)
115
+ end
116
+
117
+ # Handle positional argument for index in non-builder case
118
+ index = index_arg if index_arg && index.nil?
119
+
120
+ # Backwards-compatible path: perform query immediately
103
121
  validate_index!(index)
104
122
  validate_query_vector!(vector)
105
123
 
@@ -276,6 +294,137 @@ module Vectra
276
294
  provider.provider_name
277
295
  end
278
296
 
297
+ # Quick health check - tests if provider connection is healthy
298
+ #
299
+ # @param timeout [Float] timeout in seconds (default: 5)
300
+ # @return [Boolean] true if connection is healthy
301
+ #
302
+ # @example
303
+ # if client.healthy?
304
+ # client.upsert(...)
305
+ # else
306
+ # handle_unhealthy_connection
307
+ # end
308
+ def healthy?
309
+ start = Time.now
310
+ provider.list_indexes
311
+ true
312
+ rescue StandardError => e
313
+ log_error("Health check failed", e)
314
+ false
315
+ ensure
316
+ duration = ((Time.now - start) * 1000).round(2) if defined?(start)
317
+ log_debug("Health check completed in #{duration}ms") if duration
318
+ end
319
+
320
+ # Ping provider and get connection health status with latency
321
+ #
322
+ # @param timeout [Float] timeout in seconds (default: 5)
323
+ # @return [Hash] health status with :healthy, :provider, :latency_ms
324
+ #
325
+ # @example
326
+ # status = client.ping
327
+ # puts "Provider: #{status[:provider]}, Healthy: #{status[:healthy]}, Latency: #{status[:latency_ms]}ms"
328
+ def ping
329
+ start = Time.now
330
+ healthy = true
331
+ error_info = nil
332
+
333
+ begin
334
+ provider.list_indexes
335
+ rescue StandardError => e
336
+ healthy = false
337
+ error_info = { error: e.class.name, error_message: e.message }
338
+ log_error("Health check failed", e)
339
+ end
340
+
341
+ duration = ((Time.now - start) * 1000).round(2)
342
+
343
+ result = {
344
+ healthy: healthy,
345
+ provider: provider_name,
346
+ latency_ms: duration
347
+ }
348
+
349
+ result.merge!(error_info) if error_info
350
+ result
351
+ end
352
+
353
+ # Chainable query builder
354
+ #
355
+ # @api public
356
+ # @example
357
+ # results = client.query("docs")
358
+ # .vector(embedding)
359
+ # .top_k(20)
360
+ # .namespace("prod")
361
+ # .filter(category: "ruby")
362
+ # .with_metadata
363
+ # .execute
364
+ #
365
+ class QueryBuilder
366
+ def initialize(client, index)
367
+ @client = client
368
+ @index = index
369
+ @vector = nil
370
+ @top_k = 10
371
+ @namespace = nil
372
+ @filter = nil
373
+ @include_values = false
374
+ @include_metadata = true
375
+ end
376
+
377
+ attr_reader :index
378
+
379
+ def vector(value)
380
+ @vector = value
381
+ self
382
+ end
383
+
384
+ def top_k(value)
385
+ @top_k = value.to_i
386
+ self
387
+ end
388
+
389
+ def namespace(value)
390
+ @namespace = value
391
+ self
392
+ end
393
+
394
+ def filter(value = nil, **kwargs)
395
+ @filter = value || kwargs
396
+ self
397
+ end
398
+
399
+ def with_values
400
+ @include_values = true
401
+ self
402
+ end
403
+
404
+ def with_metadata
405
+ @include_metadata = true
406
+ self
407
+ end
408
+
409
+ def without_metadata
410
+ @include_metadata = false
411
+ self
412
+ end
413
+
414
+ # Execute the built query and return a QueryResult
415
+ def execute
416
+ @client.query(
417
+ index: @index,
418
+ vector: @vector,
419
+ top_k: @top_k,
420
+ namespace: @namespace,
421
+ filter: @filter,
422
+ include_values: @include_values,
423
+ include_metadata: @include_metadata
424
+ )
425
+ end
426
+ end
427
+
279
428
  private
280
429
 
281
430
  def build_config(provider_name, api_key, environment, host, options)
@@ -306,6 +455,8 @@ module Vectra
306
455
  Providers::Weaviate.new(config)
307
456
  when :pgvector
308
457
  Providers::Pgvector.new(config)
458
+ when :memory
459
+ Providers::Memory.new(config)
309
460
  else
310
461
  raise UnsupportedProviderError, "Provider '#{config.provider}' is not supported"
311
462
  end
@@ -317,11 +468,32 @@ module Vectra
317
468
  raise ValidationError, "Index name cannot be empty" if index.empty?
318
469
  end
319
470
 
471
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
320
472
  def validate_vectors!(vectors)
321
473
  raise ValidationError, "Vectors cannot be nil" if vectors.nil?
322
474
  raise ValidationError, "Vectors must be an array" unless vectors.is_a?(Array)
323
475
  raise ValidationError, "Vectors cannot be empty" if vectors.empty?
476
+
477
+ # Check dimension consistency
478
+ first_vector = vectors.first
479
+ first_values = first_vector.is_a?(Vector) ? first_vector.values : first_vector[:values]
480
+ first_dim = first_values&.size
481
+
482
+ return unless first_dim
483
+
484
+ vectors.each_with_index do |vec, index|
485
+ values = vec.is_a?(Vector) ? vec.values : vec[:values]
486
+ dim = values&.size
487
+
488
+ next unless dim && dim != first_dim
489
+
490
+ raise ValidationError,
491
+ "Inconsistent vector dimensions at index #{index}: " \
492
+ "expected #{first_dim}, got #{dim}. " \
493
+ "All vectors in a batch must have the same dimension."
494
+ end
324
495
  end
496
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
325
497
 
326
498
  def validate_query_vector!(vector)
327
499
  raise ValidationError, "Query vector cannot be nil" if vector.nil?
@@ -340,5 +512,20 @@ module Vectra
340
512
  raise ValidationError, "ID must be a string" unless id.is_a?(String)
341
513
  raise ValidationError, "ID cannot be empty" if id.empty?
342
514
  end
515
+
516
+ def log_error(message, error = nil)
517
+ return unless config.logger
518
+
519
+ config.logger.error("[Vectra] #{message}")
520
+ config.logger.error("[Vectra] #{error.class}: #{error.message}") if error
521
+ config.logger.error("[Vectra] #{error.backtrace&.first(3)&.join("\n")}") if error&.backtrace
522
+ end
523
+
524
+ def log_debug(message, data = nil)
525
+ return unless config.logger
526
+
527
+ config.logger.debug("[Vectra] #{message}")
528
+ config.logger.debug("[Vectra] #{data.inspect}") if data
529
+ end
343
530
  end
344
531
  end
@@ -11,7 +11,7 @@ module Vectra
11
11
  # end
12
12
  #
13
13
  class Configuration
14
- SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector].freeze
14
+ SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector memory].freeze
15
15
 
16
16
  attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
17
17
  :max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
@@ -117,7 +117,7 @@ module Vectra
117
117
 
118
118
  # Providers that don't require API key (local instances)
119
119
  def api_key_optional_provider?
120
- %i[qdrant pgvector].include?(provider)
120
+ %i[qdrant pgvector memory].include?(provider)
121
121
  end
122
122
 
123
123
  def validate_provider_specific!
@@ -130,6 +130,8 @@ module Vectra
130
130
  validate_weaviate!
131
131
  when :pgvector
132
132
  validate_pgvector!
133
+ when :memory
134
+ # Memory provider has no special requirements
133
135
  end
134
136
  end
135
137
 
@@ -37,13 +37,12 @@ module Vectra
37
37
 
38
38
  # Test if secondary key is valid
39
39
  #
40
- # @param timeout [Float] Test timeout in seconds
41
40
  # @return [Boolean] true if secondary key works
42
- def test_secondary(timeout: 5)
41
+ def test_secondary
43
42
  return false if secondary_key.nil? || secondary_key.empty?
44
43
 
45
44
  client = build_test_client(secondary_key)
46
- client.healthy?(timeout: timeout)
45
+ client.healthy?
47
46
  rescue StandardError
48
47
  false
49
48
  end
@@ -232,16 +232,34 @@ module Vectra
232
232
  #
233
233
  # @param body [Hash, String, nil] response body
234
234
  # @return [String]
235
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
235
236
  def extract_error_message(body)
236
237
  case body
237
238
  when Hash
238
- body["message"] || body["error"] || body.to_s
239
+ # Primary error message
240
+ msg = body["message"] || body["error"] || body["error_message"] || body.to_s
241
+
242
+ # Add context from details
243
+ details = body["details"] || body["error_details"] || body["detail"]
244
+ if details
245
+ details_str = details.is_a?(Hash) ? details.to_json : details.to_s
246
+ msg += " (#{details_str})" unless msg.include?(details_str)
247
+ end
248
+
249
+ # Add field-specific errors if available
250
+ if body["errors"].is_a?(Array)
251
+ field_errors = body["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
252
+ msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
253
+ end
254
+
255
+ msg
239
256
  when String
240
257
  body
241
258
  else
242
259
  "Unknown error"
243
260
  end
244
261
  end
262
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
245
263
 
246
264
  # Log debug information
247
265
  #