vectra-client 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/vectra/client.rb CHANGED
@@ -42,6 +42,37 @@ module Vectra
42
42
 
43
43
  attr_reader :config, :provider, :default_index, :default_namespace
44
44
 
45
+ class << self
46
+ # Get the global middleware stack
47
+ #
48
+ # @return [Array<Array>] Array of [middleware_class, options] pairs
49
+ def middleware
50
+ @middleware ||= []
51
+ end
52
+
53
+ # Add middleware to the global stack
54
+ #
55
+ # @param middleware_class [Class] Middleware class
56
+ # @param options [Hash] Options to pass to middleware constructor
57
+ #
58
+ # @example Add global logging middleware
59
+ # Vectra::Client.use Vectra::Middleware::Logging
60
+ #
61
+ # @example Add middleware with options
62
+ # Vectra::Client.use Vectra::Middleware::Retry, max_attempts: 5
63
+ #
64
+ def use(middleware_class, **options)
65
+ middleware << [middleware_class, options]
66
+ end
67
+
68
+ # Clear all global middleware
69
+ #
70
+ # @return [void]
71
+ def clear_middleware!
72
+ @middleware = []
73
+ end
74
+ end
75
+
45
76
  # Initialize a new Client
46
77
  #
47
78
  # @param provider [Symbol, nil] provider name (:pinecone, :qdrant, :weaviate)
@@ -51,12 +82,14 @@ module Vectra
51
82
  # @param options [Hash] additional options
52
83
  # @option options [String] :index default index name
53
84
  # @option options [String] :namespace default namespace
85
+ # @option options [Array<Class, Object>] :middleware instance-level middleware
54
86
  def initialize(provider: nil, api_key: nil, environment: nil, host: nil, **options)
55
87
  @config = build_config(provider, api_key, environment, host, options)
56
88
  @config.validate!
57
89
  @provider = build_provider
58
90
  @default_index = options[:index]
59
91
  @default_namespace = options[:namespace]
92
+ @middleware = build_middleware_stack(options[:middleware])
60
93
  end
61
94
 
62
95
  # Upsert vectors into an index
@@ -87,7 +120,7 @@ module Vectra
87
120
  index: index,
88
121
  metadata: { vector_count: vectors.size }
89
122
  ) do
90
- provider.upsert(index: index, vectors: vectors, namespace: namespace)
123
+ @middleware.call(:upsert, index: index, vectors: vectors, namespace: namespace, provider: provider_name)
91
124
  end
92
125
  end
93
126
 
@@ -151,14 +184,16 @@ module Vectra
151
184
  index: index,
152
185
  metadata: { top_k: top_k }
153
186
  ) do
154
- result = provider.query(
187
+ result = @middleware.call(
188
+ :query,
155
189
  index: index,
156
190
  vector: vector,
157
191
  top_k: top_k,
158
192
  namespace: namespace,
159
193
  filter: filter,
160
194
  include_values: include_values,
161
- include_metadata: include_metadata
195
+ include_metadata: include_metadata,
196
+ provider: provider_name
162
197
  )
163
198
  end
164
199
 
@@ -188,7 +223,7 @@ module Vectra
188
223
  index: index,
189
224
  metadata: { id_count: ids.size }
190
225
  ) do
191
- provider.fetch(index: index, ids: ids, namespace: namespace)
226
+ @middleware.call(:fetch, index: index, ids: ids, namespace: namespace, provider: provider_name)
192
227
  end
193
228
  end
194
229
 
@@ -222,12 +257,14 @@ module Vectra
222
257
  index: index,
223
258
  metadata: { has_metadata: !metadata.nil?, has_values: !values.nil? }
224
259
  ) do
225
- provider.update(
260
+ @middleware.call(
261
+ :update,
226
262
  index: index,
227
263
  id: id,
228
264
  metadata: metadata,
229
265
  values: values,
230
- namespace: namespace
266
+ namespace: namespace,
267
+ provider: provider_name
231
268
  )
232
269
  end
233
270
  end
@@ -265,12 +302,14 @@ module Vectra
265
302
  index: index,
266
303
  metadata: { id_count: ids&.size, delete_all: delete_all, has_filter: !filter.nil? }
267
304
  ) do
268
- provider.delete(
305
+ @middleware.call(
306
+ :delete,
269
307
  index: index,
270
308
  ids: ids,
271
309
  namespace: namespace,
272
310
  filter: filter,
273
- delete_all: delete_all
311
+ delete_all: delete_all,
312
+ provider: provider_name
274
313
  )
275
314
  end
276
315
  end
@@ -284,7 +323,7 @@ module Vectra
284
323
  # indexes.each { |idx| puts idx[:name] }
285
324
  #
286
325
  def list_indexes
287
- provider.list_indexes
326
+ @middleware.call(:list_indexes, provider: provider_name)
288
327
  end
289
328
 
290
329
  # Describe an index
@@ -299,7 +338,7 @@ module Vectra
299
338
  def describe_index(index: nil)
300
339
  index ||= default_index
301
340
  validate_index!(index)
302
- provider.describe_index(index: index)
341
+ @middleware.call(:describe_index, index: index, provider: provider_name)
303
342
  end
304
343
 
305
344
  # Get index statistics
@@ -316,7 +355,7 @@ module Vectra
316
355
  index ||= default_index
317
356
  namespace ||= default_namespace
318
357
  validate_index!(index)
319
- provider.stats(index: index, namespace: namespace)
358
+ @middleware.call(:stats, index: index, namespace: namespace, provider: provider_name)
320
359
  end
321
360
 
322
361
  # Create a new index
@@ -342,7 +381,7 @@ module Vectra
342
381
  index: name,
343
382
  metadata: { dimension: dimension, metric: metric }
344
383
  ) do
345
- provider.create_index(name: name, dimension: dimension, metric: metric, **options)
384
+ @middleware.call(:create_index, name: name, dimension: dimension, metric: metric, provider: provider_name, **options)
346
385
  end
347
386
  end
348
387
 
@@ -365,7 +404,7 @@ module Vectra
365
404
  provider: provider_name,
366
405
  index: name
367
406
  ) do
368
- provider.delete_index(name: name)
407
+ @middleware.call(:delete_index, name: name, provider: provider_name)
369
408
  end
370
409
  end
371
410
 
@@ -440,7 +479,8 @@ module Vectra
440
479
  "Hybrid search is not supported by #{provider_name} provider"
441
480
  end
442
481
 
443
- provider.hybrid_search(
482
+ @middleware.call(
483
+ :hybrid_search,
444
484
  index: index,
445
485
  vector: vector,
446
486
  text: text,
@@ -449,7 +489,8 @@ module Vectra
449
489
  namespace: namespace,
450
490
  filter: filter,
451
491
  include_values: include_values,
452
- include_metadata: include_metadata
492
+ include_metadata: include_metadata,
493
+ provider: provider_name
453
494
  )
454
495
  end
455
496
 
@@ -628,6 +669,21 @@ module Vectra
628
669
  end
629
670
  end
630
671
 
672
+ def build_middleware_stack(instance_middleware = nil)
673
+ # Combine class-level + instance-level middleware
674
+ all_middleware = self.class.middleware.map do |klass, opts|
675
+ klass.new(**opts)
676
+ end
677
+
678
+ if instance_middleware
679
+ all_middleware += Array(instance_middleware).map do |mw|
680
+ mw.is_a?(Class) ? mw.new : mw
681
+ end
682
+ end
683
+
684
+ Middleware::Stack.new(@provider, all_middleware)
685
+ end
686
+
631
687
  def validate_index!(index)
632
688
  raise ValidationError, "Index name cannot be nil" if index.nil?
633
689
  raise ValidationError, "Index name must be a string" unless index.is_a?(String)
@@ -29,7 +29,9 @@ module Vectra
29
29
  def health_check(index: nil, include_stats: false, timeout: 5)
30
30
  start_time = Time.now
31
31
 
32
- indexes = with_timeout(timeout) { list_indexes }
32
+ # For health checks we bypass client middleware and call the provider
33
+ # directly to avoid interference from custom stacks.
34
+ indexes = with_timeout(timeout) { provider.list_indexes }
33
35
  index_name = index || indexes.first&.dig(:name)
34
36
 
35
37
  result = base_result(start_time, indexes)
@@ -70,7 +72,7 @@ module Vectra
70
72
  def add_index_stats(result, index_name, include_stats, timeout)
71
73
  return unless include_stats && index_name
72
74
 
73
- stats = with_timeout(timeout) { stats(index: index_name) }
75
+ stats = with_timeout(timeout) { provider.stats(index: index_name) }
74
76
  result[:index] = index_name
75
77
  result[:stats] = {
76
78
  vector_count: stats[:total_vector_count],
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Base class for all middleware
6
+ #
7
+ # Middleware can hook into three lifecycle events:
8
+ # - before(request): Called before the next middleware/provider
9
+ # - after(request, response): Called after successful execution
10
+ # - on_error(request, error): Called when an error occurs
11
+ #
12
+ # @example Simple logging middleware
13
+ # class LoggingMiddleware < Vectra::Middleware::Base
14
+ # def before(request)
15
+ # puts "Starting #{request.operation}"
16
+ # end
17
+ #
18
+ # def after(request, response)
19
+ # puts "Completed #{request.operation}"
20
+ # end
21
+ # end
22
+ #
23
+ # @example Error handling middleware
24
+ # class ErrorHandlerMiddleware < Vectra::Middleware::Base
25
+ # def on_error(request, error)
26
+ # ErrorTracker.notify(error, context: { operation: request.operation })
27
+ # end
28
+ # end
29
+ #
30
+ class Base
31
+ # Execute the middleware
32
+ #
33
+ # This is the main entry point called by the middleware stack.
34
+ # It handles the before/after/error lifecycle hooks.
35
+ #
36
+ # @param request [Request] The request object
37
+ # @param app [Proc] The next middleware in the chain
38
+ # @return [Response] The response object
39
+ def call(request, app)
40
+ # Before hook
41
+ before(request)
42
+
43
+ # Call next middleware
44
+ response = app.call(request)
45
+
46
+ # Check if response has an error
47
+ if response.error
48
+ on_error(request, response.error)
49
+ end
50
+
51
+ # After hook
52
+ after(request, response)
53
+
54
+ response
55
+ rescue StandardError => e
56
+ # Error handling hook (for exceptions raised directly)
57
+ on_error(request, e)
58
+ raise
59
+ end
60
+
61
+ protected
62
+
63
+ # Hook called before the next middleware
64
+ #
65
+ # Override this method to add logic before the operation executes.
66
+ #
67
+ # @param request [Request] The request object
68
+ # @return [void]
69
+ def before(request)
70
+ # Override in subclass
71
+ end
72
+
73
+ # Hook called after successful execution
74
+ #
75
+ # Override this method to add logic after the operation completes.
76
+ #
77
+ # @param request [Request] The request object
78
+ # @param response [Response] The response object
79
+ # @return [void]
80
+ def after(request, response)
81
+ # Override in subclass
82
+ end
83
+
84
+ # Hook called when an error occurs
85
+ #
86
+ # Override this method to add error handling logic.
87
+ # The error will be re-raised after this hook executes.
88
+ #
89
+ # @param request [Request] The request object
90
+ # @param error [Exception] The error that occurred
91
+ # @return [void]
92
+ def on_error(request, error)
93
+ # Override in subclass
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Cost tracking middleware for monitoring API usage costs
6
+ #
7
+ # Tracks estimated costs per operation based on provider pricing.
8
+ # Costs are stored in response metadata and can be aggregated.
9
+ #
10
+ # @example With default pricing
11
+ # Vectra::Client.use Vectra::Middleware::CostTracker
12
+ #
13
+ # @example With custom pricing
14
+ # custom_pricing = {
15
+ # pinecone: { read: 0.0001, write: 0.0002 },
16
+ # qdrant: { read: 0.00005, write: 0.0001 }
17
+ # }
18
+ # Vectra::Client.use Vectra::Middleware::CostTracker, pricing: custom_pricing
19
+ #
20
+ # @example With cost callback
21
+ # Vectra::Client.use Vectra::Middleware::CostTracker, on_cost: ->(event) {
22
+ # puts "Cost: $#{event[:cost_usd]} for #{event[:operation]}"
23
+ # }
24
+ #
25
+ class CostTracker < Base
26
+ # Default pricing per operation (in USD)
27
+ # These are estimated values - check provider pricing for actual costs
28
+ DEFAULT_PRICING = {
29
+ pinecone: { read: 0.0001, write: 0.0002 },
30
+ qdrant: { read: 0.00005, write: 0.0001 },
31
+ weaviate: { read: 0.00008, write: 0.00015 },
32
+ pgvector: { read: 0.0, write: 0.0 }, # Self-hosted, no API costs
33
+ memory: { read: 0.0, write: 0.0 } # In-memory, no costs
34
+ }.freeze
35
+
36
+ # @param pricing [Hash] Custom pricing structure
37
+ # @param on_cost [Proc] Callback to invoke with cost events
38
+ def initialize(pricing: DEFAULT_PRICING, on_cost: nil)
39
+ super()
40
+ @pricing = pricing
41
+ @on_cost = on_cost
42
+ end
43
+
44
+ def after(request, response)
45
+ return unless response.success?
46
+
47
+ provider = request.provider || :unknown
48
+ operation_type = write_operation?(request.operation) ? :write : :read
49
+
50
+ cost = calculate_cost(provider, operation_type, request)
51
+ response.metadata[:cost_usd] = cost
52
+
53
+ # Invoke callback if provided
54
+ return unless @on_cost
55
+
56
+ @on_cost.call(
57
+ operation: request.operation,
58
+ provider: provider,
59
+ index: request.index,
60
+ cost_usd: cost,
61
+ timestamp: Time.now
62
+ )
63
+ end
64
+
65
+ private
66
+
67
+ # Check if operation is a write operation
68
+ #
69
+ # @param operation [Symbol] The operation type
70
+ # @return [Boolean] true if write operation
71
+ def write_operation?(operation)
72
+ [:upsert, :delete, :update, :create_index, :delete_index].include?(operation)
73
+ end
74
+
75
+ # Calculate cost for operation
76
+ #
77
+ # @param provider [Symbol] Provider name
78
+ # @param operation_type [Symbol] :read or :write
79
+ # @param request [Request] The request object
80
+ # @return [Float] Cost in USD
81
+ def calculate_cost(provider, operation_type, request)
82
+ rate = @pricing.dig(provider, operation_type) || 0.0
83
+ multiplier = operation_multiplier(request)
84
+ rate * multiplier
85
+ end
86
+
87
+ # Calculate multiplier for operation based on batch size
88
+ #
89
+ # @param request [Request] The request object
90
+ # @return [Integer, Float] Multiplier for the base rate
91
+ def operation_multiplier(request)
92
+ return 100 if delete_all?(request)
93
+
94
+ case request.operation
95
+ when :upsert
96
+ collection_size(request.params[:vectors])
97
+ when :fetch, :delete
98
+ collection_size(request.params[:ids])
99
+ else
100
+ 1 # Includes :query and all other operations
101
+ end
102
+ end
103
+
104
+ # Check if request is a delete_all operation
105
+ #
106
+ # @param request [Request] The request object
107
+ # @return [Boolean]
108
+ def delete_all?(request)
109
+ request.operation == :delete && request.params[:delete_all]
110
+ end
111
+
112
+ # Safely compute collection size with a default of 1
113
+ #
114
+ # @param collection [Enumerable, nil]
115
+ # @return [Integer]
116
+ def collection_size(collection)
117
+ collection&.size || 1
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Instrumentation middleware for metrics and monitoring
6
+ #
7
+ # Emits instrumentation events for all operations, compatible with
8
+ # Vectra's existing instrumentation system.
9
+ #
10
+ # @example Enable instrumentation middleware
11
+ # Vectra::Client.use Vectra::Middleware::Instrumentation
12
+ #
13
+ # @example With custom event handler
14
+ # Vectra.on_operation do |event|
15
+ # puts "Operation: #{event[:operation]}, Duration: #{event[:duration_ms]}ms"
16
+ # end
17
+ #
18
+ # Vectra::Client.use Vectra::Middleware::Instrumentation
19
+ #
20
+ class Instrumentation < Base
21
+ def call(request, app)
22
+ start_time = Time.now
23
+
24
+ response = app.call(request)
25
+
26
+ duration_ms = ((Time.now - start_time) * 1000).round(2)
27
+
28
+ # Emit instrumentation event
29
+ Vectra::Instrumentation.instrument(
30
+ operation: request.operation,
31
+ provider: request.provider,
32
+ index: request.index,
33
+ namespace: request.namespace,
34
+ duration_ms: duration_ms,
35
+ success: response.success?,
36
+ error: response.error&.class&.name,
37
+ metadata: response.metadata
38
+ )
39
+
40
+ response
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Logging middleware for tracking operations
6
+ #
7
+ # Logs before and after each operation, including timing information.
8
+ #
9
+ # @example With default logger
10
+ # Vectra::Client.use Vectra::Middleware::Logging
11
+ #
12
+ # @example With custom logger
13
+ # logger = Logger.new($stdout)
14
+ # Vectra::Client.use Vectra::Middleware::Logging, logger: logger
15
+ #
16
+ # @example Per-client logging
17
+ # client = Vectra::Client.new(
18
+ # provider: :qdrant,
19
+ # middleware: [Vectra::Middleware::Logging]
20
+ # )
21
+ #
22
+ class Logging < Base
23
+ def initialize(logger: nil)
24
+ super()
25
+ @logger = logger || Vectra.configuration.logger
26
+ end
27
+
28
+ def before(request)
29
+ return unless @logger
30
+
31
+ @start_time = Time.now
32
+ @logger.info(
33
+ "[Vectra] #{request.operation.upcase} " \
34
+ "index=#{request.index} " \
35
+ "namespace=#{request.namespace || 'default'}"
36
+ )
37
+ end
38
+
39
+ def after(request, response)
40
+ return unless @logger
41
+ return unless @start_time
42
+
43
+ duration_ms = ((Time.now - @start_time) * 1000).round(2)
44
+ response.metadata[:duration_ms] = duration_ms
45
+
46
+ if response.success?
47
+ @logger.info("[Vectra] ✅ #{request.operation} completed in #{duration_ms}ms")
48
+ else
49
+ @logger.error("[Vectra] ❌ #{request.operation} failed: #{response.error.message}")
50
+ end
51
+ end
52
+
53
+ def on_error(request, error)
54
+ return unless @logger
55
+
56
+ @logger.error(
57
+ "[Vectra] 💥 #{request.operation} exception: #{error.class} - #{error.message}"
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # PII Redaction middleware for protecting sensitive data
6
+ #
7
+ # Automatically redacts Personally Identifiable Information (PII) from
8
+ # metadata before upserting to vector databases.
9
+ #
10
+ # @example With default patterns (email, phone, SSN)
11
+ # Vectra::Client.use Vectra::Middleware::PIIRedaction
12
+ #
13
+ # @example With custom patterns
14
+ # custom_patterns = {
15
+ # credit_card: /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/,
16
+ # api_key: /sk-[a-zA-Z0-9]{32}/
17
+ # }
18
+ # Vectra::Client.use Vectra::Middleware::PIIRedaction, patterns: custom_patterns
19
+ #
20
+ class PIIRedaction < Base
21
+ # Default PII patterns
22
+ DEFAULT_PATTERNS = {
23
+ email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/,
24
+ phone: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/,
25
+ ssn: /\b\d{3}-\d{2}-\d{4}\b/,
26
+ credit_card: /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/
27
+ }.freeze
28
+
29
+ def initialize(patterns: DEFAULT_PATTERNS)
30
+ super()
31
+ @patterns = patterns
32
+ end
33
+
34
+ def before(request)
35
+ return unless request.operation == :upsert
36
+ return unless request.params[:vectors]
37
+
38
+ # Redact PII from metadata in all vectors
39
+ request.params[:vectors].each do |vector|
40
+ next unless vector[:metadata]
41
+
42
+ vector[:metadata] = redact_metadata(vector[:metadata])
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ # Redact PII from metadata hash
49
+ #
50
+ # @param metadata [Hash] Metadata to redact
51
+ # @return [Hash] Redacted metadata
52
+ def redact_metadata(metadata)
53
+ metadata.transform_values do |value|
54
+ next value unless value.is_a?(String)
55
+
56
+ redacted = value.dup
57
+ @patterns.each do |type, pattern|
58
+ redacted.gsub!(pattern, "[REDACTED_#{type.upcase}]")
59
+ end
60
+ redacted
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Request object passed through middleware chain
6
+ #
7
+ # @example Basic usage
8
+ # request = Request.new(
9
+ # operation: :upsert,
10
+ # index: 'products',
11
+ # namespace: 'prod',
12
+ # vectors: [{ id: 'doc-1', values: [0.1, 0.2, 0.3] }]
13
+ # )
14
+ #
15
+ # request.operation # => :upsert
16
+ # request.index # => 'products'
17
+ # request.namespace # => 'prod'
18
+ # request.metadata[:custom_key] = 'custom_value'
19
+ #
20
+ class Request
21
+ attr_accessor :operation, :index, :namespace, :params, :metadata
22
+
23
+ # @param operation [Symbol] The operation type (:upsert, :query, :delete, etc.)
24
+ # @param params [Hash] All parameters for the operation
25
+ def initialize(operation:, **params)
26
+ @operation = operation
27
+ @index = params[:index]
28
+ @namespace = params[:namespace]
29
+ @params = params
30
+ @metadata = {}
31
+ end
32
+
33
+ # Convert request back to hash for provider call
34
+ #
35
+ # @return [Hash] Parameters hash
36
+ def to_h
37
+ params
38
+ end
39
+
40
+ # Get the provider from params
41
+ #
42
+ # @return [Symbol, nil] Provider name
43
+ def provider
44
+ params[:provider]
45
+ end
46
+
47
+ # Check if this is a write operation
48
+ #
49
+ # @return [Boolean]
50
+ def write_operation?
51
+ [:upsert, :delete, :update, :create_index, :delete_index].include?(operation)
52
+ end
53
+
54
+ # Check if this is a read operation
55
+ #
56
+ # @return [Boolean]
57
+ def read_operation?
58
+ [:query, :fetch, :list_indexes, :describe_index, :stats].include?(operation)
59
+ end
60
+ end
61
+ end
62
+ end