vectra-client 1.0.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,9 @@
2
2
 
3
3
  require "active_support/concern"
4
4
 
5
- # Ensure Client and Providers are loaded (for Rails autoloading compatibility)
5
+ # Ensure Client and supporting classes are loaded (for Rails autoloading compatibility)
6
6
  require_relative "client" unless defined?(Vectra::Client)
7
+ require_relative "batch" unless defined?(Vectra::Batch)
7
8
 
8
9
  module Vectra
9
10
  # ActiveRecord integration for vector embeddings
@@ -26,6 +27,7 @@ module Vectra
26
27
  # # Search similar documents
27
28
  # results = Document.vector_search([0.1, 0.2, ...], limit: 10)
28
29
  #
30
+ # rubocop:disable Metrics/ModuleLength
29
31
  module ActiveRecord
30
32
  extend ActiveSupport::Concern
31
33
 
@@ -86,6 +88,54 @@ module Vectra
86
88
  end
87
89
  end
88
90
 
91
+ # Reindex all vectors for this model using current configuration.
92
+ #
93
+ # @param scope [ActiveRecord::Relation] records to reindex (default: all)
94
+ # @param batch_size [Integer] number of records per batch
95
+ # @param on_progress [Proc, nil] optional callback called after each batch
96
+ # Receives a hash with :processed and :total keys (and any other stats from Batch)
97
+ #
98
+ # @return [Integer] number of records processed
99
+ def reindex_vectors(scope: all, batch_size: 1_000, on_progress: nil)
100
+ config = _vectra_config
101
+ client = vectra_client
102
+ batch = Vectra::Batch.new(client)
103
+
104
+ processed = 0
105
+
106
+ scope.in_batches(of: batch_size).each do |relation|
107
+ records = relation.to_a
108
+
109
+ vectors = records.map do |record|
110
+ vector = record.send(config[:attribute])
111
+ next if vector.nil?
112
+
113
+ metadata = config[:metadata_fields].each_with_object({}) do |field, hash|
114
+ hash[field.to_s] = record.send(field) if record.respond_to?(field)
115
+ end
116
+
117
+ {
118
+ id: "#{config[:index]}_#{record.id}",
119
+ values: vector,
120
+ metadata: metadata
121
+ }
122
+ end.compact
123
+
124
+ next if vectors.empty?
125
+
126
+ batch.upsert_async(
127
+ index: config[:index],
128
+ vectors: vectors,
129
+ namespace: nil,
130
+ on_progress: on_progress
131
+ )
132
+
133
+ processed += vectors.size
134
+ end
135
+
136
+ processed
137
+ end
138
+
89
139
  # Search vectors
90
140
  #
91
141
  # @api private
@@ -195,4 +245,5 @@ module Vectra
195
245
  "#{self.class._vectra_config[:index]}_#{id}"
196
246
  end
197
247
  end
248
+ # rubocop:enable Metrics/ModuleLength
198
249
  end
data/lib/vectra/cache.rb CHANGED
@@ -258,4 +258,53 @@ module Vectra
258
258
  "#{index}:f:#{id}:#{namespace || 'default'}"
259
259
  end
260
260
  end
261
+
262
+ # Helper for caching embeddings based on model, record ID and input text.
263
+ #
264
+ # @example
265
+ # cache = Vectra::Cache.new(ttl: 600, max_size: 1000)
266
+ #
267
+ # embedding = Vectra::Embeddings.fetch(
268
+ # cache: cache,
269
+ # model_name: "Product",
270
+ # id: product.id,
271
+ # input: product.description,
272
+ # field: :description
273
+ # ) do
274
+ # EmbeddingService.generate(product.description)
275
+ # end
276
+ #
277
+ module Embeddings
278
+ module_function
279
+
280
+ # Build a stable cache key for an embedding.
281
+ #
282
+ # @param model_name [String] model class name (e.g. "Product")
283
+ # @param id [Integer, String] record ID
284
+ # @param input [String] raw input used for embedding
285
+ # @param field [Symbol, String, nil] optional field name
286
+ #
287
+ # @return [String] cache key
288
+ def cache_key(model_name:, id:, input:, field: nil)
289
+ field_part = field ? field.to_s : "default"
290
+ base = "#{model_name}:#{field_part}:#{id}:#{input}"
291
+ digest = Digest::SHA256.hexdigest(base)[0, 32]
292
+ "emb:#{model_name}:#{field_part}:#{digest}"
293
+ end
294
+
295
+ # Fetch an embedding from cache or compute and store it.
296
+ #
297
+ # @param cache [Vectra::Cache] cache instance
298
+ # @param model_name [String] model class name
299
+ # @param id [Integer, String] record ID
300
+ # @param input [String] input used for embedding
301
+ # @param field [Symbol, String, nil] optional field name
302
+ #
303
+ # @yield block that computes the embedding when not cached
304
+ # @return [Object] cached or computed embedding
305
+ def fetch(cache:, model_name:, id:, input:, field: nil, &block)
306
+ key = cache_key(model_name: model_name, id: id, input: input, field: field)
307
+ cache.fetch(key, &block)
308
+ end
309
+ end
261
310
  end
data/lib/vectra/client.rb CHANGED
@@ -40,7 +40,38 @@ module Vectra
40
40
  class Client
41
41
  include Vectra::HealthCheck
42
42
 
43
- attr_reader :config, :provider
43
+ attr_reader :config, :provider, :default_index, :default_namespace
44
+
45
+ class << self
46
+ # Get the global middleware stack
47
+ #
48
+ # @return [Array<Array>] Array of [middleware_class, options] pairs
49
+ def middleware
50
+ @middleware ||= []
51
+ end
52
+
53
+ # Add middleware to the global stack
54
+ #
55
+ # @param middleware_class [Class] Middleware class
56
+ # @param options [Hash] Options to pass to middleware constructor
57
+ #
58
+ # @example Add global logging middleware
59
+ # Vectra::Client.use Vectra::Middleware::Logging
60
+ #
61
+ # @example Add middleware with options
62
+ # Vectra::Client.use Vectra::Middleware::Retry, max_attempts: 5
63
+ #
64
+ def use(middleware_class, **options)
65
+ middleware << [middleware_class, options]
66
+ end
67
+
68
+ # Clear all global middleware
69
+ #
70
+ # @return [void]
71
+ def clear_middleware!
72
+ @middleware = []
73
+ end
74
+ end
44
75
 
45
76
  # Initialize a new Client
46
77
  #
@@ -49,17 +80,23 @@ module Vectra
49
80
  # @param environment [String, nil] environment/region
50
81
  # @param host [String, nil] custom host URL
51
82
  # @param options [Hash] additional options
83
+ # @option options [String] :index default index name
84
+ # @option options [String] :namespace default namespace
85
+ # @option options [Array<Class, Object>] :middleware instance-level middleware
52
86
  def initialize(provider: nil, api_key: nil, environment: nil, host: nil, **options)
53
87
  @config = build_config(provider, api_key, environment, host, options)
54
88
  @config.validate!
55
89
  @provider = build_provider
90
+ @default_index = options[:index]
91
+ @default_namespace = options[:namespace]
92
+ @middleware = build_middleware_stack(options[:middleware])
56
93
  end
57
94
 
58
95
  # Upsert vectors into an index
59
96
  #
60
- # @param index [String] the index/collection name
61
97
  # @param vectors [Array<Hash, Vector>] vectors to upsert
62
- # @param namespace [String, nil] optional namespace (provider-specific)
98
+ # @param index [String, nil] the index/collection name (falls back to client's default)
99
+ # @param namespace [String, nil] optional namespace (provider-specific, falls back to client's default)
63
100
  # @return [Hash] upsert response with :upserted_count
64
101
  #
65
102
  # @example Upsert vectors
@@ -71,7 +108,9 @@ module Vectra
71
108
  # ]
72
109
  # )
73
110
  #
74
- def upsert(index:, vectors:, namespace: nil)
111
+ def upsert(vectors:, index: nil, namespace: nil)
112
+ index ||= default_index
113
+ namespace ||= default_namespace
75
114
  validate_index!(index)
76
115
  validate_vectors!(vectors)
77
116
 
@@ -81,7 +120,7 @@ module Vectra
81
120
  index: index,
82
121
  metadata: { vector_count: vectors.size }
83
122
  ) do
84
- provider.upsert(index: index, vectors: vectors, namespace: namespace)
123
+ @middleware.call(:upsert, index: index, vectors: vectors, namespace: namespace, provider: provider_name)
85
124
  end
86
125
  end
87
126
 
@@ -130,6 +169,10 @@ module Vectra
130
169
  # Handle positional argument for index in non-builder case
131
170
  index = index_arg if index_arg && index.nil?
132
171
 
172
+ # Fall back to default index/namespace when not provided
173
+ index ||= default_index
174
+ namespace ||= default_namespace
175
+
133
176
  # Backwards-compatible path: perform query immediately
134
177
  validate_index!(index)
135
178
  validate_query_vector!(vector)
@@ -141,14 +184,16 @@ module Vectra
141
184
  index: index,
142
185
  metadata: { top_k: top_k }
143
186
  ) do
144
- result = provider.query(
187
+ result = @middleware.call(
188
+ :query,
145
189
  index: index,
146
190
  vector: vector,
147
191
  top_k: top_k,
148
192
  namespace: namespace,
149
193
  filter: filter,
150
194
  include_values: include_values,
151
- include_metadata: include_metadata
195
+ include_metadata: include_metadata,
196
+ provider: provider_name
152
197
  )
153
198
  end
154
199
 
@@ -157,16 +202,18 @@ module Vectra
157
202
 
158
203
  # Fetch vectors by IDs
159
204
  #
160
- # @param index [String] the index/collection name
161
205
  # @param ids [Array<String>] vector IDs to fetch
162
- # @param namespace [String, nil] optional namespace
206
+ # @param index [String, nil] the index/collection name (falls back to client's default)
207
+ # @param namespace [String, nil] optional namespace (falls back to client's default)
163
208
  # @return [Hash<String, Vector>] hash of ID to Vector
164
209
  #
165
210
  # @example Fetch vectors
166
211
  # vectors = client.fetch(index: 'my-index', ids: ['vec1', 'vec2'])
167
212
  # vectors['vec1'].values # => [0.1, 0.2, 0.3]
168
213
  #
169
- def fetch(index:, ids:, namespace: nil)
214
+ def fetch(ids:, index: nil, namespace: nil)
215
+ index ||= default_index
216
+ namespace ||= default_namespace
170
217
  validate_index!(index)
171
218
  validate_ids!(ids)
172
219
 
@@ -176,14 +223,14 @@ module Vectra
176
223
  index: index,
177
224
  metadata: { id_count: ids.size }
178
225
  ) do
179
- provider.fetch(index: index, ids: ids, namespace: namespace)
226
+ @middleware.call(:fetch, index: index, ids: ids, namespace: namespace, provider: provider_name)
180
227
  end
181
228
  end
182
229
 
183
230
  # Update a vector's metadata or values
184
231
  #
185
- # @param index [String] the index/collection name
186
232
  # @param id [String] vector ID
233
+ # @param index [String, nil] the index/collection name (falls back to client's default)
187
234
  # @param metadata [Hash, nil] new metadata (merged with existing)
188
235
  # @param values [Array<Float>, nil] new vector values
189
236
  # @param namespace [String, nil] optional namespace
@@ -196,7 +243,9 @@ module Vectra
196
243
  # metadata: { category: 'updated' }
197
244
  # )
198
245
  #
199
- def update(index:, id:, metadata: nil, values: nil, namespace: nil)
246
+ def update(id:, index: nil, metadata: nil, values: nil, namespace: nil)
247
+ index ||= default_index
248
+ namespace ||= default_namespace
200
249
  validate_index!(index)
201
250
  validate_id!(id)
202
251
 
@@ -208,12 +257,14 @@ module Vectra
208
257
  index: index,
209
258
  metadata: { has_metadata: !metadata.nil?, has_values: !values.nil? }
210
259
  ) do
211
- provider.update(
260
+ @middleware.call(
261
+ :update,
212
262
  index: index,
213
263
  id: id,
214
264
  metadata: metadata,
215
265
  values: values,
216
- namespace: namespace
266
+ namespace: namespace,
267
+ provider: provider_name
217
268
  )
218
269
  end
219
270
  end
@@ -236,7 +287,9 @@ module Vectra
236
287
  # @example Delete all
237
288
  # client.delete(index: 'my-index', delete_all: true)
238
289
  #
239
- def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
290
+ def delete(index: nil, ids: nil, namespace: nil, filter: nil, delete_all: false)
291
+ index ||= default_index
292
+ namespace ||= default_namespace
240
293
  validate_index!(index)
241
294
 
242
295
  if ids.nil? && filter.nil? && !delete_all
@@ -249,12 +302,14 @@ module Vectra
249
302
  index: index,
250
303
  metadata: { id_count: ids&.size, delete_all: delete_all, has_filter: !filter.nil? }
251
304
  ) do
252
- provider.delete(
305
+ @middleware.call(
306
+ :delete,
253
307
  index: index,
254
308
  ids: ids,
255
309
  namespace: namespace,
256
310
  filter: filter,
257
- delete_all: delete_all
311
+ delete_all: delete_all,
312
+ provider: provider_name
258
313
  )
259
314
  end
260
315
  end
@@ -268,7 +323,7 @@ module Vectra
268
323
  # indexes.each { |idx| puts idx[:name] }
269
324
  #
270
325
  def list_indexes
271
- provider.list_indexes
326
+ @middleware.call(:list_indexes, provider: provider_name)
272
327
  end
273
328
 
274
329
  # Describe an index
@@ -280,9 +335,10 @@ module Vectra
280
335
  # info = client.describe_index(index: 'my-index')
281
336
  # puts info[:dimension]
282
337
  #
283
- def describe_index(index:)
338
+ def describe_index(index: nil)
339
+ index ||= default_index
284
340
  validate_index!(index)
285
- provider.describe_index(index: index)
341
+ @middleware.call(:describe_index, index: index, provider: provider_name)
286
342
  end
287
343
 
288
344
  # Get index statistics
@@ -295,9 +351,11 @@ module Vectra
295
351
  # stats = client.stats(index: 'my-index')
296
352
  # puts "Total vectors: #{stats[:total_vector_count]}"
297
353
  #
298
- def stats(index:, namespace: nil)
354
+ def stats(index: nil, namespace: nil)
355
+ index ||= default_index
356
+ namespace ||= default_namespace
299
357
  validate_index!(index)
300
- provider.stats(index: index, namespace: namespace)
358
+ @middleware.call(:stats, index: index, namespace: namespace, provider: provider_name)
301
359
  end
302
360
 
303
361
  # Create a new index
@@ -323,7 +381,7 @@ module Vectra
323
381
  index: name,
324
382
  metadata: { dimension: dimension, metric: metric }
325
383
  ) do
326
- provider.create_index(name: name, dimension: dimension, metric: metric, **options)
384
+ @middleware.call(:create_index, name: name, dimension: dimension, metric: metric, provider: provider_name, **options)
327
385
  end
328
386
  end
329
387
 
@@ -346,7 +404,7 @@ module Vectra
346
404
  provider: provider_name,
347
405
  index: name
348
406
  ) do
349
- provider.delete_index(name: name)
407
+ @middleware.call(:delete_index, name: name, provider: provider_name)
350
408
  end
351
409
  end
352
410
 
@@ -359,7 +417,8 @@ module Vectra
359
417
  # namespaces = client.list_namespaces(index: 'documents')
360
418
  # namespaces.each { |ns| puts "Namespace: #{ns}" }
361
419
  #
362
- def list_namespaces(index:)
420
+ def list_namespaces(index: nil)
421
+ index ||= default_index
363
422
  validate_index!(index)
364
423
  stats_data = provider.stats(index: index)
365
424
  namespaces = stats_data[:namespaces] || {}
@@ -408,6 +467,8 @@ module Vectra
408
467
  #
409
468
  def hybrid_search(index:, vector:, text:, alpha: 0.5, top_k: 10, namespace: nil,
410
469
  filter: nil, include_values: false, include_metadata: true)
470
+ index ||= default_index
471
+ namespace ||= default_namespace
411
472
  validate_index!(index)
412
473
  validate_query_vector!(vector)
413
474
  raise ValidationError, "Text query cannot be nil or empty" if text.nil? || text.empty?
@@ -418,7 +479,8 @@ module Vectra
418
479
  "Hybrid search is not supported by #{provider_name} provider"
419
480
  end
420
481
 
421
- provider.hybrid_search(
482
+ @middleware.call(
483
+ :hybrid_search,
422
484
  index: index,
423
485
  vector: vector,
424
486
  text: text,
@@ -427,7 +489,8 @@ module Vectra
427
489
  namespace: namespace,
428
490
  filter: filter,
429
491
  include_values: include_values,
430
- include_metadata: include_metadata
492
+ include_metadata: include_metadata,
493
+ provider: provider_name
431
494
  )
432
495
  end
433
496
 
@@ -606,6 +669,21 @@ module Vectra
606
669
  end
607
670
  end
608
671
 
672
+ def build_middleware_stack(instance_middleware = nil)
673
+ # Combine class-level + instance-level middleware
674
+ all_middleware = self.class.middleware.map do |klass, opts|
675
+ klass.new(**opts)
676
+ end
677
+
678
+ if instance_middleware
679
+ all_middleware += Array(instance_middleware).map do |mw|
680
+ mw.is_a?(Class) ? mw.new : mw
681
+ end
682
+ end
683
+
684
+ Middleware::Stack.new(@provider, all_middleware)
685
+ end
686
+
609
687
  def validate_index!(index)
610
688
  raise ValidationError, "Index name cannot be nil" if index.nil?
611
689
  raise ValidationError, "Index name must be a string" unless index.is_a?(String)
@@ -671,6 +749,48 @@ module Vectra
671
749
  config.logger.debug("[Vectra] #{message}")
672
750
  config.logger.debug("[Vectra] #{data.inspect}") if data
673
751
  end
752
+
753
+ # Temporarily override default index within a block.
754
+ #
755
+ # @param index [String] temporary index name
756
+ # @yield [Client] yields self with overridden index
757
+ # @return [Object] block result
758
+ def with_index(index)
759
+ previous = @default_index
760
+ @default_index = index
761
+ yield self
762
+ ensure
763
+ @default_index = previous
764
+ end
765
+
766
+ # Temporarily override default namespace within a block.
767
+ #
768
+ # @param namespace [String] temporary namespace
769
+ # @yield [Client] yields self with overridden namespace
770
+ # @return [Object] block result
771
+ def with_namespace(namespace)
772
+ previous = @default_namespace
773
+ @default_namespace = namespace
774
+ yield self
775
+ ensure
776
+ @default_namespace = previous
777
+ end
778
+
779
+ # Temporarily override both index and namespace within a block.
780
+ #
781
+ # @param index [String] temporary index name
782
+ # @param namespace [String] temporary namespace
783
+ # @yield [Client] yields self with overridden index and namespace
784
+ # @return [Object] block result
785
+ def with_index_and_namespace(index, namespace)
786
+ with_index(index) do
787
+ with_namespace(namespace) do
788
+ yield self
789
+ end
790
+ end
791
+ end
792
+
793
+ public :with_index, :with_namespace, :with_index_and_namespace
674
794
  end
675
795
  # rubocop:enable Metrics/ClassLength
676
796
  end
@@ -29,7 +29,9 @@ module Vectra
29
29
  def health_check(index: nil, include_stats: false, timeout: 5)
30
30
  start_time = Time.now
31
31
 
32
- indexes = with_timeout(timeout) { list_indexes }
32
+ # For health checks we bypass client middleware and call the provider
33
+ # directly to avoid interference from custom stacks.
34
+ indexes = with_timeout(timeout) { provider.list_indexes }
33
35
  index_name = index || indexes.first&.dig(:name)
34
36
 
35
37
  result = base_result(start_time, indexes)
@@ -70,7 +72,7 @@ module Vectra
70
72
  def add_index_stats(result, index_name, include_stats, timeout)
71
73
  return unless include_stats && index_name
72
74
 
73
- stats = with_timeout(timeout) { stats(index: index_name) }
75
+ stats = with_timeout(timeout) { provider.stats(index: index_name) }
74
76
  result[:index] = index_name
75
77
  result[:stats] = {
76
78
  vector_count: stats[:total_vector_count],
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Base class for all middleware
6
+ #
7
+ # Middleware can hook into three lifecycle events:
8
+ # - before(request): Called before the next middleware/provider
9
+ # - after(request, response): Called after successful execution
10
+ # - on_error(request, error): Called when an error occurs
11
+ #
12
+ # @example Simple logging middleware
13
+ # class LoggingMiddleware < Vectra::Middleware::Base
14
+ # def before(request)
15
+ # puts "Starting #{request.operation}"
16
+ # end
17
+ #
18
+ # def after(request, response)
19
+ # puts "Completed #{request.operation}"
20
+ # end
21
+ # end
22
+ #
23
+ # @example Error handling middleware
24
+ # class ErrorHandlerMiddleware < Vectra::Middleware::Base
25
+ # def on_error(request, error)
26
+ # ErrorTracker.notify(error, context: { operation: request.operation })
27
+ # end
28
+ # end
29
+ #
30
+ class Base
31
+ # Execute the middleware
32
+ #
33
+ # This is the main entry point called by the middleware stack.
34
+ # It handles the before/after/error lifecycle hooks.
35
+ #
36
+ # @param request [Request] The request object
37
+ # @param app [Proc] The next middleware in the chain
38
+ # @return [Response] The response object
39
+ def call(request, app)
40
+ # Before hook
41
+ before(request)
42
+
43
+ # Call next middleware
44
+ response = app.call(request)
45
+
46
+ # Check if response has an error
47
+ if response.error
48
+ on_error(request, response.error)
49
+ end
50
+
51
+ # After hook
52
+ after(request, response)
53
+
54
+ response
55
+ rescue StandardError => e
56
+ # Error handling hook (for exceptions raised directly)
57
+ on_error(request, e)
58
+ raise
59
+ end
60
+
61
+ protected
62
+
63
+ # Hook called before the next middleware
64
+ #
65
+ # Override this method to add logic before the operation executes.
66
+ #
67
+ # @param request [Request] The request object
68
+ # @return [void]
69
+ def before(request)
70
+ # Override in subclass
71
+ end
72
+
73
+ # Hook called after successful execution
74
+ #
75
+ # Override this method to add logic after the operation completes.
76
+ #
77
+ # @param request [Request] The request object
78
+ # @param response [Response] The response object
79
+ # @return [void]
80
+ def after(request, response)
81
+ # Override in subclass
82
+ end
83
+
84
+ # Hook called when an error occurs
85
+ #
86
+ # Override this method to add error handling logic.
87
+ # The error will be re-raised after this hook executes.
88
+ #
89
+ # @param request [Request] The request object
90
+ # @param error [Exception] The error that occurred
91
+ # @return [void]
92
+ def on_error(request, error)
93
+ # Override in subclass
94
+ end
95
+ end
96
+ end
97
+ end