vectra-client 1.0.8 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Logging middleware for tracking operations
6
+ #
7
+ # Logs before and after each operation, including timing information.
8
+ #
9
+ # @example With default logger
10
+ # Vectra::Client.use Vectra::Middleware::Logging
11
+ #
12
+ # @example With custom logger
13
+ # logger = Logger.new($stdout)
14
+ # Vectra::Client.use Vectra::Middleware::Logging, logger: logger
15
+ #
16
+ # @example Per-client logging
17
+ # client = Vectra::Client.new(
18
+ # provider: :qdrant,
19
+ # middleware: [Vectra::Middleware::Logging]
20
+ # )
21
+ #
22
+ class Logging < Base
23
+ def initialize(logger: nil)
24
+ super()
25
+ @logger = logger || Vectra.configuration.logger
26
+ end
27
+
28
+ def before(request)
29
+ return unless @logger
30
+
31
+ @start_time = Time.now
32
+ @logger.info(
33
+ "[Vectra] #{request.operation.upcase} " \
34
+ "index=#{request.index} " \
35
+ "namespace=#{request.namespace || 'default'}"
36
+ )
37
+ end
38
+
39
+ def after(request, response)
40
+ return unless @logger
41
+ return unless @start_time
42
+
43
+ duration_ms = ((Time.now - @start_time) * 1000).round(2)
44
+ response.metadata[:duration_ms] = duration_ms
45
+
46
+ if response.success?
47
+ @logger.info("[Vectra] ✅ #{request.operation} completed in #{duration_ms}ms")
48
+ else
49
+ @logger.error("[Vectra] ❌ #{request.operation} failed: #{response.error.message}")
50
+ end
51
+ end
52
+
53
+ def on_error(request, error)
54
+ return unless @logger
55
+
56
+ @logger.error(
57
+ "[Vectra] 💥 #{request.operation} exception: #{error.class} - #{error.message}"
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # PII Redaction middleware for protecting sensitive data
6
+ #
7
+ # Automatically redacts Personally Identifiable Information (PII) from
8
+ # metadata before upserting to vector databases.
9
+ #
10
+ # @example With default patterns (email, phone, SSN)
11
+ # Vectra::Client.use Vectra::Middleware::PIIRedaction
12
+ #
13
+ # @example With custom patterns
14
+ # custom_patterns = {
15
+ # credit_card: /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/,
16
+ # api_key: /sk-[a-zA-Z0-9]{32}/
17
+ # }
18
+ # Vectra::Client.use Vectra::Middleware::PIIRedaction, patterns: custom_patterns
19
+ #
20
+ class PIIRedaction < Base
21
+ # Default PII patterns
22
+ DEFAULT_PATTERNS = {
23
+ email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/,
24
+ phone: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/,
25
+ ssn: /\b\d{3}-\d{2}-\d{4}\b/,
26
+ credit_card: /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/
27
+ }.freeze
28
+
29
+ def initialize(patterns: DEFAULT_PATTERNS)
30
+ super()
31
+ @patterns = patterns
32
+ end
33
+
34
+ def before(request)
35
+ return unless request.operation == :upsert
36
+ return unless request.params[:vectors]
37
+
38
+ # Redact PII from metadata in all vectors
39
+ request.params[:vectors].each do |vector|
40
+ next unless vector[:metadata]
41
+
42
+ vector[:metadata] = redact_metadata(vector[:metadata])
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ # Redact PII from metadata hash
49
+ #
50
+ # @param metadata [Hash] Metadata to redact
51
+ # @return [Hash] Redacted metadata
52
+ def redact_metadata(metadata)
53
+ metadata.transform_values do |value|
54
+ next value unless value.is_a?(String)
55
+
56
+ redacted = value.dup
57
+ @patterns.each do |type, pattern|
58
+ redacted.gsub!(pattern, "[REDACTED_#{type.upcase}]")
59
+ end
60
+ redacted
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Request object passed through middleware chain
6
+ #
7
+ # @example Basic usage
8
+ # request = Request.new(
9
+ # operation: :upsert,
10
+ # index: 'products',
11
+ # namespace: 'prod',
12
+ # vectors: [{ id: 'doc-1', values: [0.1, 0.2, 0.3] }]
13
+ # )
14
+ #
15
+ # request.operation # => :upsert
16
+ # request.index # => 'products'
17
+ # request.namespace # => 'prod'
18
+ # request.metadata[:custom_key] = 'custom_value'
19
+ #
20
+ class Request
21
+ attr_accessor :operation, :index, :namespace, :params, :metadata
22
+
23
+ # @param operation [Symbol] The operation type (:upsert, :query, :delete, etc.)
24
+ # @param params [Hash] All parameters for the operation
25
+ def initialize(operation:, **params)
26
+ @operation = operation
27
+ @index = params[:index]
28
+ @namespace = params[:namespace]
29
+ @params = params
30
+ @metadata = {}
31
+ end
32
+
33
+ # Convert request back to hash for provider call
34
+ #
35
+ # @return [Hash] Parameters hash
36
+ def to_h
37
+ params
38
+ end
39
+
40
+ # Get the provider from params
41
+ #
42
+ # @return [Symbol, nil] Provider name
43
+ def provider
44
+ params[:provider]
45
+ end
46
+
47
+ # Check if this is a write operation
48
+ #
49
+ # @return [Boolean]
50
+ def write_operation?
51
+ [:upsert, :delete, :update, :create_index, :delete_index].include?(operation)
52
+ end
53
+
54
+ # Check if this is a read operation
55
+ #
56
+ # @return [Boolean]
57
+ def read_operation?
58
+ [:query, :text_search, :hybrid_search, :fetch, :list_indexes, :describe_index, :stats].include?(operation)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Response object returned through middleware chain
6
+ #
7
+ # @example Success response
8
+ # response = Response.new(result: { success: true })
9
+ # response.success? # => true
10
+ # response.result # => { success: true }
11
+ #
12
+ # @example Error response
13
+ # response = Response.new(error: StandardError.new('Failed'))
14
+ # response.failure? # => true
15
+ # response.error # => #<StandardError: Failed>
16
+ #
17
+ # @example With metadata
18
+ # response = Response.new(result: [])
19
+ # response.metadata[:duration_ms] = 45
20
+ # response.metadata[:cache_hit] = true
21
+ #
22
+ class Response
23
+ attr_accessor :result, :error, :metadata
24
+
25
+ # @param result [Object] The successful result
26
+ # @param error [Exception, nil] The error if failed
27
+ def initialize(result: nil, error: nil)
28
+ @result = result
29
+ @error = error
30
+ @metadata = {}
31
+ end
32
+
33
+ # Check if the response was successful
34
+ #
35
+ # @return [Boolean] true if no error
36
+ def success?
37
+ error.nil?
38
+ end
39
+
40
+ # Check if the response failed
41
+ #
42
+ # @return [Boolean] true if error present
43
+ def failure?
44
+ !success?
45
+ end
46
+
47
+ # Raise error if present
48
+ #
49
+ # @raise [Exception] The stored error
50
+ # @return [void]
51
+ def raise_if_error!
52
+ raise error if error
53
+ end
54
+
55
+ # Get the result or raise error
56
+ #
57
+ # @return [Object] The result
58
+ # @raise [Exception] If error present
59
+ def value!
60
+ raise_if_error!
61
+ result
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Retry middleware for handling transient failures
6
+ #
7
+ # Automatically retries failed requests with configurable backoff strategy.
8
+ #
9
+ # @example With default settings (3 attempts, exponential backoff)
10
+ # Vectra::Client.use Vectra::Middleware::Retry
11
+ #
12
+ # @example With custom settings
13
+ # Vectra::Client.use Vectra::Middleware::Retry, max_attempts: 5, backoff: :linear
14
+ #
15
+ # @example Per-client retry
16
+ # client = Vectra::Client.new(
17
+ # provider: :pinecone,
18
+ # middleware: [[Vectra::Middleware::Retry, { max_attempts: 3 }]]
19
+ # )
20
+ #
21
+ class Retry < Base
22
+ # @param max_attempts [Integer] Maximum number of attempts (default: 3)
23
+ # @param backoff [Symbol, Numeric] Backoff strategy (:exponential, :linear) or fixed delay
24
+ def initialize(max_attempts: 3, backoff: :exponential)
25
+ super()
26
+ @max_attempts = max_attempts
27
+ @backoff = backoff
28
+ end
29
+
30
+ def call(request, app)
31
+ attempt = 0
32
+ last_error = nil
33
+
34
+ loop do
35
+ attempt += 1
36
+
37
+ begin
38
+ response = app.call(request)
39
+
40
+ # If successful, return immediately
41
+ if response.success?
42
+ response.metadata[:retry_count] = attempt - 1
43
+ return response
44
+ end
45
+
46
+ # If error is retryable and we haven't exceeded max attempts, retry
47
+ if response.error && retryable?(response.error) && attempt < @max_attempts
48
+ sleep(backoff_delay(attempt))
49
+ next
50
+ end
51
+
52
+ # Error is not retryable or max attempts reached, return response
53
+ response.metadata[:retry_count] = attempt - 1
54
+ return response
55
+ rescue StandardError => e
56
+ last_error = e
57
+
58
+ # If error is retryable and we haven't exceeded max attempts, retry
59
+ if retryable?(e) && attempt < @max_attempts
60
+ sleep(backoff_delay(attempt))
61
+ next
62
+ end
63
+
64
+ # Error is not retryable or max attempts reached, raise
65
+ raise
66
+ end
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ # Check if error is retryable
73
+ #
74
+ # @param error [Exception] The error to check
75
+ # @return [Boolean] true if error is retryable
76
+ def retryable?(error)
77
+ error.is_a?(Vectra::RateLimitError) ||
78
+ error.is_a?(Vectra::ConnectionError) ||
79
+ error.is_a?(Vectra::TimeoutError) ||
80
+ error.is_a?(Vectra::ServerError)
81
+ end
82
+
83
+ # Calculate backoff delay
84
+ #
85
+ # @param attempt [Integer] Current attempt number
86
+ # @return [Float] Delay in seconds
87
+ def backoff_delay(attempt)
88
+ case @backoff
89
+ when :exponential
90
+ # 0.2s, 0.4s, 0.8s, 1.6s, ...
91
+ (2**(attempt - 1)) * 0.2
92
+ when :linear
93
+ # 0.5s, 1.0s, 1.5s, 2.0s, ...
94
+ attempt * 0.5
95
+ when Numeric
96
+ @backoff
97
+ else
98
+ 1.0
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Middleware
5
+ # Middleware stack executor
6
+ #
7
+ # Builds and executes a chain of middleware around provider calls.
8
+ # Similar to Rack middleware, each middleware wraps the next one
9
+ # in the chain until reaching the actual provider.
10
+ #
11
+ # @example Basic usage
12
+ # provider = Vectra::Providers::Memory.new
13
+ # middlewares = [LoggingMiddleware.new, RetryMiddleware.new]
14
+ # stack = Stack.new(provider, middlewares)
15
+ #
16
+ # result = stack.call(:upsert, index: 'test', vectors: [...])
17
+ #
18
+ class Stack
19
+ # @param provider [Vectra::Providers::Base] The actual provider
20
+ # @param middlewares [Array<Base>] Array of middleware instances
21
+ def initialize(provider, middlewares = [])
22
+ @provider = provider
23
+ @middlewares = middlewares
24
+ end
25
+
26
+ # Execute the middleware stack for an operation
27
+ #
28
+ # @param operation [Symbol] The operation to perform (:upsert, :query, etc.)
29
+ # @param params [Hash] The operation parameters
30
+ # @return [Object] The result from the provider
31
+ # @raise [Exception] Any error from middleware or provider
32
+ def call(operation, **params)
33
+ request = Request.new(operation: operation, **params)
34
+
35
+ # Build middleware chain
36
+ app = build_chain(request)
37
+
38
+ # Execute chain
39
+ response = app.call(request)
40
+
41
+ # Raise if error occurred
42
+ raise response.error if response.error
43
+
44
+ response.result
45
+ end
46
+
47
+ private
48
+
49
+ # Build the middleware chain
50
+ #
51
+ # @param request [Request] The request object (unused here, but available)
52
+ # @return [Proc] The complete middleware chain
53
+ def build_chain(_request)
54
+ # Final app: actual provider call
55
+ final_app = lambda do |req|
56
+ # Remove middleware-specific params before calling provider
57
+ provider_params = req.to_h.except(:provider)
58
+ result = @provider.public_send(req.operation, **provider_params)
59
+ Response.new(result: result)
60
+ rescue StandardError => e
61
+ Response.new(error: e)
62
+ end
63
+
64
+ # Wrap with middlewares in reverse order
65
+ # (last middleware in array is first to execute)
66
+ @middlewares.reverse.inject(final_app) do |next_app, middleware|
67
+ lambda do |req|
68
+ middleware.call(req, next_app)
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -80,6 +80,32 @@ module Vectra
80
80
  QueryResult.from_response(matches: matches, namespace: namespace)
81
81
  end
82
82
 
83
+ # Text-only search using simple keyword matching in metadata
84
+ #
85
+ # For testing purposes only. Performs case-insensitive keyword matching
86
+ # in metadata values. Not a real BM25/full-text search implementation.
87
+ #
88
+ # @param index [String] index name
89
+ # @param text [String] text query for keyword search
90
+ # @param top_k [Integer] number of results
91
+ # @param namespace [String, nil] optional namespace
92
+ # @param filter [Hash, nil] metadata filter
93
+ # @param include_values [Boolean] include vector values
94
+ # @param include_metadata [Boolean] include metadata
95
+ # @return [QueryResult] search results
96
+ def text_search(index:, text:, top_k:, namespace: nil, filter: nil,
97
+ include_values: false, include_metadata: true)
98
+ ns = namespace || ""
99
+ candidates = filter_candidates(@storage[index][ns].values, filter)
100
+ text_lower = text.to_s.downcase
101
+
102
+ matches = find_text_matches(candidates, text_lower, include_values, include_metadata)
103
+ matches = matches.sort_by { |m| -m[:score] }.first(top_k)
104
+
105
+ log_debug("Text search returned #{matches.size} results")
106
+ QueryResult.from_response(matches: matches, namespace: namespace)
107
+ end
108
+
83
109
  # @see Base#fetch
84
110
  def fetch(index:, ids:, namespace: nil)
85
111
  ns = namespace || ""
@@ -293,6 +319,36 @@ module Vectra
293
319
  true
294
320
  end
295
321
  # rubocop:enable Naming/PredicateMethod
322
+
323
+ # Filter candidates by metadata filter
324
+ def filter_candidates(candidates, filter)
325
+ return candidates unless filter
326
+
327
+ candidates.select { |v| matches_filter?(v, filter) }
328
+ end
329
+
330
+ # Find text matches in candidates
331
+ def find_text_matches(candidates, text_lower, include_values, include_metadata)
332
+ candidates.map do |vec|
333
+ metadata_text = build_metadata_text(vec)
334
+ next unless metadata_text.include?(text_lower)
335
+
336
+ score = calculate_text_score(text_lower, metadata_text)
337
+ build_match(vec, score, include_values, include_metadata)
338
+ end.compact
339
+ end
340
+
341
+ # Build metadata text string for searching
342
+ def build_metadata_text(vector)
343
+ (vector.metadata || {}).values.map(&:to_s).join(" ").downcase
344
+ end
345
+
346
+ # Calculate text match score based on word matches
347
+ def calculate_text_score(query_text, metadata_text)
348
+ query_words = query_text.split(/\s+/)
349
+ matched_words = query_words.count { |word| metadata_text.include?(word) }
350
+ matched_words.to_f / query_words.size
351
+ end
296
352
  end
297
353
  end
298
354
  end
@@ -28,6 +28,7 @@ module Vectra
28
28
  # )
29
29
  # client.upsert(index: 'documents', vectors: [...])
30
30
  #
31
+ # rubocop:disable Metrics/ClassLength
31
32
  class Pgvector < Base
32
33
  include Connection
33
34
  include SqlHelpers
@@ -162,6 +163,54 @@ module Vectra
162
163
  )
163
164
  end
164
165
 
166
+ # Text-only search using PostgreSQL full-text search
167
+ #
168
+ # @param index [String] table name
169
+ # @param text [String] text query for full-text search
170
+ # @param top_k [Integer] number of results
171
+ # @param namespace [String, nil] optional namespace
172
+ # @param filter [Hash, nil] metadata filter
173
+ # @param include_values [Boolean] include vector values
174
+ # @param include_metadata [Boolean] include metadata
175
+ # @param text_column [String] column name for full-text search (default: 'content')
176
+ # @return [QueryResult] search results
177
+ #
178
+ # @note Your table should have a text column with a tsvector index:
179
+ # CREATE INDEX idx_content_fts ON my_index USING gin(to_tsvector('english', content));
180
+ def text_search(index:, text:, top_k:, namespace: nil, filter: nil,
181
+ include_values: false, include_metadata: true,
182
+ text_column: "content")
183
+ ensure_table_exists!(index)
184
+
185
+ select_cols = ["id"]
186
+ select_cols << "embedding" if include_values
187
+ select_cols << "metadata" if include_metadata
188
+
189
+ # Use ts_rank for scoring
190
+ text_score = "ts_rank(to_tsvector('english', COALESCE(#{quote_ident(text_column)}, '')), " \
191
+ "plainto_tsquery('english', #{escape_literal(text)}))"
192
+ select_cols << "#{text_score} AS score"
193
+
194
+ where_clauses = build_where_clauses(namespace, filter)
195
+ where_clauses << "to_tsvector('english', COALESCE(#{quote_ident(text_column)}, '')) @@ " \
196
+ "plainto_tsquery('english', #{escape_literal(text)})"
197
+
198
+ sql = "SELECT #{select_cols.join(', ')} FROM #{quote_ident(index)}"
199
+ sql += " WHERE #{where_clauses.join(' AND ')}" if where_clauses.any?
200
+ sql += " ORDER BY score DESC"
201
+ sql += " LIMIT #{top_k.to_i}"
202
+
203
+ result = execute(sql)
204
+ matches = result.map { |row| build_match_from_row(row, include_values, include_metadata) }
205
+
206
+ log_debug("Text search returned #{matches.size} results")
207
+
208
+ QueryResult.from_response(
209
+ matches: matches,
210
+ namespace: namespace
211
+ )
212
+ end
213
+
165
214
  # @see Base#fetch
166
215
  def fetch(index:, ids:, namespace: nil)
167
216
  ensure_table_exists!(index)
@@ -361,5 +410,6 @@ module Vectra
361
410
  raise ConfigurationError, "Host (connection URL or hostname) must be configured for pgvector"
362
411
  end
363
412
  end
413
+ # rubocop:enable Metrics/ClassLength
364
414
  end
365
415
  end
@@ -110,6 +110,45 @@ module Vectra
110
110
  handle_hybrid_search_response(response, alpha, namespace)
111
111
  end
112
112
 
113
+ # Text-only search using Qdrant's BM25 text search
114
+ #
115
+ # @param index [String] collection name
116
+ # @param text [String] text query for keyword search
117
+ # @param top_k [Integer] number of results
118
+ # @param namespace [String, nil] optional namespace
119
+ # @param filter [Hash, nil] metadata filter
120
+ # @param include_values [Boolean] include vector values
121
+ # @param include_metadata [Boolean] include metadata
122
+ # @return [QueryResult] search results
123
+ def text_search(index:, text:, top_k:, namespace: nil, filter: nil,
124
+ include_values: false, include_metadata: true)
125
+ qdrant_filter = build_filter(filter, namespace)
126
+ body = {
127
+ query: { text: text },
128
+ limit: top_k,
129
+ with_vector: include_values,
130
+ with_payload: include_metadata
131
+ }
132
+
133
+ body[:filter] = qdrant_filter if qdrant_filter
134
+
135
+ response = with_error_handling do
136
+ connection.post("/collections/#{index}/points/query", body)
137
+ end
138
+
139
+ if response.success?
140
+ matches = transform_search_results(response.body["result"] || [])
141
+ log_debug("Text search returned #{matches.size} results")
142
+
143
+ QueryResult.from_response(
144
+ matches: matches,
145
+ namespace: namespace
146
+ )
147
+ else
148
+ handle_error(response)
149
+ end
150
+ end
151
+
113
152
  # @see Base#fetch
114
153
  def fetch(index:, ids:, namespace: nil) # rubocop:disable Lint/UnusedMethodArgument
115
154
  point_ids = ids.map { |id| generate_point_id(id) }