vectra-client 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ class EnablePgvectorExtension < ActiveRecord::Migration<%= migration_version %>
4
+ def up
5
+ enable_extension 'vector'
6
+ end
7
+
8
+ def down
9
+ disable_extension 'vector'
10
+ end
11
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Vectra configuration
4
+ #
5
+ # For more information see: https://github.com/stokry/vectra
6
+
7
+ Vectra.configure do |config|
8
+ # Provider configuration
9
+ config.provider = :<%= options[:provider] %>
10
+
11
+ <%- if options[:provider] == 'pinecone' -%>
12
+ # Pinecone credentials
13
+ config.api_key = Rails.application.credentials.dig(:pinecone, :api_key)
14
+ config.environment = Rails.application.credentials.dig(:pinecone, :environment) || 'us-east-1'
15
+ # Or use direct host:
16
+ # config.host = 'your-index-host.pinecone.io'
17
+
18
+ <%- elsif options[:provider] == 'pgvector' -%>
19
+ # PostgreSQL with pgvector extension
20
+ <%- if options[:database_url] -%>
21
+ config.host = '<%= options[:database_url] %>'
22
+ <%- else -%>
23
+ config.host = ENV['DATABASE_URL'] || Rails.configuration.database_configuration[Rails.env]['url']
24
+ <%- end -%>
25
+ config.api_key = nil # pgvector uses connection URL for auth
26
+
27
+ # Connection pooling (recommended for production)
28
+ config.pool_size = ENV.fetch('VECTRA_POOL_SIZE', 10).to_i
29
+ config.pool_timeout = 5
30
+
31
+ # Batch operations
32
+ config.batch_size = ENV.fetch('VECTRA_BATCH_SIZE', 100).to_i
33
+
34
+ <%- elsif options[:provider] == 'qdrant' -%>
35
+ # Qdrant credentials
36
+ config.api_key = Rails.application.credentials.dig(:qdrant, :api_key)
37
+ config.host = Rails.application.credentials.dig(:qdrant, :host)
38
+
39
+ <%- elsif options[:provider] == 'weaviate' -%>
40
+ # Weaviate credentials
41
+ config.api_key = Rails.application.credentials.dig(:weaviate, :api_key)
42
+ config.host = Rails.application.credentials.dig(:weaviate, :host)
43
+
44
+ <%- end -%>
45
+ # Timeouts
46
+ config.timeout = 30
47
+ config.open_timeout = 10
48
+
49
+ # Retry configuration
50
+ config.max_retries = 3
51
+ config.retry_delay = 1
52
+
53
+ # Logging
54
+ config.logger = Rails.logger
55
+
56
+ <%- if options[:instrumentation] -%>
57
+ # Instrumentation (metrics and monitoring)
58
+ config.instrumentation = true
59
+
60
+ # Uncomment for New Relic:
61
+ # require 'vectra/instrumentation/new_relic'
62
+ # Vectra::Instrumentation::NewRelic.setup!
63
+
64
+ # Uncomment for Datadog:
65
+ # require 'vectra/instrumentation/datadog'
66
+ # Vectra::Instrumentation::Datadog.setup!(
67
+ # host: ENV['DD_AGENT_HOST'] || 'localhost',
68
+ # port: ENV['DD_DOGSTATSD_PORT']&.to_i || 8125
69
+ # )
70
+
71
+ # Custom instrumentation:
72
+ # Vectra.on_operation do |event|
73
+ # Rails.logger.info "Vectra: #{event.operation} on #{event.provider} took #{event.duration}ms"
74
+ # if event.failure?
75
+ # Rails.logger.error "Vectra error: #{event.error.message}"
76
+ # end
77
+ # end
78
+ <%- end -%>
79
+ end
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/concern"
4
+
5
+ module Vectra
6
+ # ActiveRecord integration for vector embeddings
7
+ #
8
+ # Provides ActiveRecord models with vector search capabilities.
9
+ #
10
+ # @example Basic usage
11
+ # class Document < ApplicationRecord
12
+ # include Vectra::ActiveRecord
13
+ #
14
+ # has_vector :embedding,
15
+ # dimension: 384,
16
+ # provider: :pgvector,
17
+ # index: 'documents'
18
+ # end
19
+ #
20
+ # # Auto-index on create/update
21
+ # doc = Document.create!(title: 'Hello', embedding: [0.1, 0.2, ...])
22
+ #
23
+ # # Search similar documents
24
+ # results = Document.vector_search([0.1, 0.2, ...], limit: 10)
25
+ #
26
+ module ActiveRecord
27
+ extend ActiveSupport::Concern
28
+
29
+ included do
30
+ class_attribute :_vectra_config, default: {}
31
+ class_attribute :_vectra_client
32
+ end
33
+
34
+ class_methods do
35
+ # Define a vector attribute
36
+ #
37
+ # @param attribute [Symbol] The attribute name (e.g., :embedding)
38
+ # @param dimension [Integer] Vector dimension
39
+ # @param provider [Symbol] Provider name (:pinecone, :pgvector, etc.)
40
+ # @param index [String] Index/collection name
41
+ # @param auto_index [Boolean] Automatically index on save
42
+ # @param metadata_fields [Array<Symbol>] Fields to include in metadata
43
+ #
44
+ # @example
45
+ # has_vector :embedding,
46
+ # dimension: 384,
47
+ # provider: :pgvector,
48
+ # index: 'documents',
49
+ # auto_index: true,
50
+ # metadata_fields: [:title, :category, :status]
51
+ #
52
+ def has_vector(attribute, dimension:, provider: nil, index: nil, auto_index: true, metadata_fields: [])
53
+ self._vectra_config = {
54
+ attribute: attribute,
55
+ dimension: dimension,
56
+ provider: provider || Vectra.configuration.provider,
57
+ index: index || table_name,
58
+ auto_index: auto_index,
59
+ metadata_fields: metadata_fields
60
+ }
61
+
62
+ # Initialize client lazily
63
+ define_singleton_method(:vectra_client) do
64
+ @_vectra_client ||= Vectra::Client.new(provider: _vectra_config[:provider])
65
+ end
66
+
67
+ # Callbacks for auto-indexing
68
+ if auto_index
69
+ after_save :_vectra_index_vector
70
+ after_destroy :_vectra_delete_vector
71
+ end
72
+
73
+ # Class methods for search
74
+ define_singleton_method(:vector_search) do |query_vector, limit: 10, **options|
75
+ _vectra_search(query_vector, limit: limit, **options)
76
+ end
77
+
78
+ define_singleton_method(:similar_to) do |record, limit: 10, **options|
79
+ vector = record.send(_vectra_config[:attribute])
80
+ raise ArgumentError, "Record has no vector" if vector.nil?
81
+
82
+ _vectra_search(vector, limit: limit, **options)
83
+ end
84
+ end
85
+
86
+ # Search vectors
87
+ #
88
+ # @api private
89
+ def _vectra_search(query_vector, limit: 10, filter: {}, score_threshold: nil, load_records: true)
90
+ config = _vectra_config
91
+ results = vectra_client.query(
92
+ index: config[:index],
93
+ vector: query_vector,
94
+ top_k: limit,
95
+ filter: filter
96
+ )
97
+
98
+ # Filter by score if threshold provided
99
+ results = results.above_score(score_threshold) if score_threshold
100
+
101
+ return results unless load_records
102
+
103
+ # Load ActiveRecord objects
104
+ ids = results.map { |match| match.id.gsub("#{config[:index]}_", "").to_i }
105
+ records = where(id: ids).index_by(&:id)
106
+
107
+ results.map do |match|
108
+ id = match.id.gsub("#{config[:index]}_", "").to_i
109
+ record = records[id]
110
+ next unless record
111
+
112
+ record.instance_variable_set(:@_vectra_score, match.score)
113
+ record.define_singleton_method(:vector_score) { @_vectra_score }
114
+ record
115
+ end.compact
116
+ end
117
+ end
118
+
119
+ # Instance methods
120
+
121
+ # Index this record's vector
122
+ #
123
+ # @return [void]
124
+ def index_vector!
125
+ config = self.class._vectra_config
126
+ vector_data = send(config[:attribute])
127
+
128
+ raise ArgumentError, "#{config[:attribute]} is nil" if vector_data.nil?
129
+
130
+ metadata = config[:metadata_fields].each_with_object({}) do |field, hash|
131
+ hash[field.to_s] = send(field) if respond_to?(field)
132
+ end
133
+
134
+ self.class.vectra_client.upsert(
135
+ index: config[:index],
136
+ vectors: [{
137
+ id: _vectra_vector_id,
138
+ values: vector_data,
139
+ metadata: metadata
140
+ }]
141
+ )
142
+ end
143
+
144
+ # Delete this record's vector from index
145
+ #
146
+ # @return [void]
147
+ def delete_vector!
148
+ config = self.class._vectra_config
149
+
150
+ self.class.vectra_client.delete(
151
+ index: config[:index],
152
+ ids: [_vectra_vector_id]
153
+ )
154
+ end
155
+
156
+ # Find similar records
157
+ #
158
+ # @param limit [Integer] Number of results
159
+ # @param filter [Hash] Metadata filter
160
+ # @return [Array<ActiveRecord::Base>]
161
+ def similar(limit: 10, filter: {})
162
+ config = self.class._vectra_config
163
+ vector_data = send(config[:attribute])
164
+
165
+ raise ArgumentError, "#{config[:attribute]} is nil" if vector_data.nil?
166
+
167
+ self.class._vectra_search(vector_data, limit: limit + 1, filter: filter)
168
+ .reject { |record| record.id == id } # Exclude self
169
+ .first(limit)
170
+ end
171
+
172
+ private
173
+
174
+ # Auto-index callback
175
+ def _vectra_index_vector
176
+ return unless saved_change_to_attribute?(self.class._vectra_config[:attribute])
177
+
178
+ index_vector!
179
+ rescue StandardError => e
180
+ Rails.logger.error("Vectra auto-index failed: #{e.message}") if defined?(Rails)
181
+ end
182
+
183
+ # Auto-delete callback
184
+ def _vectra_delete_vector
185
+ delete_vector!
186
+ rescue StandardError => e
187
+ Rails.logger.error("Vectra auto-delete failed: #{e.message}") if defined?(Rails)
188
+ end
189
+
190
+ # Generate vector ID
191
+ def _vectra_vector_id
192
+ "#{self.class._vectra_config[:index]}_#{id}"
193
+ end
194
+ end
195
+ end
data/lib/vectra/client.rb CHANGED
@@ -60,7 +60,14 @@ module Vectra
60
60
  validate_index!(index)
61
61
  validate_vectors!(vectors)
62
62
 
63
- provider.upsert(index: index, vectors: vectors, namespace: namespace)
63
+ Instrumentation.instrument(
64
+ operation: :upsert,
65
+ provider: provider_name,
66
+ index: index,
67
+ metadata: { vector_count: vectors.size }
68
+ ) do
69
+ provider.upsert(index: index, vectors: vectors, namespace: namespace)
70
+ end
64
71
  end
65
72
 
66
73
  # Query vectors by similarity
@@ -94,15 +101,25 @@ module Vectra
94
101
  validate_index!(index)
95
102
  validate_query_vector!(vector)
96
103
 
97
- provider.query(
104
+ result = nil
105
+ Instrumentation.instrument(
106
+ operation: :query,
107
+ provider: provider_name,
98
108
  index: index,
99
- vector: vector,
100
- top_k: top_k,
101
- namespace: namespace,
102
- filter: filter,
103
- include_values: include_values,
104
- include_metadata: include_metadata
105
- )
109
+ metadata: { top_k: top_k }
110
+ ) do
111
+ result = provider.query(
112
+ index: index,
113
+ vector: vector,
114
+ top_k: top_k,
115
+ namespace: namespace,
116
+ filter: filter,
117
+ include_values: include_values,
118
+ include_metadata: include_metadata
119
+ )
120
+ end
121
+
122
+ result
106
123
  end
107
124
 
108
125
  # Fetch vectors by IDs
@@ -120,7 +137,14 @@ module Vectra
120
137
  validate_index!(index)
121
138
  validate_ids!(ids)
122
139
 
123
- provider.fetch(index: index, ids: ids, namespace: namespace)
140
+ Instrumentation.instrument(
141
+ operation: :fetch,
142
+ provider: provider_name,
143
+ index: index,
144
+ metadata: { id_count: ids.size }
145
+ ) do
146
+ provider.fetch(index: index, ids: ids, namespace: namespace)
147
+ end
124
148
  end
125
149
 
126
150
  # Update a vector's metadata or values
@@ -145,13 +169,20 @@ module Vectra
145
169
 
146
170
  raise ValidationError, "Must provide metadata or values to update" if metadata.nil? && values.nil?
147
171
 
148
- provider.update(
172
+ Instrumentation.instrument(
173
+ operation: :update,
174
+ provider: provider_name,
149
175
  index: index,
150
- id: id,
151
- metadata: metadata,
152
- values: values,
153
- namespace: namespace
154
- )
176
+ metadata: { has_metadata: !metadata.nil?, has_values: !values.nil? }
177
+ ) do
178
+ provider.update(
179
+ index: index,
180
+ id: id,
181
+ metadata: metadata,
182
+ values: values,
183
+ namespace: namespace
184
+ )
185
+ end
155
186
  end
156
187
 
157
188
  # Delete vectors
@@ -179,13 +210,20 @@ module Vectra
179
210
  raise ValidationError, "Must provide ids, filter, or delete_all"
180
211
  end
181
212
 
182
- provider.delete(
213
+ Instrumentation.instrument(
214
+ operation: :delete,
215
+ provider: provider_name,
183
216
  index: index,
184
- ids: ids,
185
- namespace: namespace,
186
- filter: filter,
187
- delete_all: delete_all
188
- )
217
+ metadata: { id_count: ids&.size, delete_all: delete_all, has_filter: !filter.nil? }
218
+ ) do
219
+ provider.delete(
220
+ index: index,
221
+ ids: ids,
222
+ namespace: namespace,
223
+ filter: filter,
224
+ delete_all: delete_all
225
+ )
226
+ end
189
227
  end
190
228
 
191
229
  # List all indexes
@@ -14,7 +14,8 @@ module Vectra
14
14
  SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector].freeze
15
15
 
16
16
  attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
17
- :max_retries, :retry_delay, :logger
17
+ :max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
18
+ :batch_size, :instrumentation
18
19
 
19
20
  attr_reader :provider
20
21
 
@@ -28,6 +29,10 @@ module Vectra
28
29
  @max_retries = 3
29
30
  @retry_delay = 1
30
31
  @logger = nil
32
+ @pool_size = 5
33
+ @pool_timeout = 5
34
+ @batch_size = 100
35
+ @instrumentation = false
31
36
  end
32
37
 
33
38
  # Set the provider
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Instrumentation
5
+ # Datadog instrumentation adapter
6
+ #
7
+ # Automatically reports Vectra metrics to Datadog using DogStatsD.
8
+ #
9
+ # @example Enable Datadog instrumentation
10
+ # # config/initializers/vectra.rb
11
+ # require 'vectra/instrumentation/datadog'
12
+ #
13
+ # Vectra.configure do |config|
14
+ # config.instrumentation = true
15
+ # end
16
+ #
17
+ # Vectra::Instrumentation::Datadog.setup!(
18
+ # host: ENV['DD_AGENT_HOST'] || 'localhost',
19
+ # port: ENV['DD_DOGSTATSD_PORT']&.to_i || 8125
20
+ # )
21
+ #
22
+ module Datadog
23
+ class << self
24
+ attr_reader :statsd
25
+
26
+ # Setup Datadog instrumentation
27
+ #
28
+ # @param host [String] DogStatsD host
29
+ # @param port [Integer] DogStatsD port
30
+ # @param namespace [String] Metric namespace
31
+ # @return [void]
32
+ def setup!(host: "localhost", port: 8125, namespace: "vectra")
33
+ require "datadog/statsd"
34
+
35
+ @statsd = ::Datadog::Statsd.new(host, port, namespace: namespace)
36
+
37
+ Vectra::Instrumentation.on_operation do |event|
38
+ record_metrics(event)
39
+ end
40
+ rescue LoadError
41
+ warn "Datadog StatsD gem not found. Install with: gem 'dogstatsd-ruby'"
42
+ end
43
+
44
+ private
45
+
46
+ # Record metrics to Datadog
47
+ def record_metrics(event)
48
+ return unless statsd
49
+
50
+ tags = [
51
+ "provider:#{event.provider}",
52
+ "operation:#{event.operation}",
53
+ "index:#{event.index}",
54
+ "status:#{event.success? ? 'success' : 'error'}"
55
+ ]
56
+
57
+ # Record timing
58
+ statsd.timing("operation.duration", event.duration, tags: tags)
59
+
60
+ # Record count
61
+ statsd.increment("operation.count", tags: tags)
62
+
63
+ # Record result count if available
64
+ if event.metadata[:result_count]
65
+ statsd.gauge("operation.results", event.metadata[:result_count], tags: tags)
66
+ end
67
+
68
+ # Record vector count if available
69
+ if event.metadata[:vector_count]
70
+ statsd.gauge("operation.vectors", event.metadata[:vector_count], tags: tags)
71
+ end
72
+
73
+ # Record errors
74
+ return unless event.failure?
75
+
76
+ error_tags = tags + ["error_type:#{event.error.class.name}"]
77
+ statsd.increment("operation.error", tags: error_tags)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Instrumentation
5
+ # New Relic instrumentation adapter
6
+ #
7
+ # Automatically reports Vectra metrics to New Relic APM.
8
+ #
9
+ # @example Enable New Relic instrumentation
10
+ # # config/initializers/vectra.rb
11
+ # require 'vectra/instrumentation/new_relic'
12
+ #
13
+ # Vectra.configure do |config|
14
+ # config.instrumentation = true
15
+ # end
16
+ #
17
+ # Vectra::Instrumentation::NewRelic.setup!
18
+ #
19
+ module NewRelic
20
+ class << self
21
+ # Setup New Relic instrumentation
22
+ #
23
+ # @return [void]
24
+ def setup!
25
+ return unless defined?(::NewRelic::Agent)
26
+
27
+ Vectra::Instrumentation.on_operation do |event|
28
+ record_metrics(event)
29
+ record_transaction(event)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ # Record custom metrics
36
+ def record_metrics(event)
37
+ prefix = "Custom/Vectra/#{event.provider}/#{event.operation}"
38
+
39
+ ::NewRelic::Agent.record_metric("#{prefix}/duration", event.duration)
40
+ ::NewRelic::Agent.record_metric("#{prefix}/calls", 1)
41
+
42
+ if event.success?
43
+ ::NewRelic::Agent.record_metric("#{prefix}/success", 1)
44
+
45
+ # Record result count if available
46
+ if event.metadata[:result_count]
47
+ ::NewRelic::Agent.record_metric("#{prefix}/results", event.metadata[:result_count])
48
+ end
49
+ else
50
+ ::NewRelic::Agent.record_metric("#{prefix}/error", 1)
51
+ end
52
+ end
53
+
54
+ # Add to transaction trace
55
+ def record_transaction(event)
56
+ ::NewRelic::Agent.add_custom_attributes(
57
+ vectra_operation: event.operation,
58
+ vectra_provider: event.provider,
59
+ vectra_index: event.index,
60
+ vectra_duration: event.duration
61
+ )
62
+
63
+ return unless event.failure?
64
+
65
+ ::NewRelic::Agent.notice_error(event.error)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end