vectra-client 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pgvector/connection"
4
+ require_relative "pgvector/sql_helpers"
5
+ require_relative "pgvector/index_management"
6
+
7
+ module Vectra
8
+ module Providers
9
+ # PostgreSQL with pgvector extension provider
10
+ #
11
+ # This provider uses PostgreSQL with the pgvector extension for vector
12
+ # similarity search. Each "index" maps to a PostgreSQL table.
13
+ #
14
+ # @example Table structure
15
+ # CREATE EXTENSION IF NOT EXISTS vector;
16
+ # CREATE TABLE my_index (
17
+ # id TEXT PRIMARY KEY,
18
+ # embedding vector(384),
19
+ # metadata JSONB DEFAULT '{}',
20
+ # namespace TEXT DEFAULT '',
21
+ # created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
22
+ # );
23
+ # CREATE INDEX ON my_index USING ivfflat (embedding vector_cosine_ops);
24
+ #
25
+ # @example Usage
26
+ # client = Vectra.pgvector(
27
+ # connection_url: "postgres://user:pass@localhost/mydb"
28
+ # )
29
+ # client.upsert(index: 'documents', vectors: [...])
30
+ #
31
+ class Pgvector < Base
32
+ include Connection
33
+ include SqlHelpers
34
+ include IndexManagement
35
+
36
+ DISTANCE_FUNCTIONS = {
37
+ "cosine" => "<=>",
38
+ "euclidean" => "<->",
39
+ "inner_product" => "<#>"
40
+ }.freeze
41
+
42
+ DEFAULT_METRIC = "cosine"
43
+
44
+ def initialize(config)
45
+ super
46
+ @connection = nil
47
+ @table_cache = {}
48
+ end
49
+
50
+ # @see Base#provider_name
51
+ def provider_name
52
+ :pgvector
53
+ end
54
+
55
+ # @see Base#upsert
56
+ def upsert(index:, vectors:, namespace: nil)
57
+ ensure_table_exists!(index)
58
+ normalized = normalize_vectors(vectors)
59
+ ns = namespace || ""
60
+
61
+ upserted = 0
62
+ normalized.each do |vec|
63
+ upsert_single_vector(index, vec, ns)
64
+ upserted += 1
65
+ end
66
+
67
+ log_debug("Upserted #{upserted} vectors to #{index}")
68
+ { upserted_count: upserted }
69
+ end
70
+
71
+ # @see Base#query
72
+ def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
73
+ include_values: false, include_metadata: true)
74
+ ensure_table_exists!(index)
75
+
76
+ distance_op = DISTANCE_FUNCTIONS[table_metric(index)]
77
+ vector_literal = format_vector(vector)
78
+
79
+ sql = build_query_sql(
80
+ index: index,
81
+ vector_literal: vector_literal,
82
+ distance_op: distance_op,
83
+ top_k: top_k,
84
+ namespace: namespace,
85
+ filter: filter,
86
+ include_values: include_values,
87
+ include_metadata: include_metadata
88
+ )
89
+
90
+ result = execute(sql)
91
+ matches = result.map { |row| build_match_from_row(row, include_values, include_metadata) }
92
+
93
+ log_debug("Query returned #{matches.size} results")
94
+ QueryResult.from_response(matches: matches, namespace: namespace)
95
+ end
96
+
97
+ # @see Base#fetch
98
+ def fetch(index:, ids:, namespace: nil)
99
+ ensure_table_exists!(index)
100
+
101
+ placeholders = ids.map.with_index { |_, i| "$#{i + 1}" }.join(", ")
102
+ sql = "SELECT id, embedding, metadata FROM #{quote_ident(index)} WHERE id IN (#{placeholders})"
103
+ sql += " AND namespace = $#{ids.size + 1}" if namespace
104
+
105
+ params = namespace ? ids + [namespace] : ids
106
+ result = execute(sql, params)
107
+
108
+ vectors = {}
109
+ result.each do |row|
110
+ vectors[row["id"]] = Vector.new(
111
+ id: row["id"],
112
+ values: parse_vector(row["embedding"]),
113
+ metadata: parse_json(row["metadata"])
114
+ )
115
+ end
116
+ vectors
117
+ end
118
+
119
+ # @see Base#update
120
+ def update(index:, id:, metadata: nil, values: nil, namespace: nil)
121
+ ensure_table_exists!(index)
122
+ updates, params, param_idx = build_update_params(metadata, values)
123
+
124
+ return { updated: false } if updates.empty?
125
+
126
+ sql = "UPDATE #{quote_ident(index)} SET #{updates.join(', ')} WHERE id = $#{param_idx}"
127
+ params << id
128
+
129
+ if namespace
130
+ sql += " AND namespace = $#{param_idx + 1}"
131
+ params << namespace
132
+ end
133
+
134
+ execute(sql, params)
135
+ log_debug("Updated vector #{id}")
136
+ { updated: true }
137
+ end
138
+
139
+ # @see Base#delete
140
+ def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
141
+ ensure_table_exists!(index)
142
+
143
+ if delete_all || (namespace && ids.nil? && filter.nil?)
144
+ delete_all_vectors(index, namespace)
145
+ elsif ids
146
+ delete_by_ids(index, ids, namespace)
147
+ elsif filter
148
+ sql, params = build_filter_delete_sql(index, filter, namespace)
149
+ execute(sql, params)
150
+ end
151
+
152
+ log_debug("Deleted vectors from #{index}")
153
+ { deleted: true }
154
+ end
155
+
156
+ # @see Base#list_indexes
157
+ def list_indexes
158
+ sql = <<~SQL
159
+ SELECT table_name
160
+ FROM information_schema.columns
161
+ WHERE column_name = 'embedding'
162
+ AND data_type = 'USER-DEFINED'
163
+ AND table_schema = 'public'
164
+ AND udt_name = 'vector'
165
+ SQL
166
+
167
+ result = execute(sql)
168
+ result.map { |row| describe_index(index: row["table_name"]) }
169
+ end
170
+
171
+ # @see Base#describe_index
172
+ def describe_index(index:)
173
+ sql = <<~SQL
174
+ SELECT format_type(a.atttypid, a.atttypmod) as data_type
175
+ FROM pg_attribute a
176
+ JOIN pg_class c ON a.attrelid = c.oid
177
+ WHERE c.relname = $1 AND a.attname = 'embedding' AND a.attnum > 0
178
+ SQL
179
+
180
+ result = execute(sql, [index])
181
+ raise NotFoundError, "Index '#{index}' not found" if result.empty?
182
+
183
+ dimension = resolve_index_dimension(index, result)
184
+
185
+ { name: index, dimension: dimension, metric: table_metric(index), status: "ready" }
186
+ end
187
+
188
+ # Resolve vector dimension for an index from various sources
189
+ def resolve_index_dimension(index, pg_attribute_result)
190
+ type_info = pg_attribute_result.first["data_type"] || pg_attribute_result.first["udt_name"]
191
+ dim = extract_dimension_from_type(type_info) if type_info
192
+
193
+ return dim if dim
194
+
195
+ if @table_cache[index].is_a?(Hash)
196
+ return @table_cache[index][:dimension]
197
+ end
198
+
199
+ alt_sql = <<~SQL
200
+ SELECT udt_name, data_type FROM information_schema.columns
201
+ WHERE table_schema = 'public' AND table_name = $1 AND column_name = 'embedding'
202
+ SQL
203
+ alt_result = execute(alt_sql, [index])
204
+ udt = alt_result.first && (alt_result.first["udt_name"] || alt_result.first["data_type"])
205
+ extract_dimension_from_type(udt) if udt
206
+ end
207
+
208
+ # @see Base#stats
209
+ def stats(index:, namespace: nil)
210
+ ensure_table_exists!(index)
211
+
212
+ count_sql = "SELECT COUNT(*) as count FROM #{quote_ident(index)}"
213
+ count_sql += " WHERE namespace = $1" if namespace
214
+
215
+ count_result = execute(count_sql, namespace ? [namespace] : [])
216
+ total_count = count_result.first["count"].to_i
217
+
218
+ ns_sql = "SELECT namespace, COUNT(*) as count FROM #{quote_ident(index)} GROUP BY namespace"
219
+ ns_result = execute(ns_sql)
220
+ namespaces = ns_result.each_with_object({}) do |row, hash|
221
+ hash[row["namespace"] || ""] = { vector_count: row["count"].to_i }
222
+ end
223
+
224
+ info = describe_index(index: index)
225
+ { total_vector_count: total_count, dimension: info[:dimension], namespaces: namespaces }
226
+ end
227
+
228
+ private
229
+
230
+ # Build update parameters
231
+ def build_update_params(metadata, values)
232
+ updates = []
233
+ params = []
234
+ param_idx = 1
235
+
236
+ # Put embedding param first so tests expect embedding = $1::vector when provided
237
+ if values
238
+ updates << "embedding = $#{param_idx}::vector"
239
+ params << format_vector(values)
240
+ param_idx += 1
241
+ end
242
+
243
+ if metadata
244
+ updates << "metadata = metadata || $#{param_idx}::jsonb"
245
+ params << metadata.to_json
246
+ param_idx += 1
247
+ end
248
+
249
+ [updates, params, param_idx]
250
+ end
251
+
252
+ # Upsert a single vector
253
+ def upsert_single_vector(index, vec, namespace)
254
+ sql = <<~SQL
255
+ INSERT INTO #{quote_ident(index)} (id, embedding, metadata, namespace)
256
+ VALUES ($1, $2::vector, $3::jsonb, $4)
257
+ ON CONFLICT (id) DO UPDATE SET
258
+ embedding = EXCLUDED.embedding,
259
+ metadata = EXCLUDED.metadata,
260
+ namespace = EXCLUDED.namespace
261
+ SQL
262
+
263
+ params = [vec[:id], format_vector(vec[:values]), (vec[:metadata] || {}).to_json, namespace]
264
+ execute(sql, params)
265
+ end
266
+
267
+ # Delete all vectors from index
268
+ def delete_all_vectors(index, namespace)
269
+ sql = "DELETE FROM #{quote_ident(index)}"
270
+ sql += " WHERE namespace = $1" if namespace
271
+ execute(sql, namespace ? [namespace] : [])
272
+ end
273
+
274
+ # Delete vectors by IDs
275
+ def delete_by_ids(index, ids, namespace)
276
+ placeholders = ids.map.with_index { |_, i| "$#{i + 1}" }.join(", ")
277
+ sql = "DELETE FROM #{quote_ident(index)} WHERE id IN (#{placeholders})"
278
+ params = ids.dup
279
+
280
+ if namespace
281
+ sql += " AND namespace = $#{ids.size + 1}"
282
+ params << namespace
283
+ end
284
+
285
+ execute(sql, params)
286
+ end
287
+
288
+ # Override validate_config! for pgvector-specific validation
289
+ def validate_config!
290
+ raise ConfigurationError, "Provider must be configured" if config.provider.nil?
291
+ return if config.host
292
+
293
+ raise ConfigurationError, "Host (connection URL or hostname) must be configured for pgvector"
294
+ end
295
+ end
296
+ end
297
+ end
@@ -0,0 +1,308 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Providers
5
+ # Pinecone vector database provider
6
+ #
7
+ # @example
8
+ # provider = Vectra::Providers::Pinecone.new(config)
9
+ # provider.upsert(index: 'my-index', vectors: [...])
10
+ #
11
+ class Pinecone < Base
12
+ API_VERSION = "2024-07"
13
+
14
+ def initialize(config)
15
+ super
16
+ @control_plane_connection = nil
17
+ @data_plane_connections = {}
18
+ end
19
+
20
+ # @see Base#provider_name
21
+ def provider_name
22
+ :pinecone
23
+ end
24
+
25
+ # @see Base#upsert
26
+ def upsert(index:, vectors:, namespace: nil)
27
+ normalized = normalize_vectors(vectors)
28
+
29
+ body = { vectors: normalized }
30
+ body[:namespace] = namespace if namespace
31
+
32
+ response = data_connection(index).post("/vectors/upsert", body)
33
+
34
+ if response.success?
35
+ log_debug("Upserted #{normalized.size} vectors to #{index}")
36
+ {
37
+ upserted_count: response.body["upsertedCount"] || normalized.size
38
+ }
39
+ else
40
+ handle_error(response)
41
+ end
42
+ end
43
+
44
+ # @see Base#query
45
+ def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
46
+ include_values: false, include_metadata: true)
47
+ body = {
48
+ vector: vector.map(&:to_f),
49
+ topK: top_k,
50
+ includeValues: include_values,
51
+ includeMetadata: include_metadata
52
+ }
53
+ body[:namespace] = namespace if namespace
54
+ body[:filter] = transform_filter(filter) if filter
55
+
56
+ response = data_connection(index).post("/query", body)
57
+
58
+ if response.success?
59
+ log_debug("Query returned #{response.body['matches']&.size || 0} results")
60
+ QueryResult.from_response(
61
+ matches: transform_matches(response.body["matches"] || []),
62
+ namespace: response.body["namespace"],
63
+ usage: response.body["usage"]
64
+ )
65
+ else
66
+ handle_error(response)
67
+ end
68
+ end
69
+
70
+ # @see Base#fetch
71
+ def fetch(index:, ids:, namespace: nil)
72
+ params = { ids: ids }
73
+ params[:namespace] = namespace if namespace
74
+
75
+ response = data_connection(index).get("/vectors/fetch") do |req|
76
+ ids.each { |id| req.params.add("ids", id) }
77
+ req.params["namespace"] = namespace if namespace
78
+ end
79
+
80
+ if response.success?
81
+ vectors = {}
82
+ (response.body["vectors"] || {}).each do |id, data|
83
+ vectors[id] = Vector.new(
84
+ id: id,
85
+ values: data["values"],
86
+ metadata: data["metadata"],
87
+ sparse_values: data["sparseValues"]
88
+ )
89
+ end
90
+ vectors
91
+ else
92
+ handle_error(response)
93
+ end
94
+ end
95
+
96
+ # @see Base#update
97
+ def update(index:, id:, metadata: nil, values: nil, namespace: nil)
98
+ body = { id: id }
99
+ body[:setMetadata] = metadata if metadata
100
+ body[:values] = values.map(&:to_f) if values
101
+ body[:namespace] = namespace if namespace
102
+
103
+ response = data_connection(index).post("/vectors/update", body)
104
+
105
+ if response.success?
106
+ log_debug("Updated vector #{id}")
107
+ { updated: true }
108
+ else
109
+ handle_error(response)
110
+ end
111
+ end
112
+
113
+ # @see Base#delete
114
+ def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
115
+ body = {}
116
+ body[:ids] = ids if ids
117
+ body[:namespace] = namespace if namespace
118
+ body[:filter] = transform_filter(filter) if filter
119
+ body[:deleteAll] = true if delete_all
120
+
121
+ response = data_connection(index).post("/vectors/delete", body)
122
+
123
+ if response.success?
124
+ log_debug("Deleted vectors from #{index}")
125
+ { deleted: true }
126
+ else
127
+ handle_error(response)
128
+ end
129
+ end
130
+
131
+ # @see Base#list_indexes
132
+ def list_indexes
133
+ response = control_connection.get("/indexes")
134
+
135
+ if response.success?
136
+ (response.body["indexes"] || []).map do |idx|
137
+ {
138
+ name: idx["name"],
139
+ dimension: idx["dimension"],
140
+ metric: idx["metric"],
141
+ host: idx["host"],
142
+ status: idx.dig("status", "ready") ? "ready" : "initializing"
143
+ }
144
+ end
145
+ else
146
+ handle_error(response)
147
+ end
148
+ end
149
+
150
+ # @see Base#describe_index
151
+ def describe_index(index:)
152
+ response = control_connection.get("/indexes/#{index}")
153
+
154
+ if response.success?
155
+ body = response.body
156
+ {
157
+ name: body["name"],
158
+ dimension: body["dimension"],
159
+ metric: body["metric"],
160
+ host: body["host"],
161
+ spec: body["spec"],
162
+ status: body["status"]
163
+ }
164
+ else
165
+ handle_error(response)
166
+ end
167
+ end
168
+
169
+ # @see Base#stats
170
+ def stats(index:, namespace: nil)
171
+ body = {}
172
+ body[:filter] = {} if namespace.nil?
173
+
174
+ response = data_connection(index).post("/describe_index_stats", body)
175
+
176
+ if response.success?
177
+ {
178
+ total_vector_count: response.body["totalVectorCount"],
179
+ dimension: response.body["dimension"],
180
+ index_fullness: response.body["indexFullness"],
181
+ namespaces: response.body["namespaces"]
182
+ }
183
+ else
184
+ handle_error(response)
185
+ end
186
+ end
187
+
188
+ # Create a new index
189
+ #
190
+ # @param name [String] index name
191
+ # @param dimension [Integer] vector dimension
192
+ # @param metric [String] similarity metric (cosine, euclidean, dotproduct)
193
+ # @param spec [Hash] index spec (serverless or pod configuration)
194
+ # @return [Hash] created index info
195
+ def create_index(name:, dimension:, metric: "cosine", spec: nil)
196
+ body = {
197
+ name: name,
198
+ dimension: dimension,
199
+ metric: metric
200
+ }
201
+
202
+ # Default to serverless spec if not provided
203
+ body[:spec] = spec || {
204
+ serverless: {
205
+ cloud: "aws",
206
+ region: config.environment || "us-east-1"
207
+ }
208
+ }
209
+
210
+ response = control_connection.post("/indexes", body)
211
+
212
+ if response.success?
213
+ log_debug("Created index #{name}")
214
+ describe_index(index: name)
215
+ else
216
+ handle_error(response)
217
+ end
218
+ end
219
+
220
+ # Delete an index
221
+ #
222
+ # @param name [String] index name
223
+ # @return [Hash] deletion result
224
+ def delete_index(name:)
225
+ response = control_connection.delete("/indexes/#{name}")
226
+
227
+ if response.success?
228
+ log_debug("Deleted index #{name}")
229
+ { deleted: true }
230
+ else
231
+ handle_error(response)
232
+ end
233
+ end
234
+
235
+ private
236
+
237
+ # Control plane connection (for index management)
238
+ def control_connection
239
+ @control_plane_connection ||= build_connection(
240
+ "https://api.pinecone.io",
241
+ {
242
+ "Api-Key" => config.api_key,
243
+ "X-Pinecone-API-Version" => API_VERSION
244
+ }
245
+ )
246
+ end
247
+
248
+ # Data plane connection (for vector operations)
249
+ # Each index has its own host
250
+ def data_connection(index)
251
+ @data_plane_connections[index] ||= begin
252
+ host = resolve_index_host(index)
253
+ build_connection(
254
+ "https://#{host}",
255
+ {
256
+ "Api-Key" => config.api_key,
257
+ "X-Pinecone-API-Version" => API_VERSION
258
+ }
259
+ )
260
+ end
261
+ end
262
+
263
+ # Resolve the host for an index
264
+ def resolve_index_host(index)
265
+ # If a direct host is configured, use that
266
+ return config.host if config.host
267
+
268
+ # Otherwise, fetch from the API
269
+ info = describe_index(index: index)
270
+ host = info[:host]
271
+
272
+ raise ConfigurationError, "Could not resolve host for index '#{index}'" unless host
273
+
274
+ host
275
+ end
276
+
277
+ # Transform metadata filter to Pinecone format
278
+ def transform_filter(filter)
279
+ return nil unless filter
280
+
281
+ # Simple key-value filters are wrapped in $eq
282
+ filter.transform_values do |value|
283
+ case value
284
+ when Hash
285
+ value # Already a filter operator
286
+ when Array
287
+ { "$in" => value }
288
+ else
289
+ { "$eq" => value }
290
+ end
291
+ end
292
+ end
293
+
294
+ # Transform matches from API response
295
+ def transform_matches(matches)
296
+ matches.map do |match|
297
+ {
298
+ id: match["id"],
299
+ score: match["score"],
300
+ values: match["values"],
301
+ metadata: match["metadata"],
302
+ sparse_values: match["sparseValues"]
303
+ }
304
+ end
305
+ end
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Providers
5
+ # Qdrant vector database provider (planned for v0.2.0)
6
+ #
7
+ # @note This provider is not yet implemented
8
+ #
9
+ class Qdrant < Base
10
+ def provider_name
11
+ :qdrant
12
+ end
13
+
14
+ def upsert(index:, vectors:, namespace: nil)
15
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
16
+ end
17
+
18
+ def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
19
+ include_values: false, include_metadata: true)
20
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
21
+ end
22
+
23
+ def fetch(index:, ids:, namespace: nil)
24
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
25
+ end
26
+
27
+ def update(index:, id:, metadata:, namespace: nil)
28
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
29
+ end
30
+
31
+ def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
32
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
33
+ end
34
+
35
+ def list_indexes
36
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
37
+ end
38
+
39
+ def describe_index(index:)
40
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
41
+ end
42
+
43
+ def stats(index:, namespace: nil)
44
+ raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ module Providers
5
+ # Weaviate vector database provider (planned for v0.3.0)
6
+ #
7
+ # @note This provider is not yet implemented
8
+ #
9
+ class Weaviate < Base
10
+ def provider_name
11
+ :weaviate
12
+ end
13
+
14
+ def upsert(index:, vectors:, namespace: nil)
15
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
16
+ end
17
+
18
+ def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
19
+ include_values: false, include_metadata: true)
20
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
21
+ end
22
+
23
+ def fetch(index:, ids:, namespace: nil)
24
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
25
+ end
26
+
27
+ def update(index:, id:, metadata:, namespace: nil)
28
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
29
+ end
30
+
31
+ def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
32
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
33
+ end
34
+
35
+ def list_indexes
36
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
37
+ end
38
+
39
+ def describe_index(index:)
40
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
41
+ end
42
+
43
+ def stats(index:, namespace: nil)
44
+ raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
45
+ end
46
+ end
47
+ end
48
+ end