vectra-client 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codecov.yml +31 -0
- data/.rspec +4 -0
- data/.rubocop.yml +183 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +88 -0
- data/CODE_OF_CONDUCT.md +127 -0
- data/CONTRIBUTING.md +239 -0
- data/LICENSE +21 -0
- data/README.md +456 -0
- data/Rakefile +34 -0
- data/SECURITY.md +196 -0
- data/lib/vectra/client.rb +304 -0
- data/lib/vectra/configuration.rb +169 -0
- data/lib/vectra/errors.rb +73 -0
- data/lib/vectra/providers/base.rb +265 -0
- data/lib/vectra/providers/pgvector/connection.rb +75 -0
- data/lib/vectra/providers/pgvector/index_management.rb +122 -0
- data/lib/vectra/providers/pgvector/sql_helpers.rb +115 -0
- data/lib/vectra/providers/pgvector.rb +297 -0
- data/lib/vectra/providers/pinecone.rb +308 -0
- data/lib/vectra/providers/qdrant.rb +48 -0
- data/lib/vectra/providers/weaviate.rb +48 -0
- data/lib/vectra/query_result.rb +257 -0
- data/lib/vectra/vector.rb +155 -0
- data/lib/vectra/version.rb +5 -0
- data/lib/vectra.rb +133 -0
- metadata +226 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "pgvector/connection"
|
|
4
|
+
require_relative "pgvector/sql_helpers"
|
|
5
|
+
require_relative "pgvector/index_management"
|
|
6
|
+
|
|
7
|
+
module Vectra
|
|
8
|
+
module Providers
|
|
9
|
+
# PostgreSQL with pgvector extension provider
|
|
10
|
+
#
|
|
11
|
+
# This provider uses PostgreSQL with the pgvector extension for vector
|
|
12
|
+
# similarity search. Each "index" maps to a PostgreSQL table.
|
|
13
|
+
#
|
|
14
|
+
# @example Table structure
|
|
15
|
+
# CREATE EXTENSION IF NOT EXISTS vector;
|
|
16
|
+
# CREATE TABLE my_index (
|
|
17
|
+
# id TEXT PRIMARY KEY,
|
|
18
|
+
# embedding vector(384),
|
|
19
|
+
# metadata JSONB DEFAULT '{}',
|
|
20
|
+
# namespace TEXT DEFAULT '',
|
|
21
|
+
# created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
22
|
+
# );
|
|
23
|
+
# CREATE INDEX ON my_index USING ivfflat (embedding vector_cosine_ops);
|
|
24
|
+
#
|
|
25
|
+
# @example Usage
|
|
26
|
+
# client = Vectra.pgvector(
|
|
27
|
+
# connection_url: "postgres://user:pass@localhost/mydb"
|
|
28
|
+
# )
|
|
29
|
+
# client.upsert(index: 'documents', vectors: [...])
|
|
30
|
+
#
|
|
31
|
+
class Pgvector < Base
|
|
32
|
+
include Connection
|
|
33
|
+
include SqlHelpers
|
|
34
|
+
include IndexManagement
|
|
35
|
+
|
|
36
|
+
DISTANCE_FUNCTIONS = {
|
|
37
|
+
"cosine" => "<=>",
|
|
38
|
+
"euclidean" => "<->",
|
|
39
|
+
"inner_product" => "<#>"
|
|
40
|
+
}.freeze
|
|
41
|
+
|
|
42
|
+
DEFAULT_METRIC = "cosine"
|
|
43
|
+
|
|
44
|
+
def initialize(config)
|
|
45
|
+
super
|
|
46
|
+
@connection = nil
|
|
47
|
+
@table_cache = {}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @see Base#provider_name
|
|
51
|
+
def provider_name
|
|
52
|
+
:pgvector
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @see Base#upsert
|
|
56
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
57
|
+
ensure_table_exists!(index)
|
|
58
|
+
normalized = normalize_vectors(vectors)
|
|
59
|
+
ns = namespace || ""
|
|
60
|
+
|
|
61
|
+
upserted = 0
|
|
62
|
+
normalized.each do |vec|
|
|
63
|
+
upsert_single_vector(index, vec, ns)
|
|
64
|
+
upserted += 1
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
log_debug("Upserted #{upserted} vectors to #{index}")
|
|
68
|
+
{ upserted_count: upserted }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @see Base#query
|
|
72
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
73
|
+
include_values: false, include_metadata: true)
|
|
74
|
+
ensure_table_exists!(index)
|
|
75
|
+
|
|
76
|
+
distance_op = DISTANCE_FUNCTIONS[table_metric(index)]
|
|
77
|
+
vector_literal = format_vector(vector)
|
|
78
|
+
|
|
79
|
+
sql = build_query_sql(
|
|
80
|
+
index: index,
|
|
81
|
+
vector_literal: vector_literal,
|
|
82
|
+
distance_op: distance_op,
|
|
83
|
+
top_k: top_k,
|
|
84
|
+
namespace: namespace,
|
|
85
|
+
filter: filter,
|
|
86
|
+
include_values: include_values,
|
|
87
|
+
include_metadata: include_metadata
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
result = execute(sql)
|
|
91
|
+
matches = result.map { |row| build_match_from_row(row, include_values, include_metadata) }
|
|
92
|
+
|
|
93
|
+
log_debug("Query returned #{matches.size} results")
|
|
94
|
+
QueryResult.from_response(matches: matches, namespace: namespace)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# @see Base#fetch
|
|
98
|
+
def fetch(index:, ids:, namespace: nil)
|
|
99
|
+
ensure_table_exists!(index)
|
|
100
|
+
|
|
101
|
+
placeholders = ids.map.with_index { |_, i| "$#{i + 1}" }.join(", ")
|
|
102
|
+
sql = "SELECT id, embedding, metadata FROM #{quote_ident(index)} WHERE id IN (#{placeholders})"
|
|
103
|
+
sql += " AND namespace = $#{ids.size + 1}" if namespace
|
|
104
|
+
|
|
105
|
+
params = namespace ? ids + [namespace] : ids
|
|
106
|
+
result = execute(sql, params)
|
|
107
|
+
|
|
108
|
+
vectors = {}
|
|
109
|
+
result.each do |row|
|
|
110
|
+
vectors[row["id"]] = Vector.new(
|
|
111
|
+
id: row["id"],
|
|
112
|
+
values: parse_vector(row["embedding"]),
|
|
113
|
+
metadata: parse_json(row["metadata"])
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
vectors
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# @see Base#update
|
|
120
|
+
def update(index:, id:, metadata: nil, values: nil, namespace: nil)
|
|
121
|
+
ensure_table_exists!(index)
|
|
122
|
+
updates, params, param_idx = build_update_params(metadata, values)
|
|
123
|
+
|
|
124
|
+
return { updated: false } if updates.empty?
|
|
125
|
+
|
|
126
|
+
sql = "UPDATE #{quote_ident(index)} SET #{updates.join(', ')} WHERE id = $#{param_idx}"
|
|
127
|
+
params << id
|
|
128
|
+
|
|
129
|
+
if namespace
|
|
130
|
+
sql += " AND namespace = $#{param_idx + 1}"
|
|
131
|
+
params << namespace
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
execute(sql, params)
|
|
135
|
+
log_debug("Updated vector #{id}")
|
|
136
|
+
{ updated: true }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @see Base#delete
|
|
140
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
141
|
+
ensure_table_exists!(index)
|
|
142
|
+
|
|
143
|
+
if delete_all || (namespace && ids.nil? && filter.nil?)
|
|
144
|
+
delete_all_vectors(index, namespace)
|
|
145
|
+
elsif ids
|
|
146
|
+
delete_by_ids(index, ids, namespace)
|
|
147
|
+
elsif filter
|
|
148
|
+
sql, params = build_filter_delete_sql(index, filter, namespace)
|
|
149
|
+
execute(sql, params)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
log_debug("Deleted vectors from #{index}")
|
|
153
|
+
{ deleted: true }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# @see Base#list_indexes
|
|
157
|
+
def list_indexes
|
|
158
|
+
sql = <<~SQL
|
|
159
|
+
SELECT table_name
|
|
160
|
+
FROM information_schema.columns
|
|
161
|
+
WHERE column_name = 'embedding'
|
|
162
|
+
AND data_type = 'USER-DEFINED'
|
|
163
|
+
AND table_schema = 'public'
|
|
164
|
+
AND udt_name = 'vector'
|
|
165
|
+
SQL
|
|
166
|
+
|
|
167
|
+
result = execute(sql)
|
|
168
|
+
result.map { |row| describe_index(index: row["table_name"]) }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# @see Base#describe_index
|
|
172
|
+
def describe_index(index:)
|
|
173
|
+
sql = <<~SQL
|
|
174
|
+
SELECT format_type(a.atttypid, a.atttypmod) as data_type
|
|
175
|
+
FROM pg_attribute a
|
|
176
|
+
JOIN pg_class c ON a.attrelid = c.oid
|
|
177
|
+
WHERE c.relname = $1 AND a.attname = 'embedding' AND a.attnum > 0
|
|
178
|
+
SQL
|
|
179
|
+
|
|
180
|
+
result = execute(sql, [index])
|
|
181
|
+
raise NotFoundError, "Index '#{index}' not found" if result.empty?
|
|
182
|
+
|
|
183
|
+
dimension = resolve_index_dimension(index, result)
|
|
184
|
+
|
|
185
|
+
{ name: index, dimension: dimension, metric: table_metric(index), status: "ready" }
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Resolve vector dimension for an index from various sources
|
|
189
|
+
def resolve_index_dimension(index, pg_attribute_result)
|
|
190
|
+
type_info = pg_attribute_result.first["data_type"] || pg_attribute_result.first["udt_name"]
|
|
191
|
+
dim = extract_dimension_from_type(type_info) if type_info
|
|
192
|
+
|
|
193
|
+
return dim if dim
|
|
194
|
+
|
|
195
|
+
if @table_cache[index].is_a?(Hash)
|
|
196
|
+
return @table_cache[index][:dimension]
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
alt_sql = <<~SQL
|
|
200
|
+
SELECT udt_name, data_type FROM information_schema.columns
|
|
201
|
+
WHERE table_schema = 'public' AND table_name = $1 AND column_name = 'embedding'
|
|
202
|
+
SQL
|
|
203
|
+
alt_result = execute(alt_sql, [index])
|
|
204
|
+
udt = alt_result.first && (alt_result.first["udt_name"] || alt_result.first["data_type"])
|
|
205
|
+
extract_dimension_from_type(udt) if udt
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# @see Base#stats
|
|
209
|
+
def stats(index:, namespace: nil)
|
|
210
|
+
ensure_table_exists!(index)
|
|
211
|
+
|
|
212
|
+
count_sql = "SELECT COUNT(*) as count FROM #{quote_ident(index)}"
|
|
213
|
+
count_sql += " WHERE namespace = $1" if namespace
|
|
214
|
+
|
|
215
|
+
count_result = execute(count_sql, namespace ? [namespace] : [])
|
|
216
|
+
total_count = count_result.first["count"].to_i
|
|
217
|
+
|
|
218
|
+
ns_sql = "SELECT namespace, COUNT(*) as count FROM #{quote_ident(index)} GROUP BY namespace"
|
|
219
|
+
ns_result = execute(ns_sql)
|
|
220
|
+
namespaces = ns_result.each_with_object({}) do |row, hash|
|
|
221
|
+
hash[row["namespace"] || ""] = { vector_count: row["count"].to_i }
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
info = describe_index(index: index)
|
|
225
|
+
{ total_vector_count: total_count, dimension: info[:dimension], namespaces: namespaces }
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
private
|
|
229
|
+
|
|
230
|
+
# Build update parameters
|
|
231
|
+
def build_update_params(metadata, values)
|
|
232
|
+
updates = []
|
|
233
|
+
params = []
|
|
234
|
+
param_idx = 1
|
|
235
|
+
|
|
236
|
+
# Put embedding param first so tests expect embedding = $1::vector when provided
|
|
237
|
+
if values
|
|
238
|
+
updates << "embedding = $#{param_idx}::vector"
|
|
239
|
+
params << format_vector(values)
|
|
240
|
+
param_idx += 1
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
if metadata
|
|
244
|
+
updates << "metadata = metadata || $#{param_idx}::jsonb"
|
|
245
|
+
params << metadata.to_json
|
|
246
|
+
param_idx += 1
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
[updates, params, param_idx]
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Upsert a single vector
|
|
253
|
+
def upsert_single_vector(index, vec, namespace)
|
|
254
|
+
sql = <<~SQL
|
|
255
|
+
INSERT INTO #{quote_ident(index)} (id, embedding, metadata, namespace)
|
|
256
|
+
VALUES ($1, $2::vector, $3::jsonb, $4)
|
|
257
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
258
|
+
embedding = EXCLUDED.embedding,
|
|
259
|
+
metadata = EXCLUDED.metadata,
|
|
260
|
+
namespace = EXCLUDED.namespace
|
|
261
|
+
SQL
|
|
262
|
+
|
|
263
|
+
params = [vec[:id], format_vector(vec[:values]), (vec[:metadata] || {}).to_json, namespace]
|
|
264
|
+
execute(sql, params)
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Delete all vectors from index
|
|
268
|
+
def delete_all_vectors(index, namespace)
|
|
269
|
+
sql = "DELETE FROM #{quote_ident(index)}"
|
|
270
|
+
sql += " WHERE namespace = $1" if namespace
|
|
271
|
+
execute(sql, namespace ? [namespace] : [])
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Delete vectors by IDs
|
|
275
|
+
def delete_by_ids(index, ids, namespace)
|
|
276
|
+
placeholders = ids.map.with_index { |_, i| "$#{i + 1}" }.join(", ")
|
|
277
|
+
sql = "DELETE FROM #{quote_ident(index)} WHERE id IN (#{placeholders})"
|
|
278
|
+
params = ids.dup
|
|
279
|
+
|
|
280
|
+
if namespace
|
|
281
|
+
sql += " AND namespace = $#{ids.size + 1}"
|
|
282
|
+
params << namespace
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
execute(sql, params)
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Override validate_config! for pgvector-specific validation
|
|
289
|
+
def validate_config!
|
|
290
|
+
raise ConfigurationError, "Provider must be configured" if config.provider.nil?
|
|
291
|
+
return if config.host
|
|
292
|
+
|
|
293
|
+
raise ConfigurationError, "Host (connection URL or hostname) must be configured for pgvector"
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
module Providers
|
|
5
|
+
# Pinecone vector database provider
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# provider = Vectra::Providers::Pinecone.new(config)
|
|
9
|
+
# provider.upsert(index: 'my-index', vectors: [...])
|
|
10
|
+
#
|
|
11
|
+
class Pinecone < Base
|
|
12
|
+
API_VERSION = "2024-07"
|
|
13
|
+
|
|
14
|
+
def initialize(config)
|
|
15
|
+
super
|
|
16
|
+
@control_plane_connection = nil
|
|
17
|
+
@data_plane_connections = {}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# @see Base#provider_name
|
|
21
|
+
def provider_name
|
|
22
|
+
:pinecone
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# @see Base#upsert
|
|
26
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
27
|
+
normalized = normalize_vectors(vectors)
|
|
28
|
+
|
|
29
|
+
body = { vectors: normalized }
|
|
30
|
+
body[:namespace] = namespace if namespace
|
|
31
|
+
|
|
32
|
+
response = data_connection(index).post("/vectors/upsert", body)
|
|
33
|
+
|
|
34
|
+
if response.success?
|
|
35
|
+
log_debug("Upserted #{normalized.size} vectors to #{index}")
|
|
36
|
+
{
|
|
37
|
+
upserted_count: response.body["upsertedCount"] || normalized.size
|
|
38
|
+
}
|
|
39
|
+
else
|
|
40
|
+
handle_error(response)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# @see Base#query
|
|
45
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
46
|
+
include_values: false, include_metadata: true)
|
|
47
|
+
body = {
|
|
48
|
+
vector: vector.map(&:to_f),
|
|
49
|
+
topK: top_k,
|
|
50
|
+
includeValues: include_values,
|
|
51
|
+
includeMetadata: include_metadata
|
|
52
|
+
}
|
|
53
|
+
body[:namespace] = namespace if namespace
|
|
54
|
+
body[:filter] = transform_filter(filter) if filter
|
|
55
|
+
|
|
56
|
+
response = data_connection(index).post("/query", body)
|
|
57
|
+
|
|
58
|
+
if response.success?
|
|
59
|
+
log_debug("Query returned #{response.body['matches']&.size || 0} results")
|
|
60
|
+
QueryResult.from_response(
|
|
61
|
+
matches: transform_matches(response.body["matches"] || []),
|
|
62
|
+
namespace: response.body["namespace"],
|
|
63
|
+
usage: response.body["usage"]
|
|
64
|
+
)
|
|
65
|
+
else
|
|
66
|
+
handle_error(response)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @see Base#fetch
|
|
71
|
+
def fetch(index:, ids:, namespace: nil)
|
|
72
|
+
params = { ids: ids }
|
|
73
|
+
params[:namespace] = namespace if namespace
|
|
74
|
+
|
|
75
|
+
response = data_connection(index).get("/vectors/fetch") do |req|
|
|
76
|
+
ids.each { |id| req.params.add("ids", id) }
|
|
77
|
+
req.params["namespace"] = namespace if namespace
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
if response.success?
|
|
81
|
+
vectors = {}
|
|
82
|
+
(response.body["vectors"] || {}).each do |id, data|
|
|
83
|
+
vectors[id] = Vector.new(
|
|
84
|
+
id: id,
|
|
85
|
+
values: data["values"],
|
|
86
|
+
metadata: data["metadata"],
|
|
87
|
+
sparse_values: data["sparseValues"]
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
vectors
|
|
91
|
+
else
|
|
92
|
+
handle_error(response)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @see Base#update
|
|
97
|
+
def update(index:, id:, metadata: nil, values: nil, namespace: nil)
|
|
98
|
+
body = { id: id }
|
|
99
|
+
body[:setMetadata] = metadata if metadata
|
|
100
|
+
body[:values] = values.map(&:to_f) if values
|
|
101
|
+
body[:namespace] = namespace if namespace
|
|
102
|
+
|
|
103
|
+
response = data_connection(index).post("/vectors/update", body)
|
|
104
|
+
|
|
105
|
+
if response.success?
|
|
106
|
+
log_debug("Updated vector #{id}")
|
|
107
|
+
{ updated: true }
|
|
108
|
+
else
|
|
109
|
+
handle_error(response)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# @see Base#delete
|
|
114
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
115
|
+
body = {}
|
|
116
|
+
body[:ids] = ids if ids
|
|
117
|
+
body[:namespace] = namespace if namespace
|
|
118
|
+
body[:filter] = transform_filter(filter) if filter
|
|
119
|
+
body[:deleteAll] = true if delete_all
|
|
120
|
+
|
|
121
|
+
response = data_connection(index).post("/vectors/delete", body)
|
|
122
|
+
|
|
123
|
+
if response.success?
|
|
124
|
+
log_debug("Deleted vectors from #{index}")
|
|
125
|
+
{ deleted: true }
|
|
126
|
+
else
|
|
127
|
+
handle_error(response)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# @see Base#list_indexes
|
|
132
|
+
def list_indexes
|
|
133
|
+
response = control_connection.get("/indexes")
|
|
134
|
+
|
|
135
|
+
if response.success?
|
|
136
|
+
(response.body["indexes"] || []).map do |idx|
|
|
137
|
+
{
|
|
138
|
+
name: idx["name"],
|
|
139
|
+
dimension: idx["dimension"],
|
|
140
|
+
metric: idx["metric"],
|
|
141
|
+
host: idx["host"],
|
|
142
|
+
status: idx.dig("status", "ready") ? "ready" : "initializing"
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
else
|
|
146
|
+
handle_error(response)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# @see Base#describe_index
|
|
151
|
+
def describe_index(index:)
|
|
152
|
+
response = control_connection.get("/indexes/#{index}")
|
|
153
|
+
|
|
154
|
+
if response.success?
|
|
155
|
+
body = response.body
|
|
156
|
+
{
|
|
157
|
+
name: body["name"],
|
|
158
|
+
dimension: body["dimension"],
|
|
159
|
+
metric: body["metric"],
|
|
160
|
+
host: body["host"],
|
|
161
|
+
spec: body["spec"],
|
|
162
|
+
status: body["status"]
|
|
163
|
+
}
|
|
164
|
+
else
|
|
165
|
+
handle_error(response)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @see Base#stats
|
|
170
|
+
def stats(index:, namespace: nil)
|
|
171
|
+
body = {}
|
|
172
|
+
body[:filter] = {} if namespace.nil?
|
|
173
|
+
|
|
174
|
+
response = data_connection(index).post("/describe_index_stats", body)
|
|
175
|
+
|
|
176
|
+
if response.success?
|
|
177
|
+
{
|
|
178
|
+
total_vector_count: response.body["totalVectorCount"],
|
|
179
|
+
dimension: response.body["dimension"],
|
|
180
|
+
index_fullness: response.body["indexFullness"],
|
|
181
|
+
namespaces: response.body["namespaces"]
|
|
182
|
+
}
|
|
183
|
+
else
|
|
184
|
+
handle_error(response)
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Create a new index
|
|
189
|
+
#
|
|
190
|
+
# @param name [String] index name
|
|
191
|
+
# @param dimension [Integer] vector dimension
|
|
192
|
+
# @param metric [String] similarity metric (cosine, euclidean, dotproduct)
|
|
193
|
+
# @param spec [Hash] index spec (serverless or pod configuration)
|
|
194
|
+
# @return [Hash] created index info
|
|
195
|
+
def create_index(name:, dimension:, metric: "cosine", spec: nil)
|
|
196
|
+
body = {
|
|
197
|
+
name: name,
|
|
198
|
+
dimension: dimension,
|
|
199
|
+
metric: metric
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
# Default to serverless spec if not provided
|
|
203
|
+
body[:spec] = spec || {
|
|
204
|
+
serverless: {
|
|
205
|
+
cloud: "aws",
|
|
206
|
+
region: config.environment || "us-east-1"
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
response = control_connection.post("/indexes", body)
|
|
211
|
+
|
|
212
|
+
if response.success?
|
|
213
|
+
log_debug("Created index #{name}")
|
|
214
|
+
describe_index(index: name)
|
|
215
|
+
else
|
|
216
|
+
handle_error(response)
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Delete an index
|
|
221
|
+
#
|
|
222
|
+
# @param name [String] index name
|
|
223
|
+
# @return [Hash] deletion result
|
|
224
|
+
def delete_index(name:)
|
|
225
|
+
response = control_connection.delete("/indexes/#{name}")
|
|
226
|
+
|
|
227
|
+
if response.success?
|
|
228
|
+
log_debug("Deleted index #{name}")
|
|
229
|
+
{ deleted: true }
|
|
230
|
+
else
|
|
231
|
+
handle_error(response)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
private
|
|
236
|
+
|
|
237
|
+
# Control plane connection (for index management)
|
|
238
|
+
def control_connection
|
|
239
|
+
@control_plane_connection ||= build_connection(
|
|
240
|
+
"https://api.pinecone.io",
|
|
241
|
+
{
|
|
242
|
+
"Api-Key" => config.api_key,
|
|
243
|
+
"X-Pinecone-API-Version" => API_VERSION
|
|
244
|
+
}
|
|
245
|
+
)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Data plane connection (for vector operations)
|
|
249
|
+
# Each index has its own host
|
|
250
|
+
def data_connection(index)
|
|
251
|
+
@data_plane_connections[index] ||= begin
|
|
252
|
+
host = resolve_index_host(index)
|
|
253
|
+
build_connection(
|
|
254
|
+
"https://#{host}",
|
|
255
|
+
{
|
|
256
|
+
"Api-Key" => config.api_key,
|
|
257
|
+
"X-Pinecone-API-Version" => API_VERSION
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Resolve the host for an index
|
|
264
|
+
def resolve_index_host(index)
|
|
265
|
+
# If a direct host is configured, use that
|
|
266
|
+
return config.host if config.host
|
|
267
|
+
|
|
268
|
+
# Otherwise, fetch from the API
|
|
269
|
+
info = describe_index(index: index)
|
|
270
|
+
host = info[:host]
|
|
271
|
+
|
|
272
|
+
raise ConfigurationError, "Could not resolve host for index '#{index}'" unless host
|
|
273
|
+
|
|
274
|
+
host
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Transform metadata filter to Pinecone format
|
|
278
|
+
def transform_filter(filter)
|
|
279
|
+
return nil unless filter
|
|
280
|
+
|
|
281
|
+
# Simple key-value filters are wrapped in $eq
|
|
282
|
+
filter.transform_values do |value|
|
|
283
|
+
case value
|
|
284
|
+
when Hash
|
|
285
|
+
value # Already a filter operator
|
|
286
|
+
when Array
|
|
287
|
+
{ "$in" => value }
|
|
288
|
+
else
|
|
289
|
+
{ "$eq" => value }
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Transform matches from API response
|
|
295
|
+
def transform_matches(matches)
|
|
296
|
+
matches.map do |match|
|
|
297
|
+
{
|
|
298
|
+
id: match["id"],
|
|
299
|
+
score: match["score"],
|
|
300
|
+
values: match["values"],
|
|
301
|
+
metadata: match["metadata"],
|
|
302
|
+
sparse_values: match["sparseValues"]
|
|
303
|
+
}
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
module Providers
|
|
5
|
+
# Qdrant vector database provider (planned for v0.2.0)
|
|
6
|
+
#
|
|
7
|
+
# @note This provider is not yet implemented
|
|
8
|
+
#
|
|
9
|
+
class Qdrant < Base
|
|
10
|
+
def provider_name
|
|
11
|
+
:qdrant
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
15
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
19
|
+
include_values: false, include_metadata: true)
|
|
20
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def fetch(index:, ids:, namespace: nil)
|
|
24
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def update(index:, id:, metadata:, namespace: nil)
|
|
28
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
32
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def list_indexes
|
|
36
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def describe_index(index:)
|
|
40
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def stats(index:, namespace: nil)
|
|
44
|
+
raise NotImplementedError, "Qdrant provider is planned for v0.2.0"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
module Providers
|
|
5
|
+
# Weaviate vector database provider (planned for v0.3.0)
|
|
6
|
+
#
|
|
7
|
+
# @note This provider is not yet implemented
|
|
8
|
+
#
|
|
9
|
+
class Weaviate < Base
|
|
10
|
+
def provider_name
|
|
11
|
+
:weaviate
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
15
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
19
|
+
include_values: false, include_metadata: true)
|
|
20
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def fetch(index:, ids:, namespace: nil)
|
|
24
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def update(index:, id:, metadata:, namespace: nil)
|
|
28
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
32
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def list_indexes
|
|
36
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def describe_index(index:)
|
|
40
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def stats(index:, namespace: nil)
|
|
44
|
+
raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|