vectra-client 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -4
- data/README.md +35 -2
- data/docs/examples/real-world.md +62 -22
- data/docs/guides/getting-started.md +70 -2
- data/docs/providers/index.md +12 -0
- data/docs/providers/memory.md +145 -0
- data/docs/providers/weaviate.md +84 -25
- data/examples/README.md +12 -0
- data/lib/vectra/batch.rb +63 -8
- data/lib/vectra/client.rb +188 -1
- data/lib/vectra/configuration.rb +4 -2
- data/lib/vectra/credential_rotation.rb +2 -3
- data/lib/vectra/providers/base.rb +19 -1
- data/lib/vectra/providers/memory.rb +298 -0
- data/lib/vectra/providers/qdrant.rb +31 -0
- data/lib/vectra/vector.rb +56 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +20 -0
- metadata +3 -1
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
module Providers
|
|
5
|
+
# In-memory vector database provider for testing
|
|
6
|
+
#
|
|
7
|
+
# This provider stores all vectors in memory using Ruby hashes.
|
|
8
|
+
# Perfect for testing without external dependencies.
|
|
9
|
+
#
|
|
10
|
+
# @example Usage in tests
|
|
11
|
+
# Vectra.configure do |config|
|
|
12
|
+
# config.provider = :memory if Rails.env.test?
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# client = Vectra::Client.new
|
|
16
|
+
# client.upsert(index: 'test', vectors: [...])
|
|
17
|
+
#
|
|
18
|
+
class Memory < Base
|
|
19
|
+
def initialize(config)
|
|
20
|
+
super
|
|
21
|
+
# Storage structure: @storage[index][namespace][id] = Vector
|
|
22
|
+
@storage = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
|
23
|
+
@index_configs = {} # Store index configurations (dimension, metric)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @see Base#provider_name
|
|
27
|
+
def provider_name
|
|
28
|
+
:memory
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @see Base#upsert
|
|
32
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
33
|
+
normalized = normalize_vectors(vectors)
|
|
34
|
+
ns = namespace || ""
|
|
35
|
+
|
|
36
|
+
normalized.each do |vec|
|
|
37
|
+
# Infer dimension from first vector if not set
|
|
38
|
+
if @index_configs[index].nil?
|
|
39
|
+
@index_configs[index] = {
|
|
40
|
+
dimension: vec[:values].length,
|
|
41
|
+
metric: "cosine"
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Store vector
|
|
46
|
+
vector_obj = Vector.new(
|
|
47
|
+
id: vec[:id],
|
|
48
|
+
values: vec[:values],
|
|
49
|
+
metadata: vec[:metadata] || {}
|
|
50
|
+
)
|
|
51
|
+
@storage[index][ns][vec[:id]] = vector_obj
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
log_debug("Upserted #{normalized.size} vectors to #{index}")
|
|
55
|
+
{ upserted_count: normalized.size }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @see Base#query
|
|
59
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
60
|
+
include_values: false, include_metadata: true)
|
|
61
|
+
ns = namespace || ""
|
|
62
|
+
candidates = @storage[index][ns].values
|
|
63
|
+
|
|
64
|
+
# Apply metadata filter
|
|
65
|
+
if filter
|
|
66
|
+
candidates = candidates.select { |v| matches_filter?(v, filter) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Calculate similarity scores
|
|
70
|
+
matches = candidates.map do |vec|
|
|
71
|
+
score = calculate_similarity(vector, vec.values, index)
|
|
72
|
+
build_match(vec, score, include_values, include_metadata)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Sort by score (descending) and take top_k
|
|
76
|
+
matches.sort_by! { |m| -m[:score] }
|
|
77
|
+
matches = matches.first(top_k)
|
|
78
|
+
|
|
79
|
+
log_debug("Query returned #{matches.size} results")
|
|
80
|
+
QueryResult.from_response(matches: matches, namespace: namespace)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @see Base#fetch
|
|
84
|
+
def fetch(index:, ids:, namespace: nil)
|
|
85
|
+
ns = namespace || ""
|
|
86
|
+
vectors = {}
|
|
87
|
+
|
|
88
|
+
ids.each do |id|
|
|
89
|
+
vec = @storage[index][ns][id]
|
|
90
|
+
vectors[id] = vec if vec
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
vectors
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @see Base#update
|
|
97
|
+
def update(index:, id:, metadata:, namespace: nil)
|
|
98
|
+
ns = namespace || ""
|
|
99
|
+
vec = @storage[index][ns][id]
|
|
100
|
+
|
|
101
|
+
raise NotFoundError, "Vector '#{id}' not found in index '#{index}'" unless vec
|
|
102
|
+
|
|
103
|
+
# Merge metadata
|
|
104
|
+
new_metadata = (vec.metadata || {}).merge(metadata.transform_keys(&:to_s))
|
|
105
|
+
updated_vec = Vector.new(
|
|
106
|
+
id: vec.id,
|
|
107
|
+
values: vec.values,
|
|
108
|
+
metadata: new_metadata,
|
|
109
|
+
sparse_values: vec.sparse_values
|
|
110
|
+
)
|
|
111
|
+
@storage[index][ns][id] = updated_vec
|
|
112
|
+
|
|
113
|
+
log_debug("Updated vector #{id}")
|
|
114
|
+
{ updated: true }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# @see Base#delete
|
|
118
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
119
|
+
ns = namespace || ""
|
|
120
|
+
|
|
121
|
+
if delete_all
|
|
122
|
+
@storage[index].clear
|
|
123
|
+
elsif ids
|
|
124
|
+
ids.each { |id| @storage[index][ns].delete(id) }
|
|
125
|
+
elsif namespace && !filter
|
|
126
|
+
@storage[index].delete(ns)
|
|
127
|
+
elsif filter
|
|
128
|
+
# Delete vectors matching filter
|
|
129
|
+
@storage[index][ns].delete_if { |_id, vec| matches_filter?(vec, filter) }
|
|
130
|
+
else
|
|
131
|
+
raise ValidationError, "Must specify ids, filter, namespace, or delete_all"
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
log_debug("Deleted vectors from #{index}")
|
|
135
|
+
{ deleted: true }
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @see Base#list_indexes
|
|
139
|
+
def list_indexes
|
|
140
|
+
@index_configs.keys.map { |name| describe_index(index: name) }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @see Base#describe_index
|
|
144
|
+
def describe_index(index:)
|
|
145
|
+
config = @index_configs[index]
|
|
146
|
+
raise NotFoundError, "Index '#{index}' not found" unless config
|
|
147
|
+
|
|
148
|
+
{
|
|
149
|
+
name: index,
|
|
150
|
+
dimension: config[:dimension],
|
|
151
|
+
metric: config[:metric],
|
|
152
|
+
status: "ready"
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# @see Base#stats
|
|
157
|
+
def stats(index:, namespace: nil)
|
|
158
|
+
config = @index_configs[index]
|
|
159
|
+
raise NotFoundError, "Index '#{index}' not found" unless config
|
|
160
|
+
|
|
161
|
+
if namespace
|
|
162
|
+
ns = namespace
|
|
163
|
+
count = @storage[index][ns].size
|
|
164
|
+
namespaces = { ns => { vector_count: count } }
|
|
165
|
+
else
|
|
166
|
+
# Count all namespaces
|
|
167
|
+
namespaces = {}
|
|
168
|
+
@storage[index].each do |ns, vectors|
|
|
169
|
+
namespaces[ns] = { vector_count: vectors.size }
|
|
170
|
+
end
|
|
171
|
+
count = @storage[index].values.sum(&:size)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
total_vector_count: count,
|
|
176
|
+
dimension: config[:dimension],
|
|
177
|
+
namespaces: namespaces
|
|
178
|
+
}
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Clear all stored data (useful for tests)
|
|
182
|
+
#
|
|
183
|
+
# @return [void]
|
|
184
|
+
def clear!
|
|
185
|
+
@storage.clear
|
|
186
|
+
@index_configs.clear
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
private
|
|
190
|
+
|
|
191
|
+
# Calculate similarity score based on index metric
|
|
192
|
+
def calculate_similarity(query_vector, candidate_vector, index)
|
|
193
|
+
config = @index_configs[index] || { metric: "cosine" }
|
|
194
|
+
metric = config[:metric] || "cosine"
|
|
195
|
+
|
|
196
|
+
case metric.to_s.downcase
|
|
197
|
+
when "euclidean", "l2"
|
|
198
|
+
# Convert distance to similarity (1 / (1 + distance))
|
|
199
|
+
distance = euclidean_distance(query_vector, candidate_vector)
|
|
200
|
+
1.0 / (1.0 + distance)
|
|
201
|
+
when "dot_product", "inner_product", "dot"
|
|
202
|
+
dot_product(query_vector, candidate_vector)
|
|
203
|
+
else # cosine (default)
|
|
204
|
+
cosine_similarity(query_vector, candidate_vector)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Calculate cosine similarity
|
|
209
|
+
def cosine_similarity(vec_a, vec_b)
|
|
210
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
211
|
+
|
|
212
|
+
dot = vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
213
|
+
mag_a = Math.sqrt(vec_a.sum { |v| v**2 })
|
|
214
|
+
mag_b = Math.sqrt(vec_b.sum { |v| v**2 })
|
|
215
|
+
|
|
216
|
+
return 0.0 if mag_a.zero? || mag_b.zero?
|
|
217
|
+
|
|
218
|
+
dot / (mag_a * mag_b)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Calculate Euclidean distance
|
|
222
|
+
def euclidean_distance(vec_a, vec_b)
|
|
223
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
224
|
+
|
|
225
|
+
Math.sqrt(vec_a.zip(vec_b).sum { |a, b| (a - b)**2 })
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Calculate dot product
|
|
229
|
+
def dot_product(vec_a, vec_b)
|
|
230
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
231
|
+
|
|
232
|
+
vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Check if vector matches filter
|
|
236
|
+
def matches_filter?(vector, filter)
|
|
237
|
+
filter.all? do |key, value|
|
|
238
|
+
vec_value = vector.metadata[key.to_s]
|
|
239
|
+
matches_filter_value?(vec_value, value)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Check if a value matches filter criteria
|
|
244
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
245
|
+
def matches_filter_value?(actual, expected)
|
|
246
|
+
case expected
|
|
247
|
+
when Hash
|
|
248
|
+
# Support operators like { "$gt" => 5, "$lt" => 10 }
|
|
249
|
+
expected.all? do |op, val|
|
|
250
|
+
case op.to_s
|
|
251
|
+
when "$eq"
|
|
252
|
+
actual == val
|
|
253
|
+
when "$ne"
|
|
254
|
+
actual != val
|
|
255
|
+
when "$gt"
|
|
256
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual > val
|
|
257
|
+
when "$gte"
|
|
258
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual >= val
|
|
259
|
+
when "$lt"
|
|
260
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual < val
|
|
261
|
+
when "$lte"
|
|
262
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual <= val
|
|
263
|
+
when "$in"
|
|
264
|
+
val.is_a?(Array) && val.include?(actual)
|
|
265
|
+
else
|
|
266
|
+
actual == expected
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
when Array
|
|
270
|
+
expected.include?(actual)
|
|
271
|
+
else
|
|
272
|
+
actual == expected
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
276
|
+
|
|
277
|
+
# Build match hash from vector
|
|
278
|
+
def build_match(vector, score, include_values, include_metadata)
|
|
279
|
+
match = {
|
|
280
|
+
id: vector.id,
|
|
281
|
+
score: score
|
|
282
|
+
}
|
|
283
|
+
match[:values] = vector.values if include_values
|
|
284
|
+
match[:metadata] = vector.metadata if include_metadata
|
|
285
|
+
match[:sparse_values] = vector.sparse_values if vector.sparse?
|
|
286
|
+
match
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Override validate_config! - Memory provider doesn't need host or API key
|
|
290
|
+
# rubocop:disable Naming/PredicateMethod
|
|
291
|
+
def validate_config!
|
|
292
|
+
# Memory provider has no special requirements
|
|
293
|
+
true
|
|
294
|
+
end
|
|
295
|
+
# rubocop:enable Naming/PredicateMethod
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
@@ -299,6 +299,37 @@ module Vectra
|
|
|
299
299
|
handle_retriable_response(e)
|
|
300
300
|
end
|
|
301
301
|
|
|
302
|
+
# Extract error message from Qdrant response format
|
|
303
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
304
|
+
def extract_error_message(body)
|
|
305
|
+
case body
|
|
306
|
+
when Hash
|
|
307
|
+
# Qdrant wraps errors in "status" key
|
|
308
|
+
status = body["status"] || body
|
|
309
|
+
msg = status["error"] || body["message"] || body["error_message"] || body.to_s
|
|
310
|
+
|
|
311
|
+
# Add details
|
|
312
|
+
details = status["details"] || status["error_details"]
|
|
313
|
+
if details
|
|
314
|
+
details_str = details.is_a?(Hash) ? details.to_json : details.to_s
|
|
315
|
+
msg += " (#{details_str})" unless msg.include?(details_str)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Add field-specific errors
|
|
319
|
+
if status["errors"].is_a?(Array)
|
|
320
|
+
field_errors = status["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
|
|
321
|
+
msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
msg
|
|
325
|
+
when String
|
|
326
|
+
body
|
|
327
|
+
else
|
|
328
|
+
"Unknown error"
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
332
|
+
|
|
302
333
|
def auth_headers
|
|
303
334
|
headers = {}
|
|
304
335
|
headers["api-key"] = config.api_key if config.api_key && !config.api_key.empty?
|
data/lib/vectra/vector.rb
CHANGED
|
@@ -106,6 +106,62 @@ module Vectra
|
|
|
106
106
|
Math.sqrt(values.zip(other_values).sum { |a, b| (a - b)**2 })
|
|
107
107
|
end
|
|
108
108
|
|
|
109
|
+
# Normalize the vector in-place (mutates the vector)
|
|
110
|
+
#
|
|
111
|
+
# @param type [Symbol] normalization type: :l2 (default) or :l1
|
|
112
|
+
# @return [Vector] self (for method chaining)
|
|
113
|
+
#
|
|
114
|
+
# @example L2 normalization (unit vector)
|
|
115
|
+
# vector = Vectra::Vector.new(id: 'v1', values: [3.0, 4.0])
|
|
116
|
+
# vector.normalize!
|
|
117
|
+
# vector.values # => [0.6, 0.8] (magnitude = 1.0)
|
|
118
|
+
#
|
|
119
|
+
# @example L1 normalization (sum = 1)
|
|
120
|
+
# vector.normalize!(type: :l1)
|
|
121
|
+
# vector.values.sum(&:abs) # => 1.0
|
|
122
|
+
def normalize!(type: :l2)
|
|
123
|
+
case type
|
|
124
|
+
when :l2
|
|
125
|
+
magnitude = Math.sqrt(values.sum { |v| v**2 })
|
|
126
|
+
if magnitude.zero?
|
|
127
|
+
# Zero vector - cannot normalize, return as-is
|
|
128
|
+
return self
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
@values = values.map { |v| v / magnitude }
|
|
132
|
+
when :l1
|
|
133
|
+
sum = values.sum(&:abs)
|
|
134
|
+
if sum.zero?
|
|
135
|
+
# Zero vector - cannot normalize, return as-is
|
|
136
|
+
return self
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
@values = values.map { |v| v / sum }
|
|
140
|
+
else
|
|
141
|
+
raise ArgumentError, "Unknown normalization type: #{type}. Use :l2 or :l1"
|
|
142
|
+
end
|
|
143
|
+
self
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Normalize a vector array without creating a Vector object
|
|
147
|
+
#
|
|
148
|
+
# @param vector [Array<Float>] vector values to normalize
|
|
149
|
+
# @param type [Symbol] normalization type: :l2 (default) or :l1
|
|
150
|
+
# @return [Array<Float>] normalized vector values
|
|
151
|
+
#
|
|
152
|
+
# @example Normalize OpenAI embedding
|
|
153
|
+
# embedding = openai_response['data'][0]['embedding']
|
|
154
|
+
# normalized = Vectra::Vector.normalize(embedding)
|
|
155
|
+
# client.upsert(vectors: [{ id: '1', values: normalized }])
|
|
156
|
+
#
|
|
157
|
+
# @example L1 normalization
|
|
158
|
+
# normalized = Vectra::Vector.normalize([1.0, 2.0, 3.0], type: :l1)
|
|
159
|
+
def self.normalize(vector, type: :l2)
|
|
160
|
+
temp_vector = new(id: "temp", values: vector.dup)
|
|
161
|
+
temp_vector.normalize!(type: type)
|
|
162
|
+
temp_vector.values
|
|
163
|
+
end
|
|
164
|
+
|
|
109
165
|
# Check equality with another vector
|
|
110
166
|
#
|
|
111
167
|
# @param other [Vector] the other vector
|
data/lib/vectra/version.rb
CHANGED
data/lib/vectra.rb
CHANGED
|
@@ -23,6 +23,7 @@ require_relative "vectra/providers/pinecone"
|
|
|
23
23
|
require_relative "vectra/providers/qdrant"
|
|
24
24
|
require_relative "vectra/providers/weaviate"
|
|
25
25
|
require_relative "vectra/providers/pgvector"
|
|
26
|
+
require_relative "vectra/providers/memory"
|
|
26
27
|
require_relative "vectra/client"
|
|
27
28
|
|
|
28
29
|
# Vectra - Unified Ruby client for vector databases
|
|
@@ -157,5 +158,24 @@ module Vectra
|
|
|
157
158
|
**options
|
|
158
159
|
)
|
|
159
160
|
end
|
|
161
|
+
|
|
162
|
+
# Shortcut to create a Memory client (for testing)
|
|
163
|
+
#
|
|
164
|
+
# @param options [Hash] additional options
|
|
165
|
+
# @return [Client]
|
|
166
|
+
#
|
|
167
|
+
# @example In test environment
|
|
168
|
+
# Vectra.configure do |config|
|
|
169
|
+
# config.provider = :memory if Rails.env.test?
|
|
170
|
+
# end
|
|
171
|
+
#
|
|
172
|
+
# client = Vectra::Client.new
|
|
173
|
+
#
|
|
174
|
+
def memory(**options)
|
|
175
|
+
Client.new(
|
|
176
|
+
provider: :memory,
|
|
177
|
+
**options
|
|
178
|
+
)
|
|
179
|
+
end
|
|
160
180
|
end
|
|
161
181
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: vectra-client
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mijo Kristo
|
|
@@ -269,6 +269,7 @@ files:
|
|
|
269
269
|
- docs/guides/security.md
|
|
270
270
|
- docs/index.md
|
|
271
271
|
- docs/providers/index.md
|
|
272
|
+
- docs/providers/memory.md
|
|
272
273
|
- docs/providers/pgvector.md
|
|
273
274
|
- docs/providers/pinecone.md
|
|
274
275
|
- docs/providers/qdrant.md
|
|
@@ -303,6 +304,7 @@ files:
|
|
|
303
304
|
- lib/vectra/logging.rb
|
|
304
305
|
- lib/vectra/pool.rb
|
|
305
306
|
- lib/vectra/providers/base.rb
|
|
307
|
+
- lib/vectra/providers/memory.rb
|
|
306
308
|
- lib/vectra/providers/pgvector.rb
|
|
307
309
|
- lib/vectra/providers/pgvector/connection.rb
|
|
308
310
|
- lib/vectra/providers/pgvector/index_management.rb
|