vectra-client 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -0
- data/CHANGELOG.md +26 -8
- data/README.md +35 -2
- data/docs/_layouts/default.html +1 -0
- data/docs/_layouts/home.html +44 -3
- data/docs/_layouts/page.html +42 -9
- data/docs/assets/style.css +226 -1
- data/docs/examples/index.md +9 -0
- data/docs/examples/real-world.md +576 -0
- data/docs/grafana_final.png +0 -0
- data/docs/guides/getting-started.md +70 -2
- data/docs/guides/monitoring.md +50 -0
- data/docs/providers/index.md +12 -0
- data/docs/providers/memory.md +145 -0
- data/docs/providers/weaviate.md +84 -25
- data/examples/GRAFANA_QUICKSTART.md +158 -0
- data/examples/README.md +332 -0
- data/examples/comprehensive_demo.rb +1116 -0
- data/examples/grafana-dashboard.json +878 -0
- data/examples/grafana-setup.md +340 -0
- data/examples/prometheus-exporter.rb +229 -0
- data/lib/vectra/batch.rb +63 -8
- data/lib/vectra/client.rb +188 -1
- data/lib/vectra/configuration.rb +4 -2
- data/lib/vectra/credential_rotation.rb +2 -3
- data/lib/vectra/providers/base.rb +19 -1
- data/lib/vectra/providers/memory.rb +298 -0
- data/lib/vectra/providers/qdrant.rb +31 -0
- data/lib/vectra/providers/weaviate.rb +454 -10
- data/lib/vectra/vector.rb +56 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +20 -0
- data/vectra.gemspec +56 -0
- metadata +12 -1
data/lib/vectra/client.rb
CHANGED
|
@@ -98,8 +98,26 @@ module Vectra
|
|
|
98
98
|
# filter: { category: 'programming' }
|
|
99
99
|
# )
|
|
100
100
|
#
|
|
101
|
-
|
|
101
|
+
# @example Chainable query builder
|
|
102
|
+
# results = client.query("my-index")
|
|
103
|
+
# .vector([0.1, 0.2, 0.3])
|
|
104
|
+
# .top_k(10)
|
|
105
|
+
# .filter(category: "programming")
|
|
106
|
+
# .with_metadata
|
|
107
|
+
# .execute
|
|
108
|
+
#
|
|
109
|
+
def query(index_arg = nil, index: nil, vector: nil, top_k: 10, namespace: nil, filter: nil,
|
|
102
110
|
include_values: false, include_metadata: true)
|
|
111
|
+
# If called with a positional index string only, return a query builder:
|
|
112
|
+
# client.query("docs").vector(vec).top_k(10).filter(...).execute
|
|
113
|
+
if index_arg && index.nil? && vector.nil? && !block_given?
|
|
114
|
+
return QueryBuilder.new(self, index_arg)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Handle positional argument for index in non-builder case
|
|
118
|
+
index = index_arg if index_arg && index.nil?
|
|
119
|
+
|
|
120
|
+
# Backwards-compatible path: perform query immediately
|
|
103
121
|
validate_index!(index)
|
|
104
122
|
validate_query_vector!(vector)
|
|
105
123
|
|
|
@@ -276,6 +294,137 @@ module Vectra
|
|
|
276
294
|
provider.provider_name
|
|
277
295
|
end
|
|
278
296
|
|
|
297
|
+
# Quick health check - tests if provider connection is healthy
|
|
298
|
+
#
|
|
299
|
+
# @param timeout [Float] timeout in seconds (default: 5)
|
|
300
|
+
# @return [Boolean] true if connection is healthy
|
|
301
|
+
#
|
|
302
|
+
# @example
|
|
303
|
+
# if client.healthy?
|
|
304
|
+
# client.upsert(...)
|
|
305
|
+
# else
|
|
306
|
+
# handle_unhealthy_connection
|
|
307
|
+
# end
|
|
308
|
+
def healthy?
|
|
309
|
+
start = Time.now
|
|
310
|
+
provider.list_indexes
|
|
311
|
+
true
|
|
312
|
+
rescue StandardError => e
|
|
313
|
+
log_error("Health check failed", e)
|
|
314
|
+
false
|
|
315
|
+
ensure
|
|
316
|
+
duration = ((Time.now - start) * 1000).round(2) if defined?(start)
|
|
317
|
+
log_debug("Health check completed in #{duration}ms") if duration
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Ping provider and get connection health status with latency
|
|
321
|
+
#
|
|
322
|
+
# @param timeout [Float] timeout in seconds (default: 5)
|
|
323
|
+
# @return [Hash] health status with :healthy, :provider, :latency_ms
|
|
324
|
+
#
|
|
325
|
+
# @example
|
|
326
|
+
# status = client.ping
|
|
327
|
+
# puts "Provider: #{status[:provider]}, Healthy: #{status[:healthy]}, Latency: #{status[:latency_ms]}ms"
|
|
328
|
+
def ping
|
|
329
|
+
start = Time.now
|
|
330
|
+
healthy = true
|
|
331
|
+
error_info = nil
|
|
332
|
+
|
|
333
|
+
begin
|
|
334
|
+
provider.list_indexes
|
|
335
|
+
rescue StandardError => e
|
|
336
|
+
healthy = false
|
|
337
|
+
error_info = { error: e.class.name, error_message: e.message }
|
|
338
|
+
log_error("Health check failed", e)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
duration = ((Time.now - start) * 1000).round(2)
|
|
342
|
+
|
|
343
|
+
result = {
|
|
344
|
+
healthy: healthy,
|
|
345
|
+
provider: provider_name,
|
|
346
|
+
latency_ms: duration
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
result.merge!(error_info) if error_info
|
|
350
|
+
result
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Chainable query builder
|
|
354
|
+
#
|
|
355
|
+
# @api public
|
|
356
|
+
# @example
|
|
357
|
+
# results = client.query("docs")
|
|
358
|
+
# .vector(embedding)
|
|
359
|
+
# .top_k(20)
|
|
360
|
+
# .namespace("prod")
|
|
361
|
+
# .filter(category: "ruby")
|
|
362
|
+
# .with_metadata
|
|
363
|
+
# .execute
|
|
364
|
+
#
|
|
365
|
+
class QueryBuilder
|
|
366
|
+
def initialize(client, index)
|
|
367
|
+
@client = client
|
|
368
|
+
@index = index
|
|
369
|
+
@vector = nil
|
|
370
|
+
@top_k = 10
|
|
371
|
+
@namespace = nil
|
|
372
|
+
@filter = nil
|
|
373
|
+
@include_values = false
|
|
374
|
+
@include_metadata = true
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
attr_reader :index
|
|
378
|
+
|
|
379
|
+
def vector(value)
|
|
380
|
+
@vector = value
|
|
381
|
+
self
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def top_k(value)
|
|
385
|
+
@top_k = value.to_i
|
|
386
|
+
self
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def namespace(value)
|
|
390
|
+
@namespace = value
|
|
391
|
+
self
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def filter(value = nil, **kwargs)
|
|
395
|
+
@filter = value || kwargs
|
|
396
|
+
self
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
def with_values
|
|
400
|
+
@include_values = true
|
|
401
|
+
self
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def with_metadata
|
|
405
|
+
@include_metadata = true
|
|
406
|
+
self
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def without_metadata
|
|
410
|
+
@include_metadata = false
|
|
411
|
+
self
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Execute the built query and return a QueryResult
|
|
415
|
+
def execute
|
|
416
|
+
@client.query(
|
|
417
|
+
index: @index,
|
|
418
|
+
vector: @vector,
|
|
419
|
+
top_k: @top_k,
|
|
420
|
+
namespace: @namespace,
|
|
421
|
+
filter: @filter,
|
|
422
|
+
include_values: @include_values,
|
|
423
|
+
include_metadata: @include_metadata
|
|
424
|
+
)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
279
428
|
private
|
|
280
429
|
|
|
281
430
|
def build_config(provider_name, api_key, environment, host, options)
|
|
@@ -306,6 +455,8 @@ module Vectra
|
|
|
306
455
|
Providers::Weaviate.new(config)
|
|
307
456
|
when :pgvector
|
|
308
457
|
Providers::Pgvector.new(config)
|
|
458
|
+
when :memory
|
|
459
|
+
Providers::Memory.new(config)
|
|
309
460
|
else
|
|
310
461
|
raise UnsupportedProviderError, "Provider '#{config.provider}' is not supported"
|
|
311
462
|
end
|
|
@@ -317,11 +468,32 @@ module Vectra
|
|
|
317
468
|
raise ValidationError, "Index name cannot be empty" if index.empty?
|
|
318
469
|
end
|
|
319
470
|
|
|
471
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
320
472
|
def validate_vectors!(vectors)
|
|
321
473
|
raise ValidationError, "Vectors cannot be nil" if vectors.nil?
|
|
322
474
|
raise ValidationError, "Vectors must be an array" unless vectors.is_a?(Array)
|
|
323
475
|
raise ValidationError, "Vectors cannot be empty" if vectors.empty?
|
|
476
|
+
|
|
477
|
+
# Check dimension consistency
|
|
478
|
+
first_vector = vectors.first
|
|
479
|
+
first_values = first_vector.is_a?(Vector) ? first_vector.values : first_vector[:values]
|
|
480
|
+
first_dim = first_values&.size
|
|
481
|
+
|
|
482
|
+
return unless first_dim
|
|
483
|
+
|
|
484
|
+
vectors.each_with_index do |vec, index|
|
|
485
|
+
values = vec.is_a?(Vector) ? vec.values : vec[:values]
|
|
486
|
+
dim = values&.size
|
|
487
|
+
|
|
488
|
+
next unless dim && dim != first_dim
|
|
489
|
+
|
|
490
|
+
raise ValidationError,
|
|
491
|
+
"Inconsistent vector dimensions at index #{index}: " \
|
|
492
|
+
"expected #{first_dim}, got #{dim}. " \
|
|
493
|
+
"All vectors in a batch must have the same dimension."
|
|
494
|
+
end
|
|
324
495
|
end
|
|
496
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
325
497
|
|
|
326
498
|
def validate_query_vector!(vector)
|
|
327
499
|
raise ValidationError, "Query vector cannot be nil" if vector.nil?
|
|
@@ -340,5 +512,20 @@ module Vectra
|
|
|
340
512
|
raise ValidationError, "ID must be a string" unless id.is_a?(String)
|
|
341
513
|
raise ValidationError, "ID cannot be empty" if id.empty?
|
|
342
514
|
end
|
|
515
|
+
|
|
516
|
+
def log_error(message, error = nil)
|
|
517
|
+
return unless config.logger
|
|
518
|
+
|
|
519
|
+
config.logger.error("[Vectra] #{message}")
|
|
520
|
+
config.logger.error("[Vectra] #{error.class}: #{error.message}") if error
|
|
521
|
+
config.logger.error("[Vectra] #{error.backtrace&.first(3)&.join("\n")}") if error&.backtrace
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def log_debug(message, data = nil)
|
|
525
|
+
return unless config.logger
|
|
526
|
+
|
|
527
|
+
config.logger.debug("[Vectra] #{message}")
|
|
528
|
+
config.logger.debug("[Vectra] #{data.inspect}") if data
|
|
529
|
+
end
|
|
343
530
|
end
|
|
344
531
|
end
|
data/lib/vectra/configuration.rb
CHANGED
|
@@ -11,7 +11,7 @@ module Vectra
|
|
|
11
11
|
# end
|
|
12
12
|
#
|
|
13
13
|
class Configuration
|
|
14
|
-
SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector].freeze
|
|
14
|
+
SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector memory].freeze
|
|
15
15
|
|
|
16
16
|
attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
|
|
17
17
|
:max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
|
|
@@ -117,7 +117,7 @@ module Vectra
|
|
|
117
117
|
|
|
118
118
|
# Providers that don't require API key (local instances)
|
|
119
119
|
def api_key_optional_provider?
|
|
120
|
-
%i[qdrant pgvector].include?(provider)
|
|
120
|
+
%i[qdrant pgvector memory].include?(provider)
|
|
121
121
|
end
|
|
122
122
|
|
|
123
123
|
def validate_provider_specific!
|
|
@@ -130,6 +130,8 @@ module Vectra
|
|
|
130
130
|
validate_weaviate!
|
|
131
131
|
when :pgvector
|
|
132
132
|
validate_pgvector!
|
|
133
|
+
when :memory
|
|
134
|
+
# Memory provider has no special requirements
|
|
133
135
|
end
|
|
134
136
|
end
|
|
135
137
|
|
|
@@ -37,13 +37,12 @@ module Vectra
|
|
|
37
37
|
|
|
38
38
|
# Test if secondary key is valid
|
|
39
39
|
#
|
|
40
|
-
# @param timeout [Float] Test timeout in seconds
|
|
41
40
|
# @return [Boolean] true if secondary key works
|
|
42
|
-
def test_secondary
|
|
41
|
+
def test_secondary
|
|
43
42
|
return false if secondary_key.nil? || secondary_key.empty?
|
|
44
43
|
|
|
45
44
|
client = build_test_client(secondary_key)
|
|
46
|
-
client.healthy?
|
|
45
|
+
client.healthy?
|
|
47
46
|
rescue StandardError
|
|
48
47
|
false
|
|
49
48
|
end
|
|
@@ -232,16 +232,34 @@ module Vectra
|
|
|
232
232
|
#
|
|
233
233
|
# @param body [Hash, String, nil] response body
|
|
234
234
|
# @return [String]
|
|
235
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
235
236
|
def extract_error_message(body)
|
|
236
237
|
case body
|
|
237
238
|
when Hash
|
|
238
|
-
|
|
239
|
+
# Primary error message
|
|
240
|
+
msg = body["message"] || body["error"] || body["error_message"] || body.to_s
|
|
241
|
+
|
|
242
|
+
# Add context from details
|
|
243
|
+
details = body["details"] || body["error_details"] || body["detail"]
|
|
244
|
+
if details
|
|
245
|
+
details_str = details.is_a?(Hash) ? details.to_json : details.to_s
|
|
246
|
+
msg += " (#{details_str})" unless msg.include?(details_str)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Add field-specific errors if available
|
|
250
|
+
if body["errors"].is_a?(Array)
|
|
251
|
+
field_errors = body["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
|
|
252
|
+
msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
msg
|
|
239
256
|
when String
|
|
240
257
|
body
|
|
241
258
|
else
|
|
242
259
|
"Unknown error"
|
|
243
260
|
end
|
|
244
261
|
end
|
|
262
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
245
263
|
|
|
246
264
|
# Log debug information
|
|
247
265
|
#
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
module Providers
|
|
5
|
+
# In-memory vector database provider for testing
|
|
6
|
+
#
|
|
7
|
+
# This provider stores all vectors in memory using Ruby hashes.
|
|
8
|
+
# Perfect for testing without external dependencies.
|
|
9
|
+
#
|
|
10
|
+
# @example Usage in tests
|
|
11
|
+
# Vectra.configure do |config|
|
|
12
|
+
# config.provider = :memory if Rails.env.test?
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# client = Vectra::Client.new
|
|
16
|
+
# client.upsert(index: 'test', vectors: [...])
|
|
17
|
+
#
|
|
18
|
+
class Memory < Base
|
|
19
|
+
def initialize(config)
|
|
20
|
+
super
|
|
21
|
+
# Storage structure: @storage[index][namespace][id] = Vector
|
|
22
|
+
@storage = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
|
23
|
+
@index_configs = {} # Store index configurations (dimension, metric)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @see Base#provider_name
|
|
27
|
+
def provider_name
|
|
28
|
+
:memory
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @see Base#upsert
|
|
32
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
33
|
+
normalized = normalize_vectors(vectors)
|
|
34
|
+
ns = namespace || ""
|
|
35
|
+
|
|
36
|
+
normalized.each do |vec|
|
|
37
|
+
# Infer dimension from first vector if not set
|
|
38
|
+
if @index_configs[index].nil?
|
|
39
|
+
@index_configs[index] = {
|
|
40
|
+
dimension: vec[:values].length,
|
|
41
|
+
metric: "cosine"
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Store vector
|
|
46
|
+
vector_obj = Vector.new(
|
|
47
|
+
id: vec[:id],
|
|
48
|
+
values: vec[:values],
|
|
49
|
+
metadata: vec[:metadata] || {}
|
|
50
|
+
)
|
|
51
|
+
@storage[index][ns][vec[:id]] = vector_obj
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
log_debug("Upserted #{normalized.size} vectors to #{index}")
|
|
55
|
+
{ upserted_count: normalized.size }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @see Base#query
|
|
59
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
60
|
+
include_values: false, include_metadata: true)
|
|
61
|
+
ns = namespace || ""
|
|
62
|
+
candidates = @storage[index][ns].values
|
|
63
|
+
|
|
64
|
+
# Apply metadata filter
|
|
65
|
+
if filter
|
|
66
|
+
candidates = candidates.select { |v| matches_filter?(v, filter) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Calculate similarity scores
|
|
70
|
+
matches = candidates.map do |vec|
|
|
71
|
+
score = calculate_similarity(vector, vec.values, index)
|
|
72
|
+
build_match(vec, score, include_values, include_metadata)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Sort by score (descending) and take top_k
|
|
76
|
+
matches.sort_by! { |m| -m[:score] }
|
|
77
|
+
matches = matches.first(top_k)
|
|
78
|
+
|
|
79
|
+
log_debug("Query returned #{matches.size} results")
|
|
80
|
+
QueryResult.from_response(matches: matches, namespace: namespace)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @see Base#fetch
|
|
84
|
+
def fetch(index:, ids:, namespace: nil)
|
|
85
|
+
ns = namespace || ""
|
|
86
|
+
vectors = {}
|
|
87
|
+
|
|
88
|
+
ids.each do |id|
|
|
89
|
+
vec = @storage[index][ns][id]
|
|
90
|
+
vectors[id] = vec if vec
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
vectors
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @see Base#update
|
|
97
|
+
def update(index:, id:, metadata:, namespace: nil)
|
|
98
|
+
ns = namespace || ""
|
|
99
|
+
vec = @storage[index][ns][id]
|
|
100
|
+
|
|
101
|
+
raise NotFoundError, "Vector '#{id}' not found in index '#{index}'" unless vec
|
|
102
|
+
|
|
103
|
+
# Merge metadata
|
|
104
|
+
new_metadata = (vec.metadata || {}).merge(metadata.transform_keys(&:to_s))
|
|
105
|
+
updated_vec = Vector.new(
|
|
106
|
+
id: vec.id,
|
|
107
|
+
values: vec.values,
|
|
108
|
+
metadata: new_metadata,
|
|
109
|
+
sparse_values: vec.sparse_values
|
|
110
|
+
)
|
|
111
|
+
@storage[index][ns][id] = updated_vec
|
|
112
|
+
|
|
113
|
+
log_debug("Updated vector #{id}")
|
|
114
|
+
{ updated: true }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# @see Base#delete
|
|
118
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
119
|
+
ns = namespace || ""
|
|
120
|
+
|
|
121
|
+
if delete_all
|
|
122
|
+
@storage[index].clear
|
|
123
|
+
elsif ids
|
|
124
|
+
ids.each { |id| @storage[index][ns].delete(id) }
|
|
125
|
+
elsif namespace && !filter
|
|
126
|
+
@storage[index].delete(ns)
|
|
127
|
+
elsif filter
|
|
128
|
+
# Delete vectors matching filter
|
|
129
|
+
@storage[index][ns].delete_if { |_id, vec| matches_filter?(vec, filter) }
|
|
130
|
+
else
|
|
131
|
+
raise ValidationError, "Must specify ids, filter, namespace, or delete_all"
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
log_debug("Deleted vectors from #{index}")
|
|
135
|
+
{ deleted: true }
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @see Base#list_indexes
|
|
139
|
+
def list_indexes
|
|
140
|
+
@index_configs.keys.map { |name| describe_index(index: name) }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @see Base#describe_index
|
|
144
|
+
def describe_index(index:)
|
|
145
|
+
config = @index_configs[index]
|
|
146
|
+
raise NotFoundError, "Index '#{index}' not found" unless config
|
|
147
|
+
|
|
148
|
+
{
|
|
149
|
+
name: index,
|
|
150
|
+
dimension: config[:dimension],
|
|
151
|
+
metric: config[:metric],
|
|
152
|
+
status: "ready"
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# @see Base#stats
|
|
157
|
+
def stats(index:, namespace: nil)
|
|
158
|
+
config = @index_configs[index]
|
|
159
|
+
raise NotFoundError, "Index '#{index}' not found" unless config
|
|
160
|
+
|
|
161
|
+
if namespace
|
|
162
|
+
ns = namespace
|
|
163
|
+
count = @storage[index][ns].size
|
|
164
|
+
namespaces = { ns => { vector_count: count } }
|
|
165
|
+
else
|
|
166
|
+
# Count all namespaces
|
|
167
|
+
namespaces = {}
|
|
168
|
+
@storage[index].each do |ns, vectors|
|
|
169
|
+
namespaces[ns] = { vector_count: vectors.size }
|
|
170
|
+
end
|
|
171
|
+
count = @storage[index].values.sum(&:size)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
total_vector_count: count,
|
|
176
|
+
dimension: config[:dimension],
|
|
177
|
+
namespaces: namespaces
|
|
178
|
+
}
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Clear all stored data (useful for tests)
|
|
182
|
+
#
|
|
183
|
+
# @return [void]
|
|
184
|
+
def clear!
|
|
185
|
+
@storage.clear
|
|
186
|
+
@index_configs.clear
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
private
|
|
190
|
+
|
|
191
|
+
# Calculate similarity score based on index metric
|
|
192
|
+
def calculate_similarity(query_vector, candidate_vector, index)
|
|
193
|
+
config = @index_configs[index] || { metric: "cosine" }
|
|
194
|
+
metric = config[:metric] || "cosine"
|
|
195
|
+
|
|
196
|
+
case metric.to_s.downcase
|
|
197
|
+
when "euclidean", "l2"
|
|
198
|
+
# Convert distance to similarity (1 / (1 + distance))
|
|
199
|
+
distance = euclidean_distance(query_vector, candidate_vector)
|
|
200
|
+
1.0 / (1.0 + distance)
|
|
201
|
+
when "dot_product", "inner_product", "dot"
|
|
202
|
+
dot_product(query_vector, candidate_vector)
|
|
203
|
+
else # cosine (default)
|
|
204
|
+
cosine_similarity(query_vector, candidate_vector)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Calculate cosine similarity
|
|
209
|
+
def cosine_similarity(vec_a, vec_b)
|
|
210
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
211
|
+
|
|
212
|
+
dot = vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
213
|
+
mag_a = Math.sqrt(vec_a.sum { |v| v**2 })
|
|
214
|
+
mag_b = Math.sqrt(vec_b.sum { |v| v**2 })
|
|
215
|
+
|
|
216
|
+
return 0.0 if mag_a.zero? || mag_b.zero?
|
|
217
|
+
|
|
218
|
+
dot / (mag_a * mag_b)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Calculate Euclidean distance
|
|
222
|
+
def euclidean_distance(vec_a, vec_b)
|
|
223
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
224
|
+
|
|
225
|
+
Math.sqrt(vec_a.zip(vec_b).sum { |a, b| (a - b)**2 })
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Calculate dot product
|
|
229
|
+
def dot_product(vec_a, vec_b)
|
|
230
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
231
|
+
|
|
232
|
+
vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Check if vector matches filter
|
|
236
|
+
def matches_filter?(vector, filter)
|
|
237
|
+
filter.all? do |key, value|
|
|
238
|
+
vec_value = vector.metadata[key.to_s]
|
|
239
|
+
matches_filter_value?(vec_value, value)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Check if a value matches filter criteria
|
|
244
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
245
|
+
def matches_filter_value?(actual, expected)
|
|
246
|
+
case expected
|
|
247
|
+
when Hash
|
|
248
|
+
# Support operators like { "$gt" => 5, "$lt" => 10 }
|
|
249
|
+
expected.all? do |op, val|
|
|
250
|
+
case op.to_s
|
|
251
|
+
when "$eq"
|
|
252
|
+
actual == val
|
|
253
|
+
when "$ne"
|
|
254
|
+
actual != val
|
|
255
|
+
when "$gt"
|
|
256
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual > val
|
|
257
|
+
when "$gte"
|
|
258
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual >= val
|
|
259
|
+
when "$lt"
|
|
260
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual < val
|
|
261
|
+
when "$lte"
|
|
262
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual <= val
|
|
263
|
+
when "$in"
|
|
264
|
+
val.is_a?(Array) && val.include?(actual)
|
|
265
|
+
else
|
|
266
|
+
actual == expected
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
when Array
|
|
270
|
+
expected.include?(actual)
|
|
271
|
+
else
|
|
272
|
+
actual == expected
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
276
|
+
|
|
277
|
+
# Build match hash from vector
|
|
278
|
+
def build_match(vector, score, include_values, include_metadata)
|
|
279
|
+
match = {
|
|
280
|
+
id: vector.id,
|
|
281
|
+
score: score
|
|
282
|
+
}
|
|
283
|
+
match[:values] = vector.values if include_values
|
|
284
|
+
match[:metadata] = vector.metadata if include_metadata
|
|
285
|
+
match[:sparse_values] = vector.sparse_values if vector.sparse?
|
|
286
|
+
match
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Override validate_config! - Memory provider doesn't need host or API key
|
|
290
|
+
# rubocop:disable Naming/PredicateMethod
|
|
291
|
+
def validate_config!
|
|
292
|
+
# Memory provider has no special requirements
|
|
293
|
+
true
|
|
294
|
+
end
|
|
295
|
+
# rubocop:enable Naming/PredicateMethod
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
@@ -299,6 +299,37 @@ module Vectra
|
|
|
299
299
|
handle_retriable_response(e)
|
|
300
300
|
end
|
|
301
301
|
|
|
302
|
+
# Extract error message from Qdrant response format
|
|
303
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
304
|
+
def extract_error_message(body)
|
|
305
|
+
case body
|
|
306
|
+
when Hash
|
|
307
|
+
# Qdrant wraps errors in "status" key
|
|
308
|
+
status = body["status"] || body
|
|
309
|
+
msg = status["error"] || body["message"] || body["error_message"] || body.to_s
|
|
310
|
+
|
|
311
|
+
# Add details
|
|
312
|
+
details = status["details"] || status["error_details"]
|
|
313
|
+
if details
|
|
314
|
+
details_str = details.is_a?(Hash) ? details.to_json : details.to_s
|
|
315
|
+
msg += " (#{details_str})" unless msg.include?(details_str)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Add field-specific errors
|
|
319
|
+
if status["errors"].is_a?(Array)
|
|
320
|
+
field_errors = status["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
|
|
321
|
+
msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
msg
|
|
325
|
+
when String
|
|
326
|
+
body
|
|
327
|
+
else
|
|
328
|
+
"Unknown error"
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
332
|
+
|
|
302
333
|
def auth_headers
|
|
303
334
|
headers = {}
|
|
304
335
|
headers["api-key"] = config.api_key if config.api_key && !config.api_key.empty?
|