vectra-client 0.3.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -4
- data/README.md +44 -2
- data/docs/api/overview.md +92 -0
- data/docs/examples/real-world.md +62 -22
- data/docs/guides/getting-started.md +116 -2
- data/docs/guides/performance.md +35 -1
- data/docs/providers/index.md +12 -0
- data/docs/providers/memory.md +145 -0
- data/docs/providers/pgvector.md +12 -0
- data/docs/providers/pinecone.md +10 -0
- data/docs/providers/qdrant.md +8 -0
- data/docs/providers/weaviate.md +94 -25
- data/examples/README.md +12 -0
- data/lib/vectra/batch.rb +63 -8
- data/lib/vectra/client.rb +253 -1
- data/lib/vectra/configuration.rb +4 -2
- data/lib/vectra/credential_rotation.rb +2 -3
- data/lib/vectra/errors.rb +3 -0
- data/lib/vectra/providers/base.rb +19 -1
- data/lib/vectra/providers/memory.rb +298 -0
- data/lib/vectra/providers/pgvector.rb +68 -0
- data/lib/vectra/providers/pinecone.rb +57 -0
- data/lib/vectra/providers/qdrant.rb +90 -0
- data/lib/vectra/providers/weaviate.rb +85 -0
- data/lib/vectra/vector.rb +56 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +20 -0
- metadata +3 -1
data/lib/vectra/client.rb
CHANGED
|
@@ -98,8 +98,26 @@ module Vectra
|
|
|
98
98
|
# filter: { category: 'programming' }
|
|
99
99
|
# )
|
|
100
100
|
#
|
|
101
|
-
|
|
101
|
+
# @example Chainable query builder
|
|
102
|
+
# results = client.query("my-index")
|
|
103
|
+
# .vector([0.1, 0.2, 0.3])
|
|
104
|
+
# .top_k(10)
|
|
105
|
+
# .filter(category: "programming")
|
|
106
|
+
# .with_metadata
|
|
107
|
+
# .execute
|
|
108
|
+
#
|
|
109
|
+
def query(index_arg = nil, index: nil, vector: nil, top_k: 10, namespace: nil, filter: nil,
|
|
102
110
|
include_values: false, include_metadata: true)
|
|
111
|
+
# If called with a positional index string only, return a query builder:
|
|
112
|
+
# client.query("docs").vector(vec).top_k(10).filter(...).execute
|
|
113
|
+
if index_arg && index.nil? && vector.nil? && !block_given?
|
|
114
|
+
return QueryBuilder.new(self, index_arg)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Handle positional argument for index in non-builder case
|
|
118
|
+
index = index_arg if index_arg && index.nil?
|
|
119
|
+
|
|
120
|
+
# Backwards-compatible path: perform query immediately
|
|
103
121
|
validate_index!(index)
|
|
104
122
|
validate_query_vector!(vector)
|
|
105
123
|
|
|
@@ -269,6 +287,71 @@ module Vectra
|
|
|
269
287
|
provider.stats(index: index, namespace: namespace)
|
|
270
288
|
end
|
|
271
289
|
|
|
290
|
+
# Hybrid search combining semantic (vector) and keyword (text) search
|
|
291
|
+
#
|
|
292
|
+
# Combines the best of both worlds: semantic understanding from vectors
|
|
293
|
+
# and exact keyword matching from text search.
|
|
294
|
+
#
|
|
295
|
+
# @param index [String] the index/collection name
|
|
296
|
+
# @param vector [Array<Float>] query vector for semantic search
|
|
297
|
+
# @param text [String] text query for keyword search
|
|
298
|
+
# @param alpha [Float] balance between semantic and keyword (0.0 = pure keyword, 1.0 = pure semantic)
|
|
299
|
+
# @param top_k [Integer] number of results to return
|
|
300
|
+
# @param namespace [String, nil] optional namespace
|
|
301
|
+
# @param filter [Hash, nil] metadata filter
|
|
302
|
+
# @param include_values [Boolean] include vector values in results
|
|
303
|
+
# @param include_metadata [Boolean] include metadata in results
|
|
304
|
+
# @return [QueryResult] search results
|
|
305
|
+
#
|
|
306
|
+
# @example Basic hybrid search
|
|
307
|
+
# results = client.hybrid_search(
|
|
308
|
+
# index: 'docs',
|
|
309
|
+
# vector: embedding,
|
|
310
|
+
# text: 'ruby programming',
|
|
311
|
+
# alpha: 0.7 # 70% semantic, 30% keyword
|
|
312
|
+
# )
|
|
313
|
+
#
|
|
314
|
+
# @example Pure semantic (alpha = 1.0)
|
|
315
|
+
# results = client.hybrid_search(
|
|
316
|
+
# index: 'docs',
|
|
317
|
+
# vector: embedding,
|
|
318
|
+
# text: 'ruby',
|
|
319
|
+
# alpha: 1.0
|
|
320
|
+
# )
|
|
321
|
+
#
|
|
322
|
+
# @example Pure keyword (alpha = 0.0)
|
|
323
|
+
# results = client.hybrid_search(
|
|
324
|
+
# index: 'docs',
|
|
325
|
+
# vector: embedding,
|
|
326
|
+
# text: 'ruby programming',
|
|
327
|
+
# alpha: 0.0
|
|
328
|
+
# )
|
|
329
|
+
#
|
|
330
|
+
def hybrid_search(index:, vector:, text:, alpha: 0.5, top_k: 10, namespace: nil,
|
|
331
|
+
filter: nil, include_values: false, include_metadata: true)
|
|
332
|
+
validate_index!(index)
|
|
333
|
+
validate_query_vector!(vector)
|
|
334
|
+
raise ValidationError, "Text query cannot be nil or empty" if text.nil? || text.empty?
|
|
335
|
+
raise ValidationError, "Alpha must be between 0.0 and 1.0" unless (0.0..1.0).include?(alpha)
|
|
336
|
+
|
|
337
|
+
unless provider.respond_to?(:hybrid_search)
|
|
338
|
+
raise UnsupportedFeatureError,
|
|
339
|
+
"Hybrid search is not supported by #{provider_name} provider"
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
provider.hybrid_search(
|
|
343
|
+
index: index,
|
|
344
|
+
vector: vector,
|
|
345
|
+
text: text,
|
|
346
|
+
alpha: alpha,
|
|
347
|
+
top_k: top_k,
|
|
348
|
+
namespace: namespace,
|
|
349
|
+
filter: filter,
|
|
350
|
+
include_values: include_values,
|
|
351
|
+
include_metadata: include_metadata
|
|
352
|
+
)
|
|
353
|
+
end
|
|
354
|
+
|
|
272
355
|
# Get the provider name
|
|
273
356
|
#
|
|
274
357
|
# @return [Symbol]
|
|
@@ -276,6 +359,137 @@ module Vectra
|
|
|
276
359
|
provider.provider_name
|
|
277
360
|
end
|
|
278
361
|
|
|
362
|
+
# Quick health check - tests if provider connection is healthy
|
|
363
|
+
#
|
|
364
|
+
# @param timeout [Float] timeout in seconds (default: 5)
|
|
365
|
+
# @return [Boolean] true if connection is healthy
|
|
366
|
+
#
|
|
367
|
+
# @example
|
|
368
|
+
# if client.healthy?
|
|
369
|
+
# client.upsert(...)
|
|
370
|
+
# else
|
|
371
|
+
# handle_unhealthy_connection
|
|
372
|
+
# end
|
|
373
|
+
def healthy?
|
|
374
|
+
start = Time.now
|
|
375
|
+
provider.list_indexes
|
|
376
|
+
true
|
|
377
|
+
rescue StandardError => e
|
|
378
|
+
log_error("Health check failed", e)
|
|
379
|
+
false
|
|
380
|
+
ensure
|
|
381
|
+
duration = ((Time.now - start) * 1000).round(2) if defined?(start)
|
|
382
|
+
log_debug("Health check completed in #{duration}ms") if duration
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Ping provider and get connection health status with latency
|
|
386
|
+
#
|
|
387
|
+
# @param timeout [Float] timeout in seconds (default: 5)
|
|
388
|
+
# @return [Hash] health status with :healthy, :provider, :latency_ms
|
|
389
|
+
#
|
|
390
|
+
# @example
|
|
391
|
+
# status = client.ping
|
|
392
|
+
# puts "Provider: #{status[:provider]}, Healthy: #{status[:healthy]}, Latency: #{status[:latency_ms]}ms"
|
|
393
|
+
def ping
|
|
394
|
+
start = Time.now
|
|
395
|
+
healthy = true
|
|
396
|
+
error_info = nil
|
|
397
|
+
|
|
398
|
+
begin
|
|
399
|
+
provider.list_indexes
|
|
400
|
+
rescue StandardError => e
|
|
401
|
+
healthy = false
|
|
402
|
+
error_info = { error: e.class.name, error_message: e.message }
|
|
403
|
+
log_error("Health check failed", e)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
duration = ((Time.now - start) * 1000).round(2)
|
|
407
|
+
|
|
408
|
+
result = {
|
|
409
|
+
healthy: healthy,
|
|
410
|
+
provider: provider_name,
|
|
411
|
+
latency_ms: duration
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
result.merge!(error_info) if error_info
|
|
415
|
+
result
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Chainable query builder
|
|
419
|
+
#
|
|
420
|
+
# @api public
|
|
421
|
+
# @example
|
|
422
|
+
# results = client.query("docs")
|
|
423
|
+
# .vector(embedding)
|
|
424
|
+
# .top_k(20)
|
|
425
|
+
# .namespace("prod")
|
|
426
|
+
# .filter(category: "ruby")
|
|
427
|
+
# .with_metadata
|
|
428
|
+
# .execute
|
|
429
|
+
#
|
|
430
|
+
class QueryBuilder
|
|
431
|
+
def initialize(client, index)
|
|
432
|
+
@client = client
|
|
433
|
+
@index = index
|
|
434
|
+
@vector = nil
|
|
435
|
+
@top_k = 10
|
|
436
|
+
@namespace = nil
|
|
437
|
+
@filter = nil
|
|
438
|
+
@include_values = false
|
|
439
|
+
@include_metadata = true
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
attr_reader :index
|
|
443
|
+
|
|
444
|
+
def vector(value)
|
|
445
|
+
@vector = value
|
|
446
|
+
self
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def top_k(value)
|
|
450
|
+
@top_k = value.to_i
|
|
451
|
+
self
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def namespace(value)
|
|
455
|
+
@namespace = value
|
|
456
|
+
self
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def filter(value = nil, **kwargs)
|
|
460
|
+
@filter = value || kwargs
|
|
461
|
+
self
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def with_values
|
|
465
|
+
@include_values = true
|
|
466
|
+
self
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def with_metadata
|
|
470
|
+
@include_metadata = true
|
|
471
|
+
self
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
def without_metadata
|
|
475
|
+
@include_metadata = false
|
|
476
|
+
self
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# Execute the built query and return a QueryResult
|
|
480
|
+
def execute
|
|
481
|
+
@client.query(
|
|
482
|
+
index: @index,
|
|
483
|
+
vector: @vector,
|
|
484
|
+
top_k: @top_k,
|
|
485
|
+
namespace: @namespace,
|
|
486
|
+
filter: @filter,
|
|
487
|
+
include_values: @include_values,
|
|
488
|
+
include_metadata: @include_metadata
|
|
489
|
+
)
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
279
493
|
private
|
|
280
494
|
|
|
281
495
|
def build_config(provider_name, api_key, environment, host, options)
|
|
@@ -306,6 +520,8 @@ module Vectra
|
|
|
306
520
|
Providers::Weaviate.new(config)
|
|
307
521
|
when :pgvector
|
|
308
522
|
Providers::Pgvector.new(config)
|
|
523
|
+
when :memory
|
|
524
|
+
Providers::Memory.new(config)
|
|
309
525
|
else
|
|
310
526
|
raise UnsupportedProviderError, "Provider '#{config.provider}' is not supported"
|
|
311
527
|
end
|
|
@@ -317,11 +533,32 @@ module Vectra
|
|
|
317
533
|
raise ValidationError, "Index name cannot be empty" if index.empty?
|
|
318
534
|
end
|
|
319
535
|
|
|
536
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
320
537
|
def validate_vectors!(vectors)
|
|
321
538
|
raise ValidationError, "Vectors cannot be nil" if vectors.nil?
|
|
322
539
|
raise ValidationError, "Vectors must be an array" unless vectors.is_a?(Array)
|
|
323
540
|
raise ValidationError, "Vectors cannot be empty" if vectors.empty?
|
|
541
|
+
|
|
542
|
+
# Check dimension consistency
|
|
543
|
+
first_vector = vectors.first
|
|
544
|
+
first_values = first_vector.is_a?(Vector) ? first_vector.values : first_vector[:values]
|
|
545
|
+
first_dim = first_values&.size
|
|
546
|
+
|
|
547
|
+
return unless first_dim
|
|
548
|
+
|
|
549
|
+
vectors.each_with_index do |vec, index|
|
|
550
|
+
values = vec.is_a?(Vector) ? vec.values : vec[:values]
|
|
551
|
+
dim = values&.size
|
|
552
|
+
|
|
553
|
+
next unless dim && dim != first_dim
|
|
554
|
+
|
|
555
|
+
raise ValidationError,
|
|
556
|
+
"Inconsistent vector dimensions at index #{index}: " \
|
|
557
|
+
"expected #{first_dim}, got #{dim}. " \
|
|
558
|
+
"All vectors in a batch must have the same dimension."
|
|
559
|
+
end
|
|
324
560
|
end
|
|
561
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
325
562
|
|
|
326
563
|
def validate_query_vector!(vector)
|
|
327
564
|
raise ValidationError, "Query vector cannot be nil" if vector.nil?
|
|
@@ -340,5 +577,20 @@ module Vectra
|
|
|
340
577
|
raise ValidationError, "ID must be a string" unless id.is_a?(String)
|
|
341
578
|
raise ValidationError, "ID cannot be empty" if id.empty?
|
|
342
579
|
end
|
|
580
|
+
|
|
581
|
+
def log_error(message, error = nil)
|
|
582
|
+
return unless config.logger
|
|
583
|
+
|
|
584
|
+
config.logger.error("[Vectra] #{message}")
|
|
585
|
+
config.logger.error("[Vectra] #{error.class}: #{error.message}") if error
|
|
586
|
+
config.logger.error("[Vectra] #{error.backtrace&.first(3)&.join("\n")}") if error&.backtrace
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
def log_debug(message, data = nil)
|
|
590
|
+
return unless config.logger
|
|
591
|
+
|
|
592
|
+
config.logger.debug("[Vectra] #{message}")
|
|
593
|
+
config.logger.debug("[Vectra] #{data.inspect}") if data
|
|
594
|
+
end
|
|
343
595
|
end
|
|
344
596
|
end
|
data/lib/vectra/configuration.rb
CHANGED
|
@@ -11,7 +11,7 @@ module Vectra
|
|
|
11
11
|
# end
|
|
12
12
|
#
|
|
13
13
|
class Configuration
|
|
14
|
-
SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector].freeze
|
|
14
|
+
SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector memory].freeze
|
|
15
15
|
|
|
16
16
|
attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
|
|
17
17
|
:max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
|
|
@@ -117,7 +117,7 @@ module Vectra
|
|
|
117
117
|
|
|
118
118
|
# Providers that don't require API key (local instances)
|
|
119
119
|
def api_key_optional_provider?
|
|
120
|
-
%i[qdrant pgvector].include?(provider)
|
|
120
|
+
%i[qdrant pgvector memory].include?(provider)
|
|
121
121
|
end
|
|
122
122
|
|
|
123
123
|
def validate_provider_specific!
|
|
@@ -130,6 +130,8 @@ module Vectra
|
|
|
130
130
|
validate_weaviate!
|
|
131
131
|
when :pgvector
|
|
132
132
|
validate_pgvector!
|
|
133
|
+
when :memory
|
|
134
|
+
# Memory provider has no special requirements
|
|
133
135
|
end
|
|
134
136
|
end
|
|
135
137
|
|
|
@@ -37,13 +37,12 @@ module Vectra
|
|
|
37
37
|
|
|
38
38
|
# Test if secondary key is valid
|
|
39
39
|
#
|
|
40
|
-
# @param timeout [Float] Test timeout in seconds
|
|
41
40
|
# @return [Boolean] true if secondary key works
|
|
42
|
-
def test_secondary
|
|
41
|
+
def test_secondary
|
|
43
42
|
return false if secondary_key.nil? || secondary_key.empty?
|
|
44
43
|
|
|
45
44
|
client = build_test_client(secondary_key)
|
|
46
|
-
client.healthy?
|
|
45
|
+
client.healthy?
|
|
47
46
|
rescue StandardError
|
|
48
47
|
false
|
|
49
48
|
end
|
data/lib/vectra/errors.rb
CHANGED
|
@@ -57,6 +57,9 @@ module Vectra
|
|
|
57
57
|
# Raised when the provider is not supported
|
|
58
58
|
class UnsupportedProviderError < Error; end
|
|
59
59
|
|
|
60
|
+
# Raised when a feature is not supported by the provider
|
|
61
|
+
class UnsupportedFeatureError < Error; end
|
|
62
|
+
|
|
60
63
|
# Raised when an operation times out
|
|
61
64
|
class TimeoutError < Error; end
|
|
62
65
|
|
|
@@ -232,16 +232,34 @@ module Vectra
|
|
|
232
232
|
#
|
|
233
233
|
# @param body [Hash, String, nil] response body
|
|
234
234
|
# @return [String]
|
|
235
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
235
236
|
def extract_error_message(body)
|
|
236
237
|
case body
|
|
237
238
|
when Hash
|
|
238
|
-
|
|
239
|
+
# Primary error message
|
|
240
|
+
msg = body["message"] || body["error"] || body["error_message"] || body.to_s
|
|
241
|
+
|
|
242
|
+
# Add context from details
|
|
243
|
+
details = body["details"] || body["error_details"] || body["detail"]
|
|
244
|
+
if details
|
|
245
|
+
details_str = details.is_a?(Hash) ? details.to_json : details.to_s
|
|
246
|
+
msg += " (#{details_str})" unless msg.include?(details_str)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Add field-specific errors if available
|
|
250
|
+
if body["errors"].is_a?(Array)
|
|
251
|
+
field_errors = body["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
|
|
252
|
+
msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
msg
|
|
239
256
|
when String
|
|
240
257
|
body
|
|
241
258
|
else
|
|
242
259
|
"Unknown error"
|
|
243
260
|
end
|
|
244
261
|
end
|
|
262
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
245
263
|
|
|
246
264
|
# Log debug information
|
|
247
265
|
#
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
module Providers
|
|
5
|
+
# In-memory vector database provider for testing
|
|
6
|
+
#
|
|
7
|
+
# This provider stores all vectors in memory using Ruby hashes.
|
|
8
|
+
# Perfect for testing without external dependencies.
|
|
9
|
+
#
|
|
10
|
+
# @example Usage in tests
|
|
11
|
+
# Vectra.configure do |config|
|
|
12
|
+
# config.provider = :memory if Rails.env.test?
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# client = Vectra::Client.new
|
|
16
|
+
# client.upsert(index: 'test', vectors: [...])
|
|
17
|
+
#
|
|
18
|
+
class Memory < Base
|
|
19
|
+
def initialize(config)
|
|
20
|
+
super
|
|
21
|
+
# Storage structure: @storage[index][namespace][id] = Vector
|
|
22
|
+
@storage = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
|
|
23
|
+
@index_configs = {} # Store index configurations (dimension, metric)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @see Base#provider_name
|
|
27
|
+
def provider_name
|
|
28
|
+
:memory
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @see Base#upsert
|
|
32
|
+
def upsert(index:, vectors:, namespace: nil)
|
|
33
|
+
normalized = normalize_vectors(vectors)
|
|
34
|
+
ns = namespace || ""
|
|
35
|
+
|
|
36
|
+
normalized.each do |vec|
|
|
37
|
+
# Infer dimension from first vector if not set
|
|
38
|
+
if @index_configs[index].nil?
|
|
39
|
+
@index_configs[index] = {
|
|
40
|
+
dimension: vec[:values].length,
|
|
41
|
+
metric: "cosine"
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Store vector
|
|
46
|
+
vector_obj = Vector.new(
|
|
47
|
+
id: vec[:id],
|
|
48
|
+
values: vec[:values],
|
|
49
|
+
metadata: vec[:metadata] || {}
|
|
50
|
+
)
|
|
51
|
+
@storage[index][ns][vec[:id]] = vector_obj
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
log_debug("Upserted #{normalized.size} vectors to #{index}")
|
|
55
|
+
{ upserted_count: normalized.size }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @see Base#query
|
|
59
|
+
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
60
|
+
include_values: false, include_metadata: true)
|
|
61
|
+
ns = namespace || ""
|
|
62
|
+
candidates = @storage[index][ns].values
|
|
63
|
+
|
|
64
|
+
# Apply metadata filter
|
|
65
|
+
if filter
|
|
66
|
+
candidates = candidates.select { |v| matches_filter?(v, filter) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Calculate similarity scores
|
|
70
|
+
matches = candidates.map do |vec|
|
|
71
|
+
score = calculate_similarity(vector, vec.values, index)
|
|
72
|
+
build_match(vec, score, include_values, include_metadata)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Sort by score (descending) and take top_k
|
|
76
|
+
matches.sort_by! { |m| -m[:score] }
|
|
77
|
+
matches = matches.first(top_k)
|
|
78
|
+
|
|
79
|
+
log_debug("Query returned #{matches.size} results")
|
|
80
|
+
QueryResult.from_response(matches: matches, namespace: namespace)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @see Base#fetch
|
|
84
|
+
def fetch(index:, ids:, namespace: nil)
|
|
85
|
+
ns = namespace || ""
|
|
86
|
+
vectors = {}
|
|
87
|
+
|
|
88
|
+
ids.each do |id|
|
|
89
|
+
vec = @storage[index][ns][id]
|
|
90
|
+
vectors[id] = vec if vec
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
vectors
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @see Base#update
|
|
97
|
+
def update(index:, id:, metadata:, namespace: nil)
|
|
98
|
+
ns = namespace || ""
|
|
99
|
+
vec = @storage[index][ns][id]
|
|
100
|
+
|
|
101
|
+
raise NotFoundError, "Vector '#{id}' not found in index '#{index}'" unless vec
|
|
102
|
+
|
|
103
|
+
# Merge metadata
|
|
104
|
+
new_metadata = (vec.metadata || {}).merge(metadata.transform_keys(&:to_s))
|
|
105
|
+
updated_vec = Vector.new(
|
|
106
|
+
id: vec.id,
|
|
107
|
+
values: vec.values,
|
|
108
|
+
metadata: new_metadata,
|
|
109
|
+
sparse_values: vec.sparse_values
|
|
110
|
+
)
|
|
111
|
+
@storage[index][ns][id] = updated_vec
|
|
112
|
+
|
|
113
|
+
log_debug("Updated vector #{id}")
|
|
114
|
+
{ updated: true }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# @see Base#delete
|
|
118
|
+
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
119
|
+
ns = namespace || ""
|
|
120
|
+
|
|
121
|
+
if delete_all
|
|
122
|
+
@storage[index].clear
|
|
123
|
+
elsif ids
|
|
124
|
+
ids.each { |id| @storage[index][ns].delete(id) }
|
|
125
|
+
elsif namespace && !filter
|
|
126
|
+
@storage[index].delete(ns)
|
|
127
|
+
elsif filter
|
|
128
|
+
# Delete vectors matching filter
|
|
129
|
+
@storage[index][ns].delete_if { |_id, vec| matches_filter?(vec, filter) }
|
|
130
|
+
else
|
|
131
|
+
raise ValidationError, "Must specify ids, filter, namespace, or delete_all"
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
log_debug("Deleted vectors from #{index}")
|
|
135
|
+
{ deleted: true }
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @see Base#list_indexes
|
|
139
|
+
def list_indexes
|
|
140
|
+
@index_configs.keys.map { |name| describe_index(index: name) }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @see Base#describe_index
|
|
144
|
+
def describe_index(index:)
|
|
145
|
+
config = @index_configs[index]
|
|
146
|
+
raise NotFoundError, "Index '#{index}' not found" unless config
|
|
147
|
+
|
|
148
|
+
{
|
|
149
|
+
name: index,
|
|
150
|
+
dimension: config[:dimension],
|
|
151
|
+
metric: config[:metric],
|
|
152
|
+
status: "ready"
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# @see Base#stats
|
|
157
|
+
def stats(index:, namespace: nil)
|
|
158
|
+
config = @index_configs[index]
|
|
159
|
+
raise NotFoundError, "Index '#{index}' not found" unless config
|
|
160
|
+
|
|
161
|
+
if namespace
|
|
162
|
+
ns = namespace
|
|
163
|
+
count = @storage[index][ns].size
|
|
164
|
+
namespaces = { ns => { vector_count: count } }
|
|
165
|
+
else
|
|
166
|
+
# Count all namespaces
|
|
167
|
+
namespaces = {}
|
|
168
|
+
@storage[index].each do |ns, vectors|
|
|
169
|
+
namespaces[ns] = { vector_count: vectors.size }
|
|
170
|
+
end
|
|
171
|
+
count = @storage[index].values.sum(&:size)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
total_vector_count: count,
|
|
176
|
+
dimension: config[:dimension],
|
|
177
|
+
namespaces: namespaces
|
|
178
|
+
}
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Clear all stored data (useful for tests)
|
|
182
|
+
#
|
|
183
|
+
# @return [void]
|
|
184
|
+
def clear!
|
|
185
|
+
@storage.clear
|
|
186
|
+
@index_configs.clear
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
private
|
|
190
|
+
|
|
191
|
+
# Calculate similarity score based on index metric
|
|
192
|
+
def calculate_similarity(query_vector, candidate_vector, index)
|
|
193
|
+
config = @index_configs[index] || { metric: "cosine" }
|
|
194
|
+
metric = config[:metric] || "cosine"
|
|
195
|
+
|
|
196
|
+
case metric.to_s.downcase
|
|
197
|
+
when "euclidean", "l2"
|
|
198
|
+
# Convert distance to similarity (1 / (1 + distance))
|
|
199
|
+
distance = euclidean_distance(query_vector, candidate_vector)
|
|
200
|
+
1.0 / (1.0 + distance)
|
|
201
|
+
when "dot_product", "inner_product", "dot"
|
|
202
|
+
dot_product(query_vector, candidate_vector)
|
|
203
|
+
else # cosine (default)
|
|
204
|
+
cosine_similarity(query_vector, candidate_vector)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Calculate cosine similarity
|
|
209
|
+
def cosine_similarity(vec_a, vec_b)
|
|
210
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
211
|
+
|
|
212
|
+
dot = vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
213
|
+
mag_a = Math.sqrt(vec_a.sum { |v| v**2 })
|
|
214
|
+
mag_b = Math.sqrt(vec_b.sum { |v| v**2 })
|
|
215
|
+
|
|
216
|
+
return 0.0 if mag_a.zero? || mag_b.zero?
|
|
217
|
+
|
|
218
|
+
dot / (mag_a * mag_b)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Calculate Euclidean distance
|
|
222
|
+
def euclidean_distance(vec_a, vec_b)
|
|
223
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
224
|
+
|
|
225
|
+
Math.sqrt(vec_a.zip(vec_b).sum { |a, b| (a - b)**2 })
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Calculate dot product
|
|
229
|
+
def dot_product(vec_a, vec_b)
|
|
230
|
+
raise ArgumentError, "Vectors must have same dimension" if vec_a.length != vec_b.length
|
|
231
|
+
|
|
232
|
+
vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Check if vector matches filter
|
|
236
|
+
def matches_filter?(vector, filter)
|
|
237
|
+
filter.all? do |key, value|
|
|
238
|
+
vec_value = vector.metadata[key.to_s]
|
|
239
|
+
matches_filter_value?(vec_value, value)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Check if a value matches filter criteria
|
|
244
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
245
|
+
def matches_filter_value?(actual, expected)
|
|
246
|
+
case expected
|
|
247
|
+
when Hash
|
|
248
|
+
# Support operators like { "$gt" => 5, "$lt" => 10 }
|
|
249
|
+
expected.all? do |op, val|
|
|
250
|
+
case op.to_s
|
|
251
|
+
when "$eq"
|
|
252
|
+
actual == val
|
|
253
|
+
when "$ne"
|
|
254
|
+
actual != val
|
|
255
|
+
when "$gt"
|
|
256
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual > val
|
|
257
|
+
when "$gte"
|
|
258
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual >= val
|
|
259
|
+
when "$lt"
|
|
260
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual < val
|
|
261
|
+
when "$lte"
|
|
262
|
+
actual.is_a?(Numeric) && val.is_a?(Numeric) && actual <= val
|
|
263
|
+
when "$in"
|
|
264
|
+
val.is_a?(Array) && val.include?(actual)
|
|
265
|
+
else
|
|
266
|
+
actual == expected
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
when Array
|
|
270
|
+
expected.include?(actual)
|
|
271
|
+
else
|
|
272
|
+
actual == expected
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
276
|
+
|
|
277
|
+
# Build match hash from vector
|
|
278
|
+
def build_match(vector, score, include_values, include_metadata)
|
|
279
|
+
match = {
|
|
280
|
+
id: vector.id,
|
|
281
|
+
score: score
|
|
282
|
+
}
|
|
283
|
+
match[:values] = vector.values if include_values
|
|
284
|
+
match[:metadata] = vector.metadata if include_metadata
|
|
285
|
+
match[:sparse_values] = vector.sparse_values if vector.sparse?
|
|
286
|
+
match
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Override validate_config! - Memory provider doesn't need host or API key
|
|
290
|
+
# rubocop:disable Naming/PredicateMethod
|
|
291
|
+
def validate_config!
|
|
292
|
+
# Memory provider has no special requirements
|
|
293
|
+
true
|
|
294
|
+
end
|
|
295
|
+
# rubocop:enable Naming/PredicateMethod
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|