vectra-client 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -4
- data/README.md +35 -2
- data/docs/examples/real-world.md +62 -22
- data/docs/guides/getting-started.md +70 -2
- data/docs/providers/index.md +12 -0
- data/docs/providers/memory.md +145 -0
- data/docs/providers/weaviate.md +84 -25
- data/examples/README.md +12 -0
- data/lib/vectra/batch.rb +63 -8
- data/lib/vectra/client.rb +188 -1
- data/lib/vectra/configuration.rb +4 -2
- data/lib/vectra/credential_rotation.rb +2 -3
- data/lib/vectra/providers/base.rb +19 -1
- data/lib/vectra/providers/memory.rb +298 -0
- data/lib/vectra/providers/qdrant.rb +31 -0
- data/lib/vectra/vector.rb +56 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +20 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b0d48b8a6205df9a0d6545e3772e26da09f826f03d56751a6fa997e1a73d89f1
|
|
4
|
+
data.tar.gz: 25dc65ad327e03e7912a0b938739b6acf1d6708efd8fa41773d9bbae1053e3dc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 68fc7d7a2c941733bb8f42007dffa52989daedeb362da62c094d05dec7d02f8ca6d7ca8763194e9b51b4ecfe3f7a6e8db4418c7589caab31943f055f75f4c48a
|
|
7
|
+
data.tar.gz: f40ab28eb011943d4961e22c5d31b5a816a81cea1a2bf6afa238004adaa1cdd8558f10b0ee8d56376d6fc737788b16fc1acb7088778d8461ecb3a8db2ff85580
|
data/CHANGELOG.md
CHANGED
|
@@ -1,12 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## [v0.
|
|
3
|
+
## [v0.4.0](https://github.com/stokry/vectra/tree/v0.4.0) (2026-01-12)
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
### Added
|
|
6
|
+
- Memory provider for in-memory vector storage
|
|
7
|
+
- QueryBuilder for chainable query API
|
|
8
|
+
- Batch operations with concurrent processing
|
|
9
|
+
- Vector normalization methods (L2, L1)
|
|
10
|
+
- Enhanced error message extraction
|
|
6
11
|
|
|
7
12
|
### Fixed
|
|
8
|
-
-
|
|
9
|
-
-
|
|
13
|
+
- Weaviate DELETE request query parameters
|
|
14
|
+
- Qdrant error message extraction from nested status
|
|
15
|
+
- Client ping error info capture
|
|
16
|
+
- Credential rotation timeout parameter handling
|
|
17
|
+
|
|
18
|
+
[Full Changelog](https://github.com/stokry/vectra/compare/v0.3.4...v0.4.0)
|
|
19
|
+
|
|
20
|
+
## [v0.3.4](https://github.com/stokry/vectra/tree/v0.3.4) (2026-01-12)
|
|
21
|
+
|
|
22
|
+
[Full Changelog](https://github.com/stokry/vectra/compare/v0.3.3...v0.3.4)
|
|
10
23
|
|
|
11
24
|
## [v0.3.3](https://github.com/stokry/vectra/tree/v0.3.3) (2026-01-09)
|
|
12
25
|
|
data/README.md
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
| **Qdrant** | Open Source | ✅ Supported |
|
|
18
18
|
| **Weaviate** | Open Source | ✅ Supported |
|
|
19
19
|
| **pgvector** | PostgreSQL | ✅ Supported |
|
|
20
|
+
| **Memory** | In-Memory | ✅ Testing only |
|
|
20
21
|
|
|
21
22
|
## Installation
|
|
22
23
|
|
|
@@ -48,12 +49,38 @@ client.upsert(
|
|
|
48
49
|
]
|
|
49
50
|
)
|
|
50
51
|
|
|
51
|
-
# Search
|
|
52
|
+
# Search (classic API)
|
|
52
53
|
results = client.query(vector: [0.1, 0.2, 0.3], top_k: 5)
|
|
53
54
|
results.each { |match| puts "#{match.id}: #{match.score}" }
|
|
54
55
|
|
|
56
|
+
# Search (chainable Query Builder)
|
|
57
|
+
results = client
|
|
58
|
+
.query('docs')
|
|
59
|
+
.vector([0.1, 0.2, 0.3])
|
|
60
|
+
.top_k(5)
|
|
61
|
+
.with_metadata
|
|
62
|
+
.execute
|
|
63
|
+
|
|
64
|
+
results.each do |match|
|
|
65
|
+
puts "#{match.id}: #{match.score}"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Normalize embeddings (for better cosine similarity)
|
|
69
|
+
embedding = openai_response['data'][0]['embedding']
|
|
70
|
+
normalized = Vectra::Vector.normalize(embedding)
|
|
71
|
+
client.upsert(vectors: [{ id: 'doc-1', values: normalized }])
|
|
72
|
+
|
|
55
73
|
# Delete
|
|
56
74
|
client.delete(ids: ['doc-1', 'doc-2'])
|
|
75
|
+
|
|
76
|
+
# Health check
|
|
77
|
+
if client.healthy?
|
|
78
|
+
puts "Connection is healthy"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Ping with latency
|
|
82
|
+
status = client.ping
|
|
83
|
+
puts "Provider: #{status[:provider]}, Latency: #{status[:latency_ms]}ms"
|
|
57
84
|
```
|
|
58
85
|
|
|
59
86
|
## Provider Examples
|
|
@@ -69,10 +96,16 @@ client = Vectra.qdrant(host: 'http://localhost:6333')
|
|
|
69
96
|
client = Vectra.qdrant(host: 'https://your-cluster.qdrant.io', api_key: ENV['QDRANT_API_KEY'])
|
|
70
97
|
|
|
71
98
|
# Weaviate
|
|
72
|
-
client = Vectra.weaviate(
|
|
99
|
+
client = Vectra.weaviate(
|
|
100
|
+
api_key: ENV['WEAVIATE_API_KEY'],
|
|
101
|
+
host: 'https://your-weaviate-instance'
|
|
102
|
+
)
|
|
73
103
|
|
|
74
104
|
# pgvector (PostgreSQL)
|
|
75
105
|
client = Vectra.pgvector(connection_url: 'postgres://user:pass@localhost/mydb')
|
|
106
|
+
|
|
107
|
+
# Memory (in-memory, testing only)
|
|
108
|
+
client = Vectra.memory
|
|
76
109
|
```
|
|
77
110
|
|
|
78
111
|
## Features
|
data/docs/examples/real-world.md
CHANGED
|
@@ -37,6 +37,8 @@ class ProductSearchService
|
|
|
37
37
|
|
|
38
38
|
def search(query:, category: nil, price_range: nil, limit: 20)
|
|
39
39
|
query_embedding = generate_embedding(query)
|
|
40
|
+
# Normalize for better cosine similarity
|
|
41
|
+
query_embedding = Vectra::Vector.normalize(query_embedding)
|
|
40
42
|
|
|
41
43
|
filter = {}
|
|
42
44
|
filter[:category] = category if category
|
|
@@ -68,9 +70,12 @@ class ProductSearchService
|
|
|
68
70
|
|
|
69
71
|
def generate_embedding(text)
|
|
70
72
|
# Use your embedding model (OpenAI, sentence-transformers, etc.)
|
|
71
|
-
OpenAI::Client.new.embeddings(
|
|
73
|
+
embedding = OpenAI::Client.new.embeddings(
|
|
72
74
|
parameters: { model: "text-embedding-ada-002", input: text }
|
|
73
75
|
)["data"][0]["embedding"]
|
|
76
|
+
|
|
77
|
+
# Normalize embeddings before storing
|
|
78
|
+
Vectra::Vector.normalize(embedding)
|
|
74
79
|
end
|
|
75
80
|
|
|
76
81
|
def fallback_search(query, category)
|
|
@@ -257,14 +262,14 @@ class TenantDocumentService
|
|
|
257
262
|
query_embedding = generate_embedding(query)
|
|
258
263
|
|
|
259
264
|
# Ensure tenant isolation via namespace
|
|
260
|
-
results = @client
|
|
261
|
-
|
|
262
|
-
vector
|
|
263
|
-
top_k
|
|
264
|
-
namespace
|
|
265
|
-
filter
|
|
266
|
-
|
|
267
|
-
|
|
265
|
+
results = @client
|
|
266
|
+
.query("documents")
|
|
267
|
+
.vector(query_embedding)
|
|
268
|
+
.top_k(limit)
|
|
269
|
+
.namespace("tenant-#{@tenant_id}")
|
|
270
|
+
.filter(tenant_id: @tenant_id) # Double protection
|
|
271
|
+
.with_metadata
|
|
272
|
+
.execute
|
|
268
273
|
|
|
269
274
|
# Audit log
|
|
270
275
|
@audit.log_access(
|
|
@@ -325,9 +330,11 @@ class DocumentIndexer
|
|
|
325
330
|
|
|
326
331
|
def index_large_dataset(documents, concurrency: 4)
|
|
327
332
|
total = documents.size
|
|
328
|
-
processed = 0
|
|
329
333
|
errors = []
|
|
330
334
|
|
|
335
|
+
# Create batch client with specified concurrency
|
|
336
|
+
batch_client = Vectra::Batch.new(@client, concurrency: concurrency)
|
|
337
|
+
|
|
331
338
|
# Convert to vectors
|
|
332
339
|
vectors = documents.map do |doc|
|
|
333
340
|
{
|
|
@@ -337,25 +344,27 @@ class DocumentIndexer
|
|
|
337
344
|
}
|
|
338
345
|
end
|
|
339
346
|
|
|
340
|
-
# Process in async batches
|
|
341
|
-
result =
|
|
347
|
+
# Process in async batches with progress tracking
|
|
348
|
+
result = batch_client.upsert_async(
|
|
342
349
|
index: "documents",
|
|
343
350
|
vectors: vectors,
|
|
344
|
-
|
|
345
|
-
on_progress: proc { |
|
|
346
|
-
|
|
347
|
-
|
|
351
|
+
chunk_size: 100,
|
|
352
|
+
on_progress: proc { |stats|
|
|
353
|
+
progress = stats[:percentage]
|
|
354
|
+
processed = stats[:processed]
|
|
355
|
+
total = stats[:total]
|
|
356
|
+
chunk = stats[:current_chunk] + 1
|
|
357
|
+
total_chunks = stats[:total_chunks]
|
|
358
|
+
|
|
348
359
|
puts "Progress: #{progress}% (#{processed}/#{total})"
|
|
349
|
-
|
|
350
|
-
on_error: proc { |error, vector|
|
|
351
|
-
errors << { id: vector[:id], error: error.message }
|
|
360
|
+
puts " Chunk #{chunk}/#{total_chunks} | Success: #{stats[:success_count]}, Failed: #{stats[:failed_count]}"
|
|
352
361
|
}
|
|
353
362
|
)
|
|
354
363
|
|
|
355
364
|
{
|
|
356
|
-
success: result[:
|
|
357
|
-
failed: result[:
|
|
358
|
-
errors: errors,
|
|
365
|
+
success: result[:upserted_count],
|
|
366
|
+
failed: result[:errors].size,
|
|
367
|
+
errors: result[:errors],
|
|
359
368
|
total: total
|
|
360
369
|
}
|
|
361
370
|
end
|
|
@@ -475,6 +484,37 @@ module VectraHelper
|
|
|
475
484
|
end
|
|
476
485
|
```
|
|
477
486
|
|
|
487
|
+
## Testing with the Memory Provider
|
|
488
|
+
|
|
489
|
+
For fast, deterministic tests you can run Vectra entirely in memory without any external services:
|
|
490
|
+
|
|
491
|
+
```ruby
|
|
492
|
+
# config/initializers/vectra.rb (test environment)
|
|
493
|
+
Vectra.configure do |config|
|
|
494
|
+
config.provider = :memory if Rails.env.test?
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
RSpec.describe ProductSearchService do
|
|
498
|
+
let(:client) { Vectra::Client.new } # uses memory provider in test
|
|
499
|
+
|
|
500
|
+
before do
|
|
501
|
+
client.provider.clear! if client.provider.respond_to?(:clear!)
|
|
502
|
+
|
|
503
|
+
client.upsert(
|
|
504
|
+
index: "products",
|
|
505
|
+
vectors: [
|
|
506
|
+
{ id: "p1", values: [0.1, 0.2], metadata: { name: "Test Product" } }
|
|
507
|
+
]
|
|
508
|
+
)
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
it "returns relevant products" do
|
|
512
|
+
results = client.query(index: "products", vector: [0.1, 0.2], top_k: 5)
|
|
513
|
+
expect(results.ids).to include("p1")
|
|
514
|
+
end
|
|
515
|
+
end
|
|
516
|
+
```
|
|
517
|
+
|
|
478
518
|
## Best Practices
|
|
479
519
|
|
|
480
520
|
### 1. Always Use Caching for Frequent Queries
|
|
@@ -43,17 +43,49 @@ client.upsert(
|
|
|
43
43
|
### Query (Search)
|
|
44
44
|
|
|
45
45
|
```ruby
|
|
46
|
+
# Classic API
|
|
46
47
|
results = client.query(
|
|
47
48
|
vector: [0.1, 0.2, 0.3],
|
|
48
49
|
top_k: 5,
|
|
49
50
|
include_metadata: true
|
|
50
51
|
)
|
|
51
52
|
|
|
52
|
-
results.
|
|
53
|
-
puts "ID: #{match
|
|
53
|
+
results.each do |match|
|
|
54
|
+
puts "ID: #{match.id}, Score: #{match.score}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Chainable Query Builder
|
|
58
|
+
results = client
|
|
59
|
+
.query("my-index")
|
|
60
|
+
.vector([0.1, 0.2, 0.3])
|
|
61
|
+
.top_k(5)
|
|
62
|
+
.with_metadata
|
|
63
|
+
.execute
|
|
64
|
+
|
|
65
|
+
results.each do |match|
|
|
66
|
+
puts "ID: #{match.id}, Score: #{match.score}"
|
|
54
67
|
end
|
|
55
68
|
```
|
|
56
69
|
|
|
70
|
+
### Normalize Embeddings
|
|
71
|
+
|
|
72
|
+
For better cosine similarity results, normalize your embeddings before upserting:
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
# Normalize OpenAI embeddings (recommended)
|
|
76
|
+
embedding = openai_response['data'][0]['embedding']
|
|
77
|
+
normalized = Vectra::Vector.normalize(embedding)
|
|
78
|
+
client.upsert(vectors: [{ id: 'doc-1', values: normalized }])
|
|
79
|
+
|
|
80
|
+
# Or normalize in-place
|
|
81
|
+
vector = Vectra::Vector.new(id: 'doc-1', values: embedding)
|
|
82
|
+
vector.normalize! # L2 normalization (default, unit vector)
|
|
83
|
+
client.upsert(vectors: [vector])
|
|
84
|
+
|
|
85
|
+
# L1 normalization (sum of absolute values = 1)
|
|
86
|
+
vector.normalize!(type: :l1)
|
|
87
|
+
```
|
|
88
|
+
|
|
57
89
|
### Delete Vectors
|
|
58
90
|
|
|
59
91
|
```ruby
|
|
@@ -68,6 +100,42 @@ puts "Index dimension: #{stats['dimension']}"
|
|
|
68
100
|
puts "Vector count: #{stats['vector_count']}"
|
|
69
101
|
```
|
|
70
102
|
|
|
103
|
+
### Health Check & Ping
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
# Quick health check
|
|
107
|
+
if client.healthy?
|
|
108
|
+
client.upsert(...)
|
|
109
|
+
else
|
|
110
|
+
handle_unhealthy_connection
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Ping with latency measurement
|
|
114
|
+
status = client.ping
|
|
115
|
+
puts "Provider: #{status[:provider]}"
|
|
116
|
+
puts "Healthy: #{status[:healthy]}"
|
|
117
|
+
puts "Latency: #{status[:latency_ms]}ms"
|
|
118
|
+
|
|
119
|
+
if status[:error]
|
|
120
|
+
puts "Error: #{status[:error_message]}"
|
|
121
|
+
end
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Dimension Validation
|
|
125
|
+
|
|
126
|
+
Vectra automatically validates that all vectors in a batch have the same dimension:
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
# This will raise ValidationError
|
|
130
|
+
vectors = [
|
|
131
|
+
{ id: "vec1", values: [0.1, 0.2, 0.3] }, # 3 dimensions
|
|
132
|
+
{ id: "vec2", values: [0.4, 0.5] } # 2 dimensions - ERROR!
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
client.upsert(vectors: vectors)
|
|
136
|
+
# => ValidationError: Inconsistent vector dimensions at index 1: expected 3, got 2
|
|
137
|
+
```
|
|
138
|
+
|
|
71
139
|
## Configuration
|
|
72
140
|
|
|
73
141
|
Create a configuration file (Rails: `config/initializers/vectra.rb`):
|
data/docs/providers/index.md
CHANGED
|
@@ -16,6 +16,7 @@ Vectra supports multiple vector database providers. Choose the one that best fit
|
|
|
16
16
|
| [**Qdrant**]({{ site.baseurl }}/providers/qdrant) | Open Source | Self-hosted, Performance |
|
|
17
17
|
| [**Weaviate**]({{ site.baseurl }}/providers/weaviate) | Open Source | Semantic search, GraphQL |
|
|
18
18
|
| [**pgvector**]({{ site.baseurl }}/providers/pgvector) | PostgreSQL | SQL integration, ACID |
|
|
19
|
+
| [**Memory**]({{ site.baseurl }}/providers/memory) | In-Memory | Testing, CI, local dev |
|
|
19
20
|
|
|
20
21
|
## Quick Comparison
|
|
21
22
|
|
|
@@ -75,6 +76,17 @@ client.upsert(vectors: [...])
|
|
|
75
76
|
results = client.query(vector: [...], top_k: 5)
|
|
76
77
|
```
|
|
77
78
|
|
|
79
|
+
For **tests and CI** you can use the in-memory provider:
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
# config/initializers/vectra.rb (test environment)
|
|
83
|
+
Vectra.configure do |config|
|
|
84
|
+
config.provider = :memory if Rails.env.test?
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
client = Vectra::Client.new
|
|
88
|
+
```
|
|
89
|
+
|
|
78
90
|
## Next Steps
|
|
79
91
|
|
|
80
92
|
- [Getting Started Guide]({{ site.baseurl }}/guides/getting-started)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: page
|
|
3
|
+
title: Memory Provider
|
|
4
|
+
permalink: /providers/memory/
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Memory Provider
|
|
8
|
+
|
|
9
|
+
The **Memory provider** is an in-memory vector store built into Vectra.
|
|
10
|
+
It is designed **exclusively for testing, local development, and CI** – no external database required.
|
|
11
|
+
|
|
12
|
+
> Not for production use. All data lives in process memory and is lost when the process exits.
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- ✅ RSpec / Minitest suites (fast, isolated tests)
|
|
17
|
+
- ✅ Local development without provisioning Pinecone/Qdrant/Weaviate/pgvector
|
|
18
|
+
- ✅ CI pipelines where external services are not available
|
|
19
|
+
- ❌ Not suitable for production workloads
|
|
20
|
+
|
|
21
|
+
## Setup
|
|
22
|
+
|
|
23
|
+
### Global Configuration (Rails Example)
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
# config/initializers/vectra.rb
|
|
27
|
+
Vectra.configure do |config|
|
|
28
|
+
config.provider = :memory if Rails.env.test?
|
|
29
|
+
end
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Then in your application code:
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
client = Vectra::Client.new
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Direct Construction
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
require "vectra"
|
|
42
|
+
|
|
43
|
+
client = Vectra.memory
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
No `host`, `api_key`, or environment configuration is required.
|
|
47
|
+
|
|
48
|
+
## Features
|
|
49
|
+
|
|
50
|
+
- ✅ `upsert` – store vectors in memory
|
|
51
|
+
- ✅ `query` – cosine similarity search (with optional metadata filtering)
|
|
52
|
+
- ✅ `fetch` – retrieve vectors by ID
|
|
53
|
+
- ✅ `update` – merge metadata for existing vectors
|
|
54
|
+
- ✅ `delete` – delete by IDs, namespace, filter, or `delete_all`
|
|
55
|
+
- ✅ `list_indexes` / `describe_index` – basic index metadata (dimension, metric)
|
|
56
|
+
- ✅ `stats` – vector counts per namespace
|
|
57
|
+
- ✅ `clear!` – wipe all data between tests
|
|
58
|
+
|
|
59
|
+
## Basic Usage
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
client = Vectra.memory
|
|
63
|
+
|
|
64
|
+
# Upsert
|
|
65
|
+
client.upsert(
|
|
66
|
+
index: "documents",
|
|
67
|
+
vectors: [
|
|
68
|
+
{
|
|
69
|
+
id: "doc-1",
|
|
70
|
+
values: [0.1, 0.2, 0.3],
|
|
71
|
+
metadata: { title: "Hello", category: "docs" }
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
namespace: "test-suite"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Query
|
|
78
|
+
results = client.query(
|
|
79
|
+
index: "documents",
|
|
80
|
+
vector: [0.1, 0.2, 0.3],
|
|
81
|
+
top_k: 5,
|
|
82
|
+
namespace: "test-suite",
|
|
83
|
+
filter: { category: "docs" },
|
|
84
|
+
include_metadata: true
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
results.each do |match|
|
|
88
|
+
puts "#{match.id}: #{match.metadata["title"]} (score=#{match.score.round(3)})"
|
|
89
|
+
end
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Metadata Filtering
|
|
93
|
+
|
|
94
|
+
The Memory provider supports a subset of the same filter operators as other providers:
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
results = client.query(
|
|
98
|
+
index: "products",
|
|
99
|
+
vector: query_embedding,
|
|
100
|
+
top_k: 10,
|
|
101
|
+
filter: {
|
|
102
|
+
status: ["active", "preview"], # IN
|
|
103
|
+
price: { "$gte" => 10, "$lte" => 100 },# range
|
|
104
|
+
brand: { "$ne" => "Acme" } # not equal
|
|
105
|
+
},
|
|
106
|
+
include_metadata: true
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Supported operators:
|
|
111
|
+
|
|
112
|
+
- Equality: `{ field: "value" }` or `{ field: { "$eq" => "value" } }`
|
|
113
|
+
- Inequality: `{ field: { "$ne" => "value" } }`
|
|
114
|
+
- Ranges: `{ field: { "$gt" => 10 } }`, `{ "$gte" => 10 }`, `{ "$lt" => 20 }`, `{ "$lte" => 20 }`
|
|
115
|
+
- Arrays / IN: `{ field: ["a", "b"] }` or `{ field: { "$in" => ["a", "b"] } }`
|
|
116
|
+
|
|
117
|
+
## Resetting State Between Tests
|
|
118
|
+
|
|
119
|
+
The Memory provider exposes a `clear!` method to reset all in-memory state:
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
RSpec.describe "MyService" do
|
|
123
|
+
let(:client) { Vectra.memory }
|
|
124
|
+
|
|
125
|
+
before do
|
|
126
|
+
client.provider.clear! if client.provider.respond_to?(:clear!)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# your tests...
|
|
130
|
+
end
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
You can also call `clear!` on the provider obtained from a regular `Vectra::Client`:
|
|
134
|
+
|
|
135
|
+
```ruby
|
|
136
|
+
client = Vectra::Client.new(provider: :memory)
|
|
137
|
+
client.provider.clear!
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Limitations
|
|
141
|
+
|
|
142
|
+
- Not distributed – data lives only in the current Ruby process.
|
|
143
|
+
- No persistence – all vectors are lost when the process exits or `clear!` is called.
|
|
144
|
+
- Intended for **testing and development only**, not for production traffic.
|
|
145
|
+
|
data/docs/providers/weaviate.md
CHANGED
|
@@ -18,53 +18,112 @@ docker run -p 8080:8080 semitechnologies/weaviate:latest
|
|
|
18
18
|
|
|
19
19
|
### Connect with Vectra
|
|
20
20
|
|
|
21
|
+
You can either use the convenience constructor:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
client = Vectra.weaviate(
|
|
25
|
+
api_key: ENV["WEAVIATE_API_KEY"], # optional for local / required for cloud
|
|
26
|
+
host: "http://localhost:8080" # or your cloud endpoint
|
|
27
|
+
)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
or configure via `Vectra::Client`:
|
|
31
|
+
|
|
21
32
|
```ruby
|
|
22
33
|
client = Vectra::Client.new(
|
|
23
34
|
provider: :weaviate,
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class_name: 'Document'
|
|
35
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
|
36
|
+
host: "https://your-weaviate-instance"
|
|
27
37
|
)
|
|
28
38
|
```
|
|
29
39
|
|
|
30
40
|
## Features
|
|
31
41
|
|
|
32
|
-
- ✅ Upsert vectors
|
|
33
|
-
- ✅
|
|
34
|
-
- ✅
|
|
35
|
-
- ✅
|
|
36
|
-
- ✅
|
|
37
|
-
- ✅
|
|
42
|
+
- ✅ Upsert vectors into Weaviate classes (per-index `class`)
|
|
43
|
+
- ✅ Vector similarity search via GraphQL `nearVector`
|
|
44
|
+
- ✅ Fetch by IDs
|
|
45
|
+
- ✅ Metadata filtering (exact match, ranges, arrays)
|
|
46
|
+
- ✅ Namespace support via `_namespace` property
|
|
47
|
+
- ✅ Delete by IDs or filter
|
|
48
|
+
- ✅ List and describe classes
|
|
49
|
+
- ✅ Basic stats via GraphQL `Aggregate`
|
|
38
50
|
|
|
39
|
-
## Example
|
|
51
|
+
## Basic Example
|
|
40
52
|
|
|
41
53
|
```ruby
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
require "vectra"
|
|
55
|
+
|
|
56
|
+
client = Vectra.weaviate(
|
|
57
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
|
58
|
+
host: "https://your-weaviate-instance"
|
|
47
59
|
)
|
|
48
60
|
|
|
61
|
+
index = "Document" # Weaviate class name
|
|
62
|
+
|
|
49
63
|
# Upsert vectors
|
|
50
64
|
client.upsert(
|
|
65
|
+
index: index,
|
|
51
66
|
vectors: [
|
|
52
|
-
{
|
|
53
|
-
|
|
67
|
+
{
|
|
68
|
+
id: "doc-1",
|
|
69
|
+
values: [0.1, 0.2, 0.3],
|
|
70
|
+
metadata: {
|
|
71
|
+
title: "Getting started with Vectra",
|
|
72
|
+
category: "docs"
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
namespace: "prod" # stored as _namespace property
|
|
54
77
|
)
|
|
55
78
|
|
|
56
|
-
#
|
|
57
|
-
results = client.query(
|
|
79
|
+
# Query with metadata filter
|
|
80
|
+
results = client.query(
|
|
81
|
+
index: index,
|
|
82
|
+
vector: [0.1, 0.2, 0.3],
|
|
83
|
+
top_k: 5,
|
|
84
|
+
namespace: "prod",
|
|
85
|
+
filter: { category: "docs" },
|
|
86
|
+
include_metadata: true
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
results.each do |match|
|
|
90
|
+
puts "#{match.id} (score=#{match.score.round(3)}): #{match.metadata["title"]}"
|
|
91
|
+
end
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Advanced Filtering
|
|
95
|
+
|
|
96
|
+
Vectra maps simple Ruby hashes to Weaviate `where` filters:
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
results = client.query(
|
|
100
|
+
index: "Document",
|
|
101
|
+
vector: query_embedding,
|
|
102
|
+
top_k: 10,
|
|
103
|
+
filter: {
|
|
104
|
+
category: "blog",
|
|
105
|
+
views: { "$gt" => 1000 },
|
|
106
|
+
tags: ["ruby", "vectra"] # ContainsAny
|
|
107
|
+
},
|
|
108
|
+
include_metadata: true
|
|
109
|
+
)
|
|
58
110
|
```
|
|
59
111
|
|
|
112
|
+
Supported operators:
|
|
113
|
+
|
|
114
|
+
- Equality: `{ field: "value" }` or `{ field: { "$eq" => "value" } }`
|
|
115
|
+
- Inequality: `{ field: { "$ne" => "value" } }`
|
|
116
|
+
- Ranges: `{ field: { "$gt" => 10 } }`, `{ "$gte" => 10 }`, `{ "$lt" => 20 }`, `{ "$lte" => 20 }`
|
|
117
|
+
- Arrays: `{ field: ["a", "b"] }` (contains any), `{ field: { "$in" => ["a", "b"] } }`
|
|
118
|
+
|
|
60
119
|
## Configuration Options
|
|
61
120
|
|
|
62
|
-
| Option
|
|
63
|
-
|
|
64
|
-
| `host`
|
|
65
|
-
| `
|
|
66
|
-
|
|
67
|
-
|
|
121
|
+
| Option | Type | Required | Description |
|
|
122
|
+
|----------|--------|----------|-----------------------------------------------|
|
|
123
|
+
| `host` | String | Yes | Weaviate base URL (`http://` or `https://`) |
|
|
124
|
+
| `api_key`| String | No* | API key if auth is enabled / cloud instances |
|
|
125
|
+
|
|
126
|
+
> `api_key` is optional for local, unsecured Weaviate; required for managed/cloud deployments.
|
|
68
127
|
|
|
69
128
|
## Documentation
|
|
70
129
|
|
data/examples/README.md
CHANGED
|
@@ -261,6 +261,18 @@ The comprehensive demo now includes **4 additional sections** demonstrating prod
|
|
|
261
261
|
- **Error Tracking**: Sentry and Honeybadger integration
|
|
262
262
|
- Production monitoring setup
|
|
263
263
|
|
|
264
|
+
In addition to the classic API, the demo now also showcases the **chainable Query Builder** style:
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
results = client
|
|
268
|
+
.query("documents")
|
|
269
|
+
.vector(embedding)
|
|
270
|
+
.top_k(10)
|
|
271
|
+
.filter(category: "ruby")
|
|
272
|
+
.with_metadata
|
|
273
|
+
.execute
|
|
274
|
+
```
|
|
275
|
+
|
|
264
276
|
## Tips for Production
|
|
265
277
|
|
|
266
278
|
### Performance
|