vectra-client 0.3.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -4
- data/README.md +44 -2
- data/docs/api/overview.md +92 -0
- data/docs/examples/real-world.md +62 -22
- data/docs/guides/getting-started.md +116 -2
- data/docs/guides/performance.md +35 -1
- data/docs/providers/index.md +12 -0
- data/docs/providers/memory.md +145 -0
- data/docs/providers/pgvector.md +12 -0
- data/docs/providers/pinecone.md +10 -0
- data/docs/providers/qdrant.md +8 -0
- data/docs/providers/weaviate.md +94 -25
- data/examples/README.md +12 -0
- data/lib/vectra/batch.rb +63 -8
- data/lib/vectra/client.rb +253 -1
- data/lib/vectra/configuration.rb +4 -2
- data/lib/vectra/credential_rotation.rb +2 -3
- data/lib/vectra/errors.rb +3 -0
- data/lib/vectra/providers/base.rb +19 -1
- data/lib/vectra/providers/memory.rb +298 -0
- data/lib/vectra/providers/pgvector.rb +68 -0
- data/lib/vectra/providers/pinecone.rb +57 -0
- data/lib/vectra/providers/qdrant.rb +90 -0
- data/lib/vectra/providers/weaviate.rb +85 -0
- data/lib/vectra/vector.rb +56 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +20 -0
- metadata +3 -1
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: page
|
|
3
|
+
title: Memory Provider
|
|
4
|
+
permalink: /providers/memory/
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Memory Provider
|
|
8
|
+
|
|
9
|
+
The **Memory provider** is an in-memory vector store built into Vectra.
|
|
10
|
+
It is designed **exclusively for testing, local development, and CI** – no external database required.
|
|
11
|
+
|
|
12
|
+
> Not for production use. All data lives in process memory and is lost when the process exits.
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- ✅ RSpec / Minitest suites (fast, isolated tests)
|
|
17
|
+
- ✅ Local development without provisioning Pinecone/Qdrant/Weaviate/pgvector
|
|
18
|
+
- ✅ CI pipelines where external services are not available
|
|
19
|
+
- ❌ Not suitable for production workloads
|
|
20
|
+
|
|
21
|
+
## Setup
|
|
22
|
+
|
|
23
|
+
### Global Configuration (Rails Example)
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
# config/initializers/vectra.rb
|
|
27
|
+
Vectra.configure do |config|
|
|
28
|
+
config.provider = :memory if Rails.env.test?
|
|
29
|
+
end
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Then in your application code:
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
client = Vectra::Client.new
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Direct Construction
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
require "vectra"
|
|
42
|
+
|
|
43
|
+
client = Vectra.memory
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
No `host`, `api_key`, or environment configuration is required.
|
|
47
|
+
|
|
48
|
+
## Features
|
|
49
|
+
|
|
50
|
+
- ✅ `upsert` – store vectors in memory
|
|
51
|
+
- ✅ `query` – cosine similarity search (with optional metadata filtering)
|
|
52
|
+
- ✅ `fetch` – retrieve vectors by ID
|
|
53
|
+
- ✅ `update` – merge metadata for existing vectors
|
|
54
|
+
- ✅ `delete` – delete by IDs, namespace, filter, or `delete_all`
|
|
55
|
+
- ✅ `list_indexes` / `describe_index` – basic index metadata (dimension, metric)
|
|
56
|
+
- ✅ `stats` – vector counts per namespace
|
|
57
|
+
- ✅ `clear!` – wipe all data between tests
|
|
58
|
+
|
|
59
|
+
## Basic Usage
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
client = Vectra.memory
|
|
63
|
+
|
|
64
|
+
# Upsert
|
|
65
|
+
client.upsert(
|
|
66
|
+
index: "documents",
|
|
67
|
+
vectors: [
|
|
68
|
+
{
|
|
69
|
+
id: "doc-1",
|
|
70
|
+
values: [0.1, 0.2, 0.3],
|
|
71
|
+
metadata: { title: "Hello", category: "docs" }
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
namespace: "test-suite"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Query
|
|
78
|
+
results = client.query(
|
|
79
|
+
index: "documents",
|
|
80
|
+
vector: [0.1, 0.2, 0.3],
|
|
81
|
+
top_k: 5,
|
|
82
|
+
namespace: "test-suite",
|
|
83
|
+
filter: { category: "docs" },
|
|
84
|
+
include_metadata: true
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
results.each do |match|
|
|
88
|
+
puts "#{match.id}: #{match.metadata["title"]} (score=#{match.score.round(3)})"
|
|
89
|
+
end
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Metadata Filtering
|
|
93
|
+
|
|
94
|
+
The Memory provider supports a subset of the same filter operators as other providers:
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
results = client.query(
|
|
98
|
+
index: "products",
|
|
99
|
+
vector: query_embedding,
|
|
100
|
+
top_k: 10,
|
|
101
|
+
filter: {
|
|
102
|
+
status: ["active", "preview"], # IN
|
|
103
|
+
price: { "$gte" => 10, "$lte" => 100 },# range
|
|
104
|
+
brand: { "$ne" => "Acme" } # not equal
|
|
105
|
+
},
|
|
106
|
+
include_metadata: true
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Supported operators:
|
|
111
|
+
|
|
112
|
+
- Equality: `{ field: "value" }` or `{ field: { "$eq" => "value" } }`
|
|
113
|
+
- Inequality: `{ field: { "$ne" => "value" } }`
|
|
114
|
+
- Ranges: `{ field: { "$gt" => 10 } }`, `{ "$gte" => 10 }`, `{ "$lt" => 20 }`, `{ "$lte" => 20 }`
|
|
115
|
+
- Arrays / IN: `{ field: ["a", "b"] }` or `{ field: { "$in" => ["a", "b"] } }`
|
|
116
|
+
|
|
117
|
+
## Resetting State Between Tests
|
|
118
|
+
|
|
119
|
+
The Memory provider exposes a `clear!` method to reset all in-memory state:
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
RSpec.describe "MyService" do
|
|
123
|
+
let(:client) { Vectra.memory }
|
|
124
|
+
|
|
125
|
+
before do
|
|
126
|
+
client.provider.clear! if client.provider.respond_to?(:clear!)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# your tests...
|
|
130
|
+
end
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
You can also call `clear!` on the provider obtained from a regular `Vectra::Client`:
|
|
134
|
+
|
|
135
|
+
```ruby
|
|
136
|
+
client = Vectra::Client.new(provider: :memory)
|
|
137
|
+
client.provider.clear!
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Limitations
|
|
141
|
+
|
|
142
|
+
- Not distributed – data lives only in the current Ruby process.
|
|
143
|
+
- No persistence – all vectors are lost when the process exits or `clear!` is called.
|
|
144
|
+
- Intended for **testing and development only**, not for production traffic.
|
|
145
|
+
|
data/docs/providers/pgvector.md
CHANGED
|
@@ -43,6 +43,7 @@ client = Vectra::Client.new(
|
|
|
43
43
|
- ✅ ACID transactions
|
|
44
44
|
- ✅ Complex queries
|
|
45
45
|
- ✅ Rails ActiveRecord integration
|
|
46
|
+
- ✅ Hybrid search (vector + full-text search)
|
|
46
47
|
|
|
47
48
|
## Example
|
|
48
49
|
|
|
@@ -63,6 +64,17 @@ client.upsert(
|
|
|
63
64
|
|
|
64
65
|
# Search using cosine distance
|
|
65
66
|
results = client.query(vector: [0.1, 0.2, 0.3], top_k: 5)
|
|
67
|
+
|
|
68
|
+
# Hybrid search (requires text column with tsvector index)
|
|
69
|
+
# First, create the index:
|
|
70
|
+
# CREATE INDEX idx_content_fts ON my_index USING gin(to_tsvector('english', content));
|
|
71
|
+
results = client.hybrid_search(
|
|
72
|
+
index: 'my_index',
|
|
73
|
+
vector: embedding,
|
|
74
|
+
text: 'ruby programming',
|
|
75
|
+
alpha: 0.7,
|
|
76
|
+
text_column: 'content' # default: 'content'
|
|
77
|
+
)
|
|
66
78
|
```
|
|
67
79
|
|
|
68
80
|
## ActiveRecord Integration
|
data/docs/providers/pinecone.md
CHANGED
|
@@ -32,6 +32,7 @@ client = Vectra::Client.new(
|
|
|
32
32
|
- ✅ Index statistics
|
|
33
33
|
- ✅ Metadata filtering
|
|
34
34
|
- ✅ Namespace support
|
|
35
|
+
- ⚠️ Hybrid search (partial - requires sparse vectors)
|
|
35
36
|
|
|
36
37
|
## Example
|
|
37
38
|
|
|
@@ -56,6 +57,15 @@ results = client.query(vector: [0.1, 0.2, 0.3], top_k: 5)
|
|
|
56
57
|
results.matches.each do |match|
|
|
57
58
|
puts "#{match['id']}: #{match['score']}"
|
|
58
59
|
end
|
|
60
|
+
|
|
61
|
+
# Hybrid search (note: requires sparse vectors for true hybrid search)
|
|
62
|
+
# For now, this uses dense vector search only
|
|
63
|
+
results = client.hybrid_search(
|
|
64
|
+
index: 'my-index',
|
|
65
|
+
vector: embedding,
|
|
66
|
+
text: 'ruby programming',
|
|
67
|
+
alpha: 0.7
|
|
68
|
+
)
|
|
59
69
|
```
|
|
60
70
|
|
|
61
71
|
## Configuration Options
|
data/docs/providers/qdrant.md
CHANGED
|
@@ -56,6 +56,14 @@ client.upsert(
|
|
|
56
56
|
|
|
57
57
|
# Search
|
|
58
58
|
results = client.query(vector: [0.1, 0.2, 0.3], top_k: 10)
|
|
59
|
+
|
|
60
|
+
# Hybrid search (semantic + keyword)
|
|
61
|
+
results = client.hybrid_search(
|
|
62
|
+
index: 'my-collection',
|
|
63
|
+
vector: embedding,
|
|
64
|
+
text: 'ruby programming',
|
|
65
|
+
alpha: 0.7 # 70% semantic, 30% keyword
|
|
66
|
+
)
|
|
59
67
|
```
|
|
60
68
|
|
|
61
69
|
## Configuration Options
|
data/docs/providers/weaviate.md
CHANGED
|
@@ -18,53 +18,122 @@ docker run -p 8080:8080 semitechnologies/weaviate:latest
|
|
|
18
18
|
|
|
19
19
|
### Connect with Vectra
|
|
20
20
|
|
|
21
|
+
You can either use the convenience constructor:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
client = Vectra.weaviate(
|
|
25
|
+
api_key: ENV["WEAVIATE_API_KEY"], # optional for local / required for cloud
|
|
26
|
+
host: "http://localhost:8080" # or your cloud endpoint
|
|
27
|
+
)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
or configure via `Vectra::Client`:
|
|
31
|
+
|
|
21
32
|
```ruby
|
|
22
33
|
client = Vectra::Client.new(
|
|
23
34
|
provider: :weaviate,
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class_name: 'Document'
|
|
35
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
|
36
|
+
host: "https://your-weaviate-instance"
|
|
27
37
|
)
|
|
28
38
|
```
|
|
29
39
|
|
|
30
40
|
## Features
|
|
31
41
|
|
|
32
|
-
- ✅ Upsert vectors
|
|
33
|
-
- ✅
|
|
34
|
-
- ✅
|
|
35
|
-
- ✅
|
|
36
|
-
- ✅
|
|
37
|
-
- ✅
|
|
42
|
+
- ✅ Upsert vectors into Weaviate classes (per-index `class`)
|
|
43
|
+
- ✅ Vector similarity search via GraphQL `nearVector`
|
|
44
|
+
- ✅ Fetch by IDs
|
|
45
|
+
- ✅ Metadata filtering (exact match, ranges, arrays)
|
|
46
|
+
- ✅ Namespace support via `_namespace` property
|
|
47
|
+
- ✅ Delete by IDs or filter
|
|
48
|
+
- ✅ List and describe classes
|
|
49
|
+
- ✅ Basic stats via GraphQL `Aggregate`
|
|
50
|
+
- ✅ Hybrid search (BM25 + vector similarity)
|
|
38
51
|
|
|
39
|
-
## Example
|
|
52
|
+
## Basic Example
|
|
40
53
|
|
|
41
54
|
```ruby
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
55
|
+
require "vectra"
|
|
56
|
+
|
|
57
|
+
client = Vectra.weaviate(
|
|
58
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
|
59
|
+
host: "https://your-weaviate-instance"
|
|
47
60
|
)
|
|
48
61
|
|
|
62
|
+
index = "Document" # Weaviate class name
|
|
63
|
+
|
|
49
64
|
# Upsert vectors
|
|
50
65
|
client.upsert(
|
|
66
|
+
index: index,
|
|
51
67
|
vectors: [
|
|
52
|
-
{
|
|
53
|
-
|
|
68
|
+
{
|
|
69
|
+
id: "doc-1",
|
|
70
|
+
values: [0.1, 0.2, 0.3],
|
|
71
|
+
metadata: {
|
|
72
|
+
title: "Getting started with Vectra",
|
|
73
|
+
category: "docs"
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
],
|
|
77
|
+
namespace: "prod" # stored as _namespace property
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Query with metadata filter
|
|
81
|
+
results = client.query(
|
|
82
|
+
index: index,
|
|
83
|
+
vector: [0.1, 0.2, 0.3],
|
|
84
|
+
top_k: 5,
|
|
85
|
+
namespace: "prod",
|
|
86
|
+
filter: { category: "docs" },
|
|
87
|
+
include_metadata: true
|
|
54
88
|
)
|
|
55
89
|
|
|
56
|
-
|
|
57
|
-
|
|
90
|
+
results.each do |match|
|
|
91
|
+
puts "#{match.id} (score=#{match.score.round(3)}): #{match.metadata["title"]}"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Hybrid search (BM25 + vector)
|
|
95
|
+
results = client.hybrid_search(
|
|
96
|
+
index: index,
|
|
97
|
+
vector: embedding,
|
|
98
|
+
text: 'ruby programming',
|
|
99
|
+
alpha: 0.7, # 70% semantic, 30% keyword
|
|
100
|
+
top_k: 10
|
|
101
|
+
)
|
|
58
102
|
```
|
|
59
103
|
|
|
104
|
+
## Advanced Filtering
|
|
105
|
+
|
|
106
|
+
Vectra maps simple Ruby hashes to Weaviate `where` filters:
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
results = client.query(
|
|
110
|
+
index: "Document",
|
|
111
|
+
vector: query_embedding,
|
|
112
|
+
top_k: 10,
|
|
113
|
+
filter: {
|
|
114
|
+
category: "blog",
|
|
115
|
+
views: { "$gt" => 1000 },
|
|
116
|
+
tags: ["ruby", "vectra"] # ContainsAny
|
|
117
|
+
},
|
|
118
|
+
include_metadata: true
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Supported operators:
|
|
123
|
+
|
|
124
|
+
- Equality: `{ field: "value" }` or `{ field: { "$eq" => "value" } }`
|
|
125
|
+
- Inequality: `{ field: { "$ne" => "value" } }`
|
|
126
|
+
- Ranges: `{ field: { "$gt" => 10 } }`, `{ "$gte" => 10 }`, `{ "$lt" => 20 }`, `{ "$lte" => 20 }`
|
|
127
|
+
- Arrays: `{ field: ["a", "b"] }` (contains any), `{ field: { "$in" => ["a", "b"] } }`
|
|
128
|
+
|
|
60
129
|
## Configuration Options
|
|
61
130
|
|
|
62
|
-
| Option
|
|
63
|
-
|
|
64
|
-
| `host`
|
|
65
|
-
| `
|
|
66
|
-
|
|
67
|
-
|
|
131
|
+
| Option | Type | Required | Description |
|
|
132
|
+
|----------|--------|----------|-----------------------------------------------|
|
|
133
|
+
| `host` | String | Yes | Weaviate base URL (`http://` or `https://`) |
|
|
134
|
+
| `api_key`| String | No* | API key if auth is enabled / cloud instances |
|
|
135
|
+
|
|
136
|
+
> `api_key` is optional for local, unsecured Weaviate; required for managed/cloud deployments.
|
|
68
137
|
|
|
69
138
|
## Documentation
|
|
70
139
|
|
data/examples/README.md
CHANGED
|
@@ -261,6 +261,18 @@ The comprehensive demo now includes **4 additional sections** demonstrating prod
|
|
|
261
261
|
- **Error Tracking**: Sentry and Honeybadger integration
|
|
262
262
|
- Production monitoring setup
|
|
263
263
|
|
|
264
|
+
In addition to the classic API, the demo now also showcases the **chainable Query Builder** style:
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
results = client
|
|
268
|
+
.query("documents")
|
|
269
|
+
.vector(embedding)
|
|
270
|
+
.top_k(10)
|
|
271
|
+
.filter(category: "ruby")
|
|
272
|
+
.with_metadata
|
|
273
|
+
.execute
|
|
274
|
+
```
|
|
275
|
+
|
|
264
276
|
## Tips for Production
|
|
265
277
|
|
|
266
278
|
### Performance
|
data/lib/vectra/batch.rb
CHANGED
|
@@ -17,6 +17,17 @@ module Vectra
|
|
|
17
17
|
# )
|
|
18
18
|
# puts "Upserted: #{result[:upserted_count]}"
|
|
19
19
|
#
|
|
20
|
+
# @example With progress tracking
|
|
21
|
+
# batch.upsert_async(
|
|
22
|
+
# index: 'docs',
|
|
23
|
+
# vectors: large_array,
|
|
24
|
+
# on_progress: ->(stats) {
|
|
25
|
+
# puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
|
|
26
|
+
# puts " Chunk #{stats[:current_chunk] + 1}/#{stats[:total_chunks]}"
|
|
27
|
+
# puts " Success: #{stats[:success_count]}, Failed: #{stats[:failed_count]}"
|
|
28
|
+
# }
|
|
29
|
+
# )
|
|
30
|
+
#
|
|
20
31
|
class Batch
|
|
21
32
|
DEFAULT_CONCURRENCY = 4
|
|
22
33
|
DEFAULT_CHUNK_SIZE = 100
|
|
@@ -38,12 +49,23 @@ module Vectra
|
|
|
38
49
|
# @param vectors [Array<Hash>] vectors to upsert
|
|
39
50
|
# @param namespace [String, nil] optional namespace
|
|
40
51
|
# @param chunk_size [Integer] vectors per chunk (default: 100)
|
|
52
|
+
# @param on_progress [Proc, nil] optional callback called after each chunk completes
|
|
53
|
+
# Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
|
|
41
54
|
# @return [Hash] aggregated result with :upserted_count, :chunks, :errors
|
|
42
|
-
|
|
55
|
+
#
|
|
56
|
+
# @example With progress callback
|
|
57
|
+
# batch.upsert_async(
|
|
58
|
+
# index: 'docs',
|
|
59
|
+
# vectors: large_array,
|
|
60
|
+
# on_progress: ->(stats) {
|
|
61
|
+
# puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
|
|
62
|
+
# }
|
|
63
|
+
# )
|
|
64
|
+
def upsert_async(index:, vectors:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
|
|
43
65
|
chunks = vectors.each_slice(chunk_size).to_a
|
|
44
66
|
return { upserted_count: 0, chunks: 0, errors: [] } if chunks.empty?
|
|
45
67
|
|
|
46
|
-
results = process_chunks_concurrently(chunks) do |chunk|
|
|
68
|
+
results = process_chunks_concurrently(chunks, total_items: vectors.size, on_progress: on_progress) do |chunk|
|
|
47
69
|
client.upsert(index: index, vectors: chunk, namespace: namespace)
|
|
48
70
|
end
|
|
49
71
|
|
|
@@ -56,12 +78,14 @@ module Vectra
|
|
|
56
78
|
# @param ids [Array<String>] IDs to delete
|
|
57
79
|
# @param namespace [String, nil] optional namespace
|
|
58
80
|
# @param chunk_size [Integer] IDs per chunk (default: 100)
|
|
81
|
+
# @param on_progress [Proc, nil] optional callback called after each chunk completes
|
|
82
|
+
# Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
|
|
59
83
|
# @return [Hash] aggregated result
|
|
60
|
-
def delete_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
|
|
84
|
+
def delete_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
|
|
61
85
|
chunks = ids.each_slice(chunk_size).to_a
|
|
62
86
|
return { deleted_count: 0, chunks: 0, errors: [] } if chunks.empty?
|
|
63
87
|
|
|
64
|
-
results = process_chunks_concurrently(chunks) do |chunk|
|
|
88
|
+
results = process_chunks_concurrently(chunks, total_items: ids.size, on_progress: on_progress) do |chunk|
|
|
65
89
|
client.delete(index: index, ids: chunk, namespace: namespace)
|
|
66
90
|
end
|
|
67
91
|
|
|
@@ -74,12 +98,14 @@ module Vectra
|
|
|
74
98
|
# @param ids [Array<String>] IDs to fetch
|
|
75
99
|
# @param namespace [String, nil] optional namespace
|
|
76
100
|
# @param chunk_size [Integer] IDs per chunk (default: 100)
|
|
101
|
+
# @param on_progress [Proc, nil] optional callback called after each chunk completes
|
|
102
|
+
# Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
|
|
77
103
|
# @return [Hash<String, Vector>] merged results
|
|
78
|
-
def fetch_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
|
|
104
|
+
def fetch_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
|
|
79
105
|
chunks = ids.each_slice(chunk_size).to_a
|
|
80
106
|
return {} if chunks.empty?
|
|
81
107
|
|
|
82
|
-
results = process_chunks_concurrently(chunks) do |chunk|
|
|
108
|
+
results = process_chunks_concurrently(chunks, total_items: ids.size, on_progress: on_progress) do |chunk|
|
|
83
109
|
client.fetch(index: index, ids: chunk, namespace: namespace)
|
|
84
110
|
end
|
|
85
111
|
|
|
@@ -88,15 +114,43 @@ module Vectra
|
|
|
88
114
|
|
|
89
115
|
private
|
|
90
116
|
|
|
91
|
-
|
|
117
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
|
|
118
|
+
def process_chunks_concurrently(chunks, total_items: nil, on_progress: nil)
|
|
92
119
|
pool = Concurrent::FixedThreadPool.new(concurrency)
|
|
93
120
|
futures = []
|
|
121
|
+
progress_mutex = Mutex.new
|
|
122
|
+
completed_count = Concurrent::AtomicFixnum.new(0)
|
|
123
|
+
success_count = Concurrent::AtomicFixnum.new(0)
|
|
124
|
+
failed_count = Concurrent::AtomicFixnum.new(0)
|
|
94
125
|
|
|
95
126
|
chunks.each_with_index do |chunk, index|
|
|
96
127
|
futures << Concurrent::Future.execute(executor: pool) do
|
|
97
|
-
|
|
128
|
+
result = yield(chunk)
|
|
129
|
+
success_count.increment
|
|
130
|
+
{ index: index, result: result, error: nil }
|
|
98
131
|
rescue StandardError => e
|
|
132
|
+
failed_count.increment
|
|
99
133
|
{ index: index, result: nil, error: e }
|
|
134
|
+
ensure
|
|
135
|
+
# Call progress callback when chunk completes
|
|
136
|
+
if on_progress
|
|
137
|
+
completed = completed_count.increment
|
|
138
|
+
total_size = chunks.size * chunks.first.size
|
|
139
|
+
processed = [completed * chunks.first.size, total_items || total_size].min
|
|
140
|
+
percentage = total_items ? (processed.to_f / total_items * 100).round(2) : (completed.to_f / chunks.size * 100).round(2)
|
|
141
|
+
|
|
142
|
+
progress_mutex.synchronize do
|
|
143
|
+
on_progress.call(
|
|
144
|
+
processed: processed,
|
|
145
|
+
total: total_items || total_size,
|
|
146
|
+
percentage: percentage,
|
|
147
|
+
current_chunk: completed - 1,
|
|
148
|
+
total_chunks: chunks.size,
|
|
149
|
+
success_count: success_count.value,
|
|
150
|
+
failed_count: failed_count.value
|
|
151
|
+
)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
100
154
|
end
|
|
101
155
|
end
|
|
102
156
|
|
|
@@ -107,6 +161,7 @@ module Vectra
|
|
|
107
161
|
|
|
108
162
|
results.sort_by { |r| r[:index] }
|
|
109
163
|
end
|
|
164
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
|
|
110
165
|
|
|
111
166
|
def aggregate_results(results, total_vectors)
|
|
112
167
|
errors = results.select { |r| r[:error] }.map { |r| r[:error] }
|