vectra-client 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +86 -37
- data/SECURITY.md +134 -4
- data/docs/_layouts/page.html +2 -0
- data/docs/guides/monitoring.md +860 -0
- data/docs/guides/runbooks/cache-issues.md +267 -0
- data/docs/guides/runbooks/high-error-rate.md +152 -0
- data/docs/guides/runbooks/high-latency.md +287 -0
- data/docs/guides/runbooks/pool-exhausted.md +216 -0
- data/docs/guides/security.md +348 -0
- data/lib/vectra/audit_log.rb +225 -0
- data/lib/vectra/circuit_breaker.rb +336 -0
- data/lib/vectra/client.rb +2 -0
- data/lib/vectra/credential_rotation.rb +199 -0
- data/lib/vectra/health_check.rb +254 -0
- data/lib/vectra/instrumentation/honeybadger.rb +128 -0
- data/lib/vectra/instrumentation/sentry.rb +117 -0
- data/lib/vectra/logging.rb +242 -0
- data/lib/vectra/rate_limiter.rb +304 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +6 -0
- metadata +15 -1
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: page
|
|
3
|
+
title: "Runbook: Cache Issues"
|
|
4
|
+
permalink: /guides/runbooks/cache-issues/
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Runbook: Cache Issues
|
|
8
|
+
|
|
9
|
+
**Alert:** `VectraLowCacheHitRatio`
|
|
10
|
+
**Severity:** Warning
|
|
11
|
+
**Threshold:** Cache hit ratio <50% for 10 minutes
|
|
12
|
+
|
|
13
|
+
## Symptoms
|
|
14
|
+
|
|
15
|
+
- High cache miss rate
|
|
16
|
+
- Increased database load
|
|
17
|
+
- Higher latency than expected
|
|
18
|
+
- Stale data being returned
|
|
19
|
+
|
|
20
|
+
## Quick Diagnosis
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
cache = Vectra::Cache.new
|
|
24
|
+
stats = cache.stats
|
|
25
|
+
|
|
26
|
+
puts "Size: #{stats[:size]} / #{stats[:max_size]}"
|
|
27
|
+
puts "TTL: #{stats[:ttl]} seconds"
|
|
28
|
+
puts "Keys: #{stats[:keys].count}"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
```promql
|
|
32
|
+
# Prometheus: Check hit ratio
|
|
33
|
+
sum(vectra_cache_hits_total) /
|
|
34
|
+
(sum(vectra_cache_hits_total) + sum(vectra_cache_misses_total))
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Investigation Steps
|
|
38
|
+
|
|
39
|
+
### 1. Check Cache Configuration
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
# Current config
|
|
43
|
+
puts Vectra.configuration.cache_enabled # Should be true
|
|
44
|
+
puts Vectra.configuration.cache_ttl # Default: 300
|
|
45
|
+
puts Vectra.configuration.cache_max_size # Default: 1000
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### 2. Analyze Access Patterns
|
|
49
|
+
|
|
50
|
+
```ruby
|
|
51
|
+
# Check what's being cached
|
|
52
|
+
cache.stats[:keys].each do |key|
|
|
53
|
+
parts = key.split(":")
|
|
54
|
+
puts "Index: #{parts[0]}, Type: #{parts[1]}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Count by type
|
|
58
|
+
keys = cache.stats[:keys]
|
|
59
|
+
queries = keys.count { |k| k.include?(":q:") }
|
|
60
|
+
fetches = keys.count { |k| k.include?(":f:") }
|
|
61
|
+
puts "Query cache entries: #{queries}"
|
|
62
|
+
puts "Fetch cache entries: #{fetches}"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 3. Check for Cache Thrashing
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
# If max_size is too small, cache thrashes
|
|
69
|
+
# Sign: entries being evicted immediately after creation
|
|
70
|
+
# Solution: Increase max_size
|
|
71
|
+
|
|
72
|
+
stats = cache.stats
|
|
73
|
+
if stats[:size] >= stats[:max_size] * 0.9
|
|
74
|
+
puts "WARNING: Cache near capacity - consider increasing max_size"
|
|
75
|
+
end
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 4. Check TTL Appropriateness
|
|
79
|
+
|
|
80
|
+
```ruby
|
|
81
|
+
# If TTL is too short, cache misses are high
|
|
82
|
+
# If TTL is too long, stale data is served
|
|
83
|
+
|
|
84
|
+
# Check data freshness requirements
|
|
85
|
+
# - Real-time data: TTL 30-60s
|
|
86
|
+
# - Semi-static data: TTL 300-600s
|
|
87
|
+
# - Static data: TTL 3600s+
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Resolution Steps
|
|
91
|
+
|
|
92
|
+
### Low Hit Ratio
|
|
93
|
+
|
|
94
|
+
#### Increase Cache Size
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
cache = Vectra::Cache.new(
|
|
98
|
+
ttl: 300,
|
|
99
|
+
max_size: 5000 # Increase from 1000
|
|
100
|
+
)
|
|
101
|
+
cached_client = Vectra::CachedClient.new(client, cache: cache)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
#### Adjust TTL
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
# For high-churn data
|
|
108
|
+
cache = Vectra::Cache.new(ttl: 60) # 1 minute
|
|
109
|
+
|
|
110
|
+
# For stable data
|
|
111
|
+
cache = Vectra::Cache.new(ttl: 3600) # 1 hour
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
#### Cache Warming
|
|
115
|
+
|
|
116
|
+
```ruby
|
|
117
|
+
# Pre-populate cache on startup
|
|
118
|
+
common_queries = load_common_queries()
|
|
119
|
+
common_queries.each do |q|
|
|
120
|
+
cached_client.query(
|
|
121
|
+
index: q[:index],
|
|
122
|
+
vector: q[:vector],
|
|
123
|
+
top_k: q[:top_k]
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Stale Data
|
|
129
|
+
|
|
130
|
+
#### Reduce TTL
|
|
131
|
+
|
|
132
|
+
```ruby
|
|
133
|
+
cache = Vectra::Cache.new(ttl: 60) # Reduce from 300
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
#### Implement Cache Invalidation
|
|
137
|
+
|
|
138
|
+
```ruby
|
|
139
|
+
# After upsert, invalidate affected cache
|
|
140
|
+
def upsert_with_invalidation(index:, vectors:)
|
|
141
|
+
result = client.upsert(index: index, vectors: vectors)
|
|
142
|
+
cached_client.invalidate_index(index)
|
|
143
|
+
result
|
|
144
|
+
end
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
#### Use Cache-Aside Pattern
|
|
148
|
+
|
|
149
|
+
```ruby
|
|
150
|
+
def get_vector(id)
|
|
151
|
+
# Check cache first
|
|
152
|
+
cached = cache.get("vector:#{id}")
|
|
153
|
+
return cached if cached
|
|
154
|
+
|
|
155
|
+
# Fetch from source
|
|
156
|
+
vector = client.fetch(index: "main", ids: [id])[id]
|
|
157
|
+
|
|
158
|
+
# Cache with appropriate TTL
|
|
159
|
+
cache.set("vector:#{id}", vector)
|
|
160
|
+
vector
|
|
161
|
+
end
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Cache Thrashing
|
|
165
|
+
|
|
166
|
+
#### Increase Max Size
|
|
167
|
+
|
|
168
|
+
```ruby
|
|
169
|
+
# Rule of thumb: max_size = unique_queries_per_ttl * 1.5
|
|
170
|
+
# Example: 1000 unique queries per 5 min, max_size = 1500
|
|
171
|
+
cache = Vectra::Cache.new(
|
|
172
|
+
ttl: 300,
|
|
173
|
+
max_size: 1500
|
|
174
|
+
)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
#### Implement Tiered Caching
|
|
178
|
+
|
|
179
|
+
```ruby
|
|
180
|
+
# Hot cache: Small, short TTL
|
|
181
|
+
hot_cache = Vectra::Cache.new(ttl: 60, max_size: 100)
|
|
182
|
+
|
|
183
|
+
# Warm cache: Large, longer TTL
|
|
184
|
+
warm_cache = Vectra::Cache.new(ttl: 600, max_size: 5000)
|
|
185
|
+
|
|
186
|
+
# Check hot first, then warm
|
|
187
|
+
def cached_query(...)
|
|
188
|
+
hot_cache.fetch(key) do
|
|
189
|
+
warm_cache.fetch(key) do
|
|
190
|
+
client.query(...)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Memory Issues
|
|
197
|
+
|
|
198
|
+
#### Monitor Memory Usage
|
|
199
|
+
|
|
200
|
+
```ruby
|
|
201
|
+
# Estimate cache memory usage
|
|
202
|
+
# Approximate: 1KB per cached query result
|
|
203
|
+
estimated_mb = cache.stats[:size] * 1.0 / 1000
|
|
204
|
+
puts "Estimated cache memory: #{estimated_mb} MB"
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
#### Implement LRU Eviction
|
|
208
|
+
|
|
209
|
+
```ruby
|
|
210
|
+
# Vectra::Cache already implements LRU
|
|
211
|
+
# If memory is still an issue, reduce max_size
|
|
212
|
+
cache = Vectra::Cache.new(max_size: 500)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Prevention
|
|
216
|
+
|
|
217
|
+
### 1. Right-size Cache
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
# Calculate based on query patterns
|
|
221
|
+
unique_queries_per_minute = 100
|
|
222
|
+
ttl_minutes = 5
|
|
223
|
+
buffer = 1.5
|
|
224
|
+
|
|
225
|
+
max_size = unique_queries_per_minute * ttl_minutes * buffer
|
|
226
|
+
# = 100 * 5 * 1.5 = 750
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### 2. Monitor Cache Metrics
|
|
230
|
+
|
|
231
|
+
```promql
|
|
232
|
+
# Alert on low hit ratio
|
|
233
|
+
sum(rate(vectra_cache_hits_total[5m])) /
|
|
234
|
+
(sum(rate(vectra_cache_hits_total[5m])) +
|
|
235
|
+
sum(rate(vectra_cache_misses_total[5m]))) < 0.5
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### 3. Implement Cache Warm-up
|
|
239
|
+
|
|
240
|
+
```ruby
|
|
241
|
+
# In application boot
|
|
242
|
+
Rails.application.config.after_initialize do
|
|
243
|
+
VectraCacheWarmer.perform_async
|
|
244
|
+
end
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### 4. Use Cache Namespacing
|
|
248
|
+
|
|
249
|
+
```ruby
|
|
250
|
+
# Separate caches for different use cases
|
|
251
|
+
search_cache = Vectra::Cache.new(ttl: 60) # Fast invalidation
|
|
252
|
+
embed_cache = Vectra::Cache.new(ttl: 3600) # Long-lived embeddings
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Escalation
|
|
256
|
+
|
|
257
|
+
| Time | Action |
|
|
258
|
+
|------|--------|
|
|
259
|
+
| 10 min | Adjust TTL/max_size |
|
|
260
|
+
| 30 min | Implement cache warming |
|
|
261
|
+
| 1 hour | Review access patterns |
|
|
262
|
+
| 2 hours | Consider Redis/Memcached |
|
|
263
|
+
|
|
264
|
+
## Related
|
|
265
|
+
|
|
266
|
+
- [Performance Guide]({{ site.baseurl }}/guides/performance)
|
|
267
|
+
- [Monitoring Guide]({{ site.baseurl }}/guides/monitoring)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: page
|
|
3
|
+
title: "Runbook: High Error Rate"
|
|
4
|
+
permalink: /guides/runbooks/high-error-rate/
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Runbook: High Error Rate
|
|
8
|
+
|
|
9
|
+
**Alert:** `VectraHighErrorRate`
|
|
10
|
+
**Severity:** Critical
|
|
11
|
+
**Threshold:** Error rate >5% for 5 minutes
|
|
12
|
+
|
|
13
|
+
## Symptoms
|
|
14
|
+
|
|
15
|
+
- Alert firing for high error rate
|
|
16
|
+
- Users reporting failed operations
|
|
17
|
+
- Increased latency alongside errors
|
|
18
|
+
|
|
19
|
+
## Quick Diagnosis
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Check recent errors in logs
|
|
23
|
+
grep -i "vectra.*error" /var/log/app.log | tail -50
|
|
24
|
+
|
|
25
|
+
# Check error breakdown by type
|
|
26
|
+
curl -s localhost:9090/api/v1/query?query=sum(vectra_errors_total)by(error_type) | jq
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Investigation Steps
|
|
30
|
+
|
|
31
|
+
### 1. Identify Error Type
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
# In Rails console
|
|
35
|
+
Vectra::Client.new.stats(index: "your-index")
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
| Error Type | Likely Cause | Action |
|
|
39
|
+
|------------|--------------|--------|
|
|
40
|
+
| `AuthenticationError` | Invalid/expired API key | Check credentials |
|
|
41
|
+
| `RateLimitError` | Too many requests | Implement backoff |
|
|
42
|
+
| `ServerError` | Provider outage | Check provider status |
|
|
43
|
+
| `ConnectionError` | Network issues | Check connectivity |
|
|
44
|
+
| `ValidationError` | Bad request data | Check input validation |
|
|
45
|
+
|
|
46
|
+
### 2. Check Provider Status
|
|
47
|
+
|
|
48
|
+
- **Pinecone:** [status.pinecone.io](https://status.pinecone.io)
|
|
49
|
+
- **Qdrant:** Check self-hosted logs or cloud dashboard
|
|
50
|
+
- **pgvector:** `SELECT * FROM pg_stat_activity WHERE state = 'active';`
|
|
51
|
+
|
|
52
|
+
### 3. Check Application Logs
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Filter by error class
|
|
56
|
+
grep "Vectra::RateLimitError" /var/log/app.log | wc -l
|
|
57
|
+
grep "Vectra::ServerError" /var/log/app.log | wc -l
|
|
58
|
+
grep "Vectra::AuthenticationError" /var/log/app.log | wc -l
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Resolution Steps
|
|
62
|
+
|
|
63
|
+
### Authentication Errors
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
# Verify API key is set
|
|
67
|
+
puts ENV['PINECONE_API_KEY'].nil? ? "MISSING" : "SET"
|
|
68
|
+
|
|
69
|
+
# Test connection
|
|
70
|
+
client = Vectra::Client.new
|
|
71
|
+
client.list_indexes
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Rate Limit Errors
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
# Implement exponential backoff
|
|
78
|
+
Vectra.configure do |config|
|
|
79
|
+
config.max_retries = 5
|
|
80
|
+
config.retry_delay = 2 # Start with 2s delay
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Or use batch operations with concurrency limit
|
|
84
|
+
batch = Vectra::Batch.new(client, concurrency: 2) # Reduce from 4
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Server Errors
|
|
88
|
+
|
|
89
|
+
1. Check provider status page
|
|
90
|
+
2. If provider is down, enable fallback or circuit breaker
|
|
91
|
+
3. Consider failover to backup provider
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
# Simple circuit breaker
|
|
95
|
+
class VectraCircuitBreaker
|
|
96
|
+
def self.call
|
|
97
|
+
return cached_response if circuit_open?
|
|
98
|
+
|
|
99
|
+
yield
|
|
100
|
+
rescue Vectra::ServerError
|
|
101
|
+
open_circuit!
|
|
102
|
+
cached_response
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Connection Errors
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Test network connectivity
|
|
111
|
+
curl -I https://api.pinecone.io/health
|
|
112
|
+
|
|
113
|
+
# Check DNS resolution
|
|
114
|
+
nslookup api.pinecone.io
|
|
115
|
+
|
|
116
|
+
# Check firewall rules
|
|
117
|
+
iptables -L -n | grep -i pinecone
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Prevention
|
|
121
|
+
|
|
122
|
+
1. **Set up retry logic:**
|
|
123
|
+
```ruby
|
|
124
|
+
config.max_retries = 3
|
|
125
|
+
config.retry_delay = 1
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
2. **Monitor error rate trends:**
|
|
129
|
+
```promql
|
|
130
|
+
increase(vectra_errors_total[1h])
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
3. **Implement circuit breakers** for provider outages
|
|
134
|
+
|
|
135
|
+
4. **Cache frequently accessed data:**
|
|
136
|
+
```ruby
|
|
137
|
+
cached_client = Vectra::CachedClient.new(client)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Escalation
|
|
141
|
+
|
|
142
|
+
| Time | Action |
|
|
143
|
+
|------|--------|
|
|
144
|
+
| 5 min | Page on-call engineer |
|
|
145
|
+
| 15 min | Escalate to team lead |
|
|
146
|
+
| 30 min | Consider provider failover |
|
|
147
|
+
| 1 hour | Engage provider support |
|
|
148
|
+
|
|
149
|
+
## Related
|
|
150
|
+
|
|
151
|
+
- [High Latency Runbook]({{ site.baseurl }}/guides/runbooks/high-latency)
|
|
152
|
+
- [Monitoring Guide]({{ site.baseurl }}/guides/monitoring)
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: page
|
|
3
|
+
title: "Runbook: High Latency"
|
|
4
|
+
permalink: /guides/runbooks/high-latency/
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Runbook: High Latency
|
|
8
|
+
|
|
9
|
+
**Alert:** `VectraHighLatency`
|
|
10
|
+
**Severity:** Warning
|
|
11
|
+
**Threshold:** P95 latency >2s for 5 minutes
|
|
12
|
+
|
|
13
|
+
## Symptoms
|
|
14
|
+
|
|
15
|
+
- Slow vector operations
|
|
16
|
+
- Request timeouts
|
|
17
|
+
- User-facing latency issues
|
|
18
|
+
- Queue backlog building up
|
|
19
|
+
|
|
20
|
+
## Quick Diagnosis
|
|
21
|
+
|
|
22
|
+
```promql
|
|
23
|
+
# Check current latency by operation
|
|
24
|
+
histogram_quantile(0.95,
|
|
25
|
+
sum(rate(vectra_request_duration_seconds_bucket[5m])) by (le, operation)
|
|
26
|
+
)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
# Test latency in console
|
|
31
|
+
require 'benchmark'
|
|
32
|
+
|
|
33
|
+
time = Benchmark.realtime do
|
|
34
|
+
client.query(index: "test", vector: [0.1] * 384, top_k: 10)
|
|
35
|
+
end
|
|
36
|
+
puts "Query latency: #{(time * 1000).round}ms"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Investigation Steps
|
|
40
|
+
|
|
41
|
+
### 1. Identify Slow Operations
|
|
42
|
+
|
|
43
|
+
```promql
|
|
44
|
+
# Which operations are slow?
|
|
45
|
+
topk(5,
|
|
46
|
+
histogram_quantile(0.95,
|
|
47
|
+
sum(rate(vectra_request_duration_seconds_bucket[5m])) by (le, operation)
|
|
48
|
+
)
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
| Operation | Expected P95 | Alert Threshold |
|
|
53
|
+
|-----------|--------------|-----------------|
|
|
54
|
+
| query | <500ms | >2s |
|
|
55
|
+
| upsert (single) | <200ms | >1s |
|
|
56
|
+
| upsert (batch 100) | <2s | >5s |
|
|
57
|
+
| fetch | <100ms | >500ms |
|
|
58
|
+
| delete | <200ms | >1s |
|
|
59
|
+
|
|
60
|
+
### 2. Check Provider Status
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Test provider connectivity
|
|
64
|
+
curl -w "@curl-format.txt" -o /dev/null -s https://api.pinecone.io/health
|
|
65
|
+
|
|
66
|
+
# curl-format.txt:
|
|
67
|
+
# time_namelookup: %{time_namelookup}\n
|
|
68
|
+
# time_connect: %{time_connect}\n
|
|
69
|
+
# time_starttransfer: %{time_starttransfer}\n
|
|
70
|
+
# time_total: %{time_total}\n
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 3. Check Network Latency
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Ping provider endpoint
|
|
77
|
+
ping -c 10 api.pinecone.io
|
|
78
|
+
|
|
79
|
+
# Check for packet loss
|
|
80
|
+
mtr api.pinecone.io
|
|
81
|
+
|
|
82
|
+
# DNS resolution time
|
|
83
|
+
time nslookup api.pinecone.io
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 4. Check Vector Dimensions
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
# Large vectors = slower operations
|
|
90
|
+
client.describe_index(index: "my-index")
|
|
91
|
+
# => { dimension: 1536, ... }
|
|
92
|
+
|
|
93
|
+
# Consider using smaller embeddings:
|
|
94
|
+
# - text-embedding-3-small: 512-1536 dims
|
|
95
|
+
# - text-embedding-ada-002: 1536 dims
|
|
96
|
+
# - all-MiniLM-L6-v2: 384 dims (faster!)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### 5. Check Index Size
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
stats = client.stats(index: "my-index")
|
|
103
|
+
puts "Vector count: #{stats[:total_vector_count]}"
|
|
104
|
+
puts "Index fullness: #{stats[:index_fullness]}"
|
|
105
|
+
|
|
106
|
+
# Large indexes may need optimization
|
|
107
|
+
# - Pinecone: Check pod type
|
|
108
|
+
# - pgvector: Check IVFFlat parameters
|
|
109
|
+
# - Qdrant: Check HNSW parameters
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Resolution Steps
|
|
113
|
+
|
|
114
|
+
### Immediate: Increase Timeouts
|
|
115
|
+
|
|
116
|
+
```ruby
|
|
117
|
+
Vectra.configure do |config|
|
|
118
|
+
config.timeout = 60 # Increase from 30
|
|
119
|
+
config.open_timeout = 20 # Increase from 10
|
|
120
|
+
end
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Enable Caching
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
cache = Vectra::Cache.new(ttl: 300, max_size: 1000)
|
|
127
|
+
cached_client = Vectra::CachedClient.new(client, cache: cache)
|
|
128
|
+
|
|
129
|
+
# Repeat queries will be instant
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Optimize Batch Operations
|
|
133
|
+
|
|
134
|
+
```ruby
|
|
135
|
+
# Use smaller batches for faster responses
|
|
136
|
+
batch = Vectra::Batch.new(client, concurrency: 2)
|
|
137
|
+
|
|
138
|
+
result = batch.upsert_async(
|
|
139
|
+
index: "my-index",
|
|
140
|
+
vectors: vectors,
|
|
141
|
+
chunk_size: 50 # Smaller chunks = faster individual operations
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Reduce top_k
|
|
146
|
+
|
|
147
|
+
```ruby
|
|
148
|
+
# Fewer results = faster query
|
|
149
|
+
results = client.query(
|
|
150
|
+
index: "my-index",
|
|
151
|
+
vector: query_vec,
|
|
152
|
+
top_k: 5 # Instead of 100
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Provider-Specific Optimizations
|
|
157
|
+
|
|
158
|
+
#### Pinecone
|
|
159
|
+
|
|
160
|
+
```ruby
|
|
161
|
+
# Use serverless for auto-scaling
|
|
162
|
+
# Or upgrade pod type for more capacity
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
#### pgvector
|
|
166
|
+
|
|
167
|
+
```sql
|
|
168
|
+
-- Check if index exists
|
|
169
|
+
SELECT indexname FROM pg_indexes WHERE tablename = 'your_table';
|
|
170
|
+
|
|
171
|
+
-- Create IVFFlat index for faster queries
|
|
172
|
+
CREATE INDEX ON your_table
|
|
173
|
+
USING ivfflat (embedding vector_cosine_ops)
|
|
174
|
+
WITH (lists = 100);
|
|
175
|
+
|
|
176
|
+
-- Increase probes for accuracy vs speed trade-off
|
|
177
|
+
SET ivfflat.probes = 10; -- Default: 1
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
#### Qdrant
|
|
181
|
+
|
|
182
|
+
```ruby
|
|
183
|
+
# Optimize HNSW parameters
|
|
184
|
+
client.provider.create_index(
|
|
185
|
+
name: "optimized",
|
|
186
|
+
dimension: 384,
|
|
187
|
+
metric: "cosine",
|
|
188
|
+
hnsw_config: {
|
|
189
|
+
m: 16, # Connections per node
|
|
190
|
+
ef_construct: 100 # Build-time accuracy
|
|
191
|
+
}
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### Connection Pooling (pgvector)
|
|
196
|
+
|
|
197
|
+
```ruby
|
|
198
|
+
# Warmup connections to avoid cold start latency
|
|
199
|
+
client.provider.warmup_pool(5)
|
|
200
|
+
|
|
201
|
+
# Increase pool size for parallel queries
|
|
202
|
+
Vectra.configure do |config|
|
|
203
|
+
config.pool_size = 20
|
|
204
|
+
end
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Prevention
|
|
208
|
+
|
|
209
|
+
### 1. Monitor Latency Trends
|
|
210
|
+
|
|
211
|
+
```promql
|
|
212
|
+
# Alert on increasing latency trend
|
|
213
|
+
rate(vectra_request_duration_seconds_sum[1h]) /
|
|
214
|
+
rate(vectra_request_duration_seconds_count[1h]) > 1
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### 2. Implement Request Timeouts
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
# Fail fast instead of hanging
|
|
221
|
+
Vectra.configure do |config|
|
|
222
|
+
config.timeout = 10 # Strict timeout
|
|
223
|
+
end
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### 3. Use Async Operations
|
|
227
|
+
|
|
228
|
+
```ruby
|
|
229
|
+
# Don't block on upserts
|
|
230
|
+
Thread.new do
|
|
231
|
+
batch.upsert_async(index: "bg-index", vectors: vectors)
|
|
232
|
+
end
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### 4. Index Maintenance
|
|
236
|
+
|
|
237
|
+
```sql
|
|
238
|
+
-- pgvector: Reindex periodically
|
|
239
|
+
REINDEX INDEX your_ivfflat_index;
|
|
240
|
+
|
|
241
|
+
-- Analyze for query planner
|
|
242
|
+
ANALYZE your_table;
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### 5. Geographic Optimization
|
|
246
|
+
|
|
247
|
+
```ruby
|
|
248
|
+
# Use closest region to your servers
|
|
249
|
+
# Pinecone: us-east-1, us-west-2, eu-west-1
|
|
250
|
+
# Qdrant Cloud: Select nearest region
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Benchmarking
|
|
254
|
+
|
|
255
|
+
```ruby
|
|
256
|
+
# Run benchmark to establish baseline
|
|
257
|
+
require 'benchmark'
|
|
258
|
+
|
|
259
|
+
results = Benchmark.bm do |x|
|
|
260
|
+
x.report("query") do
|
|
261
|
+
100.times { client.query(index: "test", vector: vec, top_k: 10) }
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
x.report("upsert") do
|
|
265
|
+
client.upsert(index: "test", vectors: vectors_100)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
x.report("fetch") do
|
|
269
|
+
100.times { client.fetch(index: "test", ids: ["id1"]) }
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Escalation
|
|
275
|
+
|
|
276
|
+
| Time | Action |
|
|
277
|
+
|------|--------|
|
|
278
|
+
| 5 min | Enable caching, increase timeouts |
|
|
279
|
+
| 15 min | Check provider status, optimize queries |
|
|
280
|
+
| 30 min | Scale up provider resources |
|
|
281
|
+
| 1 hour | Engage provider support |
|
|
282
|
+
|
|
283
|
+
## Related
|
|
284
|
+
|
|
285
|
+
- [High Error Rate Runbook]({{ site.baseurl }}/guides/runbooks/high-error-rate)
|
|
286
|
+
- [Performance Guide]({{ site.baseurl }}/guides/performance)
|
|
287
|
+
- [Monitoring Guide]({{ site.baseurl }}/guides/monitoring)
|