vectra-client 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +86 -37
- data/SECURITY.md +134 -4
- data/docs/_layouts/page.html +2 -0
- data/docs/guides/monitoring.md +860 -0
- data/docs/guides/runbooks/cache-issues.md +267 -0
- data/docs/guides/runbooks/high-error-rate.md +152 -0
- data/docs/guides/runbooks/high-latency.md +287 -0
- data/docs/guides/runbooks/pool-exhausted.md +216 -0
- data/docs/guides/security.md +348 -0
- data/lib/vectra/audit_log.rb +225 -0
- data/lib/vectra/circuit_breaker.rb +336 -0
- data/lib/vectra/client.rb +2 -0
- data/lib/vectra/credential_rotation.rb +199 -0
- data/lib/vectra/health_check.rb +254 -0
- data/lib/vectra/instrumentation/honeybadger.rb +128 -0
- data/lib/vectra/instrumentation/sentry.rb +117 -0
- data/lib/vectra/logging.rb +242 -0
- data/lib/vectra/rate_limiter.rb +304 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +6 -0
- metadata +15 -1
|
@@ -0,0 +1,860 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: page
|
|
3
|
+
title: Monitoring & Observability
|
|
4
|
+
permalink: /guides/monitoring/
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Monitoring & Observability
|
|
8
|
+
|
|
9
|
+
Complete guide for monitoring Vectra in production with Prometheus, Grafana, and APM tools.
|
|
10
|
+
|
|
11
|
+
## Quick Setup
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
Vectra.configure do |config|
|
|
15
|
+
config.provider = :pinecone
|
|
16
|
+
config.api_key = ENV['PINECONE_API_KEY']
|
|
17
|
+
config.instrumentation = true # Enable metrics
|
|
18
|
+
end
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Prometheus Metrics
|
|
22
|
+
|
|
23
|
+
### Exporter Setup
|
|
24
|
+
|
|
25
|
+
Create `config/initializers/vectra_metrics.rb`:
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
# frozen_string_literal: true
|
|
29
|
+
|
|
30
|
+
require "prometheus/client"
|
|
31
|
+
|
|
32
|
+
module VectraMetrics
|
|
33
|
+
REGISTRY = Prometheus::Client.registry
|
|
34
|
+
|
|
35
|
+
# Request counters
|
|
36
|
+
REQUESTS_TOTAL = REGISTRY.counter(
|
|
37
|
+
:vectra_requests_total,
|
|
38
|
+
docstring: "Total Vectra requests",
|
|
39
|
+
labels: [:provider, :operation, :status]
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Latency histogram
|
|
43
|
+
REQUEST_DURATION = REGISTRY.histogram(
|
|
44
|
+
:vectra_request_duration_seconds,
|
|
45
|
+
docstring: "Request duration in seconds",
|
|
46
|
+
labels: [:provider, :operation],
|
|
47
|
+
buckets: [0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Vector counts
|
|
51
|
+
VECTORS_PROCESSED = REGISTRY.counter(
|
|
52
|
+
:vectra_vectors_processed_total,
|
|
53
|
+
docstring: "Total vectors processed",
|
|
54
|
+
labels: [:provider, :operation]
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Cache metrics
|
|
58
|
+
CACHE_HITS = REGISTRY.counter(
|
|
59
|
+
:vectra_cache_hits_total,
|
|
60
|
+
docstring: "Cache hit count"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
CACHE_MISSES = REGISTRY.counter(
|
|
64
|
+
:vectra_cache_misses_total,
|
|
65
|
+
docstring: "Cache miss count"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Pool metrics (pgvector)
|
|
69
|
+
POOL_SIZE = REGISTRY.gauge(
|
|
70
|
+
:vectra_pool_connections,
|
|
71
|
+
docstring: "Connection pool size",
|
|
72
|
+
labels: [:state] # available, checked_out
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Error counter
|
|
76
|
+
ERRORS_TOTAL = REGISTRY.counter(
|
|
77
|
+
:vectra_errors_total,
|
|
78
|
+
docstring: "Total errors",
|
|
79
|
+
labels: [:provider, :error_type]
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Custom instrumentation handler
|
|
84
|
+
Vectra::Instrumentation.register(:prometheus) do |event|
|
|
85
|
+
labels = {
|
|
86
|
+
provider: event[:provider],
|
|
87
|
+
operation: event[:operation]
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# Record request
|
|
91
|
+
status = event[:error] ? "error" : "success"
|
|
92
|
+
VectraMetrics::REQUESTS_TOTAL.increment(labels: labels.merge(status: status))
|
|
93
|
+
|
|
94
|
+
# Record duration
|
|
95
|
+
if event[:duration]
|
|
96
|
+
VectraMetrics::REQUEST_DURATION.observe(event[:duration], labels: labels)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Record vector count
|
|
100
|
+
if event[:metadata]&.dig(:vector_count)
|
|
101
|
+
VectraMetrics::VECTORS_PROCESSED.increment(
|
|
102
|
+
by: event[:metadata][:vector_count],
|
|
103
|
+
labels: labels
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Record errors
|
|
108
|
+
if event[:error]
|
|
109
|
+
VectraMetrics::ERRORS_TOTAL.increment(
|
|
110
|
+
labels: labels.merge(error_type: event[:error].class.name)
|
|
111
|
+
)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Prometheus Scrape Config
|
|
117
|
+
|
|
118
|
+
Add to `prometheus.yml`:
|
|
119
|
+
|
|
120
|
+
```yaml
|
|
121
|
+
scrape_configs:
|
|
122
|
+
- job_name: 'vectra'
|
|
123
|
+
static_configs:
|
|
124
|
+
- targets: ['localhost:9292']
|
|
125
|
+
metrics_path: '/metrics'
|
|
126
|
+
scrape_interval: 15s
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Expose Metrics Endpoint (Rack)
|
|
130
|
+
|
|
131
|
+
```ruby
|
|
132
|
+
# config.ru
|
|
133
|
+
require "prometheus/middleware/exporter"
|
|
134
|
+
|
|
135
|
+
use Prometheus::Middleware::Exporter
|
|
136
|
+
run YourApp
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Grafana Dashboard
|
|
140
|
+
|
|
141
|
+
### Dashboard JSON Template
|
|
142
|
+
|
|
143
|
+
Save as `vectra-dashboard.json` and import into Grafana:
|
|
144
|
+
|
|
145
|
+
```json
|
|
146
|
+
{
|
|
147
|
+
"dashboard": {
|
|
148
|
+
"title": "Vectra Vector Database Metrics",
|
|
149
|
+
"uid": "vectra-metrics",
|
|
150
|
+
"timezone": "browser",
|
|
151
|
+
"refresh": "30s",
|
|
152
|
+
"panels": [
|
|
153
|
+
{
|
|
154
|
+
"title": "Request Rate",
|
|
155
|
+
"type": "graph",
|
|
156
|
+
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
|
|
157
|
+
"targets": [
|
|
158
|
+
{
|
|
159
|
+
"expr": "sum(rate(vectra_requests_total[5m])) by (operation)",
|
|
160
|
+
"legendFormat": "{{ "{{operation}}" }}"
|
|
161
|
+
}
|
|
162
|
+
]
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"title": "Request Latency (p95)",
|
|
166
|
+
"type": "graph",
|
|
167
|
+
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 },
|
|
168
|
+
"targets": [
|
|
169
|
+
{
|
|
170
|
+
"expr": "histogram_quantile(0.95, sum(rate(vectra_request_duration_seconds_bucket[5m])) by (le, operation))",
|
|
171
|
+
"legendFormat": "{{ "{{operation}}" }} p95"
|
|
172
|
+
}
|
|
173
|
+
]
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"title": "Error Rate",
|
|
177
|
+
"type": "graph",
|
|
178
|
+
"gridPos": { "x": 0, "y": 8, "w": 12, "h": 8 },
|
|
179
|
+
"targets": [
|
|
180
|
+
{
|
|
181
|
+
"expr": "sum(rate(vectra_errors_total[5m])) by (error_type)",
|
|
182
|
+
"legendFormat": "{{ "{{error_type}}" }}"
|
|
183
|
+
}
|
|
184
|
+
]
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"title": "Vectors Processed",
|
|
188
|
+
"type": "stat",
|
|
189
|
+
"gridPos": { "x": 12, "y": 8, "w": 6, "h": 8 },
|
|
190
|
+
"targets": [
|
|
191
|
+
{
|
|
192
|
+
"expr": "sum(increase(vectra_vectors_processed_total[24h]))",
|
|
193
|
+
"legendFormat": "24h Total"
|
|
194
|
+
}
|
|
195
|
+
]
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
"title": "Cache Hit Ratio",
|
|
199
|
+
"type": "gauge",
|
|
200
|
+
"gridPos": { "x": 18, "y": 8, "w": 6, "h": 8 },
|
|
201
|
+
"targets": [
|
|
202
|
+
{
|
|
203
|
+
"expr": "sum(vectra_cache_hits_total) / (sum(vectra_cache_hits_total) + sum(vectra_cache_misses_total)) * 100"
|
|
204
|
+
}
|
|
205
|
+
],
|
|
206
|
+
"fieldConfig": {
|
|
207
|
+
"defaults": {
|
|
208
|
+
"unit": "percent",
|
|
209
|
+
"max": 100,
|
|
210
|
+
"thresholds": {
|
|
211
|
+
"steps": [
|
|
212
|
+
{ "color": "red", "value": 0 },
|
|
213
|
+
{ "color": "yellow", "value": 50 },
|
|
214
|
+
{ "color": "green", "value": 80 }
|
|
215
|
+
]
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
"title": "Connection Pool (pgvector)",
|
|
222
|
+
"type": "graph",
|
|
223
|
+
"gridPos": { "x": 0, "y": 16, "w": 12, "h": 8 },
|
|
224
|
+
"targets": [
|
|
225
|
+
{
|
|
226
|
+
"expr": "vectra_pool_connections{state='available'}",
|
|
227
|
+
"legendFormat": "Available"
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
"expr": "vectra_pool_connections{state='checked_out'}",
|
|
231
|
+
"legendFormat": "In Use"
|
|
232
|
+
}
|
|
233
|
+
]
|
|
234
|
+
},
|
|
235
|
+
{
|
|
236
|
+
"title": "Operations by Provider",
|
|
237
|
+
"type": "piechart",
|
|
238
|
+
"gridPos": { "x": 12, "y": 16, "w": 12, "h": 8 },
|
|
239
|
+
"targets": [
|
|
240
|
+
{
|
|
241
|
+
"expr": "sum(vectra_requests_total) by (provider)",
|
|
242
|
+
"legendFormat": "{{ "{{provider}}" }}"
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
}
|
|
246
|
+
],
|
|
247
|
+
"templating": {
|
|
248
|
+
"list": [
|
|
249
|
+
{
|
|
250
|
+
"name": "provider",
|
|
251
|
+
"type": "query",
|
|
252
|
+
"query": "label_values(vectra_requests_total, provider)",
|
|
253
|
+
"multi": true,
|
|
254
|
+
"includeAll": true
|
|
255
|
+
}
|
|
256
|
+
]
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## APM Integration
|
|
263
|
+
|
|
264
|
+
### Datadog
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
# config/initializers/vectra_datadog.rb
|
|
268
|
+
require "vectra/instrumentation/datadog"
|
|
269
|
+
|
|
270
|
+
Vectra.configure do |config|
|
|
271
|
+
config.instrumentation = true
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Auto-traces all Vectra operations with:
|
|
275
|
+
# - Service name: vectra
|
|
276
|
+
# - Resource: operation name (upsert, query, etc.)
|
|
277
|
+
# - Tags: provider, index, vector_count
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
#### Datadog Dashboard JSON
|
|
281
|
+
|
|
282
|
+
```json
|
|
283
|
+
{
|
|
284
|
+
"title": "Vectra Performance",
|
|
285
|
+
"widgets": [
|
|
286
|
+
{
|
|
287
|
+
"definition": {
|
|
288
|
+
"title": "Request Rate by Operation",
|
|
289
|
+
"type": "timeseries",
|
|
290
|
+
"requests": [
|
|
291
|
+
{
|
|
292
|
+
"q": "sum:vectra.request.count{*} by {operation}.as_rate()",
|
|
293
|
+
"display_type": "bars"
|
|
294
|
+
}
|
|
295
|
+
]
|
|
296
|
+
}
|
|
297
|
+
},
|
|
298
|
+
{
|
|
299
|
+
"definition": {
|
|
300
|
+
"title": "P95 Latency",
|
|
301
|
+
"type": "timeseries",
|
|
302
|
+
"requests": [
|
|
303
|
+
{
|
|
304
|
+
"q": "p95:vectra.request.duration{*} by {operation}"
|
|
305
|
+
}
|
|
306
|
+
]
|
|
307
|
+
}
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
"definition": {
|
|
311
|
+
"title": "Error Rate",
|
|
312
|
+
"type": "query_value",
|
|
313
|
+
"requests": [
|
|
314
|
+
{
|
|
315
|
+
"q": "sum:vectra.error.count{*}.as_rate() / sum:vectra.request.count{*}.as_rate() * 100"
|
|
316
|
+
}
|
|
317
|
+
],
|
|
318
|
+
"precision": 2,
|
|
319
|
+
"custom_unit": "%"
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
]
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### New Relic
|
|
327
|
+
|
|
328
|
+
```ruby
|
|
329
|
+
# config/initializers/vectra_newrelic.rb
|
|
330
|
+
require "vectra/instrumentation/new_relic"
|
|
331
|
+
|
|
332
|
+
Vectra.configure do |config|
|
|
333
|
+
config.instrumentation = true
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Records custom events: VectraOperation
|
|
337
|
+
# Attributes: provider, operation, duration, vector_count, error
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
#### New Relic NRQL Queries
|
|
341
|
+
|
|
342
|
+
```sql
|
|
343
|
+
-- Request throughput
|
|
344
|
+
SELECT rate(count(*), 1 minute) FROM VectraOperation FACET operation TIMESERIES
|
|
345
|
+
|
|
346
|
+
-- Average latency by operation
|
|
347
|
+
SELECT average(duration) FROM VectraOperation FACET operation TIMESERIES
|
|
348
|
+
|
|
349
|
+
-- Error rate
|
|
350
|
+
SELECT percentage(count(*), WHERE error IS NOT NULL) FROM VectraOperation TIMESERIES
|
|
351
|
+
|
|
352
|
+
-- Slowest operations
|
|
353
|
+
SELECT max(duration) FROM VectraOperation FACET operation WHERE duration > 1
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### Sentry
|
|
357
|
+
|
|
358
|
+
```ruby
|
|
359
|
+
# config/initializers/vectra_sentry.rb
|
|
360
|
+
require 'vectra/instrumentation/sentry'
|
|
361
|
+
|
|
362
|
+
Vectra.configure do |config|
|
|
363
|
+
config.instrumentation = true
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Setup with options
|
|
367
|
+
Vectra::Instrumentation::Sentry.setup!(
|
|
368
|
+
capture_all_errors: false, # Only capture failures
|
|
369
|
+
fingerprint_by_operation: true # Group errors by operation
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Features:
|
|
373
|
+
# - Breadcrumbs for all operations
|
|
374
|
+
# - Error context with provider/operation/index
|
|
375
|
+
# - Custom fingerprinting for error grouping
|
|
376
|
+
# - Severity levels based on error type
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
### Honeybadger
|
|
380
|
+
|
|
381
|
+
```ruby
|
|
382
|
+
# config/initializers/vectra_honeybadger.rb
|
|
383
|
+
require 'vectra/instrumentation/honeybadger'
|
|
384
|
+
|
|
385
|
+
Vectra.configure do |config|
|
|
386
|
+
config.instrumentation = true
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
Vectra::Instrumentation::Honeybadger.setup!(
|
|
390
|
+
notify_on_rate_limit: false, # Don't spam on rate limits
|
|
391
|
+
notify_on_validation: false # Don't spam on validation errors
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Features:
|
|
395
|
+
# - Breadcrumbs for operation tracing
|
|
396
|
+
# - Context with vectra metadata
|
|
397
|
+
# - Severity tags (critical, high, medium, low)
|
|
398
|
+
# - Custom fingerprinting
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
### OpenTelemetry
|
|
402
|
+
|
|
403
|
+
```ruby
|
|
404
|
+
# config/initializers/vectra_otel.rb
|
|
405
|
+
require "opentelemetry/sdk"
|
|
406
|
+
require "opentelemetry/exporter/otlp"
|
|
407
|
+
|
|
408
|
+
OpenTelemetry::SDK.configure do |c|
|
|
409
|
+
c.service_name = "vectra-service"
|
|
410
|
+
c.use_all
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Custom OpenTelemetry handler
|
|
414
|
+
Vectra::Instrumentation.register(:opentelemetry) do |event|
|
|
415
|
+
tracer = OpenTelemetry.tracer_provider.tracer("vectra")
|
|
416
|
+
|
|
417
|
+
tracer.in_span("vectra.#{event[:operation]}") do |span|
|
|
418
|
+
span.set_attribute("vectra.provider", event[:provider].to_s)
|
|
419
|
+
span.set_attribute("vectra.index", event[:index]) if event[:index]
|
|
420
|
+
span.set_attribute("vectra.vector_count", event[:metadata][:vector_count]) if event.dig(:metadata, :vector_count)
|
|
421
|
+
|
|
422
|
+
if event[:error]
|
|
423
|
+
span.record_exception(event[:error])
|
|
424
|
+
span.status = OpenTelemetry::Trace::Status.error(event[:error].message)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
## Alert Configurations
|
|
431
|
+
|
|
432
|
+
### Prometheus Alerting Rules
|
|
433
|
+
|
|
434
|
+
Save as `vectra-alerts.yml`:
|
|
435
|
+
|
|
436
|
+
```yaml
|
|
437
|
+
groups:
|
|
438
|
+
- name: vectra
|
|
439
|
+
rules:
|
|
440
|
+
# High error rate
|
|
441
|
+
- alert: VectraHighErrorRate
|
|
442
|
+
expr: |
|
|
443
|
+
sum(rate(vectra_errors_total[5m]))
|
|
444
|
+
/ sum(rate(vectra_requests_total[5m])) > 0.05
|
|
445
|
+
for: 5m
|
|
446
|
+
labels:
|
|
447
|
+
severity: critical
|
|
448
|
+
annotations:
|
|
449
|
+
summary: "High Vectra error rate"
|
|
450
|
+
description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
|
|
451
|
+
|
|
452
|
+
# High latency
|
|
453
|
+
- alert: VectraHighLatency
|
|
454
|
+
expr: |
|
|
455
|
+
histogram_quantile(0.95,
|
|
456
|
+
sum(rate(vectra_request_duration_seconds_bucket[5m])) by (le, operation)
|
|
457
|
+
) > 2
|
|
458
|
+
for: 5m
|
|
459
|
+
labels:
|
|
460
|
+
severity: warning
|
|
461
|
+
annotations:
|
|
462
|
+
summary: "High Vectra latency for {{ $labels.operation }}"
|
|
463
|
+
description: "P95 latency is {{ $value | humanizeDuration }}"
|
|
464
|
+
|
|
465
|
+
# Connection pool exhausted (pgvector)
|
|
466
|
+
- alert: VectraPoolExhausted
|
|
467
|
+
expr: vectra_pool_connections{state="available"} == 0
|
|
468
|
+
for: 1m
|
|
469
|
+
labels:
|
|
470
|
+
severity: critical
|
|
471
|
+
annotations:
|
|
472
|
+
summary: "Vectra connection pool exhausted"
|
|
473
|
+
description: "No available connections in pool"
|
|
474
|
+
|
|
475
|
+
# Low cache hit ratio
|
|
476
|
+
- alert: VectraLowCacheHitRatio
|
|
477
|
+
expr: |
|
|
478
|
+
sum(rate(vectra_cache_hits_total[5m]))
|
|
479
|
+
/ (sum(rate(vectra_cache_hits_total[5m])) + sum(rate(vectra_cache_misses_total[5m]))) < 0.5
|
|
480
|
+
for: 10m
|
|
481
|
+
labels:
|
|
482
|
+
severity: warning
|
|
483
|
+
annotations:
|
|
484
|
+
summary: "Low Vectra cache hit ratio"
|
|
485
|
+
description: "Cache hit ratio is {{ $value | humanizePercentage }}"
|
|
486
|
+
|
|
487
|
+
# No requests (service down?)
|
|
488
|
+
- alert: VectraNoRequests
|
|
489
|
+
expr: sum(rate(vectra_requests_total[5m])) == 0
|
|
490
|
+
for: 10m
|
|
491
|
+
labels:
|
|
492
|
+
severity: warning
|
|
493
|
+
annotations:
|
|
494
|
+
summary: "No Vectra requests"
|
|
495
|
+
description: "No requests in the last 10 minutes"
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
### PagerDuty Integration
|
|
499
|
+
|
|
500
|
+
```yaml
|
|
501
|
+
# alertmanager.yml
|
|
502
|
+
receivers:
|
|
503
|
+
- name: 'vectra-critical'
|
|
504
|
+
pagerduty_configs:
|
|
505
|
+
- service_key: '<your-pagerduty-key>'
|
|
506
|
+
severity: critical
|
|
507
|
+
description: '{{ .GroupLabels.alertname }}'
|
|
508
|
+
details:
|
|
509
|
+
summary: '{{ .Annotations.summary }}'
|
|
510
|
+
description: '{{ .Annotations.description }}'
|
|
511
|
+
|
|
512
|
+
route:
|
|
513
|
+
receiver: 'vectra-critical'
|
|
514
|
+
routes:
|
|
515
|
+
- match:
|
|
516
|
+
severity: critical
|
|
517
|
+
receiver: 'vectra-critical'
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
### Slack Alerts
|
|
521
|
+
|
|
522
|
+
```yaml
|
|
523
|
+
# alertmanager.yml
|
|
524
|
+
receivers:
|
|
525
|
+
- name: 'vectra-slack'
|
|
526
|
+
slack_configs:
|
|
527
|
+
- api_url: '<your-slack-webhook>'
|
|
528
|
+
channel: '#alerts'
|
|
529
|
+
title: '{{ .GroupLabels.alertname }}'
|
|
530
|
+
text: '{{ .Annotations.description }}'
|
|
531
|
+
color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
|
|
532
|
+
```
|
|
533
|
+
|
|
534
|
+
## Circuit Breaker Pattern
|
|
535
|
+
|
|
536
|
+
Prevent cascading failures with built-in circuit breaker:
|
|
537
|
+
|
|
538
|
+
```ruby
|
|
539
|
+
# Create circuit breaker for provider
|
|
540
|
+
breaker = Vectra::CircuitBreaker.new(
|
|
541
|
+
name: "pinecone",
|
|
542
|
+
failure_threshold: 5, # Open after 5 failures
|
|
543
|
+
success_threshold: 3, # Close after 3 successes in half-open
|
|
544
|
+
recovery_timeout: 30 # Try half-open after 30 seconds
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
# Use with operations
|
|
548
|
+
result = breaker.call do
|
|
549
|
+
client.query(index: "my-index", vector: vec, top_k: 10)
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# With fallback
|
|
553
|
+
result = breaker.call(fallback: -> { cached_results }) do
|
|
554
|
+
client.query(...)
|
|
555
|
+
end
|
|
556
|
+
```
|
|
557
|
+
|
|
558
|
+
### Circuit States
|
|
559
|
+
|
|
560
|
+
| State | Description | Behavior |
|
|
561
|
+
|-------|-------------|----------|
|
|
562
|
+
| `closed` | Normal operation | Requests pass through |
|
|
563
|
+
| `open` | Failing | Requests fail immediately (or use fallback) |
|
|
564
|
+
| `half_open` | Testing recovery | Limited requests allowed |
|
|
565
|
+
|
|
566
|
+
### Per-Provider Circuits
|
|
567
|
+
|
|
568
|
+
```ruby
|
|
569
|
+
# Register circuits for each provider
|
|
570
|
+
Vectra::CircuitBreakerRegistry.register(:pinecone, failure_threshold: 3)
|
|
571
|
+
Vectra::CircuitBreakerRegistry.register(:qdrant, failure_threshold: 5)
|
|
572
|
+
|
|
573
|
+
# Use registered circuit
|
|
574
|
+
Vectra::CircuitBreakerRegistry[:pinecone].call do
|
|
575
|
+
pinecone_client.query(...)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# Get all circuit stats
|
|
579
|
+
Vectra::CircuitBreakerRegistry.stats
|
|
580
|
+
# => { pinecone: { state: :closed, ... }, qdrant: { state: :open, ... } }
|
|
581
|
+
|
|
582
|
+
# Reset all circuits
|
|
583
|
+
Vectra::CircuitBreakerRegistry.reset_all!
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
### Circuit Breaker Metrics
|
|
587
|
+
|
|
588
|
+
```ruby
|
|
589
|
+
# Add to Prometheus metrics
|
|
590
|
+
Vectra::Instrumentation.on_operation do |event|
|
|
591
|
+
circuit = Vectra::CircuitBreakerRegistry[event.provider]
|
|
592
|
+
next unless circuit
|
|
593
|
+
|
|
594
|
+
CIRCUIT_STATE.set(
|
|
595
|
+
circuit.open? ? 1 : 0,
|
|
596
|
+
labels: { provider: event.provider.to_s }
|
|
597
|
+
)
|
|
598
|
+
end
|
|
599
|
+
```
|
|
600
|
+
|
|
601
|
+
### Circuit Breaker Alerts
|
|
602
|
+
|
|
603
|
+
```yaml
|
|
604
|
+
# prometheus-alerts.yml
|
|
605
|
+
- alert: VectraCircuitOpen
|
|
606
|
+
expr: vectra_circuit_state == 1
|
|
607
|
+
for: 1m
|
|
608
|
+
labels:
|
|
609
|
+
severity: critical
|
|
610
|
+
annotations:
|
|
611
|
+
summary: "Circuit breaker open for {{ $labels.provider }}"
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
## Health Check
|
|
615
|
+
|
|
616
|
+
Built-in health check functionality:
|
|
617
|
+
|
|
618
|
+
```ruby
|
|
619
|
+
# Basic health check
|
|
620
|
+
client = Vectra::Client.new(provider: :pinecone, ...)
|
|
621
|
+
result = client.health_check
|
|
622
|
+
|
|
623
|
+
if result.healthy?
|
|
624
|
+
puts "Provider is healthy! Latency: #{result.latency_ms}ms"
|
|
625
|
+
else
|
|
626
|
+
puts "Error: #{result.error_message}"
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
# Quick boolean check
|
|
630
|
+
client.healthy? # => true/false
|
|
631
|
+
|
|
632
|
+
# Detailed health check with stats
|
|
633
|
+
result = client.health_check(
|
|
634
|
+
index: "my-index",
|
|
635
|
+
include_stats: true
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
puts result.to_json
|
|
639
|
+
# => {
|
|
640
|
+
# "healthy": true,
|
|
641
|
+
# "provider": "pinecone",
|
|
642
|
+
# "latency_ms": 45.2,
|
|
643
|
+
# "indexes_available": 3,
|
|
644
|
+
# "index": "my-index",
|
|
645
|
+
# "stats": { "vector_count": 1000, "dimension": 384 },
|
|
646
|
+
# "pool": { "available": 5, "checked_out": 2 }
|
|
647
|
+
# }
|
|
648
|
+
```
|
|
649
|
+
|
|
650
|
+
### Aggregate Health Check
|
|
651
|
+
|
|
652
|
+
Check multiple providers at once:
|
|
653
|
+
|
|
654
|
+
```ruby
|
|
655
|
+
checker = Vectra::AggregateHealthCheck.new(
|
|
656
|
+
primary: pinecone_client,
|
|
657
|
+
backup: qdrant_client,
|
|
658
|
+
local: pgvector_client
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
result = checker.check_all
|
|
662
|
+
# => {
|
|
663
|
+
# overall_healthy: true,
|
|
664
|
+
# healthy_count: 3,
|
|
665
|
+
# total_count: 3,
|
|
666
|
+
# results: { ... }
|
|
667
|
+
# }
|
|
668
|
+
|
|
669
|
+
# Quick checks
|
|
670
|
+
checker.all_healthy? # => true/false
|
|
671
|
+
checker.any_healthy? # => true/false
|
|
672
|
+
```
|
|
673
|
+
|
|
674
|
+
### Health Check Endpoint (Rails)
|
|
675
|
+
|
|
676
|
+
```ruby
|
|
677
|
+
# app/controllers/health_controller.rb
|
|
678
|
+
class HealthController < ApplicationController
|
|
679
|
+
def vectra
|
|
680
|
+
client = Vectra::Client.new
|
|
681
|
+
result = client.health_check(include_stats: true)
|
|
682
|
+
|
|
683
|
+
render json: result.to_h, status: result.healthy? ? :ok : :service_unavailable
|
|
684
|
+
end
|
|
685
|
+
end
|
|
686
|
+
```
|
|
687
|
+
|
|
688
|
+
## Structured JSON Logging
|
|
689
|
+
|
|
690
|
+
Built-in JSON logger for machine-readable logs:
|
|
691
|
+
|
|
692
|
+
```ruby
|
|
693
|
+
# Setup JSON logging
|
|
694
|
+
require 'vectra/logging'
|
|
695
|
+
|
|
696
|
+
Vectra::Logging.setup!(
|
|
697
|
+
output: "log/vectra.json.log",
|
|
698
|
+
app: "my-service",
|
|
699
|
+
env: Rails.env
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
# All operations automatically logged as JSON:
|
|
703
|
+
# {
|
|
704
|
+
# "timestamp": "2025-01-08T12:00:00.123Z",
|
|
705
|
+
# "level": "info",
|
|
706
|
+
# "logger": "vectra",
|
|
707
|
+
# "message": "vectra.query",
|
|
708
|
+
# "provider": "pinecone",
|
|
709
|
+
# "operation": "query",
|
|
710
|
+
# "index": "embeddings",
|
|
711
|
+
# "duration_ms": 45.2,
|
|
712
|
+
# "success": true,
|
|
713
|
+
# "result_count": 10
|
|
714
|
+
# }
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
### Custom Logging
|
|
718
|
+
|
|
719
|
+
```ruby
|
|
720
|
+
# Log custom events
|
|
721
|
+
Vectra::Logging.log(:info, "Custom event", custom_key: "value")
|
|
722
|
+
|
|
723
|
+
# Use with standard Logger
|
|
724
|
+
logger = Logger.new(STDOUT)
|
|
725
|
+
logger.formatter = Vectra::JsonFormatter.new(service: "vectra-api")
|
|
726
|
+
```
|
|
727
|
+
|
|
728
|
+
### Log Levels
|
|
729
|
+
|
|
730
|
+
- `debug` - Detailed debugging information
|
|
731
|
+
- `info` - Successful operations
|
|
732
|
+
- `warn` - Warnings (rate limits, retries)
|
|
733
|
+
- `error` - Failed operations
|
|
734
|
+
- `fatal` - Critical errors (auth failures)
|
|
735
|
+
|
|
736
|
+
## Rate Limiting
|
|
737
|
+
|
|
738
|
+
Proactive rate limiting to prevent API rate limit errors:
|
|
739
|
+
|
|
740
|
+
```ruby
|
|
741
|
+
# Create rate limiter (token bucket algorithm)
|
|
742
|
+
limiter = Vectra::RateLimiter.new(
|
|
743
|
+
requests_per_second: 10,
|
|
744
|
+
burst_size: 20
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
# Use with operations
|
|
748
|
+
limiter.acquire do
|
|
749
|
+
client.query(index: "my-index", vector: vec, top_k: 10)
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
# Or wrap entire client
|
|
753
|
+
rate_limited_client = Vectra::RateLimitedClient.new(
|
|
754
|
+
client,
|
|
755
|
+
requests_per_second: 100,
|
|
756
|
+
burst_size: 200
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
# All operations automatically rate limited
|
|
760
|
+
rate_limited_client.query(...)
|
|
761
|
+
rate_limited_client.upsert(...)
|
|
762
|
+
```
|
|
763
|
+
|
|
764
|
+
### Per-Provider Rate Limits
|
|
765
|
+
|
|
766
|
+
```ruby
|
|
767
|
+
# Configure rate limits per provider
|
|
768
|
+
Vectra::RateLimiterRegistry.configure(:pinecone, requests_per_second: 100)
|
|
769
|
+
Vectra::RateLimiterRegistry.configure(:qdrant, requests_per_second: 50)
|
|
770
|
+
|
|
771
|
+
# Use in operations
|
|
772
|
+
limiter = Vectra::RateLimiterRegistry[:pinecone]
|
|
773
|
+
limiter.acquire { client.query(...) }
|
|
774
|
+
|
|
775
|
+
# Get stats
|
|
776
|
+
Vectra::RateLimiterRegistry.stats
|
|
777
|
+
```
|
|
778
|
+
|
|
779
|
+
### Rate Limiter Stats
|
|
780
|
+
|
|
781
|
+
```ruby
|
|
782
|
+
stats = limiter.stats
|
|
783
|
+
# => {
|
|
784
|
+
# requests_per_second: 10,
|
|
785
|
+
# burst_size: 20,
|
|
786
|
+
# available_tokens: 15.5,
|
|
787
|
+
# time_until_token: 0.05
|
|
788
|
+
# }
|
|
789
|
+
```
|
|
790
|
+
|
|
791
|
+
## Quick Reference
|
|
792
|
+
|
|
793
|
+
| Metric | Description | Alert Threshold |
|
|
794
|
+
|--------|-------------|-----------------|
|
|
795
|
+
| `vectra_requests_total` | Total requests | - |
|
|
796
|
+
| `vectra_request_duration_seconds` | Request latency | p95 > 2s |
|
|
797
|
+
| `vectra_errors_total` | Error count | > 5% error rate |
|
|
798
|
+
| `vectra_vectors_processed_total` | Vectors processed | - |
|
|
799
|
+
| `vectra_cache_hits_total` | Cache hits | < 50% hit ratio |
|
|
800
|
+
| `vectra_pool_connections` | Pool connections | 0 available |
|
|
801
|
+
| `vectra_rate_limit_tokens` | Available rate limit tokens | < 10% capacity |
|
|
802
|
+
|
|
803
|
+
## Monitoring Cost Optimization
|
|
804
|
+
|
|
805
|
+
Optimize your monitoring infrastructure costs:
|
|
806
|
+
|
|
807
|
+
| Setting | Default | Low-Cost | Notes |
|
|
808
|
+
|---------|---------|----------|-------|
|
|
809
|
+
| Scrape interval | 15s | 30s | Reduces storage by ~50% |
|
|
810
|
+
| Retention | 15d | 7d | Adjust via `--storage.tsdb.retention.time` |
|
|
811
|
+
| Histogram buckets | 10 | 5 | Fewer buckets = less cardinality |
|
|
812
|
+
|
|
813
|
+
**Metric Cardinality Estimates:**
|
|
814
|
+
|
|
815
|
+
- ~100 timeseries per provider
|
|
816
|
+
- ~500 timeseries for multi-provider setup
|
|
817
|
+
- Cache/pool metrics add ~20 timeseries
|
|
818
|
+
|
|
819
|
+
**Cost Reduction Tips:**
|
|
820
|
+
|
|
821
|
+
```yaml
|
|
822
|
+
# prometheus.yml - Longer scrape interval
|
|
823
|
+
scrape_configs:
|
|
824
|
+
- job_name: 'vectra'
|
|
825
|
+
scrape_interval: 30s # Instead of 15s
|
|
826
|
+
scrape_timeout: 10s
|
|
827
|
+
```
|
|
828
|
+
|
|
829
|
+
```ruby
|
|
830
|
+
# Reduce histogram buckets
|
|
831
|
+
REQUEST_DURATION = REGISTRY.histogram(
|
|
832
|
+
:vectra_request_duration_seconds,
|
|
833
|
+
docstring: "Request duration",
|
|
834
|
+
labels: [:provider, :operation],
|
|
835
|
+
buckets: [0.1, 0.5, 1, 5, 10] # 5 instead of 10 buckets
|
|
836
|
+
)
|
|
837
|
+
```
|
|
838
|
+
|
|
839
|
+
**Downsampling for Long-term Storage:**
|
|
840
|
+
|
|
841
|
+
```yaml
|
|
842
|
+
# Thanos/Cortex downsampling rules
|
|
843
|
+
- record: vectra:request_rate:5m
|
|
844
|
+
expr: sum(rate(vectra_requests_total[5m])) by (provider, operation)
|
|
845
|
+
```
|
|
846
|
+
|
|
847
|
+
## Troubleshooting Runbooks
|
|
848
|
+
|
|
849
|
+
Quick links to incident response procedures:
|
|
850
|
+
|
|
851
|
+
- [High Error Rate Runbook]({{ site.baseurl }}/guides/runbooks/high-error-rate) - Error rate >5%
|
|
852
|
+
- [Pool Exhaustion Runbook]({{ site.baseurl }}/guides/runbooks/pool-exhausted) - No available connections
|
|
853
|
+
- [Cache Issues Runbook]({{ site.baseurl }}/guides/runbooks/cache-issues) - Low hit ratio, stale data
|
|
854
|
+
- [High Latency Runbook]({{ site.baseurl }}/guides/runbooks/high-latency) - P95 >2s
|
|
855
|
+
|
|
856
|
+
## Next Steps
|
|
857
|
+
|
|
858
|
+
- [Performance Guide]({{ site.baseurl }}/guides/performance)
|
|
859
|
+
- [API Reference]({{ site.baseurl }}/api/overview)
|
|
860
|
+
- [Provider Guides]({{ site.baseurl }}/providers)
|