vectra-client 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,860 @@
1
+ ---
2
+ layout: page
3
+ title: Monitoring & Observability
4
+ permalink: /guides/monitoring/
5
+ ---
6
+
7
+ # Monitoring & Observability
8
+
9
+ Complete guide for monitoring Vectra in production with Prometheus, Grafana, and APM tools.
10
+
11
+ ## Quick Setup
12
+
13
+ ```ruby
14
+ Vectra.configure do |config|
15
+ config.provider = :pinecone
16
+ config.api_key = ENV['PINECONE_API_KEY']
17
+ config.instrumentation = true # Enable metrics
18
+ end
19
+ ```
20
+
21
+ ## Prometheus Metrics
22
+
23
+ ### Exporter Setup
24
+
25
+ Create `config/initializers/vectra_metrics.rb`:
26
+
27
+ ```ruby
28
+ # frozen_string_literal: true
29
+
30
+ require "prometheus/client"
31
+
32
+ module VectraMetrics
33
+ REGISTRY = Prometheus::Client.registry
34
+
35
+ # Request counters
36
+ REQUESTS_TOTAL = REGISTRY.counter(
37
+ :vectra_requests_total,
38
+ docstring: "Total Vectra requests",
39
+ labels: [:provider, :operation, :status]
40
+ )
41
+
42
+ # Latency histogram
43
+ REQUEST_DURATION = REGISTRY.histogram(
44
+ :vectra_request_duration_seconds,
45
+ docstring: "Request duration in seconds",
46
+ labels: [:provider, :operation],
47
+ buckets: [0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
48
+ )
49
+
50
+ # Vector counts
51
+ VECTORS_PROCESSED = REGISTRY.counter(
52
+ :vectra_vectors_processed_total,
53
+ docstring: "Total vectors processed",
54
+ labels: [:provider, :operation]
55
+ )
56
+
57
+ # Cache metrics
58
+ CACHE_HITS = REGISTRY.counter(
59
+ :vectra_cache_hits_total,
60
+ docstring: "Cache hit count"
61
+ )
62
+
63
+ CACHE_MISSES = REGISTRY.counter(
64
+ :vectra_cache_misses_total,
65
+ docstring: "Cache miss count"
66
+ )
67
+
68
+ # Pool metrics (pgvector)
69
+ POOL_SIZE = REGISTRY.gauge(
70
+ :vectra_pool_connections,
71
+ docstring: "Connection pool size",
72
+ labels: [:state] # available, checked_out
73
+ )
74
+
75
+ # Error counter
76
+ ERRORS_TOTAL = REGISTRY.counter(
77
+ :vectra_errors_total,
78
+ docstring: "Total errors",
79
+ labels: [:provider, :error_type]
80
+ )
81
+ end
82
+
83
+ # Custom instrumentation handler
84
+ Vectra::Instrumentation.register(:prometheus) do |event|
85
+ labels = {
86
+ provider: event[:provider],
87
+ operation: event[:operation]
88
+ }
89
+
90
+ # Record request
91
+ status = event[:error] ? "error" : "success"
92
+ VectraMetrics::REQUESTS_TOTAL.increment(labels: labels.merge(status: status))
93
+
94
+ # Record duration
95
+ if event[:duration]
96
+ VectraMetrics::REQUEST_DURATION.observe(event[:duration], labels: labels)
97
+ end
98
+
99
+ # Record vector count
100
+ if event[:metadata]&.dig(:vector_count)
101
+ VectraMetrics::VECTORS_PROCESSED.increment(
102
+ by: event[:metadata][:vector_count],
103
+ labels: labels
104
+ )
105
+ end
106
+
107
+ # Record errors
108
+ if event[:error]
109
+ VectraMetrics::ERRORS_TOTAL.increment(
110
+ labels: labels.merge(error_type: event[:error].class.name)
111
+ )
112
+ end
113
+ end
114
+ ```
115
+
116
+ ### Prometheus Scrape Config
117
+
118
+ Add to `prometheus.yml`:
119
+
120
+ ```yaml
121
+ scrape_configs:
122
+ - job_name: 'vectra'
123
+ static_configs:
124
+ - targets: ['localhost:9292']
125
+ metrics_path: '/metrics'
126
+ scrape_interval: 15s
127
+ ```
128
+
129
+ ### Expose Metrics Endpoint (Rack)
130
+
131
+ ```ruby
132
+ # config.ru
133
+ require "prometheus/middleware/exporter"
134
+
135
+ use Prometheus::Middleware::Exporter
136
+ run YourApp
137
+ ```
138
+
139
+ ## Grafana Dashboard
140
+
141
+ ### Dashboard JSON Template
142
+
143
+ Save as `vectra-dashboard.json` and import into Grafana:
144
+
145
+ ```json
146
+ {
147
+ "dashboard": {
148
+ "title": "Vectra Vector Database Metrics",
149
+ "uid": "vectra-metrics",
150
+ "timezone": "browser",
151
+ "refresh": "30s",
152
+ "panels": [
153
+ {
154
+ "title": "Request Rate",
155
+ "type": "graph",
156
+ "gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 },
157
+ "targets": [
158
+ {
159
+ "expr": "sum(rate(vectra_requests_total[5m])) by (operation)",
160
+ "legendFormat": "{{ "{{operation}}" }}"
161
+ }
162
+ ]
163
+ },
164
+ {
165
+ "title": "Request Latency (p95)",
166
+ "type": "graph",
167
+ "gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 },
168
+ "targets": [
169
+ {
170
+ "expr": "histogram_quantile(0.95, sum(rate(vectra_request_duration_seconds_bucket[5m])) by (le, operation))",
171
+ "legendFormat": "{{ "{{operation}}" }} p95"
172
+ }
173
+ ]
174
+ },
175
+ {
176
+ "title": "Error Rate",
177
+ "type": "graph",
178
+ "gridPos": { "x": 0, "y": 8, "w": 12, "h": 8 },
179
+ "targets": [
180
+ {
181
+ "expr": "sum(rate(vectra_errors_total[5m])) by (error_type)",
182
+ "legendFormat": "{{ "{{error_type}}" }}"
183
+ }
184
+ ]
185
+ },
186
+ {
187
+ "title": "Vectors Processed",
188
+ "type": "stat",
189
+ "gridPos": { "x": 12, "y": 8, "w": 6, "h": 8 },
190
+ "targets": [
191
+ {
192
+ "expr": "sum(increase(vectra_vectors_processed_total[24h]))",
193
+ "legendFormat": "24h Total"
194
+ }
195
+ ]
196
+ },
197
+ {
198
+ "title": "Cache Hit Ratio",
199
+ "type": "gauge",
200
+ "gridPos": { "x": 18, "y": 8, "w": 6, "h": 8 },
201
+ "targets": [
202
+ {
203
+ "expr": "sum(vectra_cache_hits_total) / (sum(vectra_cache_hits_total) + sum(vectra_cache_misses_total)) * 100"
204
+ }
205
+ ],
206
+ "fieldConfig": {
207
+ "defaults": {
208
+ "unit": "percent",
209
+ "max": 100,
210
+ "thresholds": {
211
+ "steps": [
212
+ { "color": "red", "value": 0 },
213
+ { "color": "yellow", "value": 50 },
214
+ { "color": "green", "value": 80 }
215
+ ]
216
+ }
217
+ }
218
+ }
219
+ },
220
+ {
221
+ "title": "Connection Pool (pgvector)",
222
+ "type": "graph",
223
+ "gridPos": { "x": 0, "y": 16, "w": 12, "h": 8 },
224
+ "targets": [
225
+ {
226
+ "expr": "vectra_pool_connections{state='available'}",
227
+ "legendFormat": "Available"
228
+ },
229
+ {
230
+ "expr": "vectra_pool_connections{state='checked_out'}",
231
+ "legendFormat": "In Use"
232
+ }
233
+ ]
234
+ },
235
+ {
236
+ "title": "Operations by Provider",
237
+ "type": "piechart",
238
+ "gridPos": { "x": 12, "y": 16, "w": 12, "h": 8 },
239
+ "targets": [
240
+ {
241
+ "expr": "sum(vectra_requests_total) by (provider)",
242
+ "legendFormat": "{{ "{{provider}}" }}"
243
+ }
244
+ ]
245
+ }
246
+ ],
247
+ "templating": {
248
+ "list": [
249
+ {
250
+ "name": "provider",
251
+ "type": "query",
252
+ "query": "label_values(vectra_requests_total, provider)",
253
+ "multi": true,
254
+ "includeAll": true
255
+ }
256
+ ]
257
+ }
258
+ }
259
+ }
260
+ ```
261
+
262
+ ## APM Integration
263
+
264
+ ### Datadog
265
+
266
+ ```ruby
267
+ # config/initializers/vectra_datadog.rb
268
+ require "vectra/instrumentation/datadog"
269
+
270
+ Vectra.configure do |config|
271
+ config.instrumentation = true
272
+ end
273
+
274
+ # Auto-traces all Vectra operations with:
275
+ # - Service name: vectra
276
+ # - Resource: operation name (upsert, query, etc.)
277
+ # - Tags: provider, index, vector_count
278
+ ```
279
+
280
+ #### Datadog Dashboard JSON
281
+
282
+ ```json
283
+ {
284
+ "title": "Vectra Performance",
285
+ "widgets": [
286
+ {
287
+ "definition": {
288
+ "title": "Request Rate by Operation",
289
+ "type": "timeseries",
290
+ "requests": [
291
+ {
292
+ "q": "sum:vectra.request.count{*} by {operation}.as_rate()",
293
+ "display_type": "bars"
294
+ }
295
+ ]
296
+ }
297
+ },
298
+ {
299
+ "definition": {
300
+ "title": "P95 Latency",
301
+ "type": "timeseries",
302
+ "requests": [
303
+ {
304
+ "q": "p95:vectra.request.duration{*} by {operation}"
305
+ }
306
+ ]
307
+ }
308
+ },
309
+ {
310
+ "definition": {
311
+ "title": "Error Rate",
312
+ "type": "query_value",
313
+ "requests": [
314
+ {
315
+ "q": "sum:vectra.error.count{*}.as_rate() / sum:vectra.request.count{*}.as_rate() * 100"
316
+ }
317
+ ],
318
+ "precision": 2,
319
+ "custom_unit": "%"
320
+ }
321
+ }
322
+ ]
323
+ }
324
+ ```
325
+
326
+ ### New Relic
327
+
328
+ ```ruby
329
+ # config/initializers/vectra_newrelic.rb
330
+ require "vectra/instrumentation/new_relic"
331
+
332
+ Vectra.configure do |config|
333
+ config.instrumentation = true
334
+ end
335
+
336
+ # Records custom events: VectraOperation
337
+ # Attributes: provider, operation, duration, vector_count, error
338
+ ```
339
+
340
+ #### New Relic NRQL Queries
341
+
342
+ ```sql
343
+ -- Request throughput
344
+ SELECT rate(count(*), 1 minute) FROM VectraOperation FACET operation TIMESERIES
345
+
346
+ -- Average latency by operation
347
+ SELECT average(duration) FROM VectraOperation FACET operation TIMESERIES
348
+
349
+ -- Error rate
350
+ SELECT percentage(count(*), WHERE error IS NOT NULL) FROM VectraOperation TIMESERIES
351
+
352
+ -- Slowest operations
353
+ SELECT max(duration) FROM VectraOperation FACET operation WHERE duration > 1
354
+ ```
355
+
356
+ ### Sentry
357
+
358
+ ```ruby
359
+ # config/initializers/vectra_sentry.rb
360
+ require 'vectra/instrumentation/sentry'
361
+
362
+ Vectra.configure do |config|
363
+ config.instrumentation = true
364
+ end
365
+
366
+ # Setup with options
367
+ Vectra::Instrumentation::Sentry.setup!(
368
+ capture_all_errors: false, # Only capture failures
369
+ fingerprint_by_operation: true # Group errors by operation
370
+ )
371
+
372
+ # Features:
373
+ # - Breadcrumbs for all operations
374
+ # - Error context with provider/operation/index
375
+ # - Custom fingerprinting for error grouping
376
+ # - Severity levels based on error type
377
+ ```
378
+
379
+ ### Honeybadger
380
+
381
+ ```ruby
382
+ # config/initializers/vectra_honeybadger.rb
383
+ require 'vectra/instrumentation/honeybadger'
384
+
385
+ Vectra.configure do |config|
386
+ config.instrumentation = true
387
+ end
388
+
389
+ Vectra::Instrumentation::Honeybadger.setup!(
390
+ notify_on_rate_limit: false, # Don't spam on rate limits
391
+ notify_on_validation: false # Don't spam on validation errors
392
+ )
393
+
394
+ # Features:
395
+ # - Breadcrumbs for operation tracing
396
+ # - Context with vectra metadata
397
+ # - Severity tags (critical, high, medium, low)
398
+ # - Custom fingerprinting
399
+ ```
400
+
401
+ ### OpenTelemetry
402
+
403
+ ```ruby
404
+ # config/initializers/vectra_otel.rb
405
+ require "opentelemetry/sdk"
406
+ require "opentelemetry/exporter/otlp"
407
+
408
+ OpenTelemetry::SDK.configure do |c|
409
+ c.service_name = "vectra-service"
410
+ c.use_all
411
+ end
412
+
413
+ # Custom OpenTelemetry handler
414
+ Vectra::Instrumentation.register(:opentelemetry) do |event|
415
+ tracer = OpenTelemetry.tracer_provider.tracer("vectra")
416
+
417
+ tracer.in_span("vectra.#{event[:operation]}") do |span|
418
+ span.set_attribute("vectra.provider", event[:provider].to_s)
419
+ span.set_attribute("vectra.index", event[:index]) if event[:index]
420
+ span.set_attribute("vectra.vector_count", event[:metadata][:vector_count]) if event.dig(:metadata, :vector_count)
421
+
422
+ if event[:error]
423
+ span.record_exception(event[:error])
424
+ span.status = OpenTelemetry::Trace::Status.error(event[:error].message)
425
+ end
426
+ end
427
+ end
428
+ ```
429
+
430
+ ## Alert Configurations
431
+
432
+ ### Prometheus Alerting Rules
433
+
434
+ Save as `vectra-alerts.yml`:
435
+
436
+ ```yaml
437
+ groups:
438
+ - name: vectra
439
+ rules:
440
+ # High error rate
441
+ - alert: VectraHighErrorRate
442
+ expr: |
443
+ sum(rate(vectra_errors_total[5m]))
444
+ / sum(rate(vectra_requests_total[5m])) > 0.05
445
+ for: 5m
446
+ labels:
447
+ severity: critical
448
+ annotations:
449
+ summary: "High Vectra error rate"
450
+ description: "Error rate is {{ $value | humanizePercentage }} (threshold: 5%)"
451
+
452
+ # High latency
453
+ - alert: VectraHighLatency
454
+ expr: |
455
+ histogram_quantile(0.95,
456
+ sum(rate(vectra_request_duration_seconds_bucket[5m])) by (le, operation)
457
+ ) > 2
458
+ for: 5m
459
+ labels:
460
+ severity: warning
461
+ annotations:
462
+ summary: "High Vectra latency for {{ $labels.operation }}"
463
+ description: "P95 latency is {{ $value | humanizeDuration }}"
464
+
465
+ # Connection pool exhausted (pgvector)
466
+ - alert: VectraPoolExhausted
467
+ expr: vectra_pool_connections{state="available"} == 0
468
+ for: 1m
469
+ labels:
470
+ severity: critical
471
+ annotations:
472
+ summary: "Vectra connection pool exhausted"
473
+ description: "No available connections in pool"
474
+
475
+ # Low cache hit ratio
476
+ - alert: VectraLowCacheHitRatio
477
+ expr: |
478
+ sum(rate(vectra_cache_hits_total[5m]))
479
+ / (sum(rate(vectra_cache_hits_total[5m])) + sum(rate(vectra_cache_misses_total[5m]))) < 0.5
480
+ for: 10m
481
+ labels:
482
+ severity: warning
483
+ annotations:
484
+ summary: "Low Vectra cache hit ratio"
485
+ description: "Cache hit ratio is {{ $value | humanizePercentage }}"
486
+
487
+ # No requests (service down?)
488
+ - alert: VectraNoRequests
489
+ expr: sum(rate(vectra_requests_total[5m])) == 0
490
+ for: 10m
491
+ labels:
492
+ severity: warning
493
+ annotations:
494
+ summary: "No Vectra requests"
495
+ description: "No requests in the last 10 minutes"
496
+ ```
497
+
498
+ ### PagerDuty Integration
499
+
500
+ ```yaml
501
+ # alertmanager.yml
502
+ receivers:
503
+ - name: 'vectra-critical'
504
+ pagerduty_configs:
505
+ - service_key: '<your-pagerduty-key>'
506
+ severity: critical
507
+ description: '{{ .GroupLabels.alertname }}'
508
+ details:
509
+ summary: '{{ .Annotations.summary }}'
510
+ description: '{{ .Annotations.description }}'
511
+
512
+ route:
513
+ receiver: 'vectra-critical'
514
+ routes:
515
+ - match:
516
+ severity: critical
517
+ receiver: 'vectra-critical'
518
+ ```
519
+
520
+ ### Slack Alerts
521
+
522
+ ```yaml
523
+ # alertmanager.yml
524
+ receivers:
525
+ - name: 'vectra-slack'
526
+ slack_configs:
527
+ - api_url: '<your-slack-webhook>'
528
+ channel: '#alerts'
529
+ title: '{{ .GroupLabels.alertname }}'
530
+ text: '{{ .Annotations.description }}'
531
+ color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
532
+ ```
533
+
534
+ ## Circuit Breaker Pattern
535
+
536
+ Prevent cascading failures with built-in circuit breaker:
537
+
538
+ ```ruby
539
+ # Create circuit breaker for provider
540
+ breaker = Vectra::CircuitBreaker.new(
541
+ name: "pinecone",
542
+ failure_threshold: 5, # Open after 5 failures
543
+ success_threshold: 3, # Close after 3 successes in half-open
544
+ recovery_timeout: 30 # Try half-open after 30 seconds
545
+ )
546
+
547
+ # Use with operations
548
+ result = breaker.call do
549
+ client.query(index: "my-index", vector: vec, top_k: 10)
550
+ end
551
+
552
+ # With fallback
553
+ result = breaker.call(fallback: -> { cached_results }) do
554
+ client.query(...)
555
+ end
556
+ ```
557
+
558
+ ### Circuit States
559
+
560
+ | State | Description | Behavior |
561
+ |-------|-------------|----------|
562
+ | `closed` | Normal operation | Requests pass through |
563
+ | `open` | Failing | Requests fail immediately (or use fallback) |
564
+ | `half_open` | Testing recovery | Limited requests allowed |
565
+
566
+ ### Per-Provider Circuits
567
+
568
+ ```ruby
569
+ # Register circuits for each provider
570
+ Vectra::CircuitBreakerRegistry.register(:pinecone, failure_threshold: 3)
571
+ Vectra::CircuitBreakerRegistry.register(:qdrant, failure_threshold: 5)
572
+
573
+ # Use registered circuit
574
+ Vectra::CircuitBreakerRegistry[:pinecone].call do
575
+ pinecone_client.query(...)
576
+ end
577
+
578
+ # Get all circuit stats
579
+ Vectra::CircuitBreakerRegistry.stats
580
+ # => { pinecone: { state: :closed, ... }, qdrant: { state: :open, ... } }
581
+
582
+ # Reset all circuits
583
+ Vectra::CircuitBreakerRegistry.reset_all!
584
+ ```
585
+
586
+ ### Circuit Breaker Metrics
587
+
588
+ ```ruby
589
+ # Add to Prometheus metrics
590
+ Vectra::Instrumentation.on_operation do |event|
591
+ circuit = Vectra::CircuitBreakerRegistry[event.provider]
592
+ next unless circuit
593
+
594
+ CIRCUIT_STATE.set(
595
+ circuit.open? ? 1 : 0,
596
+ labels: { provider: event.provider.to_s }
597
+ )
598
+ end
599
+ ```
600
+
601
+ ### Circuit Breaker Alerts
602
+
603
+ ```yaml
604
+ # prometheus-alerts.yml
605
+ - alert: VectraCircuitOpen
606
+ expr: vectra_circuit_state == 1
607
+ for: 1m
608
+ labels:
609
+ severity: critical
610
+ annotations:
611
+ summary: "Circuit breaker open for {{ $labels.provider }}"
612
+ ```
613
+
614
+ ## Health Check
615
+
616
+ Built-in health check functionality:
617
+
618
+ ```ruby
619
+ # Basic health check
620
+ client = Vectra::Client.new(provider: :pinecone, ...)
621
+ result = client.health_check
622
+
623
+ if result.healthy?
624
+ puts "Provider is healthy! Latency: #{result.latency_ms}ms"
625
+ else
626
+ puts "Error: #{result.error_message}"
627
+ end
628
+
629
+ # Quick boolean check
630
+ client.healthy? # => true/false
631
+
632
+ # Detailed health check with stats
633
+ result = client.health_check(
634
+ index: "my-index",
635
+ include_stats: true
636
+ )
637
+
638
+ puts result.to_json
639
+ # => {
640
+ # "healthy": true,
641
+ # "provider": "pinecone",
642
+ # "latency_ms": 45.2,
643
+ # "indexes_available": 3,
644
+ # "index": "my-index",
645
+ # "stats": { "vector_count": 1000, "dimension": 384 },
646
+ # "pool": { "available": 5, "checked_out": 2 }
647
+ # }
648
+ ```
649
+
650
+ ### Aggregate Health Check
651
+
652
+ Check multiple providers at once:
653
+
654
+ ```ruby
655
+ checker = Vectra::AggregateHealthCheck.new(
656
+ primary: pinecone_client,
657
+ backup: qdrant_client,
658
+ local: pgvector_client
659
+ )
660
+
661
+ result = checker.check_all
662
+ # => {
663
+ # overall_healthy: true,
664
+ # healthy_count: 3,
665
+ # total_count: 3,
666
+ # results: { ... }
667
+ # }
668
+
669
+ # Quick checks
670
+ checker.all_healthy? # => true/false
671
+ checker.any_healthy? # => true/false
672
+ ```
673
+
674
+ ### Health Check Endpoint (Rails)
675
+
676
+ ```ruby
677
+ # app/controllers/health_controller.rb
678
+ class HealthController < ApplicationController
679
+ def vectra
680
+ client = Vectra::Client.new
681
+ result = client.health_check(include_stats: true)
682
+
683
+ render json: result.to_h, status: result.healthy? ? :ok : :service_unavailable
684
+ end
685
+ end
686
+ ```
687
+
688
+ ## Structured JSON Logging
689
+
690
+ Built-in JSON logger for machine-readable logs:
691
+
692
+ ```ruby
693
+ # Setup JSON logging
694
+ require 'vectra/logging'
695
+
696
+ Vectra::Logging.setup!(
697
+ output: "log/vectra.json.log",
698
+ app: "my-service",
699
+ env: Rails.env
700
+ )
701
+
702
+ # All operations automatically logged as JSON:
703
+ # {
704
+ # "timestamp": "2025-01-08T12:00:00.123Z",
705
+ # "level": "info",
706
+ # "logger": "vectra",
707
+ # "message": "vectra.query",
708
+ # "provider": "pinecone",
709
+ # "operation": "query",
710
+ # "index": "embeddings",
711
+ # "duration_ms": 45.2,
712
+ # "success": true,
713
+ # "result_count": 10
714
+ # }
715
+ ```
716
+
717
+ ### Custom Logging
718
+
719
+ ```ruby
720
+ # Log custom events
721
+ Vectra::Logging.log(:info, "Custom event", custom_key: "value")
722
+
723
+ # Use with standard Logger
724
+ logger = Logger.new(STDOUT)
725
+ logger.formatter = Vectra::JsonFormatter.new(service: "vectra-api")
726
+ ```
727
+
728
+ ### Log Levels
729
+
730
+ - `debug` - Detailed debugging information
731
+ - `info` - Successful operations
732
+ - `warn` - Warnings (rate limits, retries)
733
+ - `error` - Failed operations
734
+ - `fatal` - Critical errors (auth failures)
735
+
736
+ ## Rate Limiting
737
+
738
+ Proactive rate limiting to prevent API rate limit errors:
739
+
740
+ ```ruby
741
+ # Create rate limiter (token bucket algorithm)
742
+ limiter = Vectra::RateLimiter.new(
743
+ requests_per_second: 10,
744
+ burst_size: 20
745
+ )
746
+
747
+ # Use with operations
748
+ limiter.acquire do
749
+ client.query(index: "my-index", vector: vec, top_k: 10)
750
+ end
751
+
752
+ # Or wrap entire client
753
+ rate_limited_client = Vectra::RateLimitedClient.new(
754
+ client,
755
+ requests_per_second: 100,
756
+ burst_size: 200
757
+ )
758
+
759
+ # All operations automatically rate limited
760
+ rate_limited_client.query(...)
761
+ rate_limited_client.upsert(...)
762
+ ```
763
+
764
+ ### Per-Provider Rate Limits
765
+
766
+ ```ruby
767
+ # Configure rate limits per provider
768
+ Vectra::RateLimiterRegistry.configure(:pinecone, requests_per_second: 100)
769
+ Vectra::RateLimiterRegistry.configure(:qdrant, requests_per_second: 50)
770
+
771
+ # Use in operations
772
+ limiter = Vectra::RateLimiterRegistry[:pinecone]
773
+ limiter.acquire { client.query(...) }
774
+
775
+ # Get stats
776
+ Vectra::RateLimiterRegistry.stats
777
+ ```
778
+
779
+ ### Rate Limiter Stats
780
+
781
+ ```ruby
782
+ stats = limiter.stats
783
+ # => {
784
+ # requests_per_second: 10,
785
+ # burst_size: 20,
786
+ # available_tokens: 15.5,
787
+ # time_until_token: 0.05
788
+ # }
789
+ ```
790
+
791
+ ## Quick Reference
792
+
793
+ | Metric | Description | Alert Threshold |
794
+ |--------|-------------|-----------------|
795
+ | `vectra_requests_total` | Total requests | - |
796
+ | `vectra_request_duration_seconds` | Request latency | p95 > 2s |
797
+ | `vectra_errors_total` | Error count | > 5% error rate |
798
+ | `vectra_vectors_processed_total` | Vectors processed | - |
799
+ | `vectra_cache_hits_total` | Cache hits | < 50% hit ratio |
800
+ | `vectra_pool_connections` | Pool connections | 0 available |
801
+ | `vectra_rate_limit_tokens` | Available rate limit tokens | < 10% capacity |
802
+
803
+ ## Monitoring Cost Optimization
804
+
805
+ Optimize your monitoring infrastructure costs:
806
+
807
+ | Setting | Default | Low-Cost | Notes |
808
+ |---------|---------|----------|-------|
809
+ | Scrape interval | 15s | 30s | Reduces storage by ~50% |
810
+ | Retention | 15d | 7d | Adjust via `--storage.tsdb.retention.time` |
811
+ | Histogram buckets | 10 | 5 | Fewer buckets = less cardinality |
812
+
813
+ **Metric Cardinality Estimates:**
814
+
815
+ - ~100 timeseries per provider
816
+ - ~500 timeseries for multi-provider setup
817
+ - Cache/pool metrics add ~20 timeseries
818
+
819
+ **Cost Reduction Tips:**
820
+
821
+ ```yaml
822
+ # prometheus.yml - Longer scrape interval
823
+ scrape_configs:
824
+ - job_name: 'vectra'
825
+ scrape_interval: 30s # Instead of 15s
826
+ scrape_timeout: 10s
827
+ ```
828
+
829
+ ```ruby
830
+ # Reduce histogram buckets
831
+ REQUEST_DURATION = REGISTRY.histogram(
832
+ :vectra_request_duration_seconds,
833
+ docstring: "Request duration",
834
+ labels: [:provider, :operation],
835
+ buckets: [0.1, 0.5, 1, 5, 10] # 5 instead of 10 buckets
836
+ )
837
+ ```
838
+
839
+ **Downsampling for Long-term Storage:**
840
+
841
+ ```yaml
842
+ # Thanos/Cortex downsampling rules
843
+ - record: vectra:request_rate:5m
844
+ expr: sum(rate(vectra_requests_total[5m])) by (provider, operation)
845
+ ```
846
+
847
+ ## Troubleshooting Runbooks
848
+
849
+ Quick links to incident response procedures:
850
+
851
+ - [High Error Rate Runbook]({{ site.baseurl }}/guides/runbooks/high-error-rate) - Error rate >5%
852
+ - [Pool Exhaustion Runbook]({{ site.baseurl }}/guides/runbooks/pool-exhausted) - No available connections
853
+ - [Cache Issues Runbook]({{ site.baseurl }}/guides/runbooks/cache-issues) - Low hit ratio, stale data
854
+ - [High Latency Runbook]({{ site.baseurl }}/guides/runbooks/high-latency) - P95 >2s
855
+
856
+ ## Next Steps
857
+
858
+ - [Performance Guide]({{ site.baseurl }}/guides/performance)
859
+ - [API Reference]({{ site.baseurl }}/api/overview)
860
+ - [Provider Guides]({{ site.baseurl }}/providers)