vectra-client 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +77 -37
- data/CHANGELOG.md +85 -6
- data/README.md +56 -203
- data/docs/Gemfile +0 -1
- data/docs/_config.yml +19 -2
- data/docs/_layouts/default.html +6 -6
- data/docs/_layouts/home.html +183 -29
- data/docs/_layouts/page.html +81 -18
- data/docs/assets/style.css +806 -174
- data/docs/examples/index.md +46 -24
- data/docs/guides/monitoring.md +860 -0
- data/docs/guides/performance.md +200 -0
- data/docs/guides/runbooks/cache-issues.md +267 -0
- data/docs/guides/runbooks/high-error-rate.md +152 -0
- data/docs/guides/runbooks/high-latency.md +287 -0
- data/docs/guides/runbooks/pool-exhausted.md +216 -0
- data/docs/index.md +22 -38
- data/docs/providers/index.md +58 -39
- data/lib/vectra/batch.rb +148 -0
- data/lib/vectra/cache.rb +261 -0
- data/lib/vectra/circuit_breaker.rb +336 -0
- data/lib/vectra/client.rb +2 -0
- data/lib/vectra/configuration.rb +6 -1
- data/lib/vectra/health_check.rb +254 -0
- data/lib/vectra/instrumentation/honeybadger.rb +128 -0
- data/lib/vectra/instrumentation/sentry.rb +117 -0
- data/lib/vectra/logging.rb +242 -0
- data/lib/vectra/pool.rb +256 -0
- data/lib/vectra/rate_limiter.rb +304 -0
- data/lib/vectra/streaming.rb +153 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +8 -0
- metadata +31 -1
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vectra
|
|
4
|
+
# Circuit Breaker pattern for handling provider failures
|
|
5
|
+
#
|
|
6
|
+
# Prevents cascading failures by temporarily stopping requests to a failing provider.
|
|
7
|
+
# The circuit has three states:
|
|
8
|
+
# - :closed - Normal operation, requests pass through
|
|
9
|
+
# - :open - Requests fail immediately without calling provider
|
|
10
|
+
# - :half_open - Limited requests allowed to test if provider recovered
|
|
11
|
+
#
|
|
12
|
+
# @example Basic usage
|
|
13
|
+
# breaker = Vectra::CircuitBreaker.new(
|
|
14
|
+
# failure_threshold: 5,
|
|
15
|
+
# recovery_timeout: 30
|
|
16
|
+
# )
|
|
17
|
+
#
|
|
18
|
+
# breaker.call do
|
|
19
|
+
# client.query(index: "my-index", vector: vec, top_k: 10)
|
|
20
|
+
# end
|
|
21
|
+
#
|
|
22
|
+
# @example With fallback
|
|
23
|
+
# breaker.call(fallback: -> { cached_results }) do
|
|
24
|
+
# client.query(...)
|
|
25
|
+
# end
|
|
26
|
+
#
|
|
27
|
+
# @example Per-provider circuit breakers
|
|
28
|
+
# breakers = {
|
|
29
|
+
# pinecone: Vectra::CircuitBreaker.new(name: "pinecone"),
|
|
30
|
+
# qdrant: Vectra::CircuitBreaker.new(name: "qdrant")
|
|
31
|
+
# }
|
|
32
|
+
#
|
|
33
|
+
class CircuitBreaker
|
|
34
|
+
STATES = [:closed, :open, :half_open].freeze
|
|
35
|
+
|
|
36
|
+
# Error raised when circuit is open
|
|
37
|
+
class OpenCircuitError < Vectra::Error
|
|
38
|
+
attr_reader :circuit_name, :failures, :opened_at
|
|
39
|
+
|
|
40
|
+
def initialize(circuit_name:, failures:, opened_at:)
|
|
41
|
+
@circuit_name = circuit_name
|
|
42
|
+
@failures = failures
|
|
43
|
+
@opened_at = opened_at
|
|
44
|
+
super("Circuit '#{circuit_name}' is open after #{failures} failures")
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
attr_reader :name, :state, :failure_count, :success_count,
|
|
49
|
+
:last_failure_at, :opened_at
|
|
50
|
+
|
|
51
|
+
# Initialize a new circuit breaker
|
|
52
|
+
#
|
|
53
|
+
# @param name [String] Circuit name for logging/metrics
|
|
54
|
+
# @param failure_threshold [Integer] Failures before opening circuit (default: 5)
|
|
55
|
+
# @param success_threshold [Integer] Successes in half-open to close (default: 3)
|
|
56
|
+
# @param recovery_timeout [Integer] Seconds before trying half-open (default: 30)
|
|
57
|
+
# @param monitored_errors [Array<Class>] Errors that count as failures
|
|
58
|
+
def initialize(
|
|
59
|
+
name: "default",
|
|
60
|
+
failure_threshold: 5,
|
|
61
|
+
success_threshold: 3,
|
|
62
|
+
recovery_timeout: 30,
|
|
63
|
+
monitored_errors: nil
|
|
64
|
+
)
|
|
65
|
+
@name = name
|
|
66
|
+
@failure_threshold = failure_threshold
|
|
67
|
+
@success_threshold = success_threshold
|
|
68
|
+
@recovery_timeout = recovery_timeout
|
|
69
|
+
@monitored_errors = monitored_errors || default_monitored_errors
|
|
70
|
+
|
|
71
|
+
@state = :closed
|
|
72
|
+
@failure_count = 0
|
|
73
|
+
@success_count = 0
|
|
74
|
+
@last_failure_at = nil
|
|
75
|
+
@opened_at = nil
|
|
76
|
+
@mutex = Mutex.new
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Execute block through circuit breaker
|
|
80
|
+
#
|
|
81
|
+
# @param fallback [Proc, nil] Fallback to call when circuit is open
|
|
82
|
+
# @yield The operation to execute
|
|
83
|
+
# @return [Object] Result of block or fallback
|
|
84
|
+
# @raise [OpenCircuitError] If circuit is open and no fallback provided
|
|
85
|
+
def call(fallback: nil, &)
|
|
86
|
+
check_state!
|
|
87
|
+
|
|
88
|
+
if open?
|
|
89
|
+
return handle_open_circuit(fallback)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
execute_with_monitoring(&)
|
|
93
|
+
rescue *@monitored_errors => e
|
|
94
|
+
record_failure(e)
|
|
95
|
+
raise
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Force circuit to closed state (manual reset)
|
|
99
|
+
#
|
|
100
|
+
# @return [void]
|
|
101
|
+
def reset!
|
|
102
|
+
@mutex.synchronize do
|
|
103
|
+
transition_to(:closed)
|
|
104
|
+
@failure_count = 0
|
|
105
|
+
@success_count = 0
|
|
106
|
+
@last_failure_at = nil
|
|
107
|
+
@opened_at = nil
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Force circuit to open state (manual trip)
|
|
112
|
+
#
|
|
113
|
+
# @return [void]
|
|
114
|
+
def trip!
|
|
115
|
+
@mutex.synchronize do
|
|
116
|
+
transition_to(:open)
|
|
117
|
+
@opened_at = Time.now
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Check if circuit is closed (normal operation)
|
|
122
|
+
#
|
|
123
|
+
# @return [Boolean]
|
|
124
|
+
def closed?
|
|
125
|
+
state == :closed
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Check if circuit is open (blocking requests)
|
|
129
|
+
#
|
|
130
|
+
# @return [Boolean]
|
|
131
|
+
def open?
|
|
132
|
+
state == :open
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Check if circuit is half-open (testing recovery)
|
|
136
|
+
#
|
|
137
|
+
# @return [Boolean]
|
|
138
|
+
def half_open?
|
|
139
|
+
state == :half_open
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Get circuit statistics
|
|
143
|
+
#
|
|
144
|
+
# @return [Hash]
|
|
145
|
+
def stats
|
|
146
|
+
{
|
|
147
|
+
name: name,
|
|
148
|
+
state: state,
|
|
149
|
+
failure_count: failure_count,
|
|
150
|
+
success_count: success_count,
|
|
151
|
+
failure_threshold: @failure_threshold,
|
|
152
|
+
success_threshold: @success_threshold,
|
|
153
|
+
recovery_timeout: @recovery_timeout,
|
|
154
|
+
last_failure_at: last_failure_at,
|
|
155
|
+
opened_at: opened_at
|
|
156
|
+
}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
private
|
|
160
|
+
|
|
161
|
+
def default_monitored_errors
|
|
162
|
+
[
|
|
163
|
+
Vectra::ServerError,
|
|
164
|
+
Vectra::ConnectionError,
|
|
165
|
+
Vectra::TimeoutError
|
|
166
|
+
]
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def check_state!
|
|
170
|
+
@mutex.synchronize do
|
|
171
|
+
# Check if we should transition from open to half-open
|
|
172
|
+
if open? && recovery_timeout_elapsed?
|
|
173
|
+
transition_to(:half_open)
|
|
174
|
+
@success_count = 0
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def recovery_timeout_elapsed?
|
|
180
|
+
return false unless opened_at
|
|
181
|
+
|
|
182
|
+
Time.now - opened_at >= @recovery_timeout
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def handle_open_circuit(fallback)
|
|
186
|
+
if fallback
|
|
187
|
+
log_fallback
|
|
188
|
+
fallback.call
|
|
189
|
+
else
|
|
190
|
+
raise OpenCircuitError.new(
|
|
191
|
+
circuit_name: name,
|
|
192
|
+
failures: failure_count,
|
|
193
|
+
opened_at: opened_at
|
|
194
|
+
)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def execute_with_monitoring
|
|
199
|
+
result = yield
|
|
200
|
+
record_success
|
|
201
|
+
result
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def record_success
|
|
205
|
+
@mutex.synchronize do
|
|
206
|
+
@success_count += 1
|
|
207
|
+
|
|
208
|
+
# In half-open, check if we should close
|
|
209
|
+
if half_open? && @success_count >= @success_threshold
|
|
210
|
+
transition_to(:closed)
|
|
211
|
+
@failure_count = 0
|
|
212
|
+
log_circuit_closed
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def record_failure(error)
|
|
218
|
+
@mutex.synchronize do
|
|
219
|
+
@failure_count += 1
|
|
220
|
+
@last_failure_at = Time.now
|
|
221
|
+
|
|
222
|
+
# In half-open, immediately open again
|
|
223
|
+
if half_open?
|
|
224
|
+
transition_to(:open)
|
|
225
|
+
@opened_at = Time.now
|
|
226
|
+
log_circuit_reopened(error)
|
|
227
|
+
return
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# In closed, check threshold
|
|
231
|
+
if closed? && @failure_count >= @failure_threshold
|
|
232
|
+
transition_to(:open)
|
|
233
|
+
@opened_at = Time.now
|
|
234
|
+
log_circuit_opened(error)
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def transition_to(new_state)
|
|
240
|
+
@state = new_state
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def log_circuit_opened(error)
|
|
244
|
+
logger&.error(
|
|
245
|
+
"[Vectra::CircuitBreaker] Circuit '#{name}' opened after #{failure_count} failures. " \
|
|
246
|
+
"Last error: #{error.class} - #{error.message}"
|
|
247
|
+
)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def log_circuit_closed
|
|
251
|
+
logger&.info(
|
|
252
|
+
"[Vectra::CircuitBreaker] Circuit '#{name}' closed after #{success_count} successes"
|
|
253
|
+
)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def log_circuit_reopened(error)
|
|
257
|
+
logger&.warn(
|
|
258
|
+
"[Vectra::CircuitBreaker] Circuit '#{name}' reopened. " \
|
|
259
|
+
"Recovery failed: #{error.class} - #{error.message}"
|
|
260
|
+
)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def log_fallback
|
|
264
|
+
logger&.info(
|
|
265
|
+
"[Vectra::CircuitBreaker] Circuit '#{name}' open, using fallback"
|
|
266
|
+
)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def logger
|
|
270
|
+
Vectra.configuration.logger
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Circuit breaker registry for managing multiple circuits
|
|
275
|
+
#
|
|
276
|
+
# @example
|
|
277
|
+
# Vectra::CircuitBreakerRegistry.register(:pinecone, failure_threshold: 3)
|
|
278
|
+
# Vectra::CircuitBreakerRegistry.register(:qdrant, failure_threshold: 5)
|
|
279
|
+
#
|
|
280
|
+
# Vectra::CircuitBreakerRegistry[:pinecone].call { ... }
|
|
281
|
+
#
|
|
282
|
+
module CircuitBreakerRegistry
|
|
283
|
+
class << self
|
|
284
|
+
# Get or create a circuit breaker
|
|
285
|
+
#
|
|
286
|
+
# @param name [Symbol, String] Circuit name
|
|
287
|
+
# @return [CircuitBreaker]
|
|
288
|
+
def [](name)
|
|
289
|
+
circuits[name.to_sym]
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Register a new circuit breaker
|
|
293
|
+
#
|
|
294
|
+
# @param name [Symbol, String] Circuit name
|
|
295
|
+
# @param options [Hash] CircuitBreaker options
|
|
296
|
+
# @return [CircuitBreaker]
|
|
297
|
+
def register(name, **options)
|
|
298
|
+
circuits[name.to_sym] = CircuitBreaker.new(name: name.to_s, **options)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Get all registered circuits
|
|
302
|
+
#
|
|
303
|
+
# @return [Hash<Symbol, CircuitBreaker>]
|
|
304
|
+
def all
|
|
305
|
+
circuits.dup
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# Reset all circuits
|
|
309
|
+
#
|
|
310
|
+
# @return [void]
|
|
311
|
+
def reset_all!
|
|
312
|
+
circuits.each_value(&:reset!)
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Get stats for all circuits
|
|
316
|
+
#
|
|
317
|
+
# @return [Hash<Symbol, Hash>]
|
|
318
|
+
def stats
|
|
319
|
+
circuits.transform_values(&:stats)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Clear all registered circuits
|
|
323
|
+
#
|
|
324
|
+
# @return [void]
|
|
325
|
+
def clear!
|
|
326
|
+
@circuits = {}
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
private
|
|
330
|
+
|
|
331
|
+
def circuits
|
|
332
|
+
@circuits ||= {}
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|
data/lib/vectra/client.rb
CHANGED
data/lib/vectra/configuration.rb
CHANGED
|
@@ -15,7 +15,8 @@ module Vectra
|
|
|
15
15
|
|
|
16
16
|
attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
|
|
17
17
|
:max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
|
|
18
|
-
:batch_size, :instrumentation
|
|
18
|
+
:batch_size, :instrumentation, :cache_enabled, :cache_ttl,
|
|
19
|
+
:cache_max_size, :async_concurrency
|
|
19
20
|
|
|
20
21
|
attr_reader :provider
|
|
21
22
|
|
|
@@ -33,6 +34,10 @@ module Vectra
|
|
|
33
34
|
@pool_timeout = 5
|
|
34
35
|
@batch_size = 100
|
|
35
36
|
@instrumentation = false
|
|
37
|
+
@cache_enabled = false
|
|
38
|
+
@cache_ttl = 300
|
|
39
|
+
@cache_max_size = 1000
|
|
40
|
+
@async_concurrency = 4
|
|
36
41
|
end
|
|
37
42
|
|
|
38
43
|
# Set the provider
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "timeout"
|
|
4
|
+
|
|
5
|
+
module Vectra
|
|
6
|
+
# Health check functionality for Vectra clients
|
|
7
|
+
#
|
|
8
|
+
# Provides health check methods to verify connectivity and status
|
|
9
|
+
# of vector database providers.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic health check
|
|
12
|
+
# client = Vectra::Client.new(provider: :pinecone, ...)
|
|
13
|
+
# result = client.health_check
|
|
14
|
+
# puts result[:healthy] # => true/false
|
|
15
|
+
#
|
|
16
|
+
# @example Detailed health check
|
|
17
|
+
# result = client.health_check(
|
|
18
|
+
# index: "my-index",
|
|
19
|
+
# include_stats: true
|
|
20
|
+
# )
|
|
21
|
+
#
|
|
22
|
+
module HealthCheck
|
|
23
|
+
# Perform health check on the provider
|
|
24
|
+
#
|
|
25
|
+
# @param index [String, nil] Index to check (uses first available if nil)
|
|
26
|
+
# @param include_stats [Boolean] Include index statistics
|
|
27
|
+
# @param timeout [Float] Health check timeout in seconds
|
|
28
|
+
# @return [HealthCheckResult]
|
|
29
|
+
def health_check(index: nil, include_stats: false, timeout: 5)
|
|
30
|
+
start_time = Time.now
|
|
31
|
+
|
|
32
|
+
indexes = with_timeout(timeout) { list_indexes }
|
|
33
|
+
index_name = index || indexes.first&.dig(:name)
|
|
34
|
+
|
|
35
|
+
result = base_result(start_time, indexes)
|
|
36
|
+
add_index_stats(result, index_name, include_stats, timeout)
|
|
37
|
+
add_pool_stats(result)
|
|
38
|
+
|
|
39
|
+
HealthCheckResult.new(**result)
|
|
40
|
+
rescue StandardError => e
|
|
41
|
+
failure_result(start_time, e)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Quick health check - just tests connectivity
|
|
45
|
+
#
|
|
46
|
+
# @param timeout [Float] Timeout in seconds
|
|
47
|
+
# @return [Boolean] true if healthy
|
|
48
|
+
def healthy?(timeout: 5)
|
|
49
|
+
health_check(timeout: timeout).healthy?
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def with_timeout(seconds, &)
|
|
55
|
+
Timeout.timeout(seconds, &)
|
|
56
|
+
rescue Timeout::Error
|
|
57
|
+
raise Vectra::TimeoutError, "Health check timed out after #{seconds}s"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def base_result(start_time, indexes)
|
|
61
|
+
{
|
|
62
|
+
healthy: true,
|
|
63
|
+
provider: provider_name,
|
|
64
|
+
latency_ms: latency_since(start_time),
|
|
65
|
+
indexes_available: indexes.size,
|
|
66
|
+
checked_at: current_time_iso
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def add_index_stats(result, index_name, include_stats, timeout)
|
|
71
|
+
return unless include_stats && index_name
|
|
72
|
+
|
|
73
|
+
stats = with_timeout(timeout) { stats(index: index_name) }
|
|
74
|
+
result[:index] = index_name
|
|
75
|
+
result[:stats] = {
|
|
76
|
+
vector_count: stats[:total_vector_count],
|
|
77
|
+
dimension: stats[:dimension]
|
|
78
|
+
}.compact
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def add_pool_stats(result)
|
|
82
|
+
return unless provider.respond_to?(:pool_stats)
|
|
83
|
+
|
|
84
|
+
pool = provider.pool_stats
|
|
85
|
+
result[:pool] = pool unless pool[:status] == "not_initialized"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def failure_result(start_time, error)
|
|
89
|
+
HealthCheckResult.new(
|
|
90
|
+
healthy: false,
|
|
91
|
+
provider: provider_name,
|
|
92
|
+
latency_ms: latency_since(start_time),
|
|
93
|
+
error: error.class.name,
|
|
94
|
+
error_message: error.message,
|
|
95
|
+
checked_at: current_time_iso
|
|
96
|
+
)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def latency_since(start_time)
|
|
100
|
+
((Time.now - start_time) * 1000).round(2)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def current_time_iso
|
|
104
|
+
Time.now.utc.iso8601
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Health check result object
|
|
109
|
+
#
|
|
110
|
+
# @example
|
|
111
|
+
# result = client.health_check
|
|
112
|
+
# if result.healthy?
|
|
113
|
+
# puts "All good! Latency: #{result.latency_ms}ms"
|
|
114
|
+
# else
|
|
115
|
+
# puts "Error: #{result.error_message}"
|
|
116
|
+
# end
|
|
117
|
+
#
|
|
118
|
+
class HealthCheckResult
|
|
119
|
+
attr_reader :provider, :latency_ms, :indexes_available, :checked_at,
|
|
120
|
+
:index, :stats, :pool, :error, :error_message
|
|
121
|
+
|
|
122
|
+
def initialize(healthy:, provider:, latency_ms:, checked_at:,
|
|
123
|
+
indexes_available: nil, index: nil, stats: nil,
|
|
124
|
+
pool: nil, error: nil, error_message: nil)
|
|
125
|
+
@healthy = healthy
|
|
126
|
+
@provider = provider
|
|
127
|
+
@latency_ms = latency_ms
|
|
128
|
+
@checked_at = checked_at
|
|
129
|
+
@indexes_available = indexes_available
|
|
130
|
+
@index = index
|
|
131
|
+
@stats = stats
|
|
132
|
+
@pool = pool
|
|
133
|
+
@error = error
|
|
134
|
+
@error_message = error_message
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Check if the health check passed
|
|
138
|
+
#
|
|
139
|
+
# @return [Boolean]
|
|
140
|
+
def healthy?
|
|
141
|
+
@healthy
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Check if the health check failed
|
|
145
|
+
#
|
|
146
|
+
# @return [Boolean]
|
|
147
|
+
def unhealthy?
|
|
148
|
+
!@healthy
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Convert to hash
|
|
152
|
+
#
|
|
153
|
+
# @return [Hash]
|
|
154
|
+
def to_h
|
|
155
|
+
{
|
|
156
|
+
healthy: @healthy,
|
|
157
|
+
provider: provider,
|
|
158
|
+
latency_ms: latency_ms,
|
|
159
|
+
checked_at: checked_at,
|
|
160
|
+
indexes_available: indexes_available,
|
|
161
|
+
index: index,
|
|
162
|
+
stats: stats,
|
|
163
|
+
pool: pool,
|
|
164
|
+
error: error,
|
|
165
|
+
error_message: error_message
|
|
166
|
+
}.compact
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Convert to JSON
|
|
170
|
+
#
|
|
171
|
+
# @return [String]
|
|
172
|
+
def to_json(*)
|
|
173
|
+
JSON.generate(to_h)
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Aggregate health checker for multiple providers
|
|
178
|
+
#
|
|
179
|
+
# @example
|
|
180
|
+
# checker = Vectra::AggregateHealthCheck.new(
|
|
181
|
+
# pinecone: pinecone_client,
|
|
182
|
+
# qdrant: qdrant_client,
|
|
183
|
+
# pgvector: pgvector_client
|
|
184
|
+
# )
|
|
185
|
+
#
|
|
186
|
+
# result = checker.check_all
|
|
187
|
+
# puts result[:overall_healthy]
|
|
188
|
+
#
|
|
189
|
+
class AggregateHealthCheck
|
|
190
|
+
attr_reader :clients
|
|
191
|
+
|
|
192
|
+
# Initialize aggregate health checker
|
|
193
|
+
#
|
|
194
|
+
# @param clients [Hash<Symbol, Client>] Named clients to check
|
|
195
|
+
def initialize(**clients)
|
|
196
|
+
@clients = clients
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Check health of all clients
|
|
200
|
+
#
|
|
201
|
+
# @param parallel [Boolean] Run checks in parallel
|
|
202
|
+
# @param timeout [Float] Timeout per check
|
|
203
|
+
# @return [Hash] Aggregate results
|
|
204
|
+
def check_all(parallel: true, timeout: 5)
|
|
205
|
+
start_time = Time.now
|
|
206
|
+
|
|
207
|
+
results = if parallel
|
|
208
|
+
check_parallel(timeout)
|
|
209
|
+
else
|
|
210
|
+
check_sequential(timeout)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
healthy_count = results.count { |_, r| r.healthy? }
|
|
214
|
+
all_healthy = healthy_count == results.size
|
|
215
|
+
|
|
216
|
+
{
|
|
217
|
+
overall_healthy: all_healthy,
|
|
218
|
+
healthy_count: healthy_count,
|
|
219
|
+
total_count: results.size,
|
|
220
|
+
total_latency_ms: ((Time.now - start_time) * 1000).round(2),
|
|
221
|
+
checked_at: Time.now.utc.iso8601,
|
|
222
|
+
results: results.transform_values(&:to_h)
|
|
223
|
+
}
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Check if all providers are healthy
|
|
227
|
+
#
|
|
228
|
+
# @return [Boolean]
|
|
229
|
+
def all_healthy?(timeout: 5)
|
|
230
|
+
check_all(timeout: timeout)[:overall_healthy]
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Check if any provider is healthy
|
|
234
|
+
#
|
|
235
|
+
# @return [Boolean]
|
|
236
|
+
def any_healthy?(timeout: 5)
|
|
237
|
+
check_all(timeout: timeout)[:healthy_count].positive?
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
private
|
|
241
|
+
|
|
242
|
+
def check_parallel(timeout)
|
|
243
|
+
threads = clients.map do |name, client|
|
|
244
|
+
Thread.new { [name, client.health_check(timeout: timeout)] }
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
threads.to_h(&:value)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def check_sequential(timeout)
|
|
251
|
+
clients.transform_values { |client| client.health_check(timeout: timeout) }
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
end
|