parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,876 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Reliability and error handling for Parishad model inference.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- RetryPolicy: Configurable retry with exponential backoff
|
|
6
|
+
- TimeoutManager: Request timeout handling
|
|
7
|
+
- CircuitBreaker: Fail-fast when backend is unhealthy
|
|
8
|
+
- FallbackChain: Try multiple backends in sequence
|
|
9
|
+
- HealthChecker: Backend health monitoring
|
|
10
|
+
|
|
11
|
+
These components ensure robust operation even with unreliable backends.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
import random
|
|
19
|
+
import threading
|
|
20
|
+
import time
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from functools import wraps
|
|
25
|
+
from typing import Any, Callable, Generic, Optional, TypeVar
|
|
26
|
+
|
|
27
|
+
from .backends import BackendError, BackendResult, ModelBackend
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
T = TypeVar("T")
|
|
33
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# =============================================================================
|
|
37
|
+
# Retry Policy
|
|
38
|
+
# =============================================================================
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RetryStrategy(Enum):
|
|
42
|
+
"""Retry backoff strategies."""
|
|
43
|
+
FIXED = "fixed"
|
|
44
|
+
LINEAR = "linear"
|
|
45
|
+
EXPONENTIAL = "exponential"
|
|
46
|
+
EXPONENTIAL_JITTER = "exponential_jitter"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class RetryPolicy:
|
|
51
|
+
"""
|
|
52
|
+
Configurable retry policy with backoff.
|
|
53
|
+
|
|
54
|
+
Supports fixed, linear, and exponential backoff strategies
|
|
55
|
+
with optional jitter to prevent thundering herd.
|
|
56
|
+
|
|
57
|
+
Usage:
|
|
58
|
+
policy = RetryPolicy(max_retries=3, strategy=RetryStrategy.EXPONENTIAL)
|
|
59
|
+
|
|
60
|
+
@policy.wrap
|
|
61
|
+
def make_request():
|
|
62
|
+
return api.call()
|
|
63
|
+
|
|
64
|
+
# Or manually:
|
|
65
|
+
for attempt in policy.attempts():
|
|
66
|
+
try:
|
|
67
|
+
return make_request()
|
|
68
|
+
except Exception as e:
|
|
69
|
+
if not policy.should_retry(e, attempt):
|
|
70
|
+
raise
|
|
71
|
+
"""
|
|
72
|
+
max_retries: int = 3
|
|
73
|
+
strategy: RetryStrategy = RetryStrategy.EXPONENTIAL_JITTER
|
|
74
|
+
base_delay: float = 1.0
|
|
75
|
+
max_delay: float = 60.0
|
|
76
|
+
jitter_factor: float = 0.1
|
|
77
|
+
|
|
78
|
+
# Exception types to retry on
|
|
79
|
+
retryable_exceptions: tuple = field(default_factory=lambda: (
|
|
80
|
+
ConnectionError,
|
|
81
|
+
TimeoutError,
|
|
82
|
+
BackendError,
|
|
83
|
+
))
|
|
84
|
+
|
|
85
|
+
# Error messages to retry on
|
|
86
|
+
retryable_messages: list[str] = field(default_factory=lambda: [
|
|
87
|
+
"rate limit",
|
|
88
|
+
"overloaded",
|
|
89
|
+
"temporarily unavailable",
|
|
90
|
+
"server error",
|
|
91
|
+
"502",
|
|
92
|
+
"503",
|
|
93
|
+
"504",
|
|
94
|
+
])
|
|
95
|
+
|
|
96
|
+
def get_delay(self, attempt: int) -> float:
|
|
97
|
+
"""
|
|
98
|
+
Calculate delay for attempt number.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
attempt: Current attempt number (0-indexed)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Delay in seconds
|
|
105
|
+
"""
|
|
106
|
+
if self.strategy == RetryStrategy.FIXED:
|
|
107
|
+
delay = self.base_delay
|
|
108
|
+
|
|
109
|
+
elif self.strategy == RetryStrategy.LINEAR:
|
|
110
|
+
delay = self.base_delay * (attempt + 1)
|
|
111
|
+
|
|
112
|
+
elif self.strategy == RetryStrategy.EXPONENTIAL:
|
|
113
|
+
delay = self.base_delay * (2 ** attempt)
|
|
114
|
+
|
|
115
|
+
elif self.strategy == RetryStrategy.EXPONENTIAL_JITTER:
|
|
116
|
+
delay = self.base_delay * (2 ** attempt)
|
|
117
|
+
jitter = delay * self.jitter_factor * random.random()
|
|
118
|
+
delay += jitter
|
|
119
|
+
|
|
120
|
+
else:
|
|
121
|
+
delay = self.base_delay
|
|
122
|
+
|
|
123
|
+
return min(delay, self.max_delay)
|
|
124
|
+
|
|
125
|
+
def should_retry(self, exception: Exception, attempt: int) -> bool:
|
|
126
|
+
"""
|
|
127
|
+
Check if exception should trigger a retry.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
exception: The exception that occurred
|
|
131
|
+
attempt: Current attempt number
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
True if should retry
|
|
135
|
+
"""
|
|
136
|
+
if attempt >= self.max_retries:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
# Check exception type
|
|
140
|
+
if isinstance(exception, self.retryable_exceptions):
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
# Check error message
|
|
144
|
+
error_msg = str(exception).lower()
|
|
145
|
+
for pattern in self.retryable_messages:
|
|
146
|
+
if pattern.lower() in error_msg:
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
def attempts(self):
|
|
152
|
+
"""
|
|
153
|
+
Generator yielding attempt numbers.
|
|
154
|
+
|
|
155
|
+
Usage:
|
|
156
|
+
for attempt in policy.attempts():
|
|
157
|
+
try:
|
|
158
|
+
return make_request()
|
|
159
|
+
except Exception as e:
|
|
160
|
+
if not policy.should_retry(e, attempt):
|
|
161
|
+
raise
|
|
162
|
+
time.sleep(policy.get_delay(attempt))
|
|
163
|
+
"""
|
|
164
|
+
for attempt in range(self.max_retries + 1):
|
|
165
|
+
yield attempt
|
|
166
|
+
|
|
167
|
+
def wrap(self, func: F) -> F:
|
|
168
|
+
"""
|
|
169
|
+
Decorator to apply retry policy to a function.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
func: Function to wrap
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Wrapped function with retry logic
|
|
176
|
+
"""
|
|
177
|
+
@wraps(func)
|
|
178
|
+
def wrapper(*args, **kwargs):
|
|
179
|
+
last_exception = None
|
|
180
|
+
|
|
181
|
+
for attempt in self.attempts():
|
|
182
|
+
try:
|
|
183
|
+
return func(*args, **kwargs)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
last_exception = e
|
|
186
|
+
|
|
187
|
+
if not self.should_retry(e, attempt):
|
|
188
|
+
raise
|
|
189
|
+
|
|
190
|
+
delay = self.get_delay(attempt)
|
|
191
|
+
logger.warning(
|
|
192
|
+
f"Retry attempt {attempt + 1}/{self.max_retries} "
|
|
193
|
+
f"after {delay:.1f}s: {e}"
|
|
194
|
+
)
|
|
195
|
+
time.sleep(delay)
|
|
196
|
+
|
|
197
|
+
raise last_exception # type: ignore
|
|
198
|
+
|
|
199
|
+
return wrapper # type: ignore
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# =============================================================================
|
|
203
|
+
# Timeout Manager
|
|
204
|
+
# =============================================================================
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class TimeoutError(Exception):
|
|
208
|
+
"""Raised when an operation times out."""
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@dataclass
|
|
213
|
+
class TimeoutConfig:
|
|
214
|
+
"""Timeout configuration."""
|
|
215
|
+
connect_timeout: float = 10.0 # Connection timeout
|
|
216
|
+
read_timeout: float = 60.0 # Read/response timeout
|
|
217
|
+
total_timeout: float = 120.0 # Total request timeout
|
|
218
|
+
|
|
219
|
+
def as_tuple(self) -> tuple[float, float]:
|
|
220
|
+
"""Return as (connect, read) tuple for requests library."""
|
|
221
|
+
return (self.connect_timeout, self.read_timeout)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class TimeoutManager:
|
|
225
|
+
"""
|
|
226
|
+
Manages request timeouts.
|
|
227
|
+
|
|
228
|
+
Provides context manager for enforcing timeouts on operations.
|
|
229
|
+
|
|
230
|
+
Usage:
|
|
231
|
+
manager = TimeoutManager(total_timeout=30.0)
|
|
232
|
+
|
|
233
|
+
with manager.timeout():
|
|
234
|
+
result = slow_operation()
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
def __init__(self, config: Optional[TimeoutConfig] = None):
|
|
238
|
+
"""Initialize with timeout configuration."""
|
|
239
|
+
self.config = config or TimeoutConfig()
|
|
240
|
+
|
|
241
|
+
def timeout(self, seconds: Optional[float] = None):
|
|
242
|
+
"""
|
|
243
|
+
Context manager for timeout enforcement.
|
|
244
|
+
|
|
245
|
+
Note: This is a basic implementation. For true timeout enforcement
|
|
246
|
+
in synchronous code, consider using signals or threading.
|
|
247
|
+
"""
|
|
248
|
+
timeout_seconds = seconds or self.config.total_timeout
|
|
249
|
+
return _TimeoutContext(timeout_seconds)
|
|
250
|
+
|
|
251
|
+
def with_timeout(self, func: Callable[..., T], *args, **kwargs) -> T:
|
|
252
|
+
"""
|
|
253
|
+
Execute function with timeout.
|
|
254
|
+
|
|
255
|
+
Uses threading for timeout enforcement.
|
|
256
|
+
"""
|
|
257
|
+
result: list[T] = []
|
|
258
|
+
exception: list[Exception] = []
|
|
259
|
+
|
|
260
|
+
def target():
|
|
261
|
+
try:
|
|
262
|
+
result.append(func(*args, **kwargs))
|
|
263
|
+
except Exception as e:
|
|
264
|
+
exception.append(e)
|
|
265
|
+
|
|
266
|
+
thread = threading.Thread(target=target)
|
|
267
|
+
thread.start()
|
|
268
|
+
thread.join(timeout=self.config.total_timeout)
|
|
269
|
+
|
|
270
|
+
if thread.is_alive():
|
|
271
|
+
# Thread is still running - timeout occurred
|
|
272
|
+
raise TimeoutError(
|
|
273
|
+
f"Operation timed out after {self.config.total_timeout}s"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
if exception:
|
|
277
|
+
raise exception[0]
|
|
278
|
+
|
|
279
|
+
return result[0]
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class _TimeoutContext:
|
|
283
|
+
"""Context manager for basic timeout tracking."""
|
|
284
|
+
|
|
285
|
+
def __init__(self, timeout: float):
|
|
286
|
+
self.timeout = timeout
|
|
287
|
+
self.start_time = 0.0
|
|
288
|
+
|
|
289
|
+
def __enter__(self):
|
|
290
|
+
self.start_time = time.time()
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
294
|
+
elapsed = time.time() - self.start_time
|
|
295
|
+
if elapsed > self.timeout:
|
|
296
|
+
logger.warning(f"Operation took {elapsed:.1f}s (timeout: {self.timeout}s)")
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def remaining(self) -> float:
|
|
301
|
+
"""Get remaining time."""
|
|
302
|
+
elapsed = time.time() - self.start_time
|
|
303
|
+
return max(0, self.timeout - elapsed)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# =============================================================================
|
|
307
|
+
# Circuit Breaker
|
|
308
|
+
# =============================================================================
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class CircuitState(Enum):
|
|
312
|
+
"""Circuit breaker states."""
|
|
313
|
+
CLOSED = "closed" # Normal operation
|
|
314
|
+
OPEN = "open" # Failing fast
|
|
315
|
+
HALF_OPEN = "half_open" # Testing recovery
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@dataclass
|
|
319
|
+
class CircuitBreakerConfig:
|
|
320
|
+
"""Circuit breaker configuration."""
|
|
321
|
+
failure_threshold: int = 5 # Failures before opening
|
|
322
|
+
success_threshold: int = 2 # Successes before closing
|
|
323
|
+
timeout: float = 30.0 # Seconds before half-open
|
|
324
|
+
half_open_max_calls: int = 3 # Max calls in half-open state
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class CircuitBreaker:
|
|
328
|
+
"""
|
|
329
|
+
Circuit breaker for fail-fast behavior.
|
|
330
|
+
|
|
331
|
+
When a backend fails repeatedly, the circuit opens and fails fast
|
|
332
|
+
instead of waiting for timeouts. After a cooldown, it tests the
|
|
333
|
+
backend again before fully recovering.
|
|
334
|
+
|
|
335
|
+
Usage:
|
|
336
|
+
breaker = CircuitBreaker()
|
|
337
|
+
|
|
338
|
+
@breaker.protect
|
|
339
|
+
def call_backend():
|
|
340
|
+
return backend.generate(prompt)
|
|
341
|
+
|
|
342
|
+
try:
|
|
343
|
+
result = call_backend()
|
|
344
|
+
except CircuitOpenError:
|
|
345
|
+
# Circuit is open, use fallback
|
|
346
|
+
result = fallback()
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
def __init__(self, config: Optional[CircuitBreakerConfig] = None):
|
|
350
|
+
"""Initialize circuit breaker."""
|
|
351
|
+
self.config = config or CircuitBreakerConfig()
|
|
352
|
+
|
|
353
|
+
self._state = CircuitState.CLOSED
|
|
354
|
+
self._failure_count = 0
|
|
355
|
+
self._success_count = 0
|
|
356
|
+
self._last_failure_time = 0.0
|
|
357
|
+
self._half_open_calls = 0
|
|
358
|
+
self._lock = threading.Lock()
|
|
359
|
+
|
|
360
|
+
@property
|
|
361
|
+
def state(self) -> CircuitState:
|
|
362
|
+
"""Get current circuit state."""
|
|
363
|
+
with self._lock:
|
|
364
|
+
# Check if we should transition from OPEN to HALF_OPEN
|
|
365
|
+
if self._state == CircuitState.OPEN:
|
|
366
|
+
if time.time() - self._last_failure_time >= self.config.timeout:
|
|
367
|
+
self._state = CircuitState.HALF_OPEN
|
|
368
|
+
self._half_open_calls = 0
|
|
369
|
+
self._success_count = 0
|
|
370
|
+
logger.info("Circuit breaker entering half-open state")
|
|
371
|
+
|
|
372
|
+
return self._state
|
|
373
|
+
|
|
374
|
+
def record_success(self) -> None:
|
|
375
|
+
"""Record a successful call."""
|
|
376
|
+
with self._lock:
|
|
377
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
378
|
+
self._success_count += 1
|
|
379
|
+
if self._success_count >= self.config.success_threshold:
|
|
380
|
+
self._state = CircuitState.CLOSED
|
|
381
|
+
self._failure_count = 0
|
|
382
|
+
logger.info("Circuit breaker closed after recovery")
|
|
383
|
+
else:
|
|
384
|
+
self._failure_count = 0
|
|
385
|
+
|
|
386
|
+
def record_failure(self, exception: Exception) -> None:
|
|
387
|
+
"""Record a failed call."""
|
|
388
|
+
with self._lock:
|
|
389
|
+
self._failure_count += 1
|
|
390
|
+
self._last_failure_time = time.time()
|
|
391
|
+
|
|
392
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
393
|
+
self._state = CircuitState.OPEN
|
|
394
|
+
logger.warning(f"Circuit breaker re-opened: {exception}")
|
|
395
|
+
|
|
396
|
+
elif self._failure_count >= self.config.failure_threshold:
|
|
397
|
+
self._state = CircuitState.OPEN
|
|
398
|
+
logger.warning(
|
|
399
|
+
f"Circuit breaker opened after {self._failure_count} failures"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
def allow_request(self) -> bool:
|
|
403
|
+
"""Check if a request is allowed."""
|
|
404
|
+
state = self.state # This may update state
|
|
405
|
+
|
|
406
|
+
if state == CircuitState.CLOSED:
|
|
407
|
+
return True
|
|
408
|
+
|
|
409
|
+
if state == CircuitState.OPEN:
|
|
410
|
+
return False
|
|
411
|
+
|
|
412
|
+
# HALF_OPEN - allow limited calls
|
|
413
|
+
with self._lock:
|
|
414
|
+
if self._half_open_calls < self.config.half_open_max_calls:
|
|
415
|
+
self._half_open_calls += 1
|
|
416
|
+
return True
|
|
417
|
+
return False
|
|
418
|
+
|
|
419
|
+
def protect(self, func: F) -> F:
|
|
420
|
+
"""
|
|
421
|
+
Decorator to protect a function with circuit breaker.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
func: Function to protect
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
Protected function
|
|
428
|
+
"""
|
|
429
|
+
@wraps(func)
|
|
430
|
+
def wrapper(*args, **kwargs):
|
|
431
|
+
if not self.allow_request():
|
|
432
|
+
raise CircuitOpenError(
|
|
433
|
+
f"Circuit breaker is {self.state.value}"
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
try:
|
|
437
|
+
result = func(*args, **kwargs)
|
|
438
|
+
self.record_success()
|
|
439
|
+
return result
|
|
440
|
+
except Exception as e:
|
|
441
|
+
self.record_failure(e)
|
|
442
|
+
raise
|
|
443
|
+
|
|
444
|
+
return wrapper # type: ignore
|
|
445
|
+
|
|
446
|
+
def reset(self) -> None:
|
|
447
|
+
"""Reset circuit breaker to closed state."""
|
|
448
|
+
with self._lock:
|
|
449
|
+
self._state = CircuitState.CLOSED
|
|
450
|
+
self._failure_count = 0
|
|
451
|
+
self._success_count = 0
|
|
452
|
+
self._half_open_calls = 0
|
|
453
|
+
|
|
454
|
+
def get_stats(self) -> dict:
|
|
455
|
+
"""Get circuit breaker statistics."""
|
|
456
|
+
return {
|
|
457
|
+
"state": self.state.value,
|
|
458
|
+
"failure_count": self._failure_count,
|
|
459
|
+
"success_count": self._success_count,
|
|
460
|
+
"last_failure": self._last_failure_time,
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
class CircuitOpenError(Exception):
|
|
465
|
+
"""Raised when circuit breaker is open."""
|
|
466
|
+
pass
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
# =============================================================================
|
|
470
|
+
# Fallback Chain
|
|
471
|
+
# =============================================================================
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
class FallbackChain:
|
|
475
|
+
"""
|
|
476
|
+
Chain of backends with automatic fallback.
|
|
477
|
+
|
|
478
|
+
Tries backends in order until one succeeds. Useful for having
|
|
479
|
+
primary/secondary/tertiary backend configurations.
|
|
480
|
+
|
|
481
|
+
Usage:
|
|
482
|
+
chain = FallbackChain([
|
|
483
|
+
primary_backend,
|
|
484
|
+
secondary_backend,
|
|
485
|
+
stub_backend,
|
|
486
|
+
])
|
|
487
|
+
|
|
488
|
+
result = chain.generate(prompt, config)
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
def __init__(
|
|
492
|
+
self,
|
|
493
|
+
backends: list[ModelBackend],
|
|
494
|
+
circuit_breaker_enabled: bool = True,
|
|
495
|
+
):
|
|
496
|
+
"""
|
|
497
|
+
Initialize fallback chain.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
backends: List of backends in priority order
|
|
501
|
+
circuit_breaker_enabled: Use circuit breakers per backend
|
|
502
|
+
"""
|
|
503
|
+
self.backends = backends
|
|
504
|
+
|
|
505
|
+
self._circuit_breakers: dict[int, CircuitBreaker] = {}
|
|
506
|
+
if circuit_breaker_enabled:
|
|
507
|
+
for i in range(len(backends)):
|
|
508
|
+
self._circuit_breakers[i] = CircuitBreaker()
|
|
509
|
+
|
|
510
|
+
def generate(
|
|
511
|
+
self,
|
|
512
|
+
prompt: str,
|
|
513
|
+
max_tokens: int = 1024,
|
|
514
|
+
temperature: float = 0.5,
|
|
515
|
+
**kwargs,
|
|
516
|
+
) -> tuple[BackendResult, int]:
|
|
517
|
+
"""
|
|
518
|
+
Generate using fallback chain.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
prompt: Input prompt
|
|
522
|
+
max_tokens: Maximum tokens
|
|
523
|
+
temperature: Sampling temperature
|
|
524
|
+
**kwargs: Additional parameters
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
Tuple of (BackendResult, backend_index)
|
|
528
|
+
|
|
529
|
+
Raises:
|
|
530
|
+
BackendError: If all backends fail
|
|
531
|
+
"""
|
|
532
|
+
last_error: Optional[Exception] = None
|
|
533
|
+
|
|
534
|
+
for i, backend in enumerate(self.backends):
|
|
535
|
+
# Check circuit breaker
|
|
536
|
+
if i in self._circuit_breakers:
|
|
537
|
+
if not self._circuit_breakers[i].allow_request():
|
|
538
|
+
logger.debug(f"Skipping backend {i}: circuit open")
|
|
539
|
+
continue
|
|
540
|
+
|
|
541
|
+
try:
|
|
542
|
+
result = backend.generate(
|
|
543
|
+
prompt=prompt,
|
|
544
|
+
max_tokens=max_tokens,
|
|
545
|
+
temperature=temperature,
|
|
546
|
+
**kwargs,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
if i in self._circuit_breakers:
|
|
550
|
+
self._circuit_breakers[i].record_success()
|
|
551
|
+
|
|
552
|
+
if i > 0:
|
|
553
|
+
logger.info(f"Using fallback backend {i}")
|
|
554
|
+
|
|
555
|
+
return result, i
|
|
556
|
+
|
|
557
|
+
except Exception as e:
|
|
558
|
+
last_error = e
|
|
559
|
+
logger.warning(f"Backend {i} failed: {e}")
|
|
560
|
+
|
|
561
|
+
if i in self._circuit_breakers:
|
|
562
|
+
self._circuit_breakers[i].record_failure(e)
|
|
563
|
+
|
|
564
|
+
raise BackendError(f"All backends failed. Last error: {last_error}")
|
|
565
|
+
|
|
566
|
+
def get_stats(self) -> dict:
|
|
567
|
+
"""Get chain statistics."""
|
|
568
|
+
return {
|
|
569
|
+
"backends": len(self.backends),
|
|
570
|
+
"circuit_breakers": {
|
|
571
|
+
i: cb.get_stats()
|
|
572
|
+
for i, cb in self._circuit_breakers.items()
|
|
573
|
+
},
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
# =============================================================================
|
|
578
|
+
# Health Checker
|
|
579
|
+
# =============================================================================
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
@dataclass
|
|
583
|
+
class HealthStatus:
|
|
584
|
+
"""Health status of a backend."""
|
|
585
|
+
healthy: bool
|
|
586
|
+
latency_ms: float
|
|
587
|
+
error: Optional[str] = None
|
|
588
|
+
checked_at: float = field(default_factory=time.time)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class HealthChecker:
|
|
592
|
+
"""
|
|
593
|
+
Backend health monitoring.
|
|
594
|
+
|
|
595
|
+
Periodically checks backend health and tracks metrics.
|
|
596
|
+
|
|
597
|
+
Usage:
|
|
598
|
+
checker = HealthChecker(backend)
|
|
599
|
+
|
|
600
|
+
# One-time check
|
|
601
|
+
status = checker.check()
|
|
602
|
+
|
|
603
|
+
# Start background monitoring
|
|
604
|
+
checker.start_monitoring(interval=30)
|
|
605
|
+
"""
|
|
606
|
+
|
|
607
|
+
def __init__(
|
|
608
|
+
self,
|
|
609
|
+
backend: ModelBackend,
|
|
610
|
+
test_prompt: str = "Hello",
|
|
611
|
+
):
|
|
612
|
+
"""
|
|
613
|
+
Initialize health checker.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
backend: Backend to monitor
|
|
617
|
+
test_prompt: Prompt for health checks
|
|
618
|
+
"""
|
|
619
|
+
self.backend = backend
|
|
620
|
+
self.test_prompt = test_prompt
|
|
621
|
+
|
|
622
|
+
self._history: list[HealthStatus] = []
|
|
623
|
+
self._max_history = 100
|
|
624
|
+
self._monitoring = False
|
|
625
|
+
self._monitor_thread: Optional[threading.Thread] = None
|
|
626
|
+
|
|
627
|
+
def check(self) -> HealthStatus:
|
|
628
|
+
"""
|
|
629
|
+
Perform a health check.
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
HealthStatus with results
|
|
633
|
+
"""
|
|
634
|
+
start = time.time()
|
|
635
|
+
|
|
636
|
+
try:
|
|
637
|
+
result = self.backend.generate(
|
|
638
|
+
prompt=self.test_prompt,
|
|
639
|
+
max_tokens=5,
|
|
640
|
+
temperature=0,
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
latency_ms = (time.time() - start) * 1000
|
|
644
|
+
|
|
645
|
+
status = HealthStatus(
|
|
646
|
+
healthy=True,
|
|
647
|
+
latency_ms=latency_ms,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
except Exception as e:
|
|
651
|
+
latency_ms = (time.time() - start) * 1000
|
|
652
|
+
|
|
653
|
+
status = HealthStatus(
|
|
654
|
+
healthy=False,
|
|
655
|
+
latency_ms=latency_ms,
|
|
656
|
+
error=str(e),
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
self._record(status)
|
|
660
|
+
return status
|
|
661
|
+
|
|
662
|
+
def _record(self, status: HealthStatus) -> None:
|
|
663
|
+
"""Record health status."""
|
|
664
|
+
self._history.append(status)
|
|
665
|
+
|
|
666
|
+
# Trim history
|
|
667
|
+
if len(self._history) > self._max_history:
|
|
668
|
+
self._history = self._history[-self._max_history:]
|
|
669
|
+
|
|
670
|
+
def start_monitoring(self, interval: float = 30.0) -> None:
|
|
671
|
+
"""Start background health monitoring."""
|
|
672
|
+
if self._monitoring:
|
|
673
|
+
return
|
|
674
|
+
|
|
675
|
+
self._monitoring = True
|
|
676
|
+
|
|
677
|
+
def monitor_loop():
|
|
678
|
+
while self._monitoring:
|
|
679
|
+
try:
|
|
680
|
+
self.check()
|
|
681
|
+
except Exception as e:
|
|
682
|
+
logger.error(f"Health check failed: {e}")
|
|
683
|
+
|
|
684
|
+
time.sleep(interval)
|
|
685
|
+
|
|
686
|
+
self._monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
|
|
687
|
+
self._monitor_thread.start()
|
|
688
|
+
|
|
689
|
+
def stop_monitoring(self) -> None:
|
|
690
|
+
"""Stop background health monitoring."""
|
|
691
|
+
self._monitoring = False
|
|
692
|
+
|
|
693
|
+
@property
|
|
694
|
+
def is_healthy(self) -> bool:
|
|
695
|
+
"""Check if backend is currently healthy."""
|
|
696
|
+
if not self._history:
|
|
697
|
+
return True # Assume healthy if no data
|
|
698
|
+
|
|
699
|
+
# Check last 3 statuses
|
|
700
|
+
recent = self._history[-3:]
|
|
701
|
+
healthy_count = sum(1 for s in recent if s.healthy)
|
|
702
|
+
|
|
703
|
+
return healthy_count >= 2
|
|
704
|
+
|
|
705
|
+
@property
|
|
706
|
+
def avg_latency(self) -> float:
|
|
707
|
+
"""Average latency over recent history."""
|
|
708
|
+
if not self._history:
|
|
709
|
+
return 0.0
|
|
710
|
+
|
|
711
|
+
recent = self._history[-10:]
|
|
712
|
+
return sum(s.latency_ms for s in recent) / len(recent)
|
|
713
|
+
|
|
714
|
+
def get_stats(self) -> dict:
|
|
715
|
+
"""Get health statistics."""
|
|
716
|
+
if not self._history:
|
|
717
|
+
return {"status": "unknown", "checks": 0}
|
|
718
|
+
|
|
719
|
+
recent = self._history[-10:]
|
|
720
|
+
|
|
721
|
+
return {
|
|
722
|
+
"status": "healthy" if self.is_healthy else "unhealthy",
|
|
723
|
+
"checks": len(self._history),
|
|
724
|
+
"recent_healthy": sum(1 for s in recent if s.healthy),
|
|
725
|
+
"recent_total": len(recent),
|
|
726
|
+
"avg_latency_ms": self.avg_latency,
|
|
727
|
+
"last_check": self._history[-1].checked_at,
|
|
728
|
+
"last_error": next(
|
|
729
|
+
(s.error for s in reversed(self._history) if s.error),
|
|
730
|
+
None
|
|
731
|
+
),
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
# =============================================================================
|
|
736
|
+
# Resilient Backend Wrapper
|
|
737
|
+
# =============================================================================
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
class ResilientBackend:
|
|
741
|
+
"""
|
|
742
|
+
Wrapper that adds all reliability features to a backend.
|
|
743
|
+
|
|
744
|
+
Combines retry, timeout, circuit breaker, and health checking.
|
|
745
|
+
|
|
746
|
+
Usage:
|
|
747
|
+
backend = LlamaCppBackend()
|
|
748
|
+
resilient = ResilientBackend(backend)
|
|
749
|
+
|
|
750
|
+
result = resilient.generate(prompt, max_tokens=100)
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
def __init__(
|
|
754
|
+
self,
|
|
755
|
+
backend: ModelBackend,
|
|
756
|
+
retry_policy: Optional[RetryPolicy] = None,
|
|
757
|
+
timeout_config: Optional[TimeoutConfig] = None,
|
|
758
|
+
circuit_config: Optional[CircuitBreakerConfig] = None,
|
|
759
|
+
enable_health_check: bool = True,
|
|
760
|
+
):
|
|
761
|
+
"""
|
|
762
|
+
Initialize resilient backend.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
backend: Base backend to wrap
|
|
766
|
+
retry_policy: Retry configuration
|
|
767
|
+
timeout_config: Timeout configuration
|
|
768
|
+
circuit_config: Circuit breaker configuration
|
|
769
|
+
enable_health_check: Enable health monitoring
|
|
770
|
+
"""
|
|
771
|
+
self.backend = backend
|
|
772
|
+
self.retry_policy = retry_policy or RetryPolicy()
|
|
773
|
+
self.timeout_manager = TimeoutManager(timeout_config)
|
|
774
|
+
self.circuit_breaker = CircuitBreaker(circuit_config)
|
|
775
|
+
|
|
776
|
+
self._health_checker: Optional[HealthChecker] = None
|
|
777
|
+
if enable_health_check:
|
|
778
|
+
self._health_checker = HealthChecker(backend)
|
|
779
|
+
|
|
780
|
+
@property
|
|
781
|
+
def name(self) -> str:
|
|
782
|
+
"""Backend name."""
|
|
783
|
+
return f"resilient({self.backend.name})"
|
|
784
|
+
|
|
785
|
+
def load(self, config) -> None:
|
|
786
|
+
"""Load model."""
|
|
787
|
+
self.backend.load(config)
|
|
788
|
+
|
|
789
|
+
def unload(self) -> None:
|
|
790
|
+
"""Unload model."""
|
|
791
|
+
self.backend.unload()
|
|
792
|
+
|
|
793
|
+
def generate(
|
|
794
|
+
self,
|
|
795
|
+
prompt: str,
|
|
796
|
+
max_tokens: int = 1024,
|
|
797
|
+
temperature: float = 0.5,
|
|
798
|
+
**kwargs,
|
|
799
|
+
) -> BackendResult:
|
|
800
|
+
"""
|
|
801
|
+
Generate with all reliability features.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
prompt: Input prompt
|
|
805
|
+
max_tokens: Maximum tokens
|
|
806
|
+
temperature: Sampling temperature
|
|
807
|
+
**kwargs: Additional parameters
|
|
808
|
+
|
|
809
|
+
Returns:
|
|
810
|
+
BackendResult from generation
|
|
811
|
+
"""
|
|
812
|
+
# Check circuit breaker
|
|
813
|
+
if not self.circuit_breaker.allow_request():
|
|
814
|
+
raise CircuitOpenError("Circuit breaker is open")
|
|
815
|
+
|
|
816
|
+
last_error: Optional[Exception] = None
|
|
817
|
+
|
|
818
|
+
for attempt in self.retry_policy.attempts():
|
|
819
|
+
try:
|
|
820
|
+
# Apply timeout
|
|
821
|
+
with self.timeout_manager.timeout():
|
|
822
|
+
result = self.backend.generate(
|
|
823
|
+
prompt=prompt,
|
|
824
|
+
max_tokens=max_tokens,
|
|
825
|
+
temperature=temperature,
|
|
826
|
+
**kwargs,
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
self.circuit_breaker.record_success()
|
|
830
|
+
return result
|
|
831
|
+
|
|
832
|
+
except Exception as e:
|
|
833
|
+
last_error = e
|
|
834
|
+
|
|
835
|
+
if not self.retry_policy.should_retry(e, attempt):
|
|
836
|
+
self.circuit_breaker.record_failure(e)
|
|
837
|
+
raise
|
|
838
|
+
|
|
839
|
+
delay = self.retry_policy.get_delay(attempt)
|
|
840
|
+
logger.warning(f"Retry {attempt + 1}: {e}, waiting {delay:.1f}s")
|
|
841
|
+
time.sleep(delay)
|
|
842
|
+
|
|
843
|
+
self.circuit_breaker.record_failure(last_error) # type: ignore
|
|
844
|
+
raise last_error # type: ignore
|
|
845
|
+
|
|
846
|
+
def get_stats(self) -> dict:
|
|
847
|
+
"""Get reliability statistics."""
|
|
848
|
+
stats = {
|
|
849
|
+
"circuit_breaker": self.circuit_breaker.get_stats(),
|
|
850
|
+
}
|
|
851
|
+
if self._health_checker:
|
|
852
|
+
stats["health"] = self._health_checker.get_stats()
|
|
853
|
+
return stats
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
__all__ = [
|
|
857
|
+
# Retry
|
|
858
|
+
"RetryStrategy",
|
|
859
|
+
"RetryPolicy",
|
|
860
|
+
# Timeout
|
|
861
|
+
"TimeoutError",
|
|
862
|
+
"TimeoutConfig",
|
|
863
|
+
"TimeoutManager",
|
|
864
|
+
# Circuit breaker
|
|
865
|
+
"CircuitState",
|
|
866
|
+
"CircuitBreakerConfig",
|
|
867
|
+
"CircuitBreaker",
|
|
868
|
+
"CircuitOpenError",
|
|
869
|
+
# Fallback
|
|
870
|
+
"FallbackChain",
|
|
871
|
+
# Health check
|
|
872
|
+
"HealthStatus",
|
|
873
|
+
"HealthChecker",
|
|
874
|
+
# Combined
|
|
875
|
+
"ResilientBackend",
|
|
876
|
+
]
|