parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,876 @@
1
+ """
2
+ Reliability and error handling for Parishad model inference.
3
+
4
+ Provides:
5
+ - RetryPolicy: Configurable retry with exponential backoff
6
+ - TimeoutManager: Request timeout handling
7
+ - CircuitBreaker: Fail-fast when backend is unhealthy
8
+ - FallbackChain: Try multiple backends in sequence
9
+ - HealthChecker: Backend health monitoring
10
+
11
+ These components ensure robust operation even with unreliable backends.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import logging
18
+ import random
19
+ import threading
20
+ import time
21
+ from abc import ABC, abstractmethod
22
+ from dataclasses import dataclass, field
23
+ from enum import Enum
24
+ from functools import wraps
25
+ from typing import Any, Callable, Generic, Optional, TypeVar
26
+
27
+ from .backends import BackendError, BackendResult, ModelBackend
28
+
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ T = TypeVar("T")
33
+ F = TypeVar("F", bound=Callable[..., Any])
34
+
35
+
36
+ # =============================================================================
37
+ # Retry Policy
38
+ # =============================================================================
39
+
40
+
41
+ class RetryStrategy(Enum):
42
+ """Retry backoff strategies."""
43
+ FIXED = "fixed"
44
+ LINEAR = "linear"
45
+ EXPONENTIAL = "exponential"
46
+ EXPONENTIAL_JITTER = "exponential_jitter"
47
+
48
+
49
+ @dataclass
50
+ class RetryPolicy:
51
+ """
52
+ Configurable retry policy with backoff.
53
+
54
+ Supports fixed, linear, and exponential backoff strategies
55
+ with optional jitter to prevent thundering herd.
56
+
57
+ Usage:
58
+ policy = RetryPolicy(max_retries=3, strategy=RetryStrategy.EXPONENTIAL)
59
+
60
+ @policy.wrap
61
+ def make_request():
62
+ return api.call()
63
+
64
+ # Or manually:
65
+ for attempt in policy.attempts():
66
+ try:
67
+ return make_request()
68
+ except Exception as e:
69
+ if not policy.should_retry(e, attempt):
70
+ raise
71
+ """
72
+ max_retries: int = 3
73
+ strategy: RetryStrategy = RetryStrategy.EXPONENTIAL_JITTER
74
+ base_delay: float = 1.0
75
+ max_delay: float = 60.0
76
+ jitter_factor: float = 0.1
77
+
78
+ # Exception types to retry on
79
+ retryable_exceptions: tuple = field(default_factory=lambda: (
80
+ ConnectionError,
81
+ TimeoutError,
82
+ BackendError,
83
+ ))
84
+
85
+ # Error messages to retry on
86
+ retryable_messages: list[str] = field(default_factory=lambda: [
87
+ "rate limit",
88
+ "overloaded",
89
+ "temporarily unavailable",
90
+ "server error",
91
+ "502",
92
+ "503",
93
+ "504",
94
+ ])
95
+
96
+ def get_delay(self, attempt: int) -> float:
97
+ """
98
+ Calculate delay for attempt number.
99
+
100
+ Args:
101
+ attempt: Current attempt number (0-indexed)
102
+
103
+ Returns:
104
+ Delay in seconds
105
+ """
106
+ if self.strategy == RetryStrategy.FIXED:
107
+ delay = self.base_delay
108
+
109
+ elif self.strategy == RetryStrategy.LINEAR:
110
+ delay = self.base_delay * (attempt + 1)
111
+
112
+ elif self.strategy == RetryStrategy.EXPONENTIAL:
113
+ delay = self.base_delay * (2 ** attempt)
114
+
115
+ elif self.strategy == RetryStrategy.EXPONENTIAL_JITTER:
116
+ delay = self.base_delay * (2 ** attempt)
117
+ jitter = delay * self.jitter_factor * random.random()
118
+ delay += jitter
119
+
120
+ else:
121
+ delay = self.base_delay
122
+
123
+ return min(delay, self.max_delay)
124
+
125
+ def should_retry(self, exception: Exception, attempt: int) -> bool:
126
+ """
127
+ Check if exception should trigger a retry.
128
+
129
+ Args:
130
+ exception: The exception that occurred
131
+ attempt: Current attempt number
132
+
133
+ Returns:
134
+ True if should retry
135
+ """
136
+ if attempt >= self.max_retries:
137
+ return False
138
+
139
+ # Check exception type
140
+ if isinstance(exception, self.retryable_exceptions):
141
+ return True
142
+
143
+ # Check error message
144
+ error_msg = str(exception).lower()
145
+ for pattern in self.retryable_messages:
146
+ if pattern.lower() in error_msg:
147
+ return True
148
+
149
+ return False
150
+
151
+ def attempts(self):
152
+ """
153
+ Generator yielding attempt numbers.
154
+
155
+ Usage:
156
+ for attempt in policy.attempts():
157
+ try:
158
+ return make_request()
159
+ except Exception as e:
160
+ if not policy.should_retry(e, attempt):
161
+ raise
162
+ time.sleep(policy.get_delay(attempt))
163
+ """
164
+ for attempt in range(self.max_retries + 1):
165
+ yield attempt
166
+
167
+ def wrap(self, func: F) -> F:
168
+ """
169
+ Decorator to apply retry policy to a function.
170
+
171
+ Args:
172
+ func: Function to wrap
173
+
174
+ Returns:
175
+ Wrapped function with retry logic
176
+ """
177
+ @wraps(func)
178
+ def wrapper(*args, **kwargs):
179
+ last_exception = None
180
+
181
+ for attempt in self.attempts():
182
+ try:
183
+ return func(*args, **kwargs)
184
+ except Exception as e:
185
+ last_exception = e
186
+
187
+ if not self.should_retry(e, attempt):
188
+ raise
189
+
190
+ delay = self.get_delay(attempt)
191
+ logger.warning(
192
+ f"Retry attempt {attempt + 1}/{self.max_retries} "
193
+ f"after {delay:.1f}s: {e}"
194
+ )
195
+ time.sleep(delay)
196
+
197
+ raise last_exception # type: ignore
198
+
199
+ return wrapper # type: ignore
200
+
201
+
202
+ # =============================================================================
203
+ # Timeout Manager
204
+ # =============================================================================
205
+
206
+
207
+ class TimeoutError(Exception):
208
+ """Raised when an operation times out."""
209
+ pass
210
+
211
+
212
+ @dataclass
213
+ class TimeoutConfig:
214
+ """Timeout configuration."""
215
+ connect_timeout: float = 10.0 # Connection timeout
216
+ read_timeout: float = 60.0 # Read/response timeout
217
+ total_timeout: float = 120.0 # Total request timeout
218
+
219
+ def as_tuple(self) -> tuple[float, float]:
220
+ """Return as (connect, read) tuple for requests library."""
221
+ return (self.connect_timeout, self.read_timeout)
222
+
223
+
224
+ class TimeoutManager:
225
+ """
226
+ Manages request timeouts.
227
+
228
+ Provides context manager for enforcing timeouts on operations.
229
+
230
+ Usage:
231
+ manager = TimeoutManager(total_timeout=30.0)
232
+
233
+ with manager.timeout():
234
+ result = slow_operation()
235
+ """
236
+
237
+ def __init__(self, config: Optional[TimeoutConfig] = None):
238
+ """Initialize with timeout configuration."""
239
+ self.config = config or TimeoutConfig()
240
+
241
+ def timeout(self, seconds: Optional[float] = None):
242
+ """
243
+ Context manager for timeout enforcement.
244
+
245
+ Note: This is a basic implementation. For true timeout enforcement
246
+ in synchronous code, consider using signals or threading.
247
+ """
248
+ timeout_seconds = seconds or self.config.total_timeout
249
+ return _TimeoutContext(timeout_seconds)
250
+
251
+ def with_timeout(self, func: Callable[..., T], *args, **kwargs) -> T:
252
+ """
253
+ Execute function with timeout.
254
+
255
+ Uses threading for timeout enforcement.
256
+ """
257
+ result: list[T] = []
258
+ exception: list[Exception] = []
259
+
260
+ def target():
261
+ try:
262
+ result.append(func(*args, **kwargs))
263
+ except Exception as e:
264
+ exception.append(e)
265
+
266
+ thread = threading.Thread(target=target)
267
+ thread.start()
268
+ thread.join(timeout=self.config.total_timeout)
269
+
270
+ if thread.is_alive():
271
+ # Thread is still running - timeout occurred
272
+ raise TimeoutError(
273
+ f"Operation timed out after {self.config.total_timeout}s"
274
+ )
275
+
276
+ if exception:
277
+ raise exception[0]
278
+
279
+ return result[0]
280
+
281
+
282
+ class _TimeoutContext:
283
+ """Context manager for basic timeout tracking."""
284
+
285
+ def __init__(self, timeout: float):
286
+ self.timeout = timeout
287
+ self.start_time = 0.0
288
+
289
+ def __enter__(self):
290
+ self.start_time = time.time()
291
+ return self
292
+
293
+ def __exit__(self, exc_type, exc_val, exc_tb):
294
+ elapsed = time.time() - self.start_time
295
+ if elapsed > self.timeout:
296
+ logger.warning(f"Operation took {elapsed:.1f}s (timeout: {self.timeout}s)")
297
+ return False
298
+
299
+ @property
300
+ def remaining(self) -> float:
301
+ """Get remaining time."""
302
+ elapsed = time.time() - self.start_time
303
+ return max(0, self.timeout - elapsed)
304
+
305
+
306
+ # =============================================================================
307
+ # Circuit Breaker
308
+ # =============================================================================
309
+
310
+
311
+ class CircuitState(Enum):
312
+ """Circuit breaker states."""
313
+ CLOSED = "closed" # Normal operation
314
+ OPEN = "open" # Failing fast
315
+ HALF_OPEN = "half_open" # Testing recovery
316
+
317
+
318
+ @dataclass
319
+ class CircuitBreakerConfig:
320
+ """Circuit breaker configuration."""
321
+ failure_threshold: int = 5 # Failures before opening
322
+ success_threshold: int = 2 # Successes before closing
323
+ timeout: float = 30.0 # Seconds before half-open
324
+ half_open_max_calls: int = 3 # Max calls in half-open state
325
+
326
+
327
+ class CircuitBreaker:
328
+ """
329
+ Circuit breaker for fail-fast behavior.
330
+
331
+ When a backend fails repeatedly, the circuit opens and fails fast
332
+ instead of waiting for timeouts. After a cooldown, it tests the
333
+ backend again before fully recovering.
334
+
335
+ Usage:
336
+ breaker = CircuitBreaker()
337
+
338
+ @breaker.protect
339
+ def call_backend():
340
+ return backend.generate(prompt)
341
+
342
+ try:
343
+ result = call_backend()
344
+ except CircuitOpenError:
345
+ # Circuit is open, use fallback
346
+ result = fallback()
347
+ """
348
+
349
+ def __init__(self, config: Optional[CircuitBreakerConfig] = None):
350
+ """Initialize circuit breaker."""
351
+ self.config = config or CircuitBreakerConfig()
352
+
353
+ self._state = CircuitState.CLOSED
354
+ self._failure_count = 0
355
+ self._success_count = 0
356
+ self._last_failure_time = 0.0
357
+ self._half_open_calls = 0
358
+ self._lock = threading.Lock()
359
+
360
+ @property
361
+ def state(self) -> CircuitState:
362
+ """Get current circuit state."""
363
+ with self._lock:
364
+ # Check if we should transition from OPEN to HALF_OPEN
365
+ if self._state == CircuitState.OPEN:
366
+ if time.time() - self._last_failure_time >= self.config.timeout:
367
+ self._state = CircuitState.HALF_OPEN
368
+ self._half_open_calls = 0
369
+ self._success_count = 0
370
+ logger.info("Circuit breaker entering half-open state")
371
+
372
+ return self._state
373
+
374
+ def record_success(self) -> None:
375
+ """Record a successful call."""
376
+ with self._lock:
377
+ if self._state == CircuitState.HALF_OPEN:
378
+ self._success_count += 1
379
+ if self._success_count >= self.config.success_threshold:
380
+ self._state = CircuitState.CLOSED
381
+ self._failure_count = 0
382
+ logger.info("Circuit breaker closed after recovery")
383
+ else:
384
+ self._failure_count = 0
385
+
386
+ def record_failure(self, exception: Exception) -> None:
387
+ """Record a failed call."""
388
+ with self._lock:
389
+ self._failure_count += 1
390
+ self._last_failure_time = time.time()
391
+
392
+ if self._state == CircuitState.HALF_OPEN:
393
+ self._state = CircuitState.OPEN
394
+ logger.warning(f"Circuit breaker re-opened: {exception}")
395
+
396
+ elif self._failure_count >= self.config.failure_threshold:
397
+ self._state = CircuitState.OPEN
398
+ logger.warning(
399
+ f"Circuit breaker opened after {self._failure_count} failures"
400
+ )
401
+
402
+ def allow_request(self) -> bool:
403
+ """Check if a request is allowed."""
404
+ state = self.state # This may update state
405
+
406
+ if state == CircuitState.CLOSED:
407
+ return True
408
+
409
+ if state == CircuitState.OPEN:
410
+ return False
411
+
412
+ # HALF_OPEN - allow limited calls
413
+ with self._lock:
414
+ if self._half_open_calls < self.config.half_open_max_calls:
415
+ self._half_open_calls += 1
416
+ return True
417
+ return False
418
+
419
+ def protect(self, func: F) -> F:
420
+ """
421
+ Decorator to protect a function with circuit breaker.
422
+
423
+ Args:
424
+ func: Function to protect
425
+
426
+ Returns:
427
+ Protected function
428
+ """
429
+ @wraps(func)
430
+ def wrapper(*args, **kwargs):
431
+ if not self.allow_request():
432
+ raise CircuitOpenError(
433
+ f"Circuit breaker is {self.state.value}"
434
+ )
435
+
436
+ try:
437
+ result = func(*args, **kwargs)
438
+ self.record_success()
439
+ return result
440
+ except Exception as e:
441
+ self.record_failure(e)
442
+ raise
443
+
444
+ return wrapper # type: ignore
445
+
446
+ def reset(self) -> None:
447
+ """Reset circuit breaker to closed state."""
448
+ with self._lock:
449
+ self._state = CircuitState.CLOSED
450
+ self._failure_count = 0
451
+ self._success_count = 0
452
+ self._half_open_calls = 0
453
+
454
+ def get_stats(self) -> dict:
455
+ """Get circuit breaker statistics."""
456
+ return {
457
+ "state": self.state.value,
458
+ "failure_count": self._failure_count,
459
+ "success_count": self._success_count,
460
+ "last_failure": self._last_failure_time,
461
+ }
462
+
463
+
464
+ class CircuitOpenError(Exception):
465
+ """Raised when circuit breaker is open."""
466
+ pass
467
+
468
+
469
+ # =============================================================================
470
+ # Fallback Chain
471
+ # =============================================================================
472
+
473
+
474
+ class FallbackChain:
475
+ """
476
+ Chain of backends with automatic fallback.
477
+
478
+ Tries backends in order until one succeeds. Useful for having
479
+ primary/secondary/tertiary backend configurations.
480
+
481
+ Usage:
482
+ chain = FallbackChain([
483
+ primary_backend,
484
+ secondary_backend,
485
+ stub_backend,
486
+ ])
487
+
488
+ result = chain.generate(prompt, config)
489
+ """
490
+
491
+ def __init__(
492
+ self,
493
+ backends: list[ModelBackend],
494
+ circuit_breaker_enabled: bool = True,
495
+ ):
496
+ """
497
+ Initialize fallback chain.
498
+
499
+ Args:
500
+ backends: List of backends in priority order
501
+ circuit_breaker_enabled: Use circuit breakers per backend
502
+ """
503
+ self.backends = backends
504
+
505
+ self._circuit_breakers: dict[int, CircuitBreaker] = {}
506
+ if circuit_breaker_enabled:
507
+ for i in range(len(backends)):
508
+ self._circuit_breakers[i] = CircuitBreaker()
509
+
510
+ def generate(
511
+ self,
512
+ prompt: str,
513
+ max_tokens: int = 1024,
514
+ temperature: float = 0.5,
515
+ **kwargs,
516
+ ) -> tuple[BackendResult, int]:
517
+ """
518
+ Generate using fallback chain.
519
+
520
+ Args:
521
+ prompt: Input prompt
522
+ max_tokens: Maximum tokens
523
+ temperature: Sampling temperature
524
+ **kwargs: Additional parameters
525
+
526
+ Returns:
527
+ Tuple of (BackendResult, backend_index)
528
+
529
+ Raises:
530
+ BackendError: If all backends fail
531
+ """
532
+ last_error: Optional[Exception] = None
533
+
534
+ for i, backend in enumerate(self.backends):
535
+ # Check circuit breaker
536
+ if i in self._circuit_breakers:
537
+ if not self._circuit_breakers[i].allow_request():
538
+ logger.debug(f"Skipping backend {i}: circuit open")
539
+ continue
540
+
541
+ try:
542
+ result = backend.generate(
543
+ prompt=prompt,
544
+ max_tokens=max_tokens,
545
+ temperature=temperature,
546
+ **kwargs,
547
+ )
548
+
549
+ if i in self._circuit_breakers:
550
+ self._circuit_breakers[i].record_success()
551
+
552
+ if i > 0:
553
+ logger.info(f"Using fallback backend {i}")
554
+
555
+ return result, i
556
+
557
+ except Exception as e:
558
+ last_error = e
559
+ logger.warning(f"Backend {i} failed: {e}")
560
+
561
+ if i in self._circuit_breakers:
562
+ self._circuit_breakers[i].record_failure(e)
563
+
564
+ raise BackendError(f"All backends failed. Last error: {last_error}")
565
+
566
+ def get_stats(self) -> dict:
567
+ """Get chain statistics."""
568
+ return {
569
+ "backends": len(self.backends),
570
+ "circuit_breakers": {
571
+ i: cb.get_stats()
572
+ for i, cb in self._circuit_breakers.items()
573
+ },
574
+ }
575
+
576
+
577
+ # =============================================================================
578
+ # Health Checker
579
+ # =============================================================================
580
+
581
+
582
+ @dataclass
583
+ class HealthStatus:
584
+ """Health status of a backend."""
585
+ healthy: bool
586
+ latency_ms: float
587
+ error: Optional[str] = None
588
+ checked_at: float = field(default_factory=time.time)
589
+
590
+
591
+ class HealthChecker:
592
+ """
593
+ Backend health monitoring.
594
+
595
+ Periodically checks backend health and tracks metrics.
596
+
597
+ Usage:
598
+ checker = HealthChecker(backend)
599
+
600
+ # One-time check
601
+ status = checker.check()
602
+
603
+ # Start background monitoring
604
+ checker.start_monitoring(interval=30)
605
+ """
606
+
607
+ def __init__(
608
+ self,
609
+ backend: ModelBackend,
610
+ test_prompt: str = "Hello",
611
+ ):
612
+ """
613
+ Initialize health checker.
614
+
615
+ Args:
616
+ backend: Backend to monitor
617
+ test_prompt: Prompt for health checks
618
+ """
619
+ self.backend = backend
620
+ self.test_prompt = test_prompt
621
+
622
+ self._history: list[HealthStatus] = []
623
+ self._max_history = 100
624
+ self._monitoring = False
625
+ self._monitor_thread: Optional[threading.Thread] = None
626
+
627
+ def check(self) -> HealthStatus:
628
+ """
629
+ Perform a health check.
630
+
631
+ Returns:
632
+ HealthStatus with results
633
+ """
634
+ start = time.time()
635
+
636
+ try:
637
+ result = self.backend.generate(
638
+ prompt=self.test_prompt,
639
+ max_tokens=5,
640
+ temperature=0,
641
+ )
642
+
643
+ latency_ms = (time.time() - start) * 1000
644
+
645
+ status = HealthStatus(
646
+ healthy=True,
647
+ latency_ms=latency_ms,
648
+ )
649
+
650
+ except Exception as e:
651
+ latency_ms = (time.time() - start) * 1000
652
+
653
+ status = HealthStatus(
654
+ healthy=False,
655
+ latency_ms=latency_ms,
656
+ error=str(e),
657
+ )
658
+
659
+ self._record(status)
660
+ return status
661
+
662
+ def _record(self, status: HealthStatus) -> None:
663
+ """Record health status."""
664
+ self._history.append(status)
665
+
666
+ # Trim history
667
+ if len(self._history) > self._max_history:
668
+ self._history = self._history[-self._max_history:]
669
+
670
+ def start_monitoring(self, interval: float = 30.0) -> None:
671
+ """Start background health monitoring."""
672
+ if self._monitoring:
673
+ return
674
+
675
+ self._monitoring = True
676
+
677
+ def monitor_loop():
678
+ while self._monitoring:
679
+ try:
680
+ self.check()
681
+ except Exception as e:
682
+ logger.error(f"Health check failed: {e}")
683
+
684
+ time.sleep(interval)
685
+
686
+ self._monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
687
+ self._monitor_thread.start()
688
+
689
+ def stop_monitoring(self) -> None:
690
+ """Stop background health monitoring."""
691
+ self._monitoring = False
692
+
693
+ @property
694
+ def is_healthy(self) -> bool:
695
+ """Check if backend is currently healthy."""
696
+ if not self._history:
697
+ return True # Assume healthy if no data
698
+
699
+ # Check last 3 statuses
700
+ recent = self._history[-3:]
701
+ healthy_count = sum(1 for s in recent if s.healthy)
702
+
703
+ return healthy_count >= 2
704
+
705
+ @property
706
+ def avg_latency(self) -> float:
707
+ """Average latency over recent history."""
708
+ if not self._history:
709
+ return 0.0
710
+
711
+ recent = self._history[-10:]
712
+ return sum(s.latency_ms for s in recent) / len(recent)
713
+
714
+ def get_stats(self) -> dict:
715
+ """Get health statistics."""
716
+ if not self._history:
717
+ return {"status": "unknown", "checks": 0}
718
+
719
+ recent = self._history[-10:]
720
+
721
+ return {
722
+ "status": "healthy" if self.is_healthy else "unhealthy",
723
+ "checks": len(self._history),
724
+ "recent_healthy": sum(1 for s in recent if s.healthy),
725
+ "recent_total": len(recent),
726
+ "avg_latency_ms": self.avg_latency,
727
+ "last_check": self._history[-1].checked_at,
728
+ "last_error": next(
729
+ (s.error for s in reversed(self._history) if s.error),
730
+ None
731
+ ),
732
+ }
733
+
734
+
735
+ # =============================================================================
736
+ # Resilient Backend Wrapper
737
+ # =============================================================================
738
+
739
+
740
+ class ResilientBackend:
741
+ """
742
+ Wrapper that adds all reliability features to a backend.
743
+
744
+ Combines retry, timeout, circuit breaker, and health checking.
745
+
746
+ Usage:
747
+ backend = LlamaCppBackend()
748
+ resilient = ResilientBackend(backend)
749
+
750
+ result = resilient.generate(prompt, max_tokens=100)
751
+ """
752
+
753
+ def __init__(
754
+ self,
755
+ backend: ModelBackend,
756
+ retry_policy: Optional[RetryPolicy] = None,
757
+ timeout_config: Optional[TimeoutConfig] = None,
758
+ circuit_config: Optional[CircuitBreakerConfig] = None,
759
+ enable_health_check: bool = True,
760
+ ):
761
+ """
762
+ Initialize resilient backend.
763
+
764
+ Args:
765
+ backend: Base backend to wrap
766
+ retry_policy: Retry configuration
767
+ timeout_config: Timeout configuration
768
+ circuit_config: Circuit breaker configuration
769
+ enable_health_check: Enable health monitoring
770
+ """
771
+ self.backend = backend
772
+ self.retry_policy = retry_policy or RetryPolicy()
773
+ self.timeout_manager = TimeoutManager(timeout_config)
774
+ self.circuit_breaker = CircuitBreaker(circuit_config)
775
+
776
+ self._health_checker: Optional[HealthChecker] = None
777
+ if enable_health_check:
778
+ self._health_checker = HealthChecker(backend)
779
+
780
+ @property
781
+ def name(self) -> str:
782
+ """Backend name."""
783
+ return f"resilient({self.backend.name})"
784
+
785
+ def load(self, config) -> None:
786
+ """Load model."""
787
+ self.backend.load(config)
788
+
789
+ def unload(self) -> None:
790
+ """Unload model."""
791
+ self.backend.unload()
792
+
793
+ def generate(
794
+ self,
795
+ prompt: str,
796
+ max_tokens: int = 1024,
797
+ temperature: float = 0.5,
798
+ **kwargs,
799
+ ) -> BackendResult:
800
+ """
801
+ Generate with all reliability features.
802
+
803
+ Args:
804
+ prompt: Input prompt
805
+ max_tokens: Maximum tokens
806
+ temperature: Sampling temperature
807
+ **kwargs: Additional parameters
808
+
809
+ Returns:
810
+ BackendResult from generation
811
+ """
812
+ # Check circuit breaker
813
+ if not self.circuit_breaker.allow_request():
814
+ raise CircuitOpenError("Circuit breaker is open")
815
+
816
+ last_error: Optional[Exception] = None
817
+
818
+ for attempt in self.retry_policy.attempts():
819
+ try:
820
+ # Apply timeout
821
+ with self.timeout_manager.timeout():
822
+ result = self.backend.generate(
823
+ prompt=prompt,
824
+ max_tokens=max_tokens,
825
+ temperature=temperature,
826
+ **kwargs,
827
+ )
828
+
829
+ self.circuit_breaker.record_success()
830
+ return result
831
+
832
+ except Exception as e:
833
+ last_error = e
834
+
835
+ if not self.retry_policy.should_retry(e, attempt):
836
+ self.circuit_breaker.record_failure(e)
837
+ raise
838
+
839
+ delay = self.retry_policy.get_delay(attempt)
840
+ logger.warning(f"Retry {attempt + 1}: {e}, waiting {delay:.1f}s")
841
+ time.sleep(delay)
842
+
843
+ self.circuit_breaker.record_failure(last_error) # type: ignore
844
+ raise last_error # type: ignore
845
+
846
+ def get_stats(self) -> dict:
847
+ """Get reliability statistics."""
848
+ stats = {
849
+ "circuit_breaker": self.circuit_breaker.get_stats(),
850
+ }
851
+ if self._health_checker:
852
+ stats["health"] = self._health_checker.get_stats()
853
+ return stats
854
+
855
+
856
+ __all__ = [
857
+ # Retry
858
+ "RetryStrategy",
859
+ "RetryPolicy",
860
+ # Timeout
861
+ "TimeoutError",
862
+ "TimeoutConfig",
863
+ "TimeoutManager",
864
+ # Circuit breaker
865
+ "CircuitState",
866
+ "CircuitBreakerConfig",
867
+ "CircuitBreaker",
868
+ "CircuitOpenError",
869
+ # Fallback
870
+ "FallbackChain",
871
+ # Health check
872
+ "HealthStatus",
873
+ "HealthChecker",
874
+ # Combined
875
+ "ResilientBackend",
876
+ ]