puffinflow 2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. puffinflow/__init__.py +132 -0
  2. puffinflow/core/__init__.py +110 -0
  3. puffinflow/core/agent/__init__.py +320 -0
  4. puffinflow/core/agent/base.py +1635 -0
  5. puffinflow/core/agent/checkpoint.py +50 -0
  6. puffinflow/core/agent/context.py +521 -0
  7. puffinflow/core/agent/decorators/__init__.py +90 -0
  8. puffinflow/core/agent/decorators/builder.py +454 -0
  9. puffinflow/core/agent/decorators/flexible.py +714 -0
  10. puffinflow/core/agent/decorators/inspection.py +144 -0
  11. puffinflow/core/agent/dependencies.py +57 -0
  12. puffinflow/core/agent/scheduling/__init__.py +21 -0
  13. puffinflow/core/agent/scheduling/builder.py +160 -0
  14. puffinflow/core/agent/scheduling/exceptions.py +35 -0
  15. puffinflow/core/agent/scheduling/inputs.py +137 -0
  16. puffinflow/core/agent/scheduling/parser.py +209 -0
  17. puffinflow/core/agent/scheduling/scheduler.py +413 -0
  18. puffinflow/core/agent/state.py +141 -0
  19. puffinflow/core/config.py +62 -0
  20. puffinflow/core/coordination/__init__.py +137 -0
  21. puffinflow/core/coordination/agent_group.py +359 -0
  22. puffinflow/core/coordination/agent_pool.py +629 -0
  23. puffinflow/core/coordination/agent_team.py +577 -0
  24. puffinflow/core/coordination/coordinator.py +720 -0
  25. puffinflow/core/coordination/deadlock.py +1759 -0
  26. puffinflow/core/coordination/fluent_api.py +421 -0
  27. puffinflow/core/coordination/primitives.py +478 -0
  28. puffinflow/core/coordination/rate_limiter.py +520 -0
  29. puffinflow/core/observability/__init__.py +47 -0
  30. puffinflow/core/observability/agent.py +139 -0
  31. puffinflow/core/observability/alerting.py +73 -0
  32. puffinflow/core/observability/config.py +127 -0
  33. puffinflow/core/observability/context.py +88 -0
  34. puffinflow/core/observability/core.py +147 -0
  35. puffinflow/core/observability/decorators.py +105 -0
  36. puffinflow/core/observability/events.py +71 -0
  37. puffinflow/core/observability/interfaces.py +196 -0
  38. puffinflow/core/observability/metrics.py +137 -0
  39. puffinflow/core/observability/tracing.py +209 -0
  40. puffinflow/core/reliability/__init__.py +27 -0
  41. puffinflow/core/reliability/bulkhead.py +96 -0
  42. puffinflow/core/reliability/circuit_breaker.py +149 -0
  43. puffinflow/core/reliability/leak_detector.py +122 -0
  44. puffinflow/core/resources/__init__.py +77 -0
  45. puffinflow/core/resources/allocation.py +790 -0
  46. puffinflow/core/resources/pool.py +645 -0
  47. puffinflow/core/resources/quotas.py +567 -0
  48. puffinflow/core/resources/requirements.py +217 -0
  49. puffinflow/version.py +21 -0
  50. puffinflow-2.dev0.dist-info/METADATA +334 -0
  51. puffinflow-2.dev0.dist-info/RECORD +55 -0
  52. puffinflow-2.dev0.dist-info/WHEEL +5 -0
  53. puffinflow-2.dev0.dist-info/entry_points.txt +3 -0
  54. puffinflow-2.dev0.dist-info/licenses/LICENSE +21 -0
  55. puffinflow-2.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,196 @@
1
+ import uuid
2
+ from abc import ABC, abstractmethod
3
+ from collections.abc import Iterator
4
+ from contextlib import contextmanager
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Any, Optional
9
+
10
+
11
+ class SpanType(Enum):
12
+ """Types of spans for categorization"""
13
+
14
+ WORKFLOW = "workflow"
15
+ STATE = "state"
16
+ RESOURCE = "resource"
17
+ BUSINESS = "business"
18
+ SYSTEM = "system"
19
+
20
+
21
+ class MetricType(Enum):
22
+ """Types of metrics"""
23
+
24
+ COUNTER = "counter"
25
+ GAUGE = "gauge"
26
+ HISTOGRAM = "histogram"
27
+
28
+
29
+ class AlertSeverity(Enum):
30
+ """Alert severity levels"""
31
+
32
+ INFO = "info"
33
+ WARNING = "warning"
34
+ ERROR = "error"
35
+ CRITICAL = "critical"
36
+
37
+
38
+ @dataclass
39
+ class SpanContext:
40
+ """Correlation context for distributed tracing"""
41
+
42
+ trace_id: str = field(default_factory=lambda: str(uuid.uuid4()))
43
+ span_id: str = field(default_factory=lambda: str(uuid.uuid4()))
44
+ parent_span_id: Optional[str] = None
45
+ workflow_id: Optional[str] = None
46
+ agent_name: Optional[str] = None
47
+ state_name: Optional[str] = None
48
+ user_id: Optional[str] = None
49
+ session_id: Optional[str] = None
50
+
51
+ def child_context(self) -> "SpanContext":
52
+ """Create child span context"""
53
+ return SpanContext(
54
+ trace_id=self.trace_id,
55
+ span_id=str(uuid.uuid4()),
56
+ parent_span_id=self.span_id,
57
+ workflow_id=self.workflow_id,
58
+ agent_name=self.agent_name,
59
+ state_name=self.state_name,
60
+ user_id=self.user_id,
61
+ session_id=self.session_id,
62
+ )
63
+
64
+
65
+ @dataclass
66
+ class ObservabilityEvent:
67
+ """Structured observability event"""
68
+
69
+ timestamp: datetime
70
+ event_type: str
71
+ source: str
72
+ level: str
73
+ message: str
74
+ attributes: dict[str, Any] = field(default_factory=dict)
75
+ span_context: Optional[SpanContext] = None
76
+
77
+
78
+ class Span(ABC):
79
+ """Abstract span interface"""
80
+
81
+ @abstractmethod
82
+ def set_attribute(self, key: str, value: Any) -> None:
83
+ """Set span attribute"""
84
+
85
+ @abstractmethod
86
+ def set_status(self, status: str, description: Optional[str] = None) -> None:
87
+ """Set span status"""
88
+
89
+ @abstractmethod
90
+ def add_event(self, name: str, attributes: Optional[dict[str, Any]] = None) -> None:
91
+ """Add event to span"""
92
+
93
+ @abstractmethod
94
+ def record_exception(self, exception: Exception) -> None:
95
+ """Record exception in span"""
96
+
97
+ @abstractmethod
98
+ def end(self) -> None:
99
+ """End the span"""
100
+
101
+ @property
102
+ @abstractmethod
103
+ def context(self) -> SpanContext:
104
+ """Get span context"""
105
+
106
+
107
+ class TracingProvider(ABC):
108
+ """Abstract tracing provider"""
109
+
110
+ @abstractmethod
111
+ def start_span(
112
+ self,
113
+ name: str,
114
+ span_type: SpanType = SpanType.SYSTEM,
115
+ parent: Optional[SpanContext] = None,
116
+ **attributes: Any,
117
+ ) -> Span:
118
+ """Start a new span"""
119
+
120
+ @abstractmethod
121
+ def get_current_span(self) -> Optional[Span]:
122
+ """Get current active span"""
123
+
124
+ @contextmanager
125
+ def span(
126
+ self,
127
+ name: str,
128
+ span_type: SpanType = SpanType.SYSTEM,
129
+ parent: Optional[SpanContext] = None,
130
+ **attributes: Any,
131
+ ) -> Iterator[Span]:
132
+ """Context manager for spans"""
133
+ span = self.start_span(name, span_type, parent, **attributes)
134
+ try:
135
+ yield span
136
+ span.set_status("ok")
137
+ except Exception as e:
138
+ span.record_exception(e)
139
+ raise
140
+ finally:
141
+ span.end()
142
+
143
+
144
+ class Metric(ABC):
145
+ """Abstract metric interface"""
146
+
147
+ @abstractmethod
148
+ def record(self, value: float, **labels: Any) -> None:
149
+ """Record metric value"""
150
+
151
+
152
+ class MetricsProvider(ABC):
153
+ """Abstract metrics provider"""
154
+
155
+ @abstractmethod
156
+ def counter(
157
+ self, name: str, description: str = "", labels: Optional[list[str]] = None
158
+ ) -> Metric:
159
+ """Create counter metric"""
160
+
161
+ @abstractmethod
162
+ def gauge(
163
+ self, name: str, description: str = "", labels: Optional[list[str]] = None
164
+ ) -> Metric:
165
+ """Create gauge metric"""
166
+
167
+ @abstractmethod
168
+ def histogram(
169
+ self, name: str, description: str = "", labels: Optional[list[str]] = None
170
+ ) -> Metric:
171
+ """Create histogram metric"""
172
+
173
+ @abstractmethod
174
+ def export_metrics(self) -> str:
175
+ """Export metrics in Prometheus format"""
176
+
177
+
178
+ class AlertingProvider(ABC):
179
+ """Abstract alerting provider"""
180
+
181
+ @abstractmethod
182
+ async def send_alert(
183
+ self,
184
+ message: str,
185
+ severity: AlertSeverity,
186
+ attributes: Optional[dict[str, Any]] = None,
187
+ ) -> None:
188
+ """Send alert"""
189
+
190
+
191
+ class EventProcessor(ABC):
192
+ """Abstract event processor"""
193
+
194
+ @abstractmethod
195
+ async def process_event(self, event: ObservabilityEvent) -> None:
196
+ """Process observability event"""
@@ -0,0 +1,137 @@
1
+ import threading
2
+ from typing import Any, Optional
3
+
4
+ from prometheus_client import CollectorRegistry, generate_latest
5
+ from prometheus_client import Counter as PrometheusCounter
6
+ from prometheus_client import Gauge as PrometheusGauge
7
+ from prometheus_client import Histogram as PrometheusHistogram
8
+
9
+ from .config import MetricsConfig
10
+ from .interfaces import Metric, MetricsProvider, MetricType
11
+
12
+
13
+ class PrometheusMetric(Metric):
14
+ """Prometheus metric wrapper"""
15
+
16
+ def __init__(
17
+ self, prometheus_metric: Any, metric_type: MetricType, cardinality_limit: int
18
+ ) -> None:
19
+ self._prometheus_metric = prometheus_metric
20
+ self._metric_type = metric_type
21
+ self._cardinality_limit = cardinality_limit
22
+ self._series_count = 0
23
+ self._lock = threading.Lock()
24
+
25
+ def record(self, value: float, **labels: Any) -> None:
26
+ """Record metric value"""
27
+ # Basic cardinality protection
28
+ with self._lock:
29
+ if self._series_count >= self._cardinality_limit:
30
+ return # Skip to prevent memory issues
31
+
32
+ # Convert label values to strings
33
+ str_labels = {k: str(v) for k, v in labels.items() if v is not None}
34
+
35
+ try:
36
+ if str_labels:
37
+ if self._metric_type == MetricType.COUNTER:
38
+ self._prometheus_metric.labels(**str_labels).inc(value)
39
+ elif self._metric_type == MetricType.GAUGE:
40
+ self._prometheus_metric.labels(**str_labels).set(value)
41
+ elif self._metric_type == MetricType.HISTOGRAM:
42
+ self._prometheus_metric.labels(**str_labels).observe(value)
43
+ else:
44
+ if self._metric_type == MetricType.COUNTER:
45
+ self._prometheus_metric.inc(value)
46
+ elif self._metric_type == MetricType.GAUGE:
47
+ self._prometheus_metric.set(value)
48
+ elif self._metric_type == MetricType.HISTOGRAM:
49
+ self._prometheus_metric.observe(value)
50
+
51
+ self._series_count += 1
52
+
53
+ except Exception as e:
54
+ # Log error but don't fail the application
55
+ print(f"Failed to record metric: {e}")
56
+
57
+
58
+ class PrometheusMetricsProvider(MetricsProvider):
59
+ """Prometheus metrics provider"""
60
+
61
+ def __init__(self, config: MetricsConfig):
62
+ self.config = config
63
+ self._registry = CollectorRegistry()
64
+ self._metrics_cache: dict[str, Metric] = {}
65
+ self._lock = threading.Lock()
66
+
67
+ def counter(
68
+ self, name: str, description: str = "", labels: Optional[list[str]] = None
69
+ ) -> Metric:
70
+ """Create counter metric"""
71
+ return self._get_or_create_metric(
72
+ name, MetricType.COUNTER, description, labels or []
73
+ )
74
+
75
+ def gauge(
76
+ self, name: str, description: str = "", labels: Optional[list[str]] = None
77
+ ) -> Metric:
78
+ """Create gauge metric"""
79
+ return self._get_or_create_metric(
80
+ name, MetricType.GAUGE, description, labels or []
81
+ )
82
+
83
+ def histogram(
84
+ self, name: str, description: str = "", labels: Optional[list[str]] = None
85
+ ) -> Metric:
86
+ """Create histogram metric"""
87
+ return self._get_or_create_metric(
88
+ name, MetricType.HISTOGRAM, description, labels or []
89
+ )
90
+
91
+ def _get_or_create_metric(
92
+ self, name: str, metric_type: MetricType, description: str, labels: list[str]
93
+ ) -> Metric:
94
+ """Get or create metric"""
95
+ metric_key = f"{self.config.namespace}_{name}"
96
+
97
+ with self._lock:
98
+ if metric_key in self._metrics_cache:
99
+ return self._metrics_cache[metric_key]
100
+
101
+ labelnames = labels or []
102
+
103
+ prometheus_metric: Any
104
+ if metric_type == MetricType.COUNTER:
105
+ prometheus_metric = PrometheusCounter(
106
+ metric_key,
107
+ description,
108
+ labelnames=labelnames,
109
+ registry=self._registry,
110
+ )
111
+ elif metric_type == MetricType.GAUGE:
112
+ prometheus_metric = PrometheusGauge(
113
+ metric_key,
114
+ description,
115
+ labelnames=labelnames,
116
+ registry=self._registry,
117
+ )
118
+ elif metric_type == MetricType.HISTOGRAM:
119
+ prometheus_metric = PrometheusHistogram(
120
+ metric_key,
121
+ description,
122
+ labelnames=labelnames,
123
+ registry=self._registry,
124
+ )
125
+ else:
126
+ raise ValueError(f"Unsupported metric type: {metric_type}")
127
+
128
+ metric = PrometheusMetric(
129
+ prometheus_metric, metric_type, self.config.cardinality_limit
130
+ )
131
+ self._metrics_cache[metric_key] = metric
132
+ return metric
133
+
134
+ def export_metrics(self) -> str:
135
+ """Export metrics in Prometheus format"""
136
+ result: bytes = generate_latest(self._registry)
137
+ return result.decode("utf-8")
@@ -0,0 +1,209 @@
1
+ import threading
2
+ import time
3
+ from collections.abc import Iterator
4
+ from contextlib import contextmanager
5
+ from typing import Any, Optional
6
+
7
+ try:
8
+ from opentelemetry import trace
9
+ from opentelemetry.exporter.jaeger.thrift import JaegerExporter
10
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
11
+ from opentelemetry.sdk.resources import Resource
12
+ from opentelemetry.sdk.trace import TracerProvider
13
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
14
+ from opentelemetry.trace import Status, StatusCode
15
+
16
+ _OPENTELEMETRY_AVAILABLE = True
17
+ except ImportError:
18
+ # Create mock classes for when OpenTelemetry is not available
19
+ trace = None
20
+ JaegerExporter = None
21
+ OTLPSpanExporter = None
22
+ Resource = None
23
+ TracerProvider = None
24
+ BatchSpanProcessor = None
25
+ ConsoleSpanExporter = None
26
+ Status = None
27
+ StatusCode = None
28
+ _OPENTELEMETRY_AVAILABLE = False
29
+
30
+ from .config import TracingConfig
31
+ from .interfaces import Span, SpanContext, SpanType, TracingProvider
32
+
33
+
34
+ class OpenTelemetrySpan(Span):
35
+ """OpenTelemetry span implementation"""
36
+
37
+ def __init__(self, otel_span: Any, span_context: SpanContext):
38
+ self._span = otel_span
39
+ self._context = span_context
40
+ self._start_time = time.time()
41
+
42
+ # Set workflow context attributes if OpenTelemetry is available
43
+ if _OPENTELEMETRY_AVAILABLE and self._span:
44
+ if span_context.workflow_id:
45
+ self._span.set_attribute("workflow.id", span_context.workflow_id)
46
+ if span_context.agent_name:
47
+ self._span.set_attribute("agent.name", span_context.agent_name)
48
+ if span_context.state_name:
49
+ self._span.set_attribute("state.name", span_context.state_name)
50
+ if span_context.user_id:
51
+ self._span.set_attribute("user.id", span_context.user_id)
52
+
53
+ def set_attribute(self, key: str, value: Any) -> None:
54
+ """Set span attribute"""
55
+ if _OPENTELEMETRY_AVAILABLE and self._span and key and value is not None:
56
+ if isinstance(value, (dict, list)):
57
+ value = str(value)
58
+ self._span.set_attribute(key, value)
59
+
60
+ def set_status(self, status: str, description: Optional[str] = None) -> None:
61
+ """Set span status"""
62
+ if _OPENTELEMETRY_AVAILABLE and self._span:
63
+ if status.lower() in ["ok", "success"]:
64
+ self._span.set_status(Status(StatusCode.OK, description))
65
+ elif status.lower() in ["error", "failed"]:
66
+ self._span.set_status(Status(StatusCode.ERROR, description))
67
+
68
+ def add_event(self, name: str, attributes: Optional[dict[str, Any]] = None) -> None:
69
+ """Add event to span"""
70
+ if _OPENTELEMETRY_AVAILABLE and self._span:
71
+ event_attrs = attributes or {}
72
+ event_attrs = {k: v for k, v in event_attrs.items() if v is not None}
73
+ self._span.add_event(name, event_attrs)
74
+
75
+ def record_exception(self, exception: Exception) -> None:
76
+ """Record exception in span"""
77
+ if _OPENTELEMETRY_AVAILABLE and self._span:
78
+ self._span.record_exception(exception)
79
+ self.set_status("error", str(exception))
80
+
81
+ def end(self) -> None:
82
+ """End span"""
83
+ duration = time.time() - self._start_time
84
+ self.set_attribute("span.duration_ms", duration * 1000)
85
+ if _OPENTELEMETRY_AVAILABLE and self._span:
86
+ self._span.end()
87
+
88
+ @property
89
+ def context(self) -> SpanContext:
90
+ """Get span context"""
91
+ return self._context
92
+
93
+
94
+ class OpenTelemetryTracingProvider(TracingProvider):
95
+ """OpenTelemetry tracing provider"""
96
+
97
+ def __init__(self, config: TracingConfig):
98
+ self.config = config
99
+ self._current_context = threading.local()
100
+ self._tracer: Any = None
101
+ if _OPENTELEMETRY_AVAILABLE:
102
+ self._setup_tracing()
103
+
104
+ def _setup_tracing(self) -> None:
105
+ """Setup OpenTelemetry tracing"""
106
+ if not _OPENTELEMETRY_AVAILABLE:
107
+ return
108
+
109
+ resource = Resource.create(
110
+ {
111
+ "service.name": self.config.service_name,
112
+ "service.version": self.config.service_version,
113
+ }
114
+ )
115
+
116
+ provider = TracerProvider(resource=resource)
117
+ trace.set_tracer_provider(provider)
118
+
119
+ # Setup exporters
120
+ processors = []
121
+
122
+ if self.config.otlp_endpoint:
123
+ otlp_exporter = OTLPSpanExporter(endpoint=self.config.otlp_endpoint)
124
+ processors.append(BatchSpanProcessor(otlp_exporter))
125
+
126
+ if self.config.jaeger_endpoint:
127
+ jaeger_exporter = JaegerExporter(
128
+ agent_host_name=self.config.jaeger_endpoint.split(":")[0],
129
+ agent_port=(
130
+ int(self.config.jaeger_endpoint.split(":")[1])
131
+ if ":" in self.config.jaeger_endpoint
132
+ else 6831
133
+ ),
134
+ )
135
+ processors.append(BatchSpanProcessor(jaeger_exporter))
136
+
137
+ if self.config.console_enabled:
138
+ console_exporter = ConsoleSpanExporter()
139
+ processors.append(BatchSpanProcessor(console_exporter))
140
+
141
+ for processor in processors:
142
+ provider.add_span_processor(processor)
143
+
144
+ self._tracer = trace.get_tracer(
145
+ instrumenting_module_name="puffinflow.observability",
146
+ instrumenting_library_version="1.0.0",
147
+ )
148
+
149
+ def start_span(
150
+ self,
151
+ name: str,
152
+ span_type: SpanType = SpanType.SYSTEM,
153
+ parent: Optional[SpanContext] = None,
154
+ **attributes: Any,
155
+ ) -> Span:
156
+ """Start new span"""
157
+ # Create span context
158
+ if parent:
159
+ span_context = parent.child_context()
160
+ else:
161
+ current_span = self.get_current_span()
162
+ if current_span:
163
+ span_context = current_span.context.child_context()
164
+ else:
165
+ span_context = SpanContext()
166
+
167
+ # Start OpenTelemetry span if available
168
+ otel_span = None
169
+ if _OPENTELEMETRY_AVAILABLE and self._tracer:
170
+ otel_span = self._tracer.start_span(name)
171
+
172
+ # Create wrapper
173
+ span = OpenTelemetrySpan(otel_span, span_context)
174
+
175
+ # Set additional attributes
176
+ span.set_attribute("span.type", span_type.value)
177
+ for key, value in attributes.items():
178
+ span.set_attribute(key, value)
179
+
180
+ self._set_current_span(span)
181
+ return span
182
+
183
+ def get_current_span(self) -> Optional[Span]:
184
+ """Get current active span"""
185
+ return getattr(self._current_context, "current_span", None)
186
+
187
+ def _set_current_span(self, span: Optional[Span]) -> None:
188
+ """Set current span in context"""
189
+ self._current_context.current_span = span
190
+
191
+ @contextmanager
192
+ def span(
193
+ self,
194
+ name: str,
195
+ span_type: SpanType = SpanType.SYSTEM,
196
+ parent: Optional[SpanContext] = None,
197
+ **attributes: Any,
198
+ ) -> Iterator[Span]:
199
+ """Context manager for spans"""
200
+ span = self.start_span(name, span_type, parent, **attributes)
201
+ try:
202
+ yield span
203
+ span.set_status("ok")
204
+ except Exception as e:
205
+ span.record_exception(e)
206
+ raise
207
+ finally:
208
+ span.end()
209
+ self._set_current_span(None)
@@ -0,0 +1,27 @@
1
+ """Reliability patterns for production workflows."""
2
+
3
+ # Import submodules for import path tests
4
+ from . import bulkhead, circuit_breaker, leak_detector
5
+ from .bulkhead import Bulkhead, BulkheadConfig, BulkheadFullError
6
+ from .circuit_breaker import (
7
+ CircuitBreaker,
8
+ CircuitBreakerConfig,
9
+ CircuitBreakerError,
10
+ CircuitState,
11
+ )
12
+ from .leak_detector import ResourceLeak, ResourceLeakDetector
13
+
14
+ __all__ = [
15
+ "Bulkhead",
16
+ "BulkheadConfig",
17
+ "BulkheadFullError",
18
+ "CircuitBreaker",
19
+ "CircuitBreakerConfig",
20
+ "CircuitBreakerError",
21
+ "CircuitState",
22
+ "ResourceLeak",
23
+ "ResourceLeakDetector",
24
+ "bulkhead",
25
+ "circuit_breaker",
26
+ "leak_detector",
27
+ ]
@@ -0,0 +1,96 @@
1
+ """Bulkhead pattern for resource isolation."""
2
+
3
+ import asyncio
4
+ from collections.abc import AsyncIterator
5
+ from contextlib import asynccontextmanager
6
+ from dataclasses import dataclass
7
+ from typing import Any, Optional
8
+
9
+
10
+ @dataclass
11
+ class BulkheadConfig:
12
+ name: str
13
+ max_concurrent: int
14
+ max_queue_size: int = 100
15
+ timeout: float = 30.0
16
+
17
+
18
+ class BulkheadFullError(Exception):
19
+ """Raised when bulkhead is at capacity"""
20
+
21
+ pass
22
+
23
+
24
+ class Bulkhead:
25
+ """Isolate resources to prevent cascading failures"""
26
+
27
+ def __init__(self, config: BulkheadConfig):
28
+ self.config = config
29
+ self._semaphore = asyncio.Semaphore(config.max_concurrent)
30
+ self._queue_size = 0
31
+ self._active_tasks: set[asyncio.Task] = set()
32
+
33
+ @asynccontextmanager
34
+ async def isolate(self) -> AsyncIterator[None]:
35
+ """Execute function within bulkhead constraints"""
36
+ # Check queue capacity
37
+ if self._queue_size >= self.config.max_queue_size:
38
+ raise BulkheadFullError(f"Bulkhead {self.config.name} queue full")
39
+
40
+ self._queue_size += 1
41
+ try:
42
+ # Wait for semaphore with timeout
43
+ try:
44
+ await asyncio.wait_for(
45
+ self._semaphore.acquire(), timeout=self.config.timeout
46
+ )
47
+ except asyncio.TimeoutError as e:
48
+ raise BulkheadFullError(
49
+ f"Bulkhead {self.config.name} timeout waiting for slot"
50
+ ) from e
51
+
52
+ try:
53
+ yield
54
+ finally:
55
+ self._semaphore.release()
56
+ finally:
57
+ self._queue_size -= 1
58
+
59
+ def get_metrics(self) -> dict[str, Any]:
60
+ """Get bulkhead metrics"""
61
+ return {
62
+ "name": self.config.name,
63
+ "max_concurrent": self.config.max_concurrent,
64
+ "available_slots": self._semaphore._value,
65
+ "queue_size": self._queue_size,
66
+ "max_queue_size": self.config.max_queue_size,
67
+ "active_tasks": len(self._active_tasks),
68
+ }
69
+
70
+
71
+ # Global bulkhead registry
72
+ class BulkheadRegistry:
73
+ """Simple registry for bulkheads"""
74
+
75
+ def __init__(self) -> None:
76
+ self._bulkheads: dict[str, Bulkhead] = {}
77
+
78
+ def get_or_create(
79
+ self, name: str, config: Optional[BulkheadConfig] = None
80
+ ) -> Bulkhead:
81
+ """Get existing or create new bulkhead"""
82
+ if name not in self._bulkheads:
83
+ if config is None:
84
+ config = BulkheadConfig(name=name, max_concurrent=5)
85
+ self._bulkheads[name] = Bulkhead(config)
86
+ return self._bulkheads[name]
87
+
88
+ def get_all_metrics(self) -> dict[str, dict[str, Any]]:
89
+ """Get metrics for all bulkheads"""
90
+ return {
91
+ name: bulkhead.get_metrics() for name, bulkhead in self._bulkheads.items()
92
+ }
93
+
94
+
95
+ # Global registry instance
96
+ bulkhead_registry = BulkheadRegistry()