puffinflow 2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. puffinflow/__init__.py +132 -0
  2. puffinflow/core/__init__.py +110 -0
  3. puffinflow/core/agent/__init__.py +320 -0
  4. puffinflow/core/agent/base.py +1635 -0
  5. puffinflow/core/agent/checkpoint.py +50 -0
  6. puffinflow/core/agent/context.py +521 -0
  7. puffinflow/core/agent/decorators/__init__.py +90 -0
  8. puffinflow/core/agent/decorators/builder.py +454 -0
  9. puffinflow/core/agent/decorators/flexible.py +714 -0
  10. puffinflow/core/agent/decorators/inspection.py +144 -0
  11. puffinflow/core/agent/dependencies.py +57 -0
  12. puffinflow/core/agent/scheduling/__init__.py +21 -0
  13. puffinflow/core/agent/scheduling/builder.py +160 -0
  14. puffinflow/core/agent/scheduling/exceptions.py +35 -0
  15. puffinflow/core/agent/scheduling/inputs.py +137 -0
  16. puffinflow/core/agent/scheduling/parser.py +209 -0
  17. puffinflow/core/agent/scheduling/scheduler.py +413 -0
  18. puffinflow/core/agent/state.py +141 -0
  19. puffinflow/core/config.py +62 -0
  20. puffinflow/core/coordination/__init__.py +137 -0
  21. puffinflow/core/coordination/agent_group.py +359 -0
  22. puffinflow/core/coordination/agent_pool.py +629 -0
  23. puffinflow/core/coordination/agent_team.py +577 -0
  24. puffinflow/core/coordination/coordinator.py +720 -0
  25. puffinflow/core/coordination/deadlock.py +1759 -0
  26. puffinflow/core/coordination/fluent_api.py +421 -0
  27. puffinflow/core/coordination/primitives.py +478 -0
  28. puffinflow/core/coordination/rate_limiter.py +520 -0
  29. puffinflow/core/observability/__init__.py +47 -0
  30. puffinflow/core/observability/agent.py +139 -0
  31. puffinflow/core/observability/alerting.py +73 -0
  32. puffinflow/core/observability/config.py +127 -0
  33. puffinflow/core/observability/context.py +88 -0
  34. puffinflow/core/observability/core.py +147 -0
  35. puffinflow/core/observability/decorators.py +105 -0
  36. puffinflow/core/observability/events.py +71 -0
  37. puffinflow/core/observability/interfaces.py +196 -0
  38. puffinflow/core/observability/metrics.py +137 -0
  39. puffinflow/core/observability/tracing.py +209 -0
  40. puffinflow/core/reliability/__init__.py +27 -0
  41. puffinflow/core/reliability/bulkhead.py +96 -0
  42. puffinflow/core/reliability/circuit_breaker.py +149 -0
  43. puffinflow/core/reliability/leak_detector.py +122 -0
  44. puffinflow/core/resources/__init__.py +77 -0
  45. puffinflow/core/resources/allocation.py +790 -0
  46. puffinflow/core/resources/pool.py +645 -0
  47. puffinflow/core/resources/quotas.py +567 -0
  48. puffinflow/core/resources/requirements.py +217 -0
  49. puffinflow/version.py +21 -0
  50. puffinflow-2.dev0.dist-info/METADATA +334 -0
  51. puffinflow-2.dev0.dist-info/RECORD +55 -0
  52. puffinflow-2.dev0.dist-info/WHEEL +5 -0
  53. puffinflow-2.dev0.dist-info/entry_points.txt +3 -0
  54. puffinflow-2.dev0.dist-info/licenses/LICENSE +21 -0
  55. puffinflow-2.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,520 @@
1
+ """Rate limiting implementations."""
2
+
3
+ import asyncio
4
+ import math
5
+ import time
6
+ from collections import deque
7
+ from dataclasses import dataclass, field
8
+ from enum import Enum, auto
9
+ from typing import Any, Optional
10
+
11
+ import structlog
12
+
13
+ logger = structlog.get_logger(__name__)
14
+
15
+
16
+ class RateLimitStrategy(Enum):
17
+ """Rate limiting strategies"""
18
+
19
+ TOKEN_BUCKET = auto()
20
+ LEAKY_BUCKET = auto()
21
+ FIXED_WINDOW = auto()
22
+ SLIDING_WINDOW = auto()
23
+
24
+
25
+ @dataclass
26
+ class RateLimiter:
27
+ """Advanced rate limiter with multiple strategies"""
28
+
29
+ max_rate: float
30
+ burst_size: int = 1
31
+ strategy: RateLimitStrategy = RateLimitStrategy.TOKEN_BUCKET
32
+ window_size: float = 1.0 # For windowed strategies
33
+
34
+ _tokens: float = field(init=False)
35
+ _last_update: float = field(init=False)
36
+ _window_requests: dict[float, int] = field(default_factory=dict)
37
+ _lock: asyncio.Lock = field(default_factory=asyncio.Lock)
38
+
39
+ def __post_init__(self) -> None:
40
+ self._tokens = self.burst_size
41
+ self._last_update = time.time()
42
+
43
+ async def acquire(self) -> bool:
44
+ """Attempt to acquire rate limit token"""
45
+ async with self._lock:
46
+ now = time.time()
47
+
48
+ if self.strategy == RateLimitStrategy.TOKEN_BUCKET:
49
+ time_passed = now - self._last_update
50
+ self._tokens = min(
51
+ self.burst_size, self._tokens + time_passed * self.max_rate
52
+ )
53
+ self._last_update = now
54
+
55
+ if self._tokens >= 1:
56
+ self._tokens -= 1
57
+ return True
58
+
59
+ elif self.strategy == RateLimitStrategy.LEAKY_BUCKET:
60
+ # Clean old requests
61
+ cutoff = now - self.window_size
62
+ self._window_requests = {
63
+ ts: count
64
+ for ts, count in self._window_requests.items()
65
+ if ts > cutoff
66
+ }
67
+
68
+ # Check rate
69
+ total_requests = sum(self._window_requests.values())
70
+ if total_requests < self.max_rate:
71
+ self._window_requests[now] = self._window_requests.get(now, 0) + 1
72
+ return True
73
+
74
+ elif self.strategy == RateLimitStrategy.FIXED_WINDOW:
75
+ # Clean up old requests outside the relevant window to prevent memory leaks
76
+ cleanup_cutoff = now - (self.window_size * 2)
77
+ self._window_requests = {
78
+ ts: count
79
+ for ts, count in self._window_requests.items()
80
+ if ts > cleanup_cutoff
81
+ }
82
+
83
+ window_start = math.floor(now / self.window_size) * self.window_size
84
+
85
+ # Count requests in current window
86
+ requests = sum(
87
+ count
88
+ for ts, count in self._window_requests.items()
89
+ if ts >= window_start
90
+ )
91
+
92
+ # For the base RateLimiter, max_rate represents requests per window
93
+ if requests < self.max_rate:
94
+ self._window_requests[now] = self._window_requests.get(now, 0) + 1
95
+ return True
96
+
97
+ elif self.strategy == RateLimitStrategy.SLIDING_WINDOW:
98
+ # Clean up old requests outside the relevant window
99
+ cleanup_cutoff = now - (self.window_size * 2)
100
+ self._window_requests = {
101
+ ts: count
102
+ for ts, count in self._window_requests.items()
103
+ if ts > cleanup_cutoff
104
+ }
105
+
106
+ window_start = now - self.window_size
107
+
108
+ # Count requests in sliding window
109
+ requests = sum(
110
+ count
111
+ for ts, count in self._window_requests.items()
112
+ if ts >= window_start
113
+ )
114
+
115
+ if requests < self.max_rate:
116
+ self._window_requests[now] = 1
117
+ return True
118
+
119
+ return False
120
+
121
+ async def wait_for_token(self, timeout: Optional[float] = None) -> bool:
122
+ """Wait for a token to become available"""
123
+ start_time = time.time()
124
+
125
+ while True:
126
+ if await self.acquire():
127
+ return True
128
+
129
+ # Check timeout
130
+ if timeout is not None and (time.time() - start_time) >= timeout:
131
+ return False
132
+
133
+ # Calculate wait time based on strategy
134
+ wait_time = self._calculate_wait_time()
135
+
136
+ if timeout is not None:
137
+ remaining_timeout = timeout - (time.time() - start_time)
138
+ if remaining_timeout <= 0:
139
+ return False
140
+ wait_time = min(wait_time, remaining_timeout)
141
+
142
+ if wait_time <= 0:
143
+ # If no wait time, yield to event loop to prevent tight loop, then retry
144
+ await asyncio.sleep(0.001)
145
+ continue
146
+
147
+ await asyncio.sleep(wait_time)
148
+
149
+ def _calculate_wait_time(self) -> float:
150
+ """Calculate how long to wait for next token"""
151
+ now = time.time()
152
+
153
+ if self.strategy == RateLimitStrategy.TOKEN_BUCKET:
154
+ if self._tokens < 1:
155
+ tokens_needed = 1 - self._tokens
156
+ return max(0, tokens_needed / self.max_rate)
157
+
158
+ elif self.strategy == RateLimitStrategy.FIXED_WINDOW:
159
+ current_window_start = math.floor(now / self.window_size) * self.window_size
160
+ next_window_start = current_window_start + self.window_size
161
+ time_to_wait = next_window_start - now
162
+ # Return a non-negative wait time with a small buffer to ensure we are in the next window
163
+ return max(0, time_to_wait) + 0.001
164
+
165
+ elif self.strategy == RateLimitStrategy.SLIDING_WINDOW:
166
+ window_start = now - self.window_size
167
+ relevant_requests_ts = [
168
+ ts for ts in self._window_requests if ts >= window_start
169
+ ]
170
+
171
+ if len(relevant_requests_ts) >= self.max_rate:
172
+ # To make space, we must wait for the oldest request to expire
173
+ oldest_request_ts = min(relevant_requests_ts)
174
+ time_to_wait = (oldest_request_ts + self.window_size) - now
175
+ return max(0, time_to_wait) + 0.001
176
+
177
+ # For Leaky Bucket or other cases, a small polling delay is the simplest approach
178
+ return 0.1
179
+
180
+ def get_stats(self) -> dict[str, Any]:
181
+ """Get rate limiter statistics"""
182
+ return {
183
+ "strategy": self.strategy.name,
184
+ "max_rate": self.max_rate,
185
+ "burst_size": self.burst_size,
186
+ "current_tokens": self._tokens if hasattr(self, "_tokens") else 0,
187
+ "window_requests": len(self._window_requests),
188
+ }
189
+
190
+
191
+ # Specialized rate limiter implementations
192
+ class TokenBucket(RateLimiter):
193
+ """Token bucket rate limiter"""
194
+
195
+ def __init__(
196
+ self, rate: float, capacity: int, initial_tokens: Optional[float] = None
197
+ ):
198
+ super().__init__(
199
+ max_rate=rate, burst_size=capacity, strategy=RateLimitStrategy.TOKEN_BUCKET
200
+ )
201
+ if initial_tokens is not None:
202
+ self._tokens = initial_tokens
203
+
204
+ @property
205
+ def tokens(self) -> float:
206
+ """Get current token count"""
207
+ # Ensure tokens are up-to-date before returning
208
+ now = time.time()
209
+ time_passed = now - self._last_update
210
+ self._tokens = min(self.burst_size, self._tokens + time_passed * self.max_rate)
211
+ self._last_update = now
212
+ return self._tokens
213
+
214
+ @property
215
+ def capacity(self) -> int:
216
+ """Get bucket capacity"""
217
+ return self.burst_size
218
+
219
+ async def consume(self, tokens: int = 1) -> bool:
220
+ """Consume multiple tokens at once"""
221
+ async with self._lock:
222
+ # Update tokens first, but don't use the property to avoid extra regeneration
223
+ now = time.time()
224
+ time_passed = now - self._last_update
225
+ self._tokens = min(
226
+ self.burst_size, self._tokens + time_passed * self.max_rate
227
+ )
228
+ self._last_update = now
229
+
230
+ if self._tokens >= tokens:
231
+ self._tokens -= tokens
232
+ return True
233
+ return False
234
+
235
+
236
+ class LeakyBucket(RateLimiter):
237
+ """Leaky bucket rate limiter"""
238
+
239
+ def __init__(self, rate: float, capacity: int):
240
+ super().__init__(
241
+ max_rate=rate, burst_size=capacity, strategy=RateLimitStrategy.LEAKY_BUCKET
242
+ )
243
+ self._bucket: deque = deque(maxlen=capacity)
244
+ self._last_leak = time.time()
245
+
246
+ async def acquire(self) -> bool:
247
+ """Add request to bucket"""
248
+ async with self._lock:
249
+ now = time.time()
250
+
251
+ # Leak requests
252
+ self._leak(now)
253
+
254
+ # Check if bucket has space
255
+ if len(self._bucket) < self.burst_size:
256
+ self._bucket.append(now)
257
+ return True
258
+
259
+ return False
260
+
261
+ def _leak(self, now: float) -> None:
262
+ """Leak requests from bucket"""
263
+ time_passed = now - self._last_leak
264
+ leak_count = int(time_passed * self.max_rate)
265
+
266
+ if leak_count > 0:
267
+ # Remove leaked requests
268
+ for _ in range(min(leak_count, len(self._bucket))):
269
+ self._bucket.popleft()
270
+
271
+ self._last_leak = now
272
+
273
+
274
+ class SlidingWindow(RateLimiter):
275
+ """Sliding window rate limiter"""
276
+
277
+ def __init__(self, rate: float, window_size: float = 60.0):
278
+ super().__init__(
279
+ max_rate=rate,
280
+ strategy=RateLimitStrategy.SLIDING_WINDOW,
281
+ window_size=window_size,
282
+ )
283
+ self._request_log: deque = deque()
284
+
285
+ async def acquire(self) -> bool:
286
+ """Check if request is allowed"""
287
+ async with self._lock:
288
+ now = time.time()
289
+ window_start = now - self.window_size
290
+
291
+ # Remove old requests
292
+ while self._request_log and self._request_log[0] < window_start:
293
+ self._request_log.popleft()
294
+
295
+ # Check rate - max_rate is requests per window, not per second
296
+ if len(self._request_log) < self.max_rate:
297
+ self._request_log.append(now)
298
+ return True
299
+
300
+ return False
301
+
302
+ @property
303
+ def current_rate(self) -> float:
304
+ """Get current request rate per second"""
305
+ now = time.time()
306
+ window_start = now - self.window_size
307
+
308
+ # Count recent requests
309
+ recent_count = sum(1 for t in self._request_log if t >= window_start)
310
+
311
+ return recent_count / self.window_size
312
+
313
+
314
+ class FixedWindow(RateLimiter):
315
+ """Fixed window rate limiter"""
316
+
317
+ def __init__(self, rate: float, window_size: float = 60.0):
318
+ super().__init__(
319
+ max_rate=rate,
320
+ strategy=RateLimitStrategy.FIXED_WINDOW,
321
+ window_size=window_size,
322
+ )
323
+ self._window_counts: dict[int, int] = {}
324
+
325
+ async def acquire(self) -> bool:
326
+ """Check if request is allowed"""
327
+ async with self._lock:
328
+ now = time.time()
329
+ window_id = int(now / self.window_size)
330
+
331
+ # Get count for current window
332
+ count = self._window_counts.get(window_id, 0)
333
+
334
+ # max_rate is requests per window
335
+ if count < self.max_rate:
336
+ self._window_counts[window_id] = count + 1
337
+
338
+ # Clean old windows
339
+ cutoff_window = window_id - 2
340
+ self._window_counts = {
341
+ w: c for w, c in self._window_counts.items() if w > cutoff_window
342
+ }
343
+
344
+ return True
345
+
346
+ return False
347
+
348
+
349
+ class AdaptiveRateLimiter:
350
+ """Adaptive rate limiter that adjusts based on system load"""
351
+
352
+ def __init__(
353
+ self,
354
+ base_rate: float,
355
+ min_rate: float,
356
+ max_rate: float,
357
+ strategy: RateLimitStrategy = RateLimitStrategy.TOKEN_BUCKET,
358
+ ):
359
+ self.base_rate = base_rate
360
+ self.min_rate = min_rate
361
+ self.max_rate = max_rate
362
+ self.strategy = strategy
363
+
364
+ self._current_rate = base_rate
365
+ self._limiter = self._create_limiter(base_rate)
366
+ self._success_count = 0
367
+ self._failure_count = 0
368
+ self._last_adjustment = time.time()
369
+ self._adjustment_interval = 10.0 # seconds
370
+ self._lock = asyncio.Lock()
371
+
372
+ def _create_limiter(self, rate: float) -> RateLimiter:
373
+ """Create underlying rate limiter"""
374
+ return RateLimiter(
375
+ max_rate=rate,
376
+ burst_size=int(rate * 2), # Allow some burst
377
+ strategy=self.strategy,
378
+ )
379
+
380
+ async def acquire(self) -> bool:
381
+ """Acquire with adaptive rate"""
382
+ # Check if we should adjust rate first
383
+ now = time.time()
384
+ should_adjust = False
385
+ async with self._lock:
386
+ if now - self._last_adjustment >= self._adjustment_interval:
387
+ should_adjust = True
388
+
389
+ if should_adjust:
390
+ async with self._lock:
391
+ # Double-check after acquiring lock
392
+ if now - self._last_adjustment >= self._adjustment_interval:
393
+ await self._adjust_rate()
394
+ self._last_adjustment = now
395
+
396
+ # Try to acquire
397
+ success = await self._limiter.acquire()
398
+
399
+ # Track success/failure
400
+ async with self._lock:
401
+ if success:
402
+ self._success_count += 1
403
+ else:
404
+ self._failure_count += 1
405
+
406
+ return success
407
+
408
+ async def _adjust_rate(self) -> None:
409
+ """Adjust rate based on success/failure ratio"""
410
+ total = self._success_count + self._failure_count
411
+ if total == 0:
412
+ return
413
+
414
+ success_ratio = self._success_count / total
415
+
416
+ # Adjust rate based on success ratio
417
+ if success_ratio > 0.95:
418
+ # Very high success rate, can increase
419
+ new_rate = min(self._current_rate * 1.1, self.max_rate)
420
+ elif success_ratio > 0.8:
421
+ # Good success rate, slight increase
422
+ new_rate = min(self._current_rate * 1.05, self.max_rate)
423
+ elif success_ratio < 0.5:
424
+ # Low success rate, decrease significantly
425
+ new_rate = max(self._current_rate * 0.8, self.min_rate)
426
+ elif success_ratio < 0.7:
427
+ # Moderate success rate, slight decrease
428
+ new_rate = max(self._current_rate * 0.95, self.min_rate)
429
+ else:
430
+ # Keep current rate
431
+ new_rate = self._current_rate
432
+
433
+ if new_rate != self._current_rate:
434
+ logger.info(
435
+ "rate_adjusted",
436
+ old_rate=self._current_rate,
437
+ new_rate=new_rate,
438
+ success_ratio=success_ratio,
439
+ )
440
+
441
+ self._current_rate = new_rate
442
+ self._limiter = self._create_limiter(new_rate)
443
+
444
+ # Reset counters
445
+ self._success_count = 0
446
+ self._failure_count = 0
447
+
448
+ @property
449
+ def current_rate(self) -> float:
450
+ """Get current rate"""
451
+ return self._current_rate
452
+
453
+ def get_stats(self) -> dict[str, Any]:
454
+ """Get adaptive limiter statistics"""
455
+ return {
456
+ "current_rate": self._current_rate,
457
+ "base_rate": self.base_rate,
458
+ "min_rate": self.min_rate,
459
+ "max_rate": self.max_rate,
460
+ "success_count": self._success_count,
461
+ "failure_count": self._failure_count,
462
+ "limiter_stats": self._limiter.get_stats(),
463
+ }
464
+
465
+
466
+ # Composite rate limiter for multiple limits
467
+ class CompositeRateLimiter:
468
+ """Combines multiple rate limiters"""
469
+
470
+ def __init__(self, limiters: list[RateLimiter]):
471
+ self.limiters = limiters
472
+
473
+ async def acquire(self) -> bool:
474
+ """Acquire from all limiters"""
475
+ # For simplicity, try to acquire from all limiters
476
+ # If any fails, the whole request fails
477
+ # Note: This approach may consume tokens from some limiters even if others fail
478
+ # A more sophisticated approach would require checking all first, then acquiring
479
+
480
+ acquired_from = []
481
+ for i, limiter in enumerate(self.limiters):
482
+ if await limiter.acquire():
483
+ acquired_from.append(i)
484
+ else:
485
+ # At least one limiter rejected the request
486
+ return False
487
+ return True
488
+
489
+ async def wait_for_all(self, timeout: Optional[float] = None) -> bool:
490
+ """Wait for all limiters to allow request"""
491
+ start_time = time.time()
492
+
493
+ while True:
494
+ # Try to acquire from all limiters
495
+ if await self.acquire():
496
+ return True
497
+
498
+ # Check timeout
499
+ if timeout is not None and (time.time() - start_time) >= timeout:
500
+ return False
501
+
502
+ # Calculate the maximum wait time across all limiters
503
+ # We need to wait for the slowest limiter
504
+ max_wait_time = 0.0
505
+ for limiter in self.limiters:
506
+ wait_time = limiter._calculate_wait_time()
507
+ max_wait_time = max(max_wait_time, wait_time)
508
+
509
+ # Apply timeout constraint
510
+ if timeout is not None:
511
+ remaining_timeout = timeout - (time.time() - start_time)
512
+ if remaining_timeout <= 0:
513
+ return False
514
+ max_wait_time = min(max_wait_time, remaining_timeout)
515
+
516
+ # Wait for the calculated time, or a small amount if no wait needed
517
+ if max_wait_time <= 0:
518
+ await asyncio.sleep(0.001)
519
+ else:
520
+ await asyncio.sleep(max_wait_time)
@@ -0,0 +1,47 @@
1
+ """PuffinFlow Observability System"""
2
+
3
+ # Import submodules for import path tests
4
+ # Clean up indirect imports that might leak from submodules
5
+
6
+ from . import (
7
+ agent,
8
+ alerting,
9
+ config,
10
+ context,
11
+ core,
12
+ decorators,
13
+ events,
14
+ interfaces,
15
+ metrics,
16
+ tracing,
17
+ )
18
+ from .agent import ObservableAgent
19
+ from .config import ObservabilityConfig
20
+ from .context import ObservableContext
21
+ from .core import ObservabilityManager, get_observability, setup_observability
22
+ from .decorators import observe, trace_state
23
+
24
+ # Clean up imports - just comment out since variables don't exist
25
+ # with contextlib.suppress(NameError):
26
+ # del interfaces, tracing, metrics, alerting, events
27
+
28
+ __all__ = [
29
+ "ObservabilityConfig",
30
+ "ObservabilityManager",
31
+ "ObservableAgent",
32
+ "ObservableContext",
33
+ "agent",
34
+ "alerting",
35
+ "config",
36
+ "context",
37
+ "core",
38
+ "decorators",
39
+ "events",
40
+ "get_observability",
41
+ "interfaces",
42
+ "metrics",
43
+ "observe",
44
+ "setup_observability",
45
+ "trace_state",
46
+ "tracing",
47
+ ]
@@ -0,0 +1,139 @@
1
+ import time
2
+ from typing import Any, Optional
3
+
4
+ from ..agent.base import Agent, AgentResult
5
+ from .context import ObservableContext
6
+ from .core import ObservabilityManager
7
+ from .interfaces import SpanType
8
+
9
+
10
+ class ObservableAgent(Agent):
11
+ """Agent with observability"""
12
+
13
+ def __init__(
14
+ self,
15
+ name: str,
16
+ observability: Optional[ObservabilityManager] = None,
17
+ **kwargs: Any,
18
+ ) -> None:
19
+ # Extract workflow_id before passing kwargs to parent
20
+ self.workflow_id = kwargs.pop("workflow_id", f"workflow_{int(time.time())}")
21
+
22
+ # Set observability BEFORE calling parent __init__
23
+ self._observability = observability
24
+
25
+ # Initialize cleanup handlers before calling parent __init__
26
+ self._cleanup_handlers = []
27
+
28
+ super().__init__(name, **kwargs)
29
+
30
+ # Store any additional attributes that weren't handled by the parent
31
+ for key, value in kwargs.items():
32
+ if not hasattr(self, key):
33
+ setattr(self, key, value)
34
+
35
+ # Setup basic metrics
36
+ if self._observability and self._observability.metrics:
37
+ self.workflow_duration = self._observability.metrics.histogram(
38
+ "workflow_duration_seconds",
39
+ "Workflow execution duration",
40
+ ["agent_name", "status"],
41
+ )
42
+
43
+ self.state_duration = self._observability.metrics.histogram(
44
+ "state_execution_duration_seconds",
45
+ "State execution duration",
46
+ ["agent_name", "state_name", "status"],
47
+ )
48
+
49
+ def _create_context(self, shared_state: dict[str, Any]) -> ObservableContext:
50
+ """Create observable context"""
51
+ context = ObservableContext(shared_state, self._observability)
52
+ context.set_variable("agent_name", self.name)
53
+ context.set_variable("workflow_id", self.workflow_id)
54
+ return context
55
+
56
+ async def run(self, timeout: Optional[float] = None) -> AgentResult:
57
+ """Run workflow with observability"""
58
+ workflow_start = time.time()
59
+
60
+ if self._observability and self._observability.tracing:
61
+ with self._observability.tracing.span(
62
+ f"workflow.{self.name}",
63
+ SpanType.WORKFLOW,
64
+ agent_name=self.name,
65
+ workflow_id=self.workflow_id,
66
+ ) as span:
67
+ try:
68
+ result = await super().run(timeout)
69
+
70
+ duration = time.time() - workflow_start
71
+ if span:
72
+ span.set_attribute("workflow.duration", duration)
73
+ span.set_status("ok")
74
+
75
+ if self.workflow_duration:
76
+ self.workflow_duration.record(
77
+ duration, agent_name=self.name, status="success"
78
+ )
79
+
80
+ return result
81
+
82
+ except Exception as e:
83
+ duration = time.time() - workflow_start
84
+ if span:
85
+ span.record_exception(e)
86
+
87
+ if self.workflow_duration:
88
+ self.workflow_duration.record(
89
+ duration, agent_name=self.name, status="error"
90
+ )
91
+ raise
92
+ else:
93
+ return await super().run(timeout)
94
+
95
+ async def run_state(self, state_name: str) -> None:
96
+ """Run state with observability"""
97
+ state_start = time.time()
98
+
99
+ if self._observability and self._observability.tracing:
100
+ with self._observability.tracing.span(
101
+ f"state.{state_name}",
102
+ SpanType.STATE,
103
+ agent_name=self.name,
104
+ state_name=state_name,
105
+ ) as span:
106
+ try:
107
+ context = self._create_context(self.shared_state)
108
+ context.set_variable("current_state", state_name)
109
+
110
+ await self.states[state_name](context)
111
+
112
+ duration = time.time() - state_start
113
+ if span:
114
+ span.set_attribute("state.duration", duration)
115
+ span.set_status("ok")
116
+
117
+ if self.state_duration:
118
+ self.state_duration.record(
119
+ duration,
120
+ agent_name=self.name,
121
+ state_name=state_name,
122
+ status="success",
123
+ )
124
+
125
+ except Exception as e:
126
+ duration = time.time() - state_start
127
+ if span:
128
+ span.record_exception(e)
129
+
130
+ if self.state_duration:
131
+ self.state_duration.record(
132
+ duration,
133
+ agent_name=self.name,
134
+ state_name=state_name,
135
+ status="error",
136
+ )
137
+ raise
138
+ else:
139
+ await super().run_state(state_name)