puffinflow 2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. puffinflow/__init__.py +132 -0
  2. puffinflow/core/__init__.py +110 -0
  3. puffinflow/core/agent/__init__.py +320 -0
  4. puffinflow/core/agent/base.py +1635 -0
  5. puffinflow/core/agent/checkpoint.py +50 -0
  6. puffinflow/core/agent/context.py +521 -0
  7. puffinflow/core/agent/decorators/__init__.py +90 -0
  8. puffinflow/core/agent/decorators/builder.py +454 -0
  9. puffinflow/core/agent/decorators/flexible.py +714 -0
  10. puffinflow/core/agent/decorators/inspection.py +144 -0
  11. puffinflow/core/agent/dependencies.py +57 -0
  12. puffinflow/core/agent/scheduling/__init__.py +21 -0
  13. puffinflow/core/agent/scheduling/builder.py +160 -0
  14. puffinflow/core/agent/scheduling/exceptions.py +35 -0
  15. puffinflow/core/agent/scheduling/inputs.py +137 -0
  16. puffinflow/core/agent/scheduling/parser.py +209 -0
  17. puffinflow/core/agent/scheduling/scheduler.py +413 -0
  18. puffinflow/core/agent/state.py +141 -0
  19. puffinflow/core/config.py +62 -0
  20. puffinflow/core/coordination/__init__.py +137 -0
  21. puffinflow/core/coordination/agent_group.py +359 -0
  22. puffinflow/core/coordination/agent_pool.py +629 -0
  23. puffinflow/core/coordination/agent_team.py +577 -0
  24. puffinflow/core/coordination/coordinator.py +720 -0
  25. puffinflow/core/coordination/deadlock.py +1759 -0
  26. puffinflow/core/coordination/fluent_api.py +421 -0
  27. puffinflow/core/coordination/primitives.py +478 -0
  28. puffinflow/core/coordination/rate_limiter.py +520 -0
  29. puffinflow/core/observability/__init__.py +47 -0
  30. puffinflow/core/observability/agent.py +139 -0
  31. puffinflow/core/observability/alerting.py +73 -0
  32. puffinflow/core/observability/config.py +127 -0
  33. puffinflow/core/observability/context.py +88 -0
  34. puffinflow/core/observability/core.py +147 -0
  35. puffinflow/core/observability/decorators.py +105 -0
  36. puffinflow/core/observability/events.py +71 -0
  37. puffinflow/core/observability/interfaces.py +196 -0
  38. puffinflow/core/observability/metrics.py +137 -0
  39. puffinflow/core/observability/tracing.py +209 -0
  40. puffinflow/core/reliability/__init__.py +27 -0
  41. puffinflow/core/reliability/bulkhead.py +96 -0
  42. puffinflow/core/reliability/circuit_breaker.py +149 -0
  43. puffinflow/core/reliability/leak_detector.py +122 -0
  44. puffinflow/core/resources/__init__.py +77 -0
  45. puffinflow/core/resources/allocation.py +790 -0
  46. puffinflow/core/resources/pool.py +645 -0
  47. puffinflow/core/resources/quotas.py +567 -0
  48. puffinflow/core/resources/requirements.py +217 -0
  49. puffinflow/version.py +21 -0
  50. puffinflow-2.dev0.dist-info/METADATA +334 -0
  51. puffinflow-2.dev0.dist-info/RECORD +55 -0
  52. puffinflow-2.dev0.dist-info/WHEEL +5 -0
  53. puffinflow-2.dev0.dist-info/entry_points.txt +3 -0
  54. puffinflow-2.dev0.dist-info/licenses/LICENSE +21 -0
  55. puffinflow-2.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,645 @@
1
+ """Resource pool implementation with advanced features.
2
+
3
+ Provides a comprehensive resource management system with leak detection,
4
+ quota enforcement, preemption capabilities, and detailed usage tracking.
5
+ Supports CPU, memory, I/O, network, and GPU resources.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import time
11
+ from collections import defaultdict
12
+ from dataclasses import dataclass
13
+ from typing import Any, Optional
14
+
15
+ # Import from the canonical source to ensure consistent enum instances
16
+ from .requirements import (
17
+ ResourceRequirements,
18
+ ResourceType,
19
+ get_resource_amount,
20
+ safe_check_resource_type,
21
+ )
22
+
23
+ # Import leak detector with fallback
24
+ try:
25
+ from ..reliability.leak_detector import leak_detector
26
+ except ImportError:
27
+ # Mock leak detector if not available
28
+ class MockLeakDetector:
29
+ def track_allocation(
30
+ self, state_name: Any, agent_name: Any, resources: Any
31
+ ) -> None:
32
+ pass
33
+
34
+ def track_release(self, state_name: Any, agent_name: Any) -> None:
35
+ pass
36
+
37
+ def detect_leaks(self) -> list[Any]:
38
+ return []
39
+
40
+ def get_metrics(self) -> dict[str, Any]:
41
+ return {"leak_detection": "mock"}
42
+
43
+ leak_detector = MockLeakDetector() # type: ignore
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ class ResourceAllocationError(Exception):
49
+ """Base class for resource allocation errors."""
50
+
51
+ pass
52
+
53
+
54
+ class ResourceOverflowError(ResourceAllocationError):
55
+ """Raised when resource allocation would exceed system limits."""
56
+
57
+ pass
58
+
59
+
60
+ class ResourceQuotaExceededError(ResourceAllocationError):
61
+ """Raised when a state/agent exceeds its assigned resource quota."""
62
+
63
+ pass
64
+
65
+
66
+ @dataclass
67
+ class ResourceUsageStats:
68
+ """Statistics container for tracking resource usage patterns."""
69
+
70
+ peak_usage: float = 0.0
71
+ current_usage: float = 0.0
72
+ total_allocations: int = 0
73
+ failed_allocations: int = 0
74
+ last_allocation_time: Optional[float] = None
75
+ total_wait_time: float = 0.0
76
+
77
+
78
+ class ResourcePool:
79
+ """Advanced resource management system with comprehensive features."""
80
+
81
+ def __init__(
82
+ self,
83
+ total_cpu: float = 4.0,
84
+ total_memory: float = 1024.0,
85
+ total_io: float = 100.0,
86
+ total_network: float = 100.0,
87
+ total_gpu: float = 0.0,
88
+ enable_quotas: bool = False,
89
+ enable_preemption: bool = False,
90
+ enable_leak_detection: bool = True,
91
+ ):
92
+ """Initialize resource pool with specified capacities and features."""
93
+ # Resource capacity limits
94
+ self.resources = {
95
+ ResourceType.CPU: total_cpu,
96
+ ResourceType.MEMORY: total_memory,
97
+ ResourceType.IO: total_io,
98
+ ResourceType.NETWORK: total_network,
99
+ ResourceType.GPU: total_gpu,
100
+ }
101
+
102
+ # Currently available resources
103
+ self.available = self.resources.copy()
104
+
105
+ # Synchronization primitives
106
+ self._lock = asyncio.Lock()
107
+ self._condition = asyncio.Condition(self._lock)
108
+
109
+ # Resource allocation tracking
110
+ self._allocations: dict[str, dict[ResourceType, float]] = {}
111
+ self._allocation_times: dict[str, float] = {}
112
+
113
+ # Usage statistics
114
+ self._usage_stats = {
115
+ rt: ResourceUsageStats()
116
+ for rt in ResourceType
117
+ if rt != ResourceType.NONE and rt != ResourceType.ALL
118
+ }
119
+
120
+ # Feature flags
121
+ self.enable_quotas = enable_quotas
122
+ self._quotas: dict[str, dict[ResourceType, float]] = {}
123
+
124
+ self.enable_preemption = enable_preemption
125
+ self._preempted_states: set[str] = set()
126
+
127
+ # Historical data
128
+ self._allocation_history: dict[ResourceType, list[tuple]] = defaultdict(list)
129
+ self._usage_history: list[tuple] = []
130
+ self._history_retention = 3600
131
+
132
+ # Queue management
133
+ self._waiting_states: set[str] = set()
134
+
135
+ # Leak detection
136
+ self.enable_leak_detection = enable_leak_detection
137
+ self._agent_names: dict[str, str] = {}
138
+
139
+ async def set_quota(
140
+ self, state_name: str, resource_type: ResourceType, limit: float
141
+ ) -> None:
142
+ """Set resource quota for a specific state."""
143
+ if not self.enable_quotas:
144
+ raise RuntimeError("Quotas are not enabled for this resource pool")
145
+
146
+ async with self._lock:
147
+ if state_name not in self._quotas:
148
+ self._quotas[state_name] = {}
149
+ self._quotas[state_name][resource_type] = limit
150
+
151
+ def _check_quota(self, state_name: str, requirements: ResourceRequirements) -> bool:
152
+ """Check if allocation would exceed assigned quota."""
153
+ if not self.enable_quotas:
154
+ return True
155
+
156
+ current_usage = self._allocations.get(state_name, {})
157
+
158
+ for resource_type in [
159
+ ResourceType.CPU,
160
+ ResourceType.MEMORY,
161
+ ResourceType.IO,
162
+ ResourceType.NETWORK,
163
+ ResourceType.GPU,
164
+ ]:
165
+ # Only check quotas for resources that are actually requested
166
+ if safe_check_resource_type(requirements, resource_type):
167
+ quota = self._quotas.get(state_name, {}).get(resource_type)
168
+ if quota is None:
169
+ continue
170
+
171
+ required = get_resource_amount(requirements, resource_type)
172
+ current = current_usage.get(resource_type, 0.0)
173
+
174
+ if current + required > quota:
175
+ logger.warning(
176
+ f"Quota exceeded for {state_name}: {resource_type.name} "
177
+ f"(current: {current}, required: {required}, quota: {quota})"
178
+ )
179
+ return False
180
+
181
+ return True
182
+
183
+ async def acquire(
184
+ self,
185
+ state_name: str,
186
+ requirements: ResourceRequirements,
187
+ timeout: Optional[float] = None,
188
+ allow_preemption: bool = False,
189
+ agent_name: Optional[str] = None,
190
+ ) -> bool:
191
+ """Acquire resources for a state with advanced features."""
192
+ start_time = time.time()
193
+
194
+ # Store agent name for leak detection
195
+ if agent_name and self.enable_leak_detection:
196
+ self._agent_names[state_name] = agent_name
197
+
198
+ try:
199
+ # Validate and fix requirements if needed
200
+ requirements = self._validate_and_fix_requirements(requirements)
201
+
202
+ async with self._condition:
203
+ # Check if requirements exceed total available resources
204
+ self._validate_requirements_against_total(requirements)
205
+
206
+ # Check quota constraints
207
+ if not self._check_quota(state_name, requirements):
208
+ raise ResourceQuotaExceededError(f"Quota exceeded for {state_name}")
209
+
210
+ # Wait for resources to become available
211
+ while not self._can_allocate(requirements):
212
+ self._waiting_states.add(state_name)
213
+
214
+ # Try preemption if enabled
215
+ if (
216
+ allow_preemption
217
+ and self.enable_preemption
218
+ and self._try_preemption(state_name, requirements)
219
+ ):
220
+ break
221
+
222
+ # Wait with timeout
223
+ if timeout:
224
+ remaining_time = timeout - (time.time() - start_time)
225
+ if remaining_time <= 0:
226
+ self._waiting_states.discard(state_name)
227
+ self._update_stats_failure(requirements)
228
+ return False
229
+
230
+ try:
231
+ await asyncio.wait_for(
232
+ self._condition.wait(), timeout=remaining_time
233
+ )
234
+ except asyncio.TimeoutError:
235
+ self._waiting_states.discard(state_name)
236
+ self._update_stats_failure(requirements)
237
+ return False
238
+ else:
239
+ await self._condition.wait()
240
+
241
+ # Remove from waiting states
242
+ self._waiting_states.discard(state_name)
243
+
244
+ # Perform allocation
245
+ self._allocate(state_name, requirements)
246
+
247
+ # Track for leak detection
248
+ if self.enable_leak_detection:
249
+ agent = self._agent_names.get(state_name, "unknown")
250
+ resource_dict = self._build_resource_dict(requirements)
251
+ leak_detector.track_allocation(state_name, agent, resource_dict)
252
+
253
+ # Update statistics
254
+ self._update_stats(state_name, requirements, start_time)
255
+
256
+ return True
257
+
258
+ except Exception as e:
259
+ self._update_stats_failure(requirements)
260
+ logger.error(f"Error acquiring resources for {state_name}: {e}")
261
+ raise
262
+
263
+ def _validate_and_fix_requirements(
264
+ self, requirements: ResourceRequirements
265
+ ) -> ResourceRequirements:
266
+ """Validate and fix resource requirements if needed."""
267
+ try:
268
+ # Check for negative resource values
269
+ resource_values = {
270
+ "cpu_units": getattr(requirements, "cpu_units", 0.0),
271
+ "memory_mb": getattr(requirements, "memory_mb", 0.0),
272
+ "io_weight": getattr(requirements, "io_weight", 0.0),
273
+ "network_weight": getattr(requirements, "network_weight", 0.0),
274
+ "gpu_units": getattr(requirements, "gpu_units", 0.0),
275
+ }
276
+
277
+ for attr_name, value in resource_values.items():
278
+ if value < 0:
279
+ raise ValueError(
280
+ f"Negative resource requirement: {attr_name}={value}"
281
+ )
282
+
283
+ # Test bitwise operations
284
+ requirements.resource_types & ResourceType.CPU
285
+ logger.debug(f"Requirements validation passed: {requirements}")
286
+
287
+ return requirements
288
+
289
+ except ValueError:
290
+ # Re-raise ValueError for negative resource requirements
291
+ raise
292
+ except Exception as e:
293
+ logger.error(f"Error validating requirements: {e}")
294
+ # Create a safe fallback
295
+ fallback = ResourceRequirements(
296
+ cpu_units=getattr(requirements, "cpu_units", 1.0),
297
+ memory_mb=getattr(requirements, "memory_mb", 100.0),
298
+ io_weight=getattr(requirements, "io_weight", 1.0),
299
+ network_weight=getattr(requirements, "network_weight", 1.0),
300
+ gpu_units=getattr(requirements, "gpu_units", 0.0),
301
+ resource_types=ResourceType.ALL,
302
+ )
303
+ logger.info(f"Using fallback requirements: {fallback}")
304
+ return fallback
305
+
306
+ def _validate_requirements_against_total(
307
+ self, requirements: ResourceRequirements
308
+ ) -> None:
309
+ """Validate that requirements don't exceed total available resources."""
310
+ for resource_type in [
311
+ ResourceType.CPU,
312
+ ResourceType.MEMORY,
313
+ ResourceType.IO,
314
+ ResourceType.NETWORK,
315
+ ResourceType.GPU,
316
+ ]:
317
+ if safe_check_resource_type(requirements, resource_type):
318
+ required = get_resource_amount(requirements, resource_type)
319
+ total_available = self.resources.get(resource_type, 0.0)
320
+
321
+ if required > total_available:
322
+ raise ResourceOverflowError(
323
+ f"Required {resource_type.name} ({required}) exceeds total available ({total_available})"
324
+ )
325
+
326
+ def _build_resource_dict(
327
+ self, requirements: ResourceRequirements
328
+ ) -> dict[str, float]:
329
+ """Build resource dictionary for leak detection."""
330
+ resource_dict = {}
331
+
332
+ for resource_type in [
333
+ ResourceType.CPU,
334
+ ResourceType.MEMORY,
335
+ ResourceType.IO,
336
+ ResourceType.NETWORK,
337
+ ResourceType.GPU,
338
+ ]:
339
+ if safe_check_resource_type(requirements, resource_type):
340
+ amount = get_resource_amount(requirements, resource_type)
341
+ if amount > 0 and resource_type.name:
342
+ resource_dict[resource_type.name.lower()] = amount
343
+
344
+ return resource_dict
345
+
346
+ def _can_allocate(self, requirements: ResourceRequirements) -> bool:
347
+ """Check if resources can be allocated immediately."""
348
+ try:
349
+ logger.debug(f"Checking allocation for: {requirements}")
350
+
351
+ for resource_type in [
352
+ ResourceType.CPU,
353
+ ResourceType.MEMORY,
354
+ ResourceType.IO,
355
+ ResourceType.NETWORK,
356
+ ResourceType.GPU,
357
+ ]:
358
+ # Use safe check for resource type
359
+ if safe_check_resource_type(requirements, resource_type):
360
+ required = get_resource_amount(requirements, resource_type)
361
+ available = self.available.get(resource_type, 0.0)
362
+
363
+ logger.debug(
364
+ f"Resource {resource_type.name}: required={required}, available={available}"
365
+ )
366
+
367
+ if required > available:
368
+ logger.debug(
369
+ f"Cannot allocate - insufficient {resource_type.name}"
370
+ )
371
+ return False
372
+
373
+ return True
374
+
375
+ except Exception as e:
376
+ logger.error(f"Error in _can_allocate: {e}")
377
+ logger.error(f"Requirements: {requirements}")
378
+ logger.error(f"Requirements type: {type(requirements)}")
379
+ # In case of error, assume we can't allocate safely
380
+ return False
381
+
382
+ def _allocate(self, state_name: str, requirements: ResourceRequirements) -> None:
383
+ """Perform the actual resource allocation."""
384
+ try:
385
+ if state_name not in self._allocations:
386
+ self._allocations[state_name] = {}
387
+
388
+ # Record allocation timestamp
389
+ self._allocation_times[state_name] = time.time()
390
+
391
+ # Allocate each requested resource type
392
+ for resource_type in [
393
+ ResourceType.CPU,
394
+ ResourceType.MEMORY,
395
+ ResourceType.IO,
396
+ ResourceType.NETWORK,
397
+ ResourceType.GPU,
398
+ ]:
399
+ if safe_check_resource_type(requirements, resource_type):
400
+ amount = get_resource_amount(requirements, resource_type)
401
+ if amount > 0:
402
+ self._allocations[state_name][resource_type] = amount
403
+ self.available[resource_type] -= amount
404
+
405
+ logger.debug(
406
+ f"Allocated {amount} {resource_type.name} to {state_name}"
407
+ )
408
+
409
+ except Exception as e:
410
+ logger.error(f"Error in _allocate: {e}")
411
+ raise
412
+
413
+ def _try_preemption(
414
+ self, state_name: str, requirements: ResourceRequirements
415
+ ) -> bool:
416
+ """Attempt to preempt lower-priority states."""
417
+ if not self.enable_preemption:
418
+ return False
419
+
420
+ try:
421
+ # Find candidates for preemption
422
+ candidates = []
423
+ for allocated_state, resources in self._allocations.items():
424
+ if allocated_state != state_name:
425
+ total_resources = sum(resources.values())
426
+ candidates.append((allocated_state, total_resources))
427
+
428
+ if not candidates:
429
+ return False
430
+
431
+ # Sort by resource usage (preempt largest first)
432
+ candidates.sort(key=lambda x: x[1], reverse=True)
433
+
434
+ # Simulate preemption
435
+ would_free = {
436
+ rt: 0.0
437
+ for rt in ResourceType
438
+ if rt != ResourceType.NONE and rt != ResourceType.ALL
439
+ }
440
+ preempt_list = []
441
+
442
+ for candidate_state, _ in candidates:
443
+ candidate_resources = self._allocations[candidate_state]
444
+ for rt, amount in candidate_resources.items():
445
+ would_free[rt] += amount # type: ignore
446
+ preempt_list.append(candidate_state)
447
+
448
+ # Check if preemption would free enough resources
449
+ could_satisfy = True
450
+ for resource_type in [
451
+ ResourceType.CPU,
452
+ ResourceType.MEMORY,
453
+ ResourceType.IO,
454
+ ResourceType.NETWORK,
455
+ ResourceType.GPU,
456
+ ]:
457
+ if safe_check_resource_type(requirements, resource_type):
458
+ required = get_resource_amount(requirements, resource_type)
459
+ available_after = (
460
+ self.available[resource_type] + would_free[resource_type] # type: ignore
461
+ )
462
+ if required > available_after:
463
+ could_satisfy = False
464
+ break
465
+
466
+ if could_satisfy:
467
+ # Perform actual preemption
468
+ for preempt_state in preempt_list:
469
+ self._preempt_state(preempt_state)
470
+ return True
471
+
472
+ return False
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error in preemption: {e}")
476
+ return False
477
+
478
+ def _preempt_state(self, state_name: str) -> None:
479
+ """Forcibly preempt a state."""
480
+ try:
481
+ if state_name in self._allocations:
482
+ # Return resources to pool
483
+ for resource_type, amount in self._allocations[state_name].items():
484
+ self.available[resource_type] += amount
485
+
486
+ # Track preemption
487
+ self._preempted_states.add(state_name)
488
+ del self._allocations[state_name]
489
+
490
+ # Remove from leak detection
491
+ if self.enable_leak_detection:
492
+ agent = self._agent_names.get(state_name, "unknown")
493
+ leak_detector.track_release(state_name, agent)
494
+
495
+ logger.warning(f"Preempted state {state_name}")
496
+
497
+ except Exception as e:
498
+ logger.error(f"Error preempting state {state_name}: {e}")
499
+
500
+ async def release(self, state_name: str) -> None:
501
+ """Release all resources held by a state."""
502
+ try:
503
+ async with self._condition:
504
+ if state_name in self._allocations:
505
+ # Return resources to pool
506
+ for resource_type, amount in self._allocations[state_name].items():
507
+ self.available[resource_type] += amount
508
+ logger.debug(
509
+ f"Released {amount} {resource_type.name} from {state_name}"
510
+ )
511
+
512
+ # Clean up tracking
513
+ del self._allocations[state_name]
514
+ if state_name in self._allocation_times:
515
+ del self._allocation_times[state_name]
516
+
517
+ # Update leak detection
518
+ if self.enable_leak_detection:
519
+ agent = self._agent_names.get(state_name, "unknown")
520
+ leak_detector.track_release(state_name, agent)
521
+ if state_name in self._agent_names:
522
+ del self._agent_names[state_name]
523
+
524
+ # Notify waiting states
525
+ self._condition.notify_all()
526
+
527
+ except Exception as e:
528
+ logger.error(f"Error releasing resources for {state_name}: {e}")
529
+
530
+ def _update_stats(
531
+ self, state_name: str, requirements: ResourceRequirements, start_time: float
532
+ ) -> None:
533
+ """Update usage statistics after successful allocation."""
534
+ try:
535
+ wait_time = time.time() - start_time
536
+ current_time = time.time()
537
+
538
+ # Add to usage history
539
+ self._usage_history.append((current_time, self.available.copy()))
540
+
541
+ # Update stats for each resource type
542
+ for resource_type in [
543
+ ResourceType.CPU,
544
+ ResourceType.MEMORY,
545
+ ResourceType.IO,
546
+ ResourceType.NETWORK,
547
+ ResourceType.GPU,
548
+ ]:
549
+ if safe_check_resource_type(requirements, resource_type):
550
+ amount = get_resource_amount(requirements, resource_type)
551
+ if amount <= 0:
552
+ continue
553
+
554
+ stats = self._usage_stats[resource_type] # type: ignore
555
+ stats.total_allocations += 1
556
+ stats.total_wait_time += wait_time
557
+ stats.last_allocation_time = current_time
558
+
559
+ # Calculate current usage
560
+ current_usage = sum(
561
+ alloc.get(resource_type, 0.0)
562
+ for alloc in self._allocations.values()
563
+ )
564
+ stats.current_usage = current_usage
565
+ stats.peak_usage = max(stats.peak_usage, current_usage)
566
+
567
+ # Record historical data
568
+ self._allocation_history[resource_type].append(
569
+ (current_time, current_usage)
570
+ )
571
+
572
+ # Clean up old history
573
+ cutoff = current_time - self._history_retention
574
+ self._usage_history = [
575
+ (t, usage) for t, usage in self._usage_history if t >= cutoff
576
+ ]
577
+
578
+ for resource_type in self._allocation_history:
579
+ self._allocation_history[resource_type] = [
580
+ (t, usage)
581
+ for t, usage in self._allocation_history[resource_type]
582
+ if t >= cutoff
583
+ ]
584
+
585
+ except Exception as e:
586
+ logger.error(f"Error updating stats: {e}")
587
+
588
+ def _update_stats_failure(self, requirements: ResourceRequirements) -> None:
589
+ """Update statistics for failed allocations."""
590
+ try:
591
+ for resource_type in [
592
+ ResourceType.CPU,
593
+ ResourceType.MEMORY,
594
+ ResourceType.IO,
595
+ ResourceType.NETWORK,
596
+ ResourceType.GPU,
597
+ ]:
598
+ if safe_check_resource_type(requirements, resource_type):
599
+ amount = get_resource_amount(requirements, resource_type)
600
+ if amount > 0:
601
+ self._usage_stats[resource_type].failed_allocations += 1 # type: ignore
602
+ except Exception as e:
603
+ logger.error(f"Error updating failure stats: {e}")
604
+
605
+ # Information methods
606
+ def get_usage_stats(self) -> dict[ResourceType, ResourceUsageStats]:
607
+ """Get usage statistics for all resource types."""
608
+ return self._usage_stats.copy() # type: ignore
609
+
610
+ def get_state_allocations(self) -> dict[str, dict[ResourceType, float]]:
611
+ """Get current allocations by state."""
612
+ return self._allocations.copy()
613
+
614
+ def get_waiting_states(self) -> set[str]:
615
+ """Get states waiting for resources."""
616
+ return self._waiting_states.copy()
617
+
618
+ def get_preempted_states(self) -> set[str]:
619
+ """Get states that were preempted."""
620
+ return self._preempted_states.copy()
621
+
622
+ def check_leaks(self) -> list[Any]:
623
+ """Check for resource leaks."""
624
+ if not self.enable_leak_detection:
625
+ return []
626
+ try:
627
+ return leak_detector.detect_leaks()
628
+ except Exception as e:
629
+ logger.error(f"Error checking leaks: {e}")
630
+ return []
631
+
632
+ def get_leak_metrics(self) -> dict[str, Any]:
633
+ """Get leak detection metrics."""
634
+ if not self.enable_leak_detection:
635
+ return {"leak_detection": "disabled"}
636
+ try:
637
+ return leak_detector.get_metrics()
638
+ except Exception as e:
639
+ logger.error(f"Error getting leak metrics: {e}")
640
+ return {"leak_detection": "error", "error": str(e)}
641
+
642
+ async def force_release(self, state_name: str) -> None:
643
+ """Force release resources from a state."""
644
+ logger.warning(f"Force releasing resources for state {state_name}")
645
+ await self.release(state_name)