kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -7
- kailash/cli/__init__.py +11 -1
- kailash/cli/validation_audit.py +570 -0
- kailash/core/actors/supervisor.py +1 -1
- kailash/core/resilience/circuit_breaker.py +71 -1
- kailash/core/resilience/health_monitor.py +172 -0
- kailash/edge/compliance.py +33 -0
- kailash/edge/consistency.py +609 -0
- kailash/edge/coordination/__init__.py +30 -0
- kailash/edge/coordination/global_ordering.py +355 -0
- kailash/edge/coordination/leader_election.py +217 -0
- kailash/edge/coordination/partition_detector.py +296 -0
- kailash/edge/coordination/raft.py +485 -0
- kailash/edge/discovery.py +63 -1
- kailash/edge/migration/__init__.py +19 -0
- kailash/edge/migration/edge_migrator.py +832 -0
- kailash/edge/monitoring/__init__.py +21 -0
- kailash/edge/monitoring/edge_monitor.py +736 -0
- kailash/edge/prediction/__init__.py +10 -0
- kailash/edge/prediction/predictive_warmer.py +591 -0
- kailash/edge/resource/__init__.py +102 -0
- kailash/edge/resource/cloud_integration.py +796 -0
- kailash/edge/resource/cost_optimizer.py +949 -0
- kailash/edge/resource/docker_integration.py +919 -0
- kailash/edge/resource/kubernetes_integration.py +893 -0
- kailash/edge/resource/platform_integration.py +913 -0
- kailash/edge/resource/predictive_scaler.py +959 -0
- kailash/edge/resource/resource_analyzer.py +824 -0
- kailash/edge/resource/resource_pools.py +610 -0
- kailash/integrations/dataflow_edge.py +261 -0
- kailash/mcp_server/registry_integration.py +1 -1
- kailash/monitoring/__init__.py +18 -0
- kailash/monitoring/alerts.py +646 -0
- kailash/monitoring/metrics.py +677 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/semantic_memory.py +2 -2
- kailash/nodes/base.py +545 -0
- kailash/nodes/edge/__init__.py +36 -0
- kailash/nodes/edge/base.py +240 -0
- kailash/nodes/edge/cloud_node.py +710 -0
- kailash/nodes/edge/coordination.py +239 -0
- kailash/nodes/edge/docker_node.py +825 -0
- kailash/nodes/edge/edge_data.py +582 -0
- kailash/nodes/edge/edge_migration_node.py +392 -0
- kailash/nodes/edge/edge_monitoring_node.py +421 -0
- kailash/nodes/edge/edge_state.py +673 -0
- kailash/nodes/edge/edge_warming_node.py +393 -0
- kailash/nodes/edge/kubernetes_node.py +652 -0
- kailash/nodes/edge/platform_node.py +766 -0
- kailash/nodes/edge/resource_analyzer_node.py +378 -0
- kailash/nodes/edge/resource_optimizer_node.py +501 -0
- kailash/nodes/edge/resource_scaler_node.py +397 -0
- kailash/nodes/ports.py +676 -0
- kailash/runtime/local.py +344 -1
- kailash/runtime/validation/__init__.py +20 -0
- kailash/runtime/validation/connection_context.py +119 -0
- kailash/runtime/validation/enhanced_error_formatter.py +202 -0
- kailash/runtime/validation/error_categorizer.py +164 -0
- kailash/runtime/validation/metrics.py +380 -0
- kailash/runtime/validation/performance.py +615 -0
- kailash/runtime/validation/suggestion_engine.py +212 -0
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +230 -4
- kailash/workflow/contracts.py +418 -0
- kailash/workflow/edge_infrastructure.py +369 -0
- kailash/workflow/migration.py +3 -3
- kailash/workflow/type_inference.py +669 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
- kailash/nexus/__init__.py +0 -21
- kailash/nexus/cli/__init__.py +0 -5
- kailash/nexus/cli/__main__.py +0 -6
- kailash/nexus/cli/main.py +0 -176
- kailash/nexus/factory.py +0 -413
- kailash/nexus/gateway.py +0 -545
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,610 @@
|
|
1
|
+
"""Resource pool management for edge computing.
|
2
|
+
|
3
|
+
This module provides unified resource abstraction and management
|
4
|
+
across different types of computing resources.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import logging
|
9
|
+
import uuid
|
10
|
+
from dataclasses import dataclass, field
|
11
|
+
from datetime import datetime, timedelta
|
12
|
+
from enum import Enum
|
13
|
+
from typing import Any, Dict, List, Optional, Set
|
14
|
+
|
15
|
+
|
16
|
+
class AllocationStrategy(Enum):
|
17
|
+
"""Resource allocation strategies."""
|
18
|
+
|
19
|
+
FIRST_FIT = "first_fit" # First available slot
|
20
|
+
BEST_FIT = "best_fit" # Smallest adequate slot
|
21
|
+
WORST_FIT = "worst_fit" # Largest available slot
|
22
|
+
ROUND_ROBIN = "round_robin" # Distribute evenly
|
23
|
+
PRIORITY_BASED = "priority_based" # Based on request priority
|
24
|
+
FAIR_SHARE = "fair_share" # Equal distribution
|
25
|
+
|
26
|
+
|
27
|
+
class ResourceUnit(Enum):
|
28
|
+
"""Units for different resource types."""
|
29
|
+
|
30
|
+
CORES = "cores" # CPU cores
|
31
|
+
MEGABYTES = "MB" # Memory
|
32
|
+
GIGABYTES = "GB" # Storage
|
33
|
+
MBPS = "Mbps" # Network bandwidth
|
34
|
+
PERCENTAGE = "percent" # Generic percentage
|
35
|
+
COUNT = "count" # Generic count
|
36
|
+
|
37
|
+
|
38
|
+
@dataclass
|
39
|
+
class ResourceSpec:
|
40
|
+
"""Specification for a resource type."""
|
41
|
+
|
42
|
+
resource_type: str
|
43
|
+
capacity: float
|
44
|
+
unit: ResourceUnit
|
45
|
+
shareable: bool = True
|
46
|
+
preemptible: bool = False
|
47
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
48
|
+
|
49
|
+
|
50
|
+
@dataclass
|
51
|
+
class ResourceRequest:
|
52
|
+
"""Request for resource allocation."""
|
53
|
+
|
54
|
+
request_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
55
|
+
requester: str = ""
|
56
|
+
resources: Dict[str, float] = field(default_factory=dict) # type -> amount
|
57
|
+
priority: int = 5 # 1-10, higher is more important
|
58
|
+
duration: Optional[int] = None # Seconds, None = indefinite
|
59
|
+
preemptible: bool = True
|
60
|
+
constraints: Dict[str, Any] = field(default_factory=dict)
|
61
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
62
|
+
|
63
|
+
def to_dict(self) -> Dict[str, Any]:
|
64
|
+
"""Convert to dictionary."""
|
65
|
+
return {
|
66
|
+
"request_id": self.request_id,
|
67
|
+
"requester": self.requester,
|
68
|
+
"resources": self.resources,
|
69
|
+
"priority": self.priority,
|
70
|
+
"duration": self.duration,
|
71
|
+
"preemptible": self.preemptible,
|
72
|
+
"constraints": self.constraints,
|
73
|
+
"metadata": self.metadata,
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
@dataclass
|
78
|
+
class ResourceAllocation:
|
79
|
+
"""Allocated resource information."""
|
80
|
+
|
81
|
+
allocation_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
82
|
+
request_id: str = ""
|
83
|
+
edge_node: str = ""
|
84
|
+
resources: Dict[str, float] = field(default_factory=dict)
|
85
|
+
allocated_at: datetime = field(default_factory=datetime.now)
|
86
|
+
expires_at: Optional[datetime] = None
|
87
|
+
status: str = "active" # active, expired, released
|
88
|
+
|
89
|
+
@property
|
90
|
+
def is_expired(self) -> bool:
|
91
|
+
"""Check if allocation is expired."""
|
92
|
+
if self.expires_at and datetime.now() > self.expires_at:
|
93
|
+
return True
|
94
|
+
return False
|
95
|
+
|
96
|
+
def to_dict(self) -> Dict[str, Any]:
|
97
|
+
"""Convert to dictionary."""
|
98
|
+
return {
|
99
|
+
"allocation_id": self.allocation_id,
|
100
|
+
"request_id": self.request_id,
|
101
|
+
"edge_node": self.edge_node,
|
102
|
+
"resources": self.resources,
|
103
|
+
"allocated_at": self.allocated_at.isoformat(),
|
104
|
+
"expires_at": self.expires_at.isoformat() if self.expires_at else None,
|
105
|
+
"status": self.status,
|
106
|
+
"is_expired": self.is_expired,
|
107
|
+
}
|
108
|
+
|
109
|
+
|
110
|
+
@dataclass
|
111
|
+
class AllocationResult:
|
112
|
+
"""Result of allocation attempt."""
|
113
|
+
|
114
|
+
success: bool
|
115
|
+
allocations: List[ResourceAllocation] = field(default_factory=list)
|
116
|
+
reason: Optional[str] = None
|
117
|
+
partial: bool = False
|
118
|
+
suggestions: List[str] = field(default_factory=list)
|
119
|
+
|
120
|
+
def to_dict(self) -> Dict[str, Any]:
|
121
|
+
"""Convert to dictionary."""
|
122
|
+
return {
|
123
|
+
"success": self.success,
|
124
|
+
"allocations": [a.to_dict() for a in self.allocations],
|
125
|
+
"reason": self.reason,
|
126
|
+
"partial": self.partial,
|
127
|
+
"suggestions": self.suggestions,
|
128
|
+
}
|
129
|
+
|
130
|
+
|
131
|
+
class ResourcePool:
|
132
|
+
"""Manages a pool of resources for an edge node."""
|
133
|
+
|
134
|
+
def __init__(
|
135
|
+
self,
|
136
|
+
edge_node: str,
|
137
|
+
resources: List[ResourceSpec],
|
138
|
+
allocation_strategy: AllocationStrategy = AllocationStrategy.BEST_FIT,
|
139
|
+
oversubscription_ratio: float = 1.0,
|
140
|
+
):
|
141
|
+
"""Initialize resource pool.
|
142
|
+
|
143
|
+
Args:
|
144
|
+
edge_node: Edge node identifier
|
145
|
+
resources: Resource specifications
|
146
|
+
allocation_strategy: Strategy for allocation
|
147
|
+
oversubscription_ratio: Allow oversubscription (>1.0)
|
148
|
+
"""
|
149
|
+
self.edge_node = edge_node
|
150
|
+
self.allocation_strategy = allocation_strategy
|
151
|
+
self.oversubscription_ratio = oversubscription_ratio
|
152
|
+
|
153
|
+
# Resource tracking
|
154
|
+
self.resources: Dict[str, ResourceSpec] = {
|
155
|
+
r.resource_type: r for r in resources
|
156
|
+
}
|
157
|
+
self.allocated: Dict[str, float] = {r.resource_type: 0.0 for r in resources}
|
158
|
+
self.allocations: Dict[str, ResourceAllocation] = {}
|
159
|
+
|
160
|
+
# Request tracking for fair share
|
161
|
+
self.request_history: Dict[str, List[float]] = {}
|
162
|
+
|
163
|
+
# Locks for thread safety
|
164
|
+
self._lock = asyncio.Lock()
|
165
|
+
|
166
|
+
self.logger = logging.getLogger(__name__)
|
167
|
+
|
168
|
+
async def allocate(self, request: ResourceRequest) -> AllocationResult:
|
169
|
+
"""Allocate resources for a request.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
request: Resource request
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
Allocation result
|
176
|
+
"""
|
177
|
+
async with self._lock:
|
178
|
+
# Check if resources are available
|
179
|
+
available = await self._check_availability(request)
|
180
|
+
|
181
|
+
if not available["sufficient"]:
|
182
|
+
return AllocationResult(
|
183
|
+
success=False,
|
184
|
+
reason=available["reason"],
|
185
|
+
suggestions=await self._get_allocation_suggestions(request),
|
186
|
+
)
|
187
|
+
|
188
|
+
# Perform allocation
|
189
|
+
allocation = await self._perform_allocation(request)
|
190
|
+
|
191
|
+
return AllocationResult(success=True, allocations=[allocation])
|
192
|
+
|
193
|
+
async def release(self, allocation_id: str) -> bool:
|
194
|
+
"""Release allocated resources.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
allocation_id: Allocation to release
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
Success status
|
201
|
+
"""
|
202
|
+
async with self._lock:
|
203
|
+
if allocation_id not in self.allocations:
|
204
|
+
return False
|
205
|
+
|
206
|
+
allocation = self.allocations[allocation_id]
|
207
|
+
|
208
|
+
# Return resources to pool
|
209
|
+
for rtype, amount in allocation.resources.items():
|
210
|
+
self.allocated[rtype] -= amount
|
211
|
+
|
212
|
+
# Update status
|
213
|
+
allocation.status = "released"
|
214
|
+
del self.allocations[allocation_id]
|
215
|
+
|
216
|
+
self.logger.info(f"Released allocation {allocation_id}")
|
217
|
+
return True
|
218
|
+
|
219
|
+
async def get_utilization(self) -> Dict[str, Any]:
|
220
|
+
"""Get current resource utilization.
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
Utilization information
|
224
|
+
"""
|
225
|
+
utilization = {}
|
226
|
+
|
227
|
+
for rtype, spec in self.resources.items():
|
228
|
+
allocated = self.allocated.get(rtype, 0)
|
229
|
+
capacity = spec.capacity * self.oversubscription_ratio
|
230
|
+
|
231
|
+
utilization[rtype] = {
|
232
|
+
"allocated": allocated,
|
233
|
+
"capacity": capacity,
|
234
|
+
"available": capacity - allocated,
|
235
|
+
"utilization_percent": (
|
236
|
+
(allocated / capacity * 100) if capacity > 0 else 0
|
237
|
+
),
|
238
|
+
"unit": spec.unit.value,
|
239
|
+
}
|
240
|
+
|
241
|
+
return {
|
242
|
+
"edge_node": self.edge_node,
|
243
|
+
"resources": utilization,
|
244
|
+
"total_allocations": len(self.allocations),
|
245
|
+
"active_allocations": len(
|
246
|
+
[a for a in self.allocations.values() if not a.is_expired]
|
247
|
+
),
|
248
|
+
}
|
249
|
+
|
250
|
+
async def cleanup_expired(self) -> int:
|
251
|
+
"""Clean up expired allocations.
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
Number of allocations cleaned
|
255
|
+
"""
|
256
|
+
async with self._lock:
|
257
|
+
expired = []
|
258
|
+
|
259
|
+
for aid, allocation in self.allocations.items():
|
260
|
+
if allocation.is_expired:
|
261
|
+
expired.append(aid)
|
262
|
+
|
263
|
+
for aid in expired:
|
264
|
+
await self.release(aid)
|
265
|
+
|
266
|
+
return len(expired)
|
267
|
+
|
268
|
+
async def preempt_resources(self, request: ResourceRequest) -> List[str]:
|
269
|
+
"""Preempt lower priority allocations if needed.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
request: High priority request
|
273
|
+
|
274
|
+
Returns:
|
275
|
+
List of preempted allocation IDs
|
276
|
+
"""
|
277
|
+
if request.priority < 8: # Only high priority can preempt
|
278
|
+
return []
|
279
|
+
|
280
|
+
async with self._lock:
|
281
|
+
preempted = []
|
282
|
+
needed = dict(request.resources)
|
283
|
+
|
284
|
+
# Sort allocations by priority (ascending)
|
285
|
+
sorted_allocs = sorted(
|
286
|
+
[
|
287
|
+
(aid, a)
|
288
|
+
for aid, a in self.allocations.items()
|
289
|
+
if a.status == "active"
|
290
|
+
],
|
291
|
+
key=lambda x: x[1].metadata.get("priority", 5),
|
292
|
+
)
|
293
|
+
|
294
|
+
for aid, allocation in sorted_allocs:
|
295
|
+
if allocation.metadata.get("priority", 5) >= request.priority:
|
296
|
+
continue # Can't preempt equal or higher priority
|
297
|
+
|
298
|
+
if not allocation.metadata.get("preemptible", True):
|
299
|
+
continue # Can't preempt non-preemptible
|
300
|
+
|
301
|
+
# Check if this helps
|
302
|
+
helps = False
|
303
|
+
for rtype, amount in allocation.resources.items():
|
304
|
+
if rtype in needed and needed[rtype] > 0:
|
305
|
+
helps = True
|
306
|
+
break
|
307
|
+
|
308
|
+
if helps:
|
309
|
+
preempted.append(aid)
|
310
|
+
await self.release(aid)
|
311
|
+
|
312
|
+
# Update needed resources
|
313
|
+
for rtype, amount in allocation.resources.items():
|
314
|
+
if rtype in needed:
|
315
|
+
needed[rtype] = max(0, needed[rtype] - amount)
|
316
|
+
|
317
|
+
# Check if we have enough now
|
318
|
+
if all(n <= 0 for n in needed.values()):
|
319
|
+
break
|
320
|
+
|
321
|
+
return preempted
|
322
|
+
|
323
|
+
async def _check_availability(self, request: ResourceRequest) -> Dict[str, Any]:
|
324
|
+
"""Check if resources are available.
|
325
|
+
|
326
|
+
Args:
|
327
|
+
request: Resource request
|
328
|
+
|
329
|
+
Returns:
|
330
|
+
Availability information
|
331
|
+
"""
|
332
|
+
insufficient_resources = []
|
333
|
+
|
334
|
+
for rtype, requested in request.resources.items():
|
335
|
+
if rtype not in self.resources:
|
336
|
+
insufficient_resources.append(f"{rtype} not available")
|
337
|
+
continue
|
338
|
+
|
339
|
+
spec = self.resources[rtype]
|
340
|
+
allocated = self.allocated.get(rtype, 0)
|
341
|
+
capacity = spec.capacity * self.oversubscription_ratio
|
342
|
+
available = capacity - allocated
|
343
|
+
|
344
|
+
if requested > available:
|
345
|
+
insufficient_resources.append(
|
346
|
+
f"{rtype}: requested {requested}, available {available:.2f}"
|
347
|
+
)
|
348
|
+
|
349
|
+
if insufficient_resources:
|
350
|
+
return {
|
351
|
+
"sufficient": False,
|
352
|
+
"reason": "Insufficient resources: "
|
353
|
+
+ ", ".join(insufficient_resources),
|
354
|
+
}
|
355
|
+
|
356
|
+
return {"sufficient": True}
|
357
|
+
|
358
|
+
async def _perform_allocation(self, request: ResourceRequest) -> ResourceAllocation:
|
359
|
+
"""Perform the actual allocation.
|
360
|
+
|
361
|
+
Args:
|
362
|
+
request: Resource request
|
363
|
+
|
364
|
+
Returns:
|
365
|
+
Resource allocation
|
366
|
+
"""
|
367
|
+
# Update allocated amounts
|
368
|
+
for rtype, amount in request.resources.items():
|
369
|
+
self.allocated[rtype] += amount
|
370
|
+
|
371
|
+
# Create allocation record
|
372
|
+
allocation = ResourceAllocation(
|
373
|
+
request_id=request.request_id,
|
374
|
+
edge_node=self.edge_node,
|
375
|
+
resources=dict(request.resources),
|
376
|
+
expires_at=(
|
377
|
+
datetime.now() + timedelta(seconds=request.duration)
|
378
|
+
if request.duration
|
379
|
+
else None
|
380
|
+
),
|
381
|
+
)
|
382
|
+
|
383
|
+
# Store metadata
|
384
|
+
allocation.metadata = {
|
385
|
+
"requester": request.requester,
|
386
|
+
"priority": request.priority,
|
387
|
+
"preemptible": request.preemptible,
|
388
|
+
}
|
389
|
+
|
390
|
+
self.allocations[allocation.allocation_id] = allocation
|
391
|
+
|
392
|
+
# Track for fair share
|
393
|
+
if request.requester not in self.request_history:
|
394
|
+
self.request_history[request.requester] = []
|
395
|
+
self.request_history[request.requester].append(sum(request.resources.values()))
|
396
|
+
|
397
|
+
self.logger.info(
|
398
|
+
f"Allocated resources for {request.requester}: "
|
399
|
+
f"{request.resources} (allocation_id: {allocation.allocation_id})"
|
400
|
+
)
|
401
|
+
|
402
|
+
return allocation
|
403
|
+
|
404
|
+
async def _get_allocation_suggestions(self, request: ResourceRequest) -> List[str]:
|
405
|
+
"""Get suggestions for failed allocation.
|
406
|
+
|
407
|
+
Args:
|
408
|
+
request: Failed resource request
|
409
|
+
|
410
|
+
Returns:
|
411
|
+
List of suggestions
|
412
|
+
"""
|
413
|
+
suggestions = []
|
414
|
+
|
415
|
+
# Check if reducing request would help
|
416
|
+
for rtype, requested in request.resources.items():
|
417
|
+
if rtype in self.resources:
|
418
|
+
available = self.resources[rtype].capacity - self.allocated.get(
|
419
|
+
rtype, 0
|
420
|
+
)
|
421
|
+
if available > 0:
|
422
|
+
suggestions.append(
|
423
|
+
f"Reduce {rtype} request to {available:.2f} or less"
|
424
|
+
)
|
425
|
+
|
426
|
+
# Check if waiting would help
|
427
|
+
upcoming_releases = []
|
428
|
+
for allocation in self.allocations.values():
|
429
|
+
if allocation.expires_at and not allocation.is_expired:
|
430
|
+
upcoming_releases.append(allocation.expires_at)
|
431
|
+
|
432
|
+
if upcoming_releases:
|
433
|
+
next_release = min(upcoming_releases)
|
434
|
+
wait_time = (next_release - datetime.now()).total_seconds()
|
435
|
+
suggestions.append(f"Wait {wait_time:.0f}s for resources to be released")
|
436
|
+
|
437
|
+
# Suggest preemption if applicable
|
438
|
+
if request.priority >= 8:
|
439
|
+
preemptible_count = sum(
|
440
|
+
1
|
441
|
+
for a in self.allocations.values()
|
442
|
+
if a.metadata.get("preemptible", True)
|
443
|
+
and a.metadata.get("priority", 5) < request.priority
|
444
|
+
)
|
445
|
+
if preemptible_count > 0:
|
446
|
+
suggestions.append(
|
447
|
+
f"Enable preemption to free resources from "
|
448
|
+
f"{preemptible_count} lower priority allocations"
|
449
|
+
)
|
450
|
+
|
451
|
+
return suggestions
|
452
|
+
|
453
|
+
|
454
|
+
class ResourcePoolManager:
|
455
|
+
"""Manages multiple resource pools across edge nodes."""
|
456
|
+
|
457
|
+
def __init__(self):
|
458
|
+
"""Initialize resource pool manager."""
|
459
|
+
self.pools: Dict[str, ResourcePool] = {}
|
460
|
+
self.logger = logging.getLogger(__name__)
|
461
|
+
|
462
|
+
def add_pool(self, pool: ResourcePool):
|
463
|
+
"""Add a resource pool.
|
464
|
+
|
465
|
+
Args:
|
466
|
+
pool: Resource pool to add
|
467
|
+
"""
|
468
|
+
self.pools[pool.edge_node] = pool
|
469
|
+
self.logger.info(f"Added resource pool for {pool.edge_node}")
|
470
|
+
|
471
|
+
async def allocate(
|
472
|
+
self, request: ResourceRequest, preferred_nodes: Optional[List[str]] = None
|
473
|
+
) -> AllocationResult:
|
474
|
+
"""Allocate resources across pools.
|
475
|
+
|
476
|
+
Args:
|
477
|
+
request: Resource request
|
478
|
+
preferred_nodes: Preferred edge nodes
|
479
|
+
|
480
|
+
Returns:
|
481
|
+
Allocation result
|
482
|
+
"""
|
483
|
+
# Try preferred nodes first
|
484
|
+
if preferred_nodes:
|
485
|
+
for node in preferred_nodes:
|
486
|
+
if node in self.pools:
|
487
|
+
result = await self.pools[node].allocate(request)
|
488
|
+
if result.success:
|
489
|
+
return result
|
490
|
+
|
491
|
+
# Try all nodes
|
492
|
+
for node, pool in self.pools.items():
|
493
|
+
if preferred_nodes and node in preferred_nodes:
|
494
|
+
continue # Already tried
|
495
|
+
|
496
|
+
result = await pool.allocate(request)
|
497
|
+
if result.success:
|
498
|
+
return result
|
499
|
+
|
500
|
+
# No allocation possible
|
501
|
+
return AllocationResult(
|
502
|
+
success=False,
|
503
|
+
reason="No edge node has sufficient resources",
|
504
|
+
suggestions=[
|
505
|
+
"Consider splitting the request",
|
506
|
+
"Wait for resources to be freed",
|
507
|
+
],
|
508
|
+
)
|
509
|
+
|
510
|
+
async def get_global_utilization(self) -> Dict[str, Any]:
|
511
|
+
"""Get utilization across all pools.
|
512
|
+
|
513
|
+
Returns:
|
514
|
+
Global utilization information
|
515
|
+
"""
|
516
|
+
utilizations = {}
|
517
|
+
total_by_type: Dict[str, Dict[str, float]] = {}
|
518
|
+
|
519
|
+
for node, pool in self.pools.items():
|
520
|
+
util = await pool.get_utilization()
|
521
|
+
utilizations[node] = util
|
522
|
+
|
523
|
+
# Aggregate by resource type
|
524
|
+
for rtype, info in util["resources"].items():
|
525
|
+
if rtype not in total_by_type:
|
526
|
+
total_by_type[rtype] = {"allocated": 0, "capacity": 0, "count": 0}
|
527
|
+
|
528
|
+
total_by_type[rtype]["allocated"] += info["allocated"]
|
529
|
+
total_by_type[rtype]["capacity"] += info["capacity"]
|
530
|
+
total_by_type[rtype]["count"] += 1
|
531
|
+
|
532
|
+
# Calculate aggregates
|
533
|
+
aggregates = {}
|
534
|
+
for rtype, totals in total_by_type.items():
|
535
|
+
aggregates[rtype] = {
|
536
|
+
"total_allocated": totals["allocated"],
|
537
|
+
"total_capacity": totals["capacity"],
|
538
|
+
"average_utilization": (
|
539
|
+
totals["allocated"] / totals["capacity"] * 100
|
540
|
+
if totals["capacity"] > 0
|
541
|
+
else 0
|
542
|
+
),
|
543
|
+
"node_count": totals["count"],
|
544
|
+
}
|
545
|
+
|
546
|
+
return {
|
547
|
+
"by_node": utilizations,
|
548
|
+
"aggregates": aggregates,
|
549
|
+
"total_nodes": len(self.pools),
|
550
|
+
}
|
551
|
+
|
552
|
+
async def find_best_node(
|
553
|
+
self, request: ResourceRequest, strategy: str = "least_loaded"
|
554
|
+
) -> Optional[str]:
|
555
|
+
"""Find best node for allocation.
|
556
|
+
|
557
|
+
Args:
|
558
|
+
request: Resource request
|
559
|
+
strategy: Selection strategy
|
560
|
+
|
561
|
+
Returns:
|
562
|
+
Best node ID or None
|
563
|
+
"""
|
564
|
+
candidates = []
|
565
|
+
|
566
|
+
for node, pool in self.pools.items():
|
567
|
+
# Check basic availability
|
568
|
+
available = await pool._check_availability(request)
|
569
|
+
if available["sufficient"]:
|
570
|
+
util = await pool.get_utilization()
|
571
|
+
|
572
|
+
# Calculate score based on strategy
|
573
|
+
if strategy == "least_loaded":
|
574
|
+
# Average utilization across resource types
|
575
|
+
utilizations = [
|
576
|
+
r["utilization_percent"] for r in util["resources"].values()
|
577
|
+
]
|
578
|
+
avg_util = (
|
579
|
+
sum(utilizations) / len(utilizations) if utilizations else 0
|
580
|
+
)
|
581
|
+
score = 100 - avg_util # Higher score = less loaded
|
582
|
+
|
583
|
+
elif strategy == "most_capacity":
|
584
|
+
# Total available capacity
|
585
|
+
total_available = sum(
|
586
|
+
r["available"] for r in util["resources"].values()
|
587
|
+
)
|
588
|
+
score = total_available
|
589
|
+
|
590
|
+
else: # balanced
|
591
|
+
# Balance between utilization and capacity
|
592
|
+
utilizations = [
|
593
|
+
r["utilization_percent"] for r in util["resources"].values()
|
594
|
+
]
|
595
|
+
avg_util = (
|
596
|
+
sum(utilizations) / len(utilizations) if utilizations else 0
|
597
|
+
)
|
598
|
+
total_capacity = sum(
|
599
|
+
r["capacity"] for r in util["resources"].values()
|
600
|
+
)
|
601
|
+
score = (100 - avg_util) * 0.5 + total_capacity * 0.5
|
602
|
+
|
603
|
+
candidates.append((node, score))
|
604
|
+
|
605
|
+
if not candidates:
|
606
|
+
return None
|
607
|
+
|
608
|
+
# Return node with highest score
|
609
|
+
candidates.sort(key=lambda x: x[1], reverse=True)
|
610
|
+
return candidates[0][0]
|