kailash 0.8.3__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -7
- kailash/cli/__init__.py +11 -1
- kailash/cli/validation_audit.py +570 -0
- kailash/core/actors/supervisor.py +1 -1
- kailash/core/resilience/circuit_breaker.py +71 -1
- kailash/core/resilience/health_monitor.py +172 -0
- kailash/edge/compliance.py +33 -0
- kailash/edge/consistency.py +609 -0
- kailash/edge/coordination/__init__.py +30 -0
- kailash/edge/coordination/global_ordering.py +355 -0
- kailash/edge/coordination/leader_election.py +217 -0
- kailash/edge/coordination/partition_detector.py +296 -0
- kailash/edge/coordination/raft.py +485 -0
- kailash/edge/discovery.py +63 -1
- kailash/edge/migration/__init__.py +19 -0
- kailash/edge/migration/edge_migrator.py +832 -0
- kailash/edge/monitoring/__init__.py +21 -0
- kailash/edge/monitoring/edge_monitor.py +736 -0
- kailash/edge/prediction/__init__.py +10 -0
- kailash/edge/prediction/predictive_warmer.py +591 -0
- kailash/edge/resource/__init__.py +102 -0
- kailash/edge/resource/cloud_integration.py +796 -0
- kailash/edge/resource/cost_optimizer.py +949 -0
- kailash/edge/resource/docker_integration.py +919 -0
- kailash/edge/resource/kubernetes_integration.py +893 -0
- kailash/edge/resource/platform_integration.py +913 -0
- kailash/edge/resource/predictive_scaler.py +959 -0
- kailash/edge/resource/resource_analyzer.py +824 -0
- kailash/edge/resource/resource_pools.py +610 -0
- kailash/integrations/dataflow_edge.py +261 -0
- kailash/mcp_server/registry_integration.py +1 -1
- kailash/monitoring/__init__.py +18 -0
- kailash/monitoring/alerts.py +646 -0
- kailash/monitoring/metrics.py +677 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/__init__.py +17 -0
- kailash/nodes/ai/a2a.py +1914 -43
- kailash/nodes/ai/a2a_backup.py +1807 -0
- kailash/nodes/ai/hybrid_search.py +972 -0
- kailash/nodes/ai/semantic_memory.py +558 -0
- kailash/nodes/ai/streaming_analytics.py +947 -0
- kailash/nodes/base.py +545 -0
- kailash/nodes/edge/__init__.py +36 -0
- kailash/nodes/edge/base.py +240 -0
- kailash/nodes/edge/cloud_node.py +710 -0
- kailash/nodes/edge/coordination.py +239 -0
- kailash/nodes/edge/docker_node.py +825 -0
- kailash/nodes/edge/edge_data.py +582 -0
- kailash/nodes/edge/edge_migration_node.py +392 -0
- kailash/nodes/edge/edge_monitoring_node.py +421 -0
- kailash/nodes/edge/edge_state.py +673 -0
- kailash/nodes/edge/edge_warming_node.py +393 -0
- kailash/nodes/edge/kubernetes_node.py +652 -0
- kailash/nodes/edge/platform_node.py +766 -0
- kailash/nodes/edge/resource_analyzer_node.py +378 -0
- kailash/nodes/edge/resource_optimizer_node.py +501 -0
- kailash/nodes/edge/resource_scaler_node.py +397 -0
- kailash/nodes/ports.py +676 -0
- kailash/runtime/local.py +344 -1
- kailash/runtime/validation/__init__.py +20 -0
- kailash/runtime/validation/connection_context.py +119 -0
- kailash/runtime/validation/enhanced_error_formatter.py +202 -0
- kailash/runtime/validation/error_categorizer.py +164 -0
- kailash/runtime/validation/metrics.py +380 -0
- kailash/runtime/validation/performance.py +615 -0
- kailash/runtime/validation/suggestion_engine.py +212 -0
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +234 -8
- kailash/workflow/contracts.py +418 -0
- kailash/workflow/edge_infrastructure.py +369 -0
- kailash/workflow/migration.py +3 -3
- kailash/workflow/type_inference.py +669 -0
- {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/METADATA +44 -27
- {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/RECORD +78 -28
- kailash/nexus/__init__.py +0 -21
- kailash/nexus/cli/__init__.py +0 -5
- kailash/nexus/cli/__main__.py +0 -6
- kailash/nexus/cli/main.py +0 -176
- kailash/nexus/factory.py +0 -413
- kailash/nexus/gateway.py +0 -545
- {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
- {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,959 @@
|
|
1
|
+
"""Predictive scaler for intelligent edge resource scaling.
|
2
|
+
|
3
|
+
This module provides ML-based demand prediction and preemptive scaling
|
4
|
+
decisions for edge computing resources.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import json
|
9
|
+
import logging
|
10
|
+
from collections import defaultdict, deque
|
11
|
+
from dataclasses import dataclass, field
|
12
|
+
from datetime import datetime, timedelta
|
13
|
+
from enum import Enum
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple
|
15
|
+
|
16
|
+
import numpy as np
|
17
|
+
from scipy import stats
|
18
|
+
|
19
|
+
# For time series forecasting
|
20
|
+
try:
|
21
|
+
from statsmodels.tsa.arima.model import ARIMA
|
22
|
+
from statsmodels.tsa.holtwinters import ExponentialSmoothing
|
23
|
+
|
24
|
+
STATSMODELS_AVAILABLE = True
|
25
|
+
except ImportError:
|
26
|
+
STATSMODELS_AVAILABLE = False
|
27
|
+
|
28
|
+
|
29
|
+
class ScalingStrategy(Enum):
|
30
|
+
"""Strategies for predictive scaling."""
|
31
|
+
|
32
|
+
REACTIVE = "reactive" # Scale based on current metrics
|
33
|
+
PREDICTIVE = "predictive" # Scale based on predictions
|
34
|
+
SCHEDULED = "scheduled" # Scale based on time patterns
|
35
|
+
HYBRID = "hybrid" # Combine multiple strategies
|
36
|
+
AGGRESSIVE = "aggressive" # Scale early and generously
|
37
|
+
CONSERVATIVE = "conservative" # Scale cautiously
|
38
|
+
|
39
|
+
|
40
|
+
class PredictionHorizon(Enum):
|
41
|
+
"""Time horizons for predictions."""
|
42
|
+
|
43
|
+
IMMEDIATE = 300 # 5 minutes
|
44
|
+
SHORT_TERM = 900 # 15 minutes
|
45
|
+
MEDIUM_TERM = 3600 # 1 hour
|
46
|
+
LONG_TERM = 86400 # 24 hours
|
47
|
+
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class ScalingPrediction:
|
51
|
+
"""Prediction for resource scaling needs."""
|
52
|
+
|
53
|
+
timestamp: datetime
|
54
|
+
horizon: PredictionHorizon
|
55
|
+
resource_type: str
|
56
|
+
edge_node: str
|
57
|
+
current_usage: float
|
58
|
+
predicted_usage: float
|
59
|
+
confidence: float
|
60
|
+
recommended_capacity: float
|
61
|
+
scaling_action: str # scale_up, scale_down, maintain
|
62
|
+
urgency: str # immediate, soon, planned
|
63
|
+
reasoning: List[str] = field(default_factory=list)
|
64
|
+
|
65
|
+
@property
|
66
|
+
def scaling_factor(self) -> float:
|
67
|
+
"""Calculate scaling factor."""
|
68
|
+
if self.current_usage > 0:
|
69
|
+
return self.predicted_usage / self.current_usage
|
70
|
+
return 1.0
|
71
|
+
|
72
|
+
def to_dict(self) -> Dict[str, Any]:
|
73
|
+
"""Convert to dictionary."""
|
74
|
+
return {
|
75
|
+
"timestamp": self.timestamp.isoformat(),
|
76
|
+
"horizon": self.horizon.value,
|
77
|
+
"resource_type": self.resource_type,
|
78
|
+
"edge_node": self.edge_node,
|
79
|
+
"current_usage": self.current_usage,
|
80
|
+
"predicted_usage": self.predicted_usage,
|
81
|
+
"confidence": self.confidence,
|
82
|
+
"recommended_capacity": self.recommended_capacity,
|
83
|
+
"scaling_action": self.scaling_action,
|
84
|
+
"scaling_factor": self.scaling_factor,
|
85
|
+
"urgency": self.urgency,
|
86
|
+
"reasoning": self.reasoning,
|
87
|
+
}
|
88
|
+
|
89
|
+
|
90
|
+
@dataclass
|
91
|
+
class ScalingDecision:
|
92
|
+
"""Scaling decision with execution plan."""
|
93
|
+
|
94
|
+
decision_id: str
|
95
|
+
predictions: List[ScalingPrediction]
|
96
|
+
strategy: ScalingStrategy
|
97
|
+
action_plan: Dict[str, Any]
|
98
|
+
estimated_cost: float
|
99
|
+
risk_assessment: Dict[str, Any]
|
100
|
+
approval_required: bool
|
101
|
+
|
102
|
+
def to_dict(self) -> Dict[str, Any]:
|
103
|
+
"""Convert to dictionary."""
|
104
|
+
return {
|
105
|
+
"decision_id": self.decision_id,
|
106
|
+
"predictions": [p.to_dict() for p in self.predictions],
|
107
|
+
"strategy": self.strategy.value,
|
108
|
+
"action_plan": self.action_plan,
|
109
|
+
"estimated_cost": self.estimated_cost,
|
110
|
+
"risk_assessment": self.risk_assessment,
|
111
|
+
"approval_required": self.approval_required,
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
class PredictiveScaler:
|
116
|
+
"""ML-based predictive scaler for edge resources."""
|
117
|
+
|
118
|
+
def __init__(
|
119
|
+
self,
|
120
|
+
prediction_window: int = 3600, # 1 hour of history for predictions
|
121
|
+
update_interval: int = 60, # Update predictions every minute
|
122
|
+
confidence_threshold: float = 0.7,
|
123
|
+
scale_up_threshold: float = 0.8, # 80% utilization triggers scale up
|
124
|
+
scale_down_threshold: float = 0.3, # 30% utilization triggers scale down
|
125
|
+
min_data_points: int = 30,
|
126
|
+
):
|
127
|
+
"""Initialize predictive scaler.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
prediction_window: Historical data window for predictions
|
131
|
+
update_interval: How often to update predictions
|
132
|
+
confidence_threshold: Minimum confidence for actions
|
133
|
+
scale_up_threshold: Utilization threshold for scaling up
|
134
|
+
scale_down_threshold: Utilization threshold for scaling down
|
135
|
+
min_data_points: Minimum data points for predictions
|
136
|
+
"""
|
137
|
+
self.prediction_window = prediction_window
|
138
|
+
self.update_interval = update_interval
|
139
|
+
self.confidence_threshold = confidence_threshold
|
140
|
+
self.scale_up_threshold = scale_up_threshold
|
141
|
+
self.scale_down_threshold = scale_down_threshold
|
142
|
+
self.min_data_points = min_data_points
|
143
|
+
|
144
|
+
# Historical data storage
|
145
|
+
self.usage_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
|
146
|
+
|
147
|
+
# Prediction models cache
|
148
|
+
self.models: Dict[str, Any] = {}
|
149
|
+
self.last_model_update: Dict[str, datetime] = {}
|
150
|
+
|
151
|
+
# Scaling history for learning
|
152
|
+
self.scaling_history: List[Dict[str, Any]] = []
|
153
|
+
|
154
|
+
# Background task
|
155
|
+
self._prediction_task: Optional[asyncio.Task] = None
|
156
|
+
|
157
|
+
self.logger = logging.getLogger(__name__)
|
158
|
+
|
159
|
+
async def start(self):
|
160
|
+
"""Start background prediction updates."""
|
161
|
+
if not self._prediction_task:
|
162
|
+
self._prediction_task = asyncio.create_task(self._prediction_loop())
|
163
|
+
self.logger.info("Predictive scaler started")
|
164
|
+
|
165
|
+
async def stop(self):
|
166
|
+
"""Stop background prediction updates."""
|
167
|
+
if self._prediction_task:
|
168
|
+
self._prediction_task.cancel()
|
169
|
+
try:
|
170
|
+
await self._prediction_task
|
171
|
+
except asyncio.CancelledError:
|
172
|
+
pass
|
173
|
+
self._prediction_task = None
|
174
|
+
self.logger.info("Predictive scaler stopped")
|
175
|
+
|
176
|
+
async def record_usage(
|
177
|
+
self,
|
178
|
+
edge_node: str,
|
179
|
+
resource_type: str,
|
180
|
+
usage: float,
|
181
|
+
capacity: float,
|
182
|
+
timestamp: Optional[datetime] = None,
|
183
|
+
):
|
184
|
+
"""Record resource usage data point.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
edge_node: Edge node identifier
|
188
|
+
resource_type: Type of resource
|
189
|
+
usage: Current usage amount
|
190
|
+
capacity: Total capacity
|
191
|
+
timestamp: Usage timestamp
|
192
|
+
"""
|
193
|
+
if timestamp is None:
|
194
|
+
timestamp = datetime.now()
|
195
|
+
|
196
|
+
key = f"{edge_node}:{resource_type}"
|
197
|
+
|
198
|
+
# Store normalized utilization (0-100%)
|
199
|
+
utilization = (usage / capacity * 100) if capacity > 0 else 0
|
200
|
+
|
201
|
+
self.usage_history[key].append(
|
202
|
+
{
|
203
|
+
"timestamp": timestamp,
|
204
|
+
"usage": usage,
|
205
|
+
"capacity": capacity,
|
206
|
+
"utilization": utilization,
|
207
|
+
}
|
208
|
+
)
|
209
|
+
|
210
|
+
async def predict_scaling_needs(
|
211
|
+
self,
|
212
|
+
strategy: ScalingStrategy = ScalingStrategy.HYBRID,
|
213
|
+
horizons: Optional[List[PredictionHorizon]] = None,
|
214
|
+
) -> List[ScalingDecision]:
|
215
|
+
"""Predict scaling needs across all resources.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
strategy: Scaling strategy to use
|
219
|
+
horizons: Prediction horizons to consider
|
220
|
+
|
221
|
+
Returns:
|
222
|
+
List of scaling decisions
|
223
|
+
"""
|
224
|
+
if horizons is None:
|
225
|
+
horizons = [PredictionHorizon.IMMEDIATE, PredictionHorizon.SHORT_TERM]
|
226
|
+
|
227
|
+
decisions = []
|
228
|
+
|
229
|
+
# Group predictions by edge node
|
230
|
+
predictions_by_node: Dict[str, List[ScalingPrediction]] = defaultdict(list)
|
231
|
+
|
232
|
+
for key, history in self.usage_history.items():
|
233
|
+
if len(history) < self.min_data_points:
|
234
|
+
continue
|
235
|
+
|
236
|
+
edge_node, resource_type = key.split(":")
|
237
|
+
|
238
|
+
# Generate predictions for each horizon
|
239
|
+
for horizon in horizons:
|
240
|
+
prediction = await self._predict_for_resource(
|
241
|
+
edge_node, resource_type, history, horizon
|
242
|
+
)
|
243
|
+
|
244
|
+
if prediction and prediction.confidence >= self.confidence_threshold:
|
245
|
+
predictions_by_node[edge_node].append(prediction)
|
246
|
+
|
247
|
+
# Create scaling decisions
|
248
|
+
for edge_node, predictions in predictions_by_node.items():
|
249
|
+
if not predictions:
|
250
|
+
continue
|
251
|
+
|
252
|
+
decision = await self._create_scaling_decision(
|
253
|
+
edge_node, predictions, strategy
|
254
|
+
)
|
255
|
+
|
256
|
+
if decision:
|
257
|
+
decisions.append(decision)
|
258
|
+
|
259
|
+
return decisions
|
260
|
+
|
261
|
+
async def get_resource_forecast(
|
262
|
+
self, edge_node: str, resource_type: str, forecast_minutes: int = 60
|
263
|
+
) -> Dict[str, Any]:
|
264
|
+
"""Get detailed forecast for a specific resource.
|
265
|
+
|
266
|
+
Args:
|
267
|
+
edge_node: Edge node identifier
|
268
|
+
resource_type: Type of resource
|
269
|
+
forecast_minutes: Minutes to forecast
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
Forecast details
|
273
|
+
"""
|
274
|
+
key = f"{edge_node}:{resource_type}"
|
275
|
+
history = self.usage_history.get(key, [])
|
276
|
+
|
277
|
+
if len(history) < self.min_data_points:
|
278
|
+
return {
|
279
|
+
"error": "Insufficient data for forecast",
|
280
|
+
"data_points": len(history),
|
281
|
+
"required": self.min_data_points,
|
282
|
+
}
|
283
|
+
|
284
|
+
# Prepare time series data
|
285
|
+
timestamps = [h["timestamp"] for h in history]
|
286
|
+
utilizations = [h["utilization"] for h in history]
|
287
|
+
|
288
|
+
# Generate forecast
|
289
|
+
forecast = await self._generate_forecast(
|
290
|
+
timestamps, utilizations, forecast_minutes
|
291
|
+
)
|
292
|
+
|
293
|
+
return {
|
294
|
+
"edge_node": edge_node,
|
295
|
+
"resource_type": resource_type,
|
296
|
+
"current_utilization": utilizations[-1] if utilizations else 0,
|
297
|
+
"forecast": forecast,
|
298
|
+
"confidence_intervals": self._calculate_confidence_intervals(forecast),
|
299
|
+
}
|
300
|
+
|
301
|
+
async def evaluate_scaling_decision(
|
302
|
+
self,
|
303
|
+
decision_id: str,
|
304
|
+
actual_usage: Dict[str, float],
|
305
|
+
feedback: Optional[str] = None,
|
306
|
+
):
|
307
|
+
"""Evaluate a past scaling decision for learning.
|
308
|
+
|
309
|
+
Args:
|
310
|
+
decision_id: Decision to evaluate
|
311
|
+
actual_usage: Actual usage that occurred
|
312
|
+
feedback: Optional human feedback
|
313
|
+
"""
|
314
|
+
# Find the decision in history
|
315
|
+
decision_record = None
|
316
|
+
for record in self.scaling_history:
|
317
|
+
if record.get("decision_id") == decision_id:
|
318
|
+
decision_record = record
|
319
|
+
break
|
320
|
+
|
321
|
+
if not decision_record:
|
322
|
+
self.logger.warning(f"Decision {decision_id} not found in history")
|
323
|
+
return
|
324
|
+
|
325
|
+
# Calculate prediction accuracy
|
326
|
+
predictions = decision_record.get("predictions", [])
|
327
|
+
accuracy_scores = []
|
328
|
+
|
329
|
+
for pred in predictions:
|
330
|
+
key = f"{pred['edge_node']}:{pred['resource_type']}"
|
331
|
+
if key in actual_usage:
|
332
|
+
predicted = pred["predicted_usage"]
|
333
|
+
actual = actual_usage[key]
|
334
|
+
|
335
|
+
# Calculate error percentage
|
336
|
+
error = abs(predicted - actual) / actual if actual > 0 else 0
|
337
|
+
accuracy = max(0, 1 - error)
|
338
|
+
accuracy_scores.append(accuracy)
|
339
|
+
|
340
|
+
# Update decision record
|
341
|
+
decision_record["evaluation"] = {
|
342
|
+
"actual_usage": actual_usage,
|
343
|
+
"accuracy_scores": accuracy_scores,
|
344
|
+
"average_accuracy": np.mean(accuracy_scores) if accuracy_scores else 0,
|
345
|
+
"feedback": feedback,
|
346
|
+
"evaluated_at": datetime.now().isoformat(),
|
347
|
+
}
|
348
|
+
|
349
|
+
# Learn from the evaluation
|
350
|
+
await self._update_models_from_feedback(decision_record)
|
351
|
+
|
352
|
+
async def _prediction_loop(self):
|
353
|
+
"""Background loop for updating predictions."""
|
354
|
+
while True:
|
355
|
+
try:
|
356
|
+
await asyncio.sleep(self.update_interval)
|
357
|
+
|
358
|
+
# Update models if needed
|
359
|
+
await self._update_prediction_models()
|
360
|
+
|
361
|
+
# Clean old history
|
362
|
+
await self._cleanup_old_history()
|
363
|
+
|
364
|
+
except asyncio.CancelledError:
|
365
|
+
break
|
366
|
+
except Exception as e:
|
367
|
+
self.logger.error(f"Prediction loop error: {e}")
|
368
|
+
|
369
|
+
async def _predict_for_resource(
|
370
|
+
self,
|
371
|
+
edge_node: str,
|
372
|
+
resource_type: str,
|
373
|
+
history: deque,
|
374
|
+
horizon: PredictionHorizon,
|
375
|
+
) -> Optional[ScalingPrediction]:
|
376
|
+
"""Generate prediction for a specific resource.
|
377
|
+
|
378
|
+
Args:
|
379
|
+
edge_node: Edge node identifier
|
380
|
+
resource_type: Type of resource
|
381
|
+
history: Usage history
|
382
|
+
horizon: Prediction horizon
|
383
|
+
|
384
|
+
Returns:
|
385
|
+
Scaling prediction or None
|
386
|
+
"""
|
387
|
+
# Extract time series
|
388
|
+
utilizations = [h["utilization"] for h in history]
|
389
|
+
timestamps = [h["timestamp"] for h in history]
|
390
|
+
|
391
|
+
if not utilizations:
|
392
|
+
return None
|
393
|
+
|
394
|
+
current_usage = utilizations[-1]
|
395
|
+
current_capacity = history[-1]["capacity"]
|
396
|
+
|
397
|
+
# Predict future usage
|
398
|
+
predicted_usage, confidence = await self._predict_usage(
|
399
|
+
utilizations, timestamps, horizon.value
|
400
|
+
)
|
401
|
+
|
402
|
+
# Determine scaling action
|
403
|
+
if predicted_usage > self.scale_up_threshold * 100:
|
404
|
+
scaling_action = "scale_up"
|
405
|
+
urgency = "immediate" if horizon == PredictionHorizon.IMMEDIATE else "soon"
|
406
|
+
elif predicted_usage < self.scale_down_threshold * 100:
|
407
|
+
scaling_action = "scale_down"
|
408
|
+
urgency = "planned"
|
409
|
+
else:
|
410
|
+
scaling_action = "maintain"
|
411
|
+
urgency = "none"
|
412
|
+
|
413
|
+
# Calculate recommended capacity
|
414
|
+
if scaling_action == "scale_up":
|
415
|
+
# Add 20% buffer above predicted usage
|
416
|
+
recommended_capacity = current_capacity * (predicted_usage / 100) * 1.2
|
417
|
+
elif scaling_action == "scale_down":
|
418
|
+
# Keep 50% buffer above predicted usage
|
419
|
+
recommended_capacity = current_capacity * (predicted_usage / 100) * 1.5
|
420
|
+
else:
|
421
|
+
recommended_capacity = current_capacity
|
422
|
+
|
423
|
+
# Build reasoning
|
424
|
+
reasoning = []
|
425
|
+
if scaling_action == "scale_up":
|
426
|
+
reasoning.append(
|
427
|
+
f"Predicted utilization ({predicted_usage:.1f}%) exceeds threshold ({self.scale_up_threshold * 100}%)"
|
428
|
+
)
|
429
|
+
elif scaling_action == "scale_down":
|
430
|
+
reasoning.append(
|
431
|
+
f"Predicted utilization ({predicted_usage:.1f}%) below threshold ({self.scale_down_threshold * 100}%)"
|
432
|
+
)
|
433
|
+
|
434
|
+
# Add trend analysis
|
435
|
+
trend = self._analyze_trend(utilizations)
|
436
|
+
if trend > 0.1:
|
437
|
+
reasoning.append(f"Upward trend detected ({trend:.2f}% per interval)")
|
438
|
+
elif trend < -0.1:
|
439
|
+
reasoning.append(f"Downward trend detected ({trend:.2f}% per interval)")
|
440
|
+
|
441
|
+
return ScalingPrediction(
|
442
|
+
timestamp=datetime.now(),
|
443
|
+
horizon=horizon,
|
444
|
+
resource_type=resource_type,
|
445
|
+
edge_node=edge_node,
|
446
|
+
current_usage=current_usage,
|
447
|
+
predicted_usage=predicted_usage,
|
448
|
+
confidence=confidence,
|
449
|
+
recommended_capacity=recommended_capacity,
|
450
|
+
scaling_action=scaling_action,
|
451
|
+
urgency=urgency,
|
452
|
+
reasoning=reasoning,
|
453
|
+
)
|
454
|
+
|
455
|
+
async def _predict_usage(
|
456
|
+
self,
|
457
|
+
utilizations: List[float],
|
458
|
+
timestamps: List[datetime],
|
459
|
+
horizon_seconds: int,
|
460
|
+
) -> Tuple[float, float]:
|
461
|
+
"""Predict future usage using time series models.
|
462
|
+
|
463
|
+
Args:
|
464
|
+
utilizations: Historical utilization values
|
465
|
+
timestamps: Timestamps for utilization values
|
466
|
+
horizon_seconds: Prediction horizon in seconds
|
467
|
+
|
468
|
+
Returns:
|
469
|
+
Tuple of (predicted_usage, confidence)
|
470
|
+
"""
|
471
|
+
if not STATSMODELS_AVAILABLE:
|
472
|
+
# Fallback to simple linear regression
|
473
|
+
return self._simple_prediction(utilizations, timestamps, horizon_seconds)
|
474
|
+
|
475
|
+
try:
|
476
|
+
# Use ARIMA for time series prediction
|
477
|
+
model = ARIMA(utilizations, order=(1, 1, 1))
|
478
|
+
model_fit = model.fit()
|
479
|
+
|
480
|
+
# Calculate steps ahead
|
481
|
+
interval_seconds = (
|
482
|
+
(timestamps[-1] - timestamps[-2]).total_seconds()
|
483
|
+
if len(timestamps) > 1
|
484
|
+
else 60
|
485
|
+
)
|
486
|
+
steps_ahead = int(horizon_seconds / interval_seconds)
|
487
|
+
|
488
|
+
# Make prediction
|
489
|
+
forecast = model_fit.forecast(steps=steps_ahead)
|
490
|
+
predicted_usage = float(forecast[-1])
|
491
|
+
|
492
|
+
# Calculate confidence based on model metrics
|
493
|
+
confidence = 0.8 # Base confidence for ARIMA
|
494
|
+
|
495
|
+
# Adjust confidence based on data quality
|
496
|
+
if len(utilizations) > 100:
|
497
|
+
confidence += 0.1
|
498
|
+
if np.std(utilizations) < 10: # Low variance
|
499
|
+
confidence += 0.05
|
500
|
+
|
501
|
+
return max(0, min(100, predicted_usage)), min(1.0, confidence)
|
502
|
+
|
503
|
+
except Exception as e:
|
504
|
+
self.logger.warning(
|
505
|
+
f"ARIMA prediction failed: {e}, falling back to simple prediction"
|
506
|
+
)
|
507
|
+
return self._simple_prediction(utilizations, timestamps, horizon_seconds)
|
508
|
+
|
509
|
+
def _simple_prediction(
|
510
|
+
self,
|
511
|
+
utilizations: List[float],
|
512
|
+
timestamps: List[datetime],
|
513
|
+
horizon_seconds: int,
|
514
|
+
) -> Tuple[float, float]:
|
515
|
+
"""Simple prediction using linear regression.
|
516
|
+
|
517
|
+
Args:
|
518
|
+
utilizations: Historical utilization values
|
519
|
+
timestamps: Timestamps for utilization values
|
520
|
+
horizon_seconds: Prediction horizon in seconds
|
521
|
+
|
522
|
+
Returns:
|
523
|
+
Tuple of (predicted_usage, confidence)
|
524
|
+
"""
|
525
|
+
if len(utilizations) < 2:
|
526
|
+
return utilizations[-1] if utilizations else 0, 0.5
|
527
|
+
|
528
|
+
# Convert timestamps to seconds from first timestamp
|
529
|
+
time_values = [(t - timestamps[0]).total_seconds() for t in timestamps]
|
530
|
+
|
531
|
+
# Linear regression
|
532
|
+
slope, intercept, r_value, _, _ = stats.linregress(time_values, utilizations)
|
533
|
+
|
534
|
+
# Predict future value
|
535
|
+
future_time = time_values[-1] + horizon_seconds
|
536
|
+
predicted_usage = intercept + slope * future_time
|
537
|
+
|
538
|
+
# Confidence based on R-squared
|
539
|
+
confidence = abs(r_value) ** 2
|
540
|
+
|
541
|
+
# Add exponential smoothing for better short-term predictions
|
542
|
+
if len(utilizations) > 5:
|
543
|
+
alpha = 0.3 # Smoothing factor
|
544
|
+
smoothed = utilizations[-1]
|
545
|
+
for i in range(len(utilizations) - 2, -1, -1):
|
546
|
+
smoothed = alpha * utilizations[i] + (1 - alpha) * smoothed
|
547
|
+
|
548
|
+
# Blend linear prediction with smoothed value
|
549
|
+
predicted_usage = 0.7 * predicted_usage + 0.3 * smoothed
|
550
|
+
confidence = (
|
551
|
+
confidence * 0.9
|
552
|
+
) # Slightly reduce confidence for blended prediction
|
553
|
+
|
554
|
+
return max(0, min(100, predicted_usage)), confidence
|
555
|
+
|
556
|
+
def _analyze_trend(self, utilizations: List[float]) -> float:
|
557
|
+
"""Analyze trend in utilization data.
|
558
|
+
|
559
|
+
Args:
|
560
|
+
utilizations: Utilization values
|
561
|
+
|
562
|
+
Returns:
|
563
|
+
Trend slope (percentage per interval)
|
564
|
+
"""
|
565
|
+
if len(utilizations) < 3:
|
566
|
+
return 0.0
|
567
|
+
|
568
|
+
# Use recent data for trend
|
569
|
+
recent = utilizations[-10:] if len(utilizations) > 10 else utilizations
|
570
|
+
|
571
|
+
# Linear regression for trend
|
572
|
+
x = list(range(len(recent)))
|
573
|
+
slope, _, _, _, _ = stats.linregress(x, recent)
|
574
|
+
|
575
|
+
return slope
|
576
|
+
|
577
|
+
async def _create_scaling_decision(
|
578
|
+
self,
|
579
|
+
edge_node: str,
|
580
|
+
predictions: List[ScalingPrediction],
|
581
|
+
strategy: ScalingStrategy,
|
582
|
+
) -> Optional[ScalingDecision]:
|
583
|
+
"""Create scaling decision from predictions.
|
584
|
+
|
585
|
+
Args:
|
586
|
+
edge_node: Edge node identifier
|
587
|
+
predictions: List of predictions
|
588
|
+
strategy: Scaling strategy
|
589
|
+
|
590
|
+
Returns:
|
591
|
+
Scaling decision or None
|
592
|
+
"""
|
593
|
+
# Filter predictions by strategy
|
594
|
+
relevant_predictions = []
|
595
|
+
|
596
|
+
if strategy == ScalingStrategy.REACTIVE:
|
597
|
+
# Only immediate predictions
|
598
|
+
relevant_predictions = [
|
599
|
+
p for p in predictions if p.horizon == PredictionHorizon.IMMEDIATE
|
600
|
+
]
|
601
|
+
elif strategy == ScalingStrategy.PREDICTIVE:
|
602
|
+
# All predictions
|
603
|
+
relevant_predictions = predictions
|
604
|
+
elif strategy == ScalingStrategy.SCHEDULED:
|
605
|
+
# Focus on longer-term predictions
|
606
|
+
relevant_predictions = [
|
607
|
+
p
|
608
|
+
for p in predictions
|
609
|
+
if p.horizon
|
610
|
+
in [PredictionHorizon.MEDIUM_TERM, PredictionHorizon.LONG_TERM]
|
611
|
+
]
|
612
|
+
elif strategy == ScalingStrategy.HYBRID:
|
613
|
+
# Use all predictions with weighting
|
614
|
+
relevant_predictions = predictions
|
615
|
+
|
616
|
+
if not relevant_predictions:
|
617
|
+
return None
|
618
|
+
|
619
|
+
# Check if any action needed
|
620
|
+
actions_needed = [
|
621
|
+
p for p in relevant_predictions if p.scaling_action != "maintain"
|
622
|
+
]
|
623
|
+
|
624
|
+
if not actions_needed:
|
625
|
+
return None
|
626
|
+
|
627
|
+
# Create action plan
|
628
|
+
action_plan = await self._create_action_plan(
|
629
|
+
edge_node, actions_needed, strategy
|
630
|
+
)
|
631
|
+
|
632
|
+
# Estimate cost
|
633
|
+
estimated_cost = self._estimate_scaling_cost(action_plan)
|
634
|
+
|
635
|
+
# Risk assessment
|
636
|
+
risk_assessment = self._assess_scaling_risk(actions_needed)
|
637
|
+
|
638
|
+
# Determine if approval needed
|
639
|
+
approval_required = (
|
640
|
+
estimated_cost > 100 # Cost threshold
|
641
|
+
or risk_assessment.get("risk_level", "low") == "high"
|
642
|
+
or strategy == ScalingStrategy.AGGRESSIVE
|
643
|
+
)
|
644
|
+
|
645
|
+
decision = ScalingDecision(
|
646
|
+
decision_id=f"{edge_node}_{datetime.now().timestamp()}",
|
647
|
+
predictions=actions_needed,
|
648
|
+
strategy=strategy,
|
649
|
+
action_plan=action_plan,
|
650
|
+
estimated_cost=estimated_cost,
|
651
|
+
risk_assessment=risk_assessment,
|
652
|
+
approval_required=approval_required,
|
653
|
+
)
|
654
|
+
|
655
|
+
# Store in history
|
656
|
+
self.scaling_history.append(decision.to_dict())
|
657
|
+
|
658
|
+
return decision
|
659
|
+
|
660
|
+
async def _create_action_plan(
|
661
|
+
self,
|
662
|
+
edge_node: str,
|
663
|
+
predictions: List[ScalingPrediction],
|
664
|
+
strategy: ScalingStrategy,
|
665
|
+
) -> Dict[str, Any]:
|
666
|
+
"""Create detailed action plan.
|
667
|
+
|
668
|
+
Args:
|
669
|
+
edge_node: Edge node identifier
|
670
|
+
predictions: Predictions requiring action
|
671
|
+
strategy: Scaling strategy
|
672
|
+
|
673
|
+
Returns:
|
674
|
+
Action plan
|
675
|
+
"""
|
676
|
+
actions = []
|
677
|
+
|
678
|
+
# Group by resource type
|
679
|
+
by_resource = defaultdict(list)
|
680
|
+
for pred in predictions:
|
681
|
+
by_resource[pred.resource_type].append(pred)
|
682
|
+
|
683
|
+
for resource_type, preds in by_resource.items():
|
684
|
+
# Find most urgent prediction
|
685
|
+
most_urgent = min(preds, key=lambda p: p.horizon.value)
|
686
|
+
|
687
|
+
if most_urgent.scaling_action == "scale_up":
|
688
|
+
actions.append(
|
689
|
+
{
|
690
|
+
"action": "increase_capacity",
|
691
|
+
"resource_type": resource_type,
|
692
|
+
"current_capacity": most_urgent.current_usage
|
693
|
+
/ (most_urgent.predicted_usage / 100),
|
694
|
+
"target_capacity": most_urgent.recommended_capacity,
|
695
|
+
"urgency": most_urgent.urgency,
|
696
|
+
"execute_at": (
|
697
|
+
datetime.now() + timedelta(seconds=60)
|
698
|
+
if most_urgent.urgency == "immediate"
|
699
|
+
else datetime.now()
|
700
|
+
+ timedelta(seconds=most_urgent.horizon.value / 2)
|
701
|
+
).isoformat(),
|
702
|
+
}
|
703
|
+
)
|
704
|
+
elif most_urgent.scaling_action == "scale_down":
|
705
|
+
actions.append(
|
706
|
+
{
|
707
|
+
"action": "decrease_capacity",
|
708
|
+
"resource_type": resource_type,
|
709
|
+
"current_capacity": most_urgent.current_usage
|
710
|
+
/ (most_urgent.predicted_usage / 100),
|
711
|
+
"target_capacity": most_urgent.recommended_capacity,
|
712
|
+
"urgency": most_urgent.urgency,
|
713
|
+
"execute_at": (
|
714
|
+
datetime.now()
|
715
|
+
+ timedelta(seconds=most_urgent.horizon.value)
|
716
|
+
).isoformat(),
|
717
|
+
}
|
718
|
+
)
|
719
|
+
|
720
|
+
return {
|
721
|
+
"edge_node": edge_node,
|
722
|
+
"actions": actions,
|
723
|
+
"strategy": strategy.value,
|
724
|
+
"created_at": datetime.now().isoformat(),
|
725
|
+
}
|
726
|
+
|
727
|
+
def _estimate_scaling_cost(self, action_plan: Dict[str, Any]) -> float:
|
728
|
+
"""Estimate cost of scaling actions.
|
729
|
+
|
730
|
+
Args:
|
731
|
+
action_plan: Action plan
|
732
|
+
|
733
|
+
Returns:
|
734
|
+
Estimated cost
|
735
|
+
"""
|
736
|
+
total_cost = 0.0
|
737
|
+
|
738
|
+
# Simple cost model
|
739
|
+
resource_costs = {
|
740
|
+
"cpu": 0.1, # Per core per hour
|
741
|
+
"memory": 0.01, # Per GB per hour
|
742
|
+
"gpu": 1.0, # Per GPU per hour
|
743
|
+
"storage": 0.05, # Per GB per month
|
744
|
+
"network": 0.02, # Per Mbps
|
745
|
+
}
|
746
|
+
|
747
|
+
for action in action_plan.get("actions", []):
|
748
|
+
resource_type = action["resource_type"]
|
749
|
+
|
750
|
+
if action["action"] == "increase_capacity":
|
751
|
+
capacity_increase = (
|
752
|
+
action["target_capacity"] - action["current_capacity"]
|
753
|
+
)
|
754
|
+
cost_per_unit = resource_costs.get(resource_type, 0.05)
|
755
|
+
|
756
|
+
# Estimate hours until scale down (assume 4 hours)
|
757
|
+
hours = 4
|
758
|
+
total_cost += capacity_increase * cost_per_unit * hours
|
759
|
+
|
760
|
+
return round(total_cost, 2)
|
761
|
+
|
762
|
+
def _assess_scaling_risk(
|
763
|
+
self, predictions: List[ScalingPrediction]
|
764
|
+
) -> Dict[str, Any]:
|
765
|
+
"""Assess risk of scaling actions.
|
766
|
+
|
767
|
+
Args:
|
768
|
+
predictions: Scaling predictions
|
769
|
+
|
770
|
+
Returns:
|
771
|
+
Risk assessment
|
772
|
+
"""
|
773
|
+
risks = []
|
774
|
+
|
775
|
+
# Check for conflicting predictions
|
776
|
+
scale_up_count = sum(1 for p in predictions if p.scaling_action == "scale_up")
|
777
|
+
scale_down_count = sum(
|
778
|
+
1 for p in predictions if p.scaling_action == "scale_down"
|
779
|
+
)
|
780
|
+
|
781
|
+
if scale_up_count > 0 and scale_down_count > 0:
|
782
|
+
risks.append(
|
783
|
+
{
|
784
|
+
"type": "conflicting_predictions",
|
785
|
+
"severity": "medium",
|
786
|
+
"description": "Both scale up and scale down predicted",
|
787
|
+
}
|
788
|
+
)
|
789
|
+
|
790
|
+
# Check confidence levels
|
791
|
+
low_confidence = [p for p in predictions if p.confidence < 0.6]
|
792
|
+
if low_confidence:
|
793
|
+
risks.append(
|
794
|
+
{
|
795
|
+
"type": "low_confidence",
|
796
|
+
"severity": "low",
|
797
|
+
"description": f"{len(low_confidence)} predictions with low confidence",
|
798
|
+
}
|
799
|
+
)
|
800
|
+
|
801
|
+
# Check for aggressive scaling
|
802
|
+
high_scale_factors = [p for p in predictions if p.scaling_factor > 2.0]
|
803
|
+
if high_scale_factors:
|
804
|
+
risks.append(
|
805
|
+
{
|
806
|
+
"type": "aggressive_scaling",
|
807
|
+
"severity": "high",
|
808
|
+
"description": f"{len(high_scale_factors)} predictions require >2x scaling",
|
809
|
+
}
|
810
|
+
)
|
811
|
+
|
812
|
+
# Determine overall risk level
|
813
|
+
if any(r["severity"] == "high" for r in risks):
|
814
|
+
risk_level = "high"
|
815
|
+
elif any(r["severity"] == "medium" for r in risks):
|
816
|
+
risk_level = "medium"
|
817
|
+
else:
|
818
|
+
risk_level = "low"
|
819
|
+
|
820
|
+
return {
|
821
|
+
"risk_level": risk_level,
|
822
|
+
"risks": risks,
|
823
|
+
"mitigation_suggestions": self._get_mitigation_suggestions(risks),
|
824
|
+
}
|
825
|
+
|
826
|
+
def _get_mitigation_suggestions(self, risks: List[Dict[str, Any]]) -> List[str]:
|
827
|
+
"""Get risk mitigation suggestions.
|
828
|
+
|
829
|
+
Args:
|
830
|
+
risks: Identified risks
|
831
|
+
|
832
|
+
Returns:
|
833
|
+
Mitigation suggestions
|
834
|
+
"""
|
835
|
+
suggestions = []
|
836
|
+
|
837
|
+
for risk in risks:
|
838
|
+
if risk["type"] == "conflicting_predictions":
|
839
|
+
suggestions.append(
|
840
|
+
"Review resource allocation patterns and consider phased scaling"
|
841
|
+
)
|
842
|
+
elif risk["type"] == "low_confidence":
|
843
|
+
suggestions.append(
|
844
|
+
"Collect more historical data before aggressive scaling"
|
845
|
+
)
|
846
|
+
elif risk["type"] == "aggressive_scaling":
|
847
|
+
suggestions.append(
|
848
|
+
"Consider gradual scaling with monitoring checkpoints"
|
849
|
+
)
|
850
|
+
|
851
|
+
return suggestions
|
852
|
+
|
853
|
+
async def _update_prediction_models(self):
|
854
|
+
"""Update prediction models based on new data."""
|
855
|
+
for key in self.usage_history:
|
856
|
+
# Check if model needs update
|
857
|
+
last_update = self.last_model_update.get(key)
|
858
|
+
if last_update and (datetime.now() - last_update).total_seconds() < 3600:
|
859
|
+
continue # Update models hourly
|
860
|
+
|
861
|
+
# Update model for this resource
|
862
|
+
# In production, this would retrain ML models
|
863
|
+
self.models[key] = {"updated": datetime.now().isoformat()}
|
864
|
+
self.last_model_update[key] = datetime.now()
|
865
|
+
|
866
|
+
async def _cleanup_old_history(self):
|
867
|
+
"""Clean up old historical data."""
|
868
|
+
cutoff = datetime.now() - timedelta(seconds=self.prediction_window * 2)
|
869
|
+
|
870
|
+
for key, history in self.usage_history.items():
|
871
|
+
# Remove old entries
|
872
|
+
while history and history[0]["timestamp"] < cutoff:
|
873
|
+
history.popleft()
|
874
|
+
|
875
|
+
async def _update_models_from_feedback(self, decision_record: Dict[str, Any]):
|
876
|
+
"""Update models based on decision evaluation.
|
877
|
+
|
878
|
+
Args:
|
879
|
+
decision_record: Evaluated decision record
|
880
|
+
"""
|
881
|
+
evaluation = decision_record.get("evaluation", {})
|
882
|
+
accuracy = evaluation.get("average_accuracy", 0)
|
883
|
+
|
884
|
+
# Simple learning: adjust thresholds based on accuracy
|
885
|
+
if accuracy < 0.7: # Poor prediction
|
886
|
+
# Make predictions more conservative
|
887
|
+
self.confidence_threshold = min(0.9, self.confidence_threshold + 0.05)
|
888
|
+
self.logger.info(
|
889
|
+
f"Adjusted confidence threshold to {self.confidence_threshold}"
|
890
|
+
)
|
891
|
+
elif accuracy > 0.9: # Good prediction
|
892
|
+
# Can be slightly less conservative
|
893
|
+
self.confidence_threshold = max(0.6, self.confidence_threshold - 0.02)
|
894
|
+
|
895
|
+
def _calculate_confidence_intervals(
|
896
|
+
self, forecast: List[float]
|
897
|
+
) -> Dict[str, List[float]]:
|
898
|
+
"""Calculate confidence intervals for forecast.
|
899
|
+
|
900
|
+
Args:
|
901
|
+
forecast: Forecast values
|
902
|
+
|
903
|
+
Returns:
|
904
|
+
Confidence intervals
|
905
|
+
"""
|
906
|
+
# Simple confidence intervals based on historical variance
|
907
|
+
std_dev = np.std(forecast) if len(forecast) > 1 else 5.0
|
908
|
+
|
909
|
+
return {
|
910
|
+
"lower_95": [max(0, v - 1.96 * std_dev) for v in forecast],
|
911
|
+
"upper_95": [min(100, v + 1.96 * std_dev) for v in forecast],
|
912
|
+
"lower_68": [max(0, v - std_dev) for v in forecast],
|
913
|
+
"upper_68": [min(100, v + std_dev) for v in forecast],
|
914
|
+
}
|
915
|
+
|
916
|
+
async def _generate_forecast(
|
917
|
+
self, timestamps: List[datetime], values: List[float], forecast_minutes: int
|
918
|
+
) -> List[Dict[str, Any]]:
|
919
|
+
"""Generate detailed forecast.
|
920
|
+
|
921
|
+
Args:
|
922
|
+
timestamps: Historical timestamps
|
923
|
+
values: Historical values
|
924
|
+
forecast_minutes: Minutes to forecast
|
925
|
+
|
926
|
+
Returns:
|
927
|
+
Forecast points
|
928
|
+
"""
|
929
|
+
if not timestamps or not values:
|
930
|
+
return []
|
931
|
+
|
932
|
+
# Calculate interval
|
933
|
+
interval_seconds = (
|
934
|
+
(timestamps[-1] - timestamps[-2]).total_seconds()
|
935
|
+
if len(timestamps) > 1
|
936
|
+
else 60
|
937
|
+
)
|
938
|
+
points_to_forecast = int(forecast_minutes * 60 / interval_seconds)
|
939
|
+
|
940
|
+
forecast_points = []
|
941
|
+
|
942
|
+
for i in range(1, points_to_forecast + 1):
|
943
|
+
future_time = timestamps[-1] + timedelta(seconds=interval_seconds * i)
|
944
|
+
|
945
|
+
# Predict value
|
946
|
+
predicted_value, confidence = await self._predict_usage(
|
947
|
+
values, timestamps, interval_seconds * i
|
948
|
+
)
|
949
|
+
|
950
|
+
forecast_points.append(
|
951
|
+
{
|
952
|
+
"timestamp": future_time.isoformat(),
|
953
|
+
"value": predicted_value,
|
954
|
+
"confidence": confidence,
|
955
|
+
"minutes_ahead": i * interval_seconds / 60,
|
956
|
+
}
|
957
|
+
)
|
958
|
+
|
959
|
+
return forecast_points
|