kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. kailash/__init__.py +1 -7
  2. kailash/cli/__init__.py +11 -1
  3. kailash/cli/validation_audit.py +570 -0
  4. kailash/core/actors/supervisor.py +1 -1
  5. kailash/core/resilience/circuit_breaker.py +71 -1
  6. kailash/core/resilience/health_monitor.py +172 -0
  7. kailash/edge/compliance.py +33 -0
  8. kailash/edge/consistency.py +609 -0
  9. kailash/edge/coordination/__init__.py +30 -0
  10. kailash/edge/coordination/global_ordering.py +355 -0
  11. kailash/edge/coordination/leader_election.py +217 -0
  12. kailash/edge/coordination/partition_detector.py +296 -0
  13. kailash/edge/coordination/raft.py +485 -0
  14. kailash/edge/discovery.py +63 -1
  15. kailash/edge/migration/__init__.py +19 -0
  16. kailash/edge/migration/edge_migrator.py +832 -0
  17. kailash/edge/monitoring/__init__.py +21 -0
  18. kailash/edge/monitoring/edge_monitor.py +736 -0
  19. kailash/edge/prediction/__init__.py +10 -0
  20. kailash/edge/prediction/predictive_warmer.py +591 -0
  21. kailash/edge/resource/__init__.py +102 -0
  22. kailash/edge/resource/cloud_integration.py +796 -0
  23. kailash/edge/resource/cost_optimizer.py +949 -0
  24. kailash/edge/resource/docker_integration.py +919 -0
  25. kailash/edge/resource/kubernetes_integration.py +893 -0
  26. kailash/edge/resource/platform_integration.py +913 -0
  27. kailash/edge/resource/predictive_scaler.py +959 -0
  28. kailash/edge/resource/resource_analyzer.py +824 -0
  29. kailash/edge/resource/resource_pools.py +610 -0
  30. kailash/integrations/dataflow_edge.py +261 -0
  31. kailash/mcp_server/registry_integration.py +1 -1
  32. kailash/monitoring/__init__.py +18 -0
  33. kailash/monitoring/alerts.py +646 -0
  34. kailash/monitoring/metrics.py +677 -0
  35. kailash/nodes/__init__.py +2 -0
  36. kailash/nodes/ai/semantic_memory.py +2 -2
  37. kailash/nodes/base.py +545 -0
  38. kailash/nodes/edge/__init__.py +36 -0
  39. kailash/nodes/edge/base.py +240 -0
  40. kailash/nodes/edge/cloud_node.py +710 -0
  41. kailash/nodes/edge/coordination.py +239 -0
  42. kailash/nodes/edge/docker_node.py +825 -0
  43. kailash/nodes/edge/edge_data.py +582 -0
  44. kailash/nodes/edge/edge_migration_node.py +392 -0
  45. kailash/nodes/edge/edge_monitoring_node.py +421 -0
  46. kailash/nodes/edge/edge_state.py +673 -0
  47. kailash/nodes/edge/edge_warming_node.py +393 -0
  48. kailash/nodes/edge/kubernetes_node.py +652 -0
  49. kailash/nodes/edge/platform_node.py +766 -0
  50. kailash/nodes/edge/resource_analyzer_node.py +378 -0
  51. kailash/nodes/edge/resource_optimizer_node.py +501 -0
  52. kailash/nodes/edge/resource_scaler_node.py +397 -0
  53. kailash/nodes/ports.py +676 -0
  54. kailash/runtime/local.py +344 -1
  55. kailash/runtime/validation/__init__.py +20 -0
  56. kailash/runtime/validation/connection_context.py +119 -0
  57. kailash/runtime/validation/enhanced_error_formatter.py +202 -0
  58. kailash/runtime/validation/error_categorizer.py +164 -0
  59. kailash/runtime/validation/metrics.py +380 -0
  60. kailash/runtime/validation/performance.py +615 -0
  61. kailash/runtime/validation/suggestion_engine.py +212 -0
  62. kailash/testing/fixtures.py +2 -2
  63. kailash/workflow/builder.py +230 -4
  64. kailash/workflow/contracts.py +418 -0
  65. kailash/workflow/edge_infrastructure.py +369 -0
  66. kailash/workflow/migration.py +3 -3
  67. kailash/workflow/type_inference.py +669 -0
  68. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
  69. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
  70. kailash/nexus/__init__.py +0 -21
  71. kailash/nexus/cli/__init__.py +0 -5
  72. kailash/nexus/cli/__main__.py +0 -6
  73. kailash/nexus/cli/main.py +0 -176
  74. kailash/nexus/factory.py +0 -413
  75. kailash/nexus/gateway.py +0 -545
  76. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
  77. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
  78. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
  79. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,949 @@
1
+ """Cost optimizer for intelligent edge resource cost management.
2
+
3
+ This module provides multi-cloud cost optimization, spot instance management,
4
+ reserved capacity planning, and ROI-based allocation decisions.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import logging
10
+ from collections import defaultdict
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timedelta
13
+ from enum import Enum
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+
16
+ import numpy as np
17
+
18
+
19
+ class CloudProvider(Enum):
20
+ """Supported cloud providers."""
21
+
22
+ AWS = "aws"
23
+ GCP = "gcp"
24
+ AZURE = "azure"
25
+ ALIBABA = "alibaba"
26
+ EDGE_LOCAL = "edge_local"
27
+
28
+
29
+ class InstanceType(Enum):
30
+ """Instance pricing types."""
31
+
32
+ ON_DEMAND = "on_demand"
33
+ SPOT = "spot"
34
+ RESERVED = "reserved"
35
+ SAVINGS_PLAN = "savings_plan"
36
+ DEDICATED = "dedicated"
37
+
38
+
39
+ class OptimizationStrategy(Enum):
40
+ """Cost optimization strategies."""
41
+
42
+ MINIMIZE_COST = "minimize_cost"
43
+ BALANCE_COST_PERFORMANCE = "balance_cost_performance"
44
+ MAXIMIZE_PERFORMANCE = "maximize_performance"
45
+ PREDICTABLE_COST = "predictable_cost"
46
+ RISK_AVERSE = "risk_averse"
47
+
48
+
49
+ @dataclass
50
+ class CostMetric:
51
+ """Cost measurement for resources."""
52
+
53
+ timestamp: datetime
54
+ edge_node: str
55
+ resource_type: str
56
+ provider: CloudProvider
57
+ instance_type: InstanceType
58
+ cost_per_hour: float
59
+ usage_hours: float
60
+ total_cost: float
61
+ currency: str = "USD"
62
+ metadata: Dict[str, Any] = field(default_factory=dict)
63
+
64
+ def to_dict(self) -> Dict[str, Any]:
65
+ """Convert to dictionary."""
66
+ return {
67
+ "timestamp": self.timestamp.isoformat(),
68
+ "edge_node": self.edge_node,
69
+ "resource_type": self.resource_type,
70
+ "provider": self.provider.value,
71
+ "instance_type": self.instance_type.value,
72
+ "cost_per_hour": self.cost_per_hour,
73
+ "usage_hours": self.usage_hours,
74
+ "total_cost": self.total_cost,
75
+ "currency": self.currency,
76
+ "metadata": self.metadata,
77
+ }
78
+
79
+
80
+ @dataclass
81
+ class CostOptimization:
82
+ """Cost optimization recommendation."""
83
+
84
+ optimization_id: str
85
+ edge_node: str
86
+ current_setup: Dict[str, Any]
87
+ recommended_setup: Dict[str, Any]
88
+ estimated_savings: float
89
+ savings_percentage: float
90
+ confidence: float
91
+ implementation_effort: str # low, medium, high
92
+ risk_level: str # low, medium, high
93
+ reasoning: List[str] = field(default_factory=list)
94
+
95
+ def to_dict(self) -> Dict[str, Any]:
96
+ """Convert to dictionary."""
97
+ return {
98
+ "optimization_id": self.optimization_id,
99
+ "edge_node": self.edge_node,
100
+ "current_setup": self.current_setup,
101
+ "recommended_setup": self.recommended_setup,
102
+ "estimated_savings": self.estimated_savings,
103
+ "savings_percentage": self.savings_percentage,
104
+ "confidence": self.confidence,
105
+ "implementation_effort": self.implementation_effort,
106
+ "risk_level": self.risk_level,
107
+ "reasoning": self.reasoning,
108
+ }
109
+
110
+
111
+ @dataclass
112
+ class SpotInstanceRecommendation:
113
+ """Spot instance optimization recommendation."""
114
+
115
+ edge_node: str
116
+ current_on_demand_cost: float
117
+ spot_cost: float
118
+ potential_savings: float
119
+ interruption_risk: float
120
+ recommended_strategy: str
121
+ backup_plan: Dict[str, Any]
122
+
123
+ def to_dict(self) -> Dict[str, Any]:
124
+ """Convert to dictionary."""
125
+ return {
126
+ "edge_node": self.edge_node,
127
+ "current_on_demand_cost": self.current_on_demand_cost,
128
+ "spot_cost": self.spot_cost,
129
+ "potential_savings": self.potential_savings,
130
+ "savings_percentage": (
131
+ (self.potential_savings / self.current_on_demand_cost * 100)
132
+ if self.current_on_demand_cost > 0
133
+ else 0
134
+ ),
135
+ "interruption_risk": self.interruption_risk,
136
+ "recommended_strategy": self.recommended_strategy,
137
+ "backup_plan": self.backup_plan,
138
+ }
139
+
140
+
141
+ @dataclass
142
+ class ReservationRecommendation:
143
+ """Reserved capacity recommendation."""
144
+
145
+ resource_type: str
146
+ provider: CloudProvider
147
+ commitment_length: int # months
148
+ upfront_cost: float
149
+ monthly_cost: float
150
+ on_demand_equivalent: float
151
+ total_savings: float
152
+ breakeven_months: int
153
+ utilization_requirement: float
154
+
155
+ def to_dict(self) -> Dict[str, Any]:
156
+ """Convert to dictionary."""
157
+ return {
158
+ "resource_type": self.resource_type,
159
+ "provider": self.provider.value,
160
+ "commitment_length": self.commitment_length,
161
+ "upfront_cost": self.upfront_cost,
162
+ "monthly_cost": self.monthly_cost,
163
+ "on_demand_equivalent": self.on_demand_equivalent,
164
+ "total_savings": self.total_savings,
165
+ "savings_percentage": (
166
+ (self.total_savings / self.on_demand_equivalent * 100)
167
+ if self.on_demand_equivalent > 0
168
+ else 0
169
+ ),
170
+ "breakeven_months": self.breakeven_months,
171
+ "utilization_requirement": self.utilization_requirement,
172
+ }
173
+
174
+
175
+ class CostOptimizer:
176
+ """Multi-cloud cost optimizer for edge resources."""
177
+
178
+ def __init__(
179
+ self,
180
+ cost_history_days: int = 30,
181
+ optimization_interval: int = 3600, # 1 hour
182
+ savings_threshold: float = 0.1, # 10% minimum savings
183
+ risk_tolerance: str = "medium",
184
+ ):
185
+ """Initialize cost optimizer.
186
+
187
+ Args:
188
+ cost_history_days: Days of cost history to analyze
189
+ optimization_interval: How often to run optimization
190
+ savings_threshold: Minimum savings percentage to recommend
191
+ risk_tolerance: Risk tolerance (low, medium, high)
192
+ """
193
+ self.cost_history_days = cost_history_days
194
+ self.optimization_interval = optimization_interval
195
+ self.savings_threshold = savings_threshold
196
+ self.risk_tolerance = risk_tolerance
197
+
198
+ # Cost data storage
199
+ self.cost_metrics: List[CostMetric] = []
200
+ self.provider_pricing: Dict[str, Dict[str, Any]] = {}
201
+
202
+ # Optimization history
203
+ self.optimizations: List[CostOptimization] = []
204
+ self.implemented_optimizations: List[str] = []
205
+
206
+ # Background task
207
+ self._optimization_task: Optional[asyncio.Task] = None
208
+
209
+ self.logger = logging.getLogger(__name__)
210
+
211
+ # Initialize default pricing data
212
+ self._initialize_pricing_data()
213
+
214
+ async def start(self):
215
+ """Start background optimization."""
216
+ if not self._optimization_task:
217
+ self._optimization_task = asyncio.create_task(self._optimization_loop())
218
+ self.logger.info("Cost optimizer started")
219
+
220
+ async def stop(self):
221
+ """Stop background optimization."""
222
+ if self._optimization_task:
223
+ self._optimization_task.cancel()
224
+ try:
225
+ await self._optimization_task
226
+ except asyncio.CancelledError:
227
+ pass
228
+ self._optimization_task = None
229
+ self.logger.info("Cost optimizer stopped")
230
+
231
+ async def record_cost(self, cost_metric: CostMetric):
232
+ """Record a cost metric.
233
+
234
+ Args:
235
+ cost_metric: Cost metric to record
236
+ """
237
+ self.cost_metrics.append(cost_metric)
238
+
239
+ # Keep only recent history
240
+ cutoff = datetime.now() - timedelta(days=self.cost_history_days)
241
+ self.cost_metrics = [m for m in self.cost_metrics if m.timestamp > cutoff]
242
+
243
+ async def optimize_costs(
244
+ self,
245
+ strategy: OptimizationStrategy = OptimizationStrategy.BALANCE_COST_PERFORMANCE,
246
+ edge_nodes: Optional[List[str]] = None,
247
+ ) -> List[CostOptimization]:
248
+ """Generate cost optimization recommendations.
249
+
250
+ Args:
251
+ strategy: Optimization strategy
252
+ edge_nodes: Specific edge nodes to optimize
253
+
254
+ Returns:
255
+ List of cost optimizations
256
+ """
257
+ optimizations = []
258
+
259
+ # Get nodes to analyze
260
+ nodes_to_analyze = edge_nodes if edge_nodes else self._get_all_edge_nodes()
261
+
262
+ for node in nodes_to_analyze:
263
+ # Analyze current costs
264
+ current_costs = self._analyze_node_costs(node)
265
+
266
+ if not current_costs:
267
+ continue
268
+
269
+ # Generate optimization recommendations
270
+ node_optimizations = await self._optimize_node_costs(
271
+ node, current_costs, strategy
272
+ )
273
+
274
+ optimizations.extend(node_optimizations)
275
+
276
+ # Filter by savings threshold
277
+ significant_optimizations = [
278
+ opt
279
+ for opt in optimizations
280
+ if opt.savings_percentage >= self.savings_threshold * 100
281
+ ]
282
+
283
+ # Store optimizations
284
+ self.optimizations.extend(significant_optimizations)
285
+
286
+ return significant_optimizations
287
+
288
+ async def get_spot_recommendations(
289
+ self, edge_nodes: Optional[List[str]] = None
290
+ ) -> List[SpotInstanceRecommendation]:
291
+ """Get spot instance recommendations.
292
+
293
+ Args:
294
+ edge_nodes: Specific edge nodes to analyze
295
+
296
+ Returns:
297
+ List of spot instance recommendations
298
+ """
299
+ recommendations = []
300
+
301
+ nodes_to_analyze = edge_nodes if edge_nodes else self._get_all_edge_nodes()
302
+
303
+ for node in nodes_to_analyze:
304
+ current_costs = self._analyze_node_costs(node)
305
+
306
+ if not current_costs:
307
+ continue
308
+
309
+ # Check if spot instances would be beneficial
310
+ spot_rec = await self._analyze_spot_opportunity(node, current_costs)
311
+
312
+ if spot_rec and spot_rec.potential_savings > 0:
313
+ recommendations.append(spot_rec)
314
+
315
+ return recommendations
316
+
317
+ async def get_reservation_recommendations(
318
+ self, providers: Optional[List[CloudProvider]] = None
319
+ ) -> List[ReservationRecommendation]:
320
+ """Get reserved capacity recommendations.
321
+
322
+ Args:
323
+ providers: Specific providers to analyze
324
+
325
+ Returns:
326
+ List of reservation recommendations
327
+ """
328
+ recommendations = []
329
+
330
+ providers_to_analyze = providers if providers else list(CloudProvider)
331
+
332
+ for provider in providers_to_analyze:
333
+ # Analyze usage patterns for this provider
334
+ usage_patterns = self._analyze_provider_usage(provider)
335
+
336
+ for resource_type, usage in usage_patterns.items():
337
+ # Check if reservation would be beneficial
338
+ reservation_rec = await self._analyze_reservation_opportunity(
339
+ provider, resource_type, usage
340
+ )
341
+
342
+ if reservation_rec and reservation_rec.total_savings > 0:
343
+ recommendations.append(reservation_rec)
344
+
345
+ return recommendations
346
+
347
+ async def calculate_roi(
348
+ self, optimization: CostOptimization, implementation_cost: float = 0.0
349
+ ) -> Dict[str, Any]:
350
+ """Calculate ROI for an optimization.
351
+
352
+ Args:
353
+ optimization: Cost optimization to analyze
354
+ implementation_cost: One-time implementation cost
355
+
356
+ Returns:
357
+ ROI analysis
358
+ """
359
+ monthly_savings = optimization.estimated_savings
360
+
361
+ # Calculate payback period
362
+ payback_months = (
363
+ implementation_cost / monthly_savings
364
+ if monthly_savings > 0
365
+ else float("inf")
366
+ )
367
+
368
+ # Calculate 1-year ROI
369
+ annual_savings = monthly_savings * 12
370
+ roi_percentage = (
371
+ ((annual_savings - implementation_cost) / implementation_cost * 100)
372
+ if implementation_cost > 0
373
+ else float("inf")
374
+ )
375
+
376
+ # Risk-adjusted ROI
377
+ risk_multiplier = self._get_risk_multiplier(optimization.risk_level)
378
+ risk_adjusted_roi = roi_percentage * risk_multiplier
379
+
380
+ return {
381
+ "optimization_id": optimization.optimization_id,
382
+ "monthly_savings": monthly_savings,
383
+ "annual_savings": annual_savings,
384
+ "implementation_cost": implementation_cost,
385
+ "payback_months": payback_months,
386
+ "roi_percentage": roi_percentage,
387
+ "risk_adjusted_roi": risk_adjusted_roi,
388
+ "recommendation": self._get_roi_recommendation(
389
+ roi_percentage, payback_months
390
+ ),
391
+ }
392
+
393
+ async def get_cost_forecast(
394
+ self, forecast_months: int = 12, include_optimizations: bool = True
395
+ ) -> Dict[str, Any]:
396
+ """Get cost forecast with and without optimizations.
397
+
398
+ Args:
399
+ forecast_months: Months to forecast
400
+ include_optimizations: Include optimization impact
401
+
402
+ Returns:
403
+ Cost forecast
404
+ """
405
+ # Calculate current monthly spend
406
+ current_monthly = self._calculate_current_monthly_spend()
407
+
408
+ # Project baseline costs
409
+ baseline_forecast = []
410
+ optimized_forecast = []
411
+
412
+ for month in range(forecast_months):
413
+ # Apply growth assumptions
414
+ growth_factor = 1 + (0.05 * month / 12) # 5% annual growth
415
+ baseline_cost = current_monthly * growth_factor
416
+ baseline_forecast.append(baseline_cost)
417
+
418
+ # Apply optimizations if requested
419
+ if include_optimizations:
420
+ total_savings = sum(opt.estimated_savings for opt in self.optimizations)
421
+ optimized_cost = baseline_cost - total_savings
422
+ optimized_forecast.append(max(0, optimized_cost))
423
+ else:
424
+ optimized_forecast.append(baseline_cost)
425
+
426
+ total_baseline = sum(baseline_forecast)
427
+ total_optimized = sum(optimized_forecast)
428
+ total_savings = total_baseline - total_optimized
429
+
430
+ return {
431
+ "forecast_months": forecast_months,
432
+ "current_monthly_spend": current_monthly,
433
+ "baseline_forecast": baseline_forecast,
434
+ "optimized_forecast": optimized_forecast if include_optimizations else None,
435
+ "total_baseline_cost": total_baseline,
436
+ "total_optimized_cost": total_optimized,
437
+ "total_projected_savings": total_savings,
438
+ "savings_percentage": (
439
+ (total_savings / total_baseline * 100) if total_baseline > 0 else 0
440
+ ),
441
+ }
442
+
443
+ async def _optimization_loop(self):
444
+ """Background optimization loop."""
445
+ while True:
446
+ try:
447
+ await asyncio.sleep(self.optimization_interval)
448
+
449
+ # Run automatic optimization
450
+ optimizations = await self.optimize_costs()
451
+
452
+ if optimizations:
453
+ self.logger.info(
454
+ f"Found {len(optimizations)} cost optimization opportunities"
455
+ )
456
+
457
+ except asyncio.CancelledError:
458
+ break
459
+ except Exception as e:
460
+ self.logger.error(f"Optimization loop error: {e}")
461
+
462
+ def _initialize_pricing_data(self):
463
+ """Initialize default pricing data for providers."""
464
+ # Simplified pricing data - in production, this would come from APIs
465
+ self.provider_pricing = {
466
+ CloudProvider.AWS.value: {
467
+ "cpu": {
468
+ "on_demand": 0.10, # per vCPU hour
469
+ "spot": 0.03,
470
+ "reserved_1yr": 0.07,
471
+ "reserved_3yr": 0.05,
472
+ },
473
+ "memory": {
474
+ "on_demand": 0.01, # per GB hour
475
+ "spot": 0.003,
476
+ "reserved_1yr": 0.007,
477
+ "reserved_3yr": 0.005,
478
+ },
479
+ "storage": {"on_demand": 0.10, "reserved": 0.08}, # per GB month
480
+ },
481
+ CloudProvider.GCP.value: {
482
+ "cpu": {
483
+ "on_demand": 0.09,
484
+ "preemptible": 0.025,
485
+ "committed_1yr": 0.065,
486
+ "committed_3yr": 0.045,
487
+ },
488
+ "memory": {
489
+ "on_demand": 0.009,
490
+ "preemptible": 0.0025,
491
+ "committed_1yr": 0.0065,
492
+ "committed_3yr": 0.0045,
493
+ },
494
+ },
495
+ CloudProvider.AZURE.value: {
496
+ "cpu": {
497
+ "on_demand": 0.11,
498
+ "spot": 0.035,
499
+ "reserved_1yr": 0.075,
500
+ "reserved_3yr": 0.055,
501
+ },
502
+ "memory": {
503
+ "on_demand": 0.011,
504
+ "spot": 0.0035,
505
+ "reserved_1yr": 0.0075,
506
+ "reserved_3yr": 0.0055,
507
+ },
508
+ },
509
+ }
510
+
511
+ def _get_all_edge_nodes(self) -> List[str]:
512
+ """Get all edge nodes from cost metrics."""
513
+ return list(set(metric.edge_node for metric in self.cost_metrics))
514
+
515
+ def _analyze_node_costs(self, edge_node: str) -> Dict[str, Any]:
516
+ """Analyze costs for a specific edge node."""
517
+ node_metrics = [m for m in self.cost_metrics if m.edge_node == edge_node]
518
+
519
+ if not node_metrics:
520
+ return {}
521
+
522
+ # Group by resource type and instance type
523
+ cost_breakdown = defaultdict(lambda: defaultdict(float))
524
+
525
+ for metric in node_metrics:
526
+ key = f"{metric.resource_type}_{metric.instance_type.value}"
527
+ cost_breakdown[metric.resource_type][
528
+ metric.instance_type.value
529
+ ] += metric.total_cost
530
+
531
+ # Calculate totals
532
+ total_cost = sum(
533
+ sum(instance_costs.values()) for instance_costs in cost_breakdown.values()
534
+ )
535
+
536
+ return {
537
+ "edge_node": edge_node,
538
+ "total_cost": total_cost,
539
+ "cost_breakdown": dict(cost_breakdown),
540
+ "metrics_count": len(node_metrics),
541
+ "last_updated": max(m.timestamp for m in node_metrics),
542
+ }
543
+
544
+ async def _optimize_node_costs(
545
+ self,
546
+ edge_node: str,
547
+ current_costs: Dict[str, Any],
548
+ strategy: OptimizationStrategy,
549
+ ) -> List[CostOptimization]:
550
+ """Optimize costs for a specific node."""
551
+ optimizations = []
552
+
553
+ # Analyze each resource type
554
+ for resource_type, instance_costs in current_costs["cost_breakdown"].items():
555
+ # Check for spot instance opportunities
556
+ spot_opt = await self._check_spot_optimization(
557
+ edge_node, resource_type, instance_costs, strategy
558
+ )
559
+ if spot_opt:
560
+ optimizations.append(spot_opt)
561
+
562
+ # Check for reserved instance opportunities
563
+ reserved_opt = await self._check_reserved_optimization(
564
+ edge_node, resource_type, instance_costs, strategy
565
+ )
566
+ if reserved_opt:
567
+ optimizations.append(reserved_opt)
568
+
569
+ # Check for right-sizing opportunities
570
+ rightsizing_opt = await self._check_rightsizing_optimization(
571
+ edge_node, resource_type, instance_costs, strategy
572
+ )
573
+ if rightsizing_opt:
574
+ optimizations.append(rightsizing_opt)
575
+
576
+ return optimizations
577
+
578
+ async def _check_spot_optimization(
579
+ self,
580
+ edge_node: str,
581
+ resource_type: str,
582
+ instance_costs: Dict[str, float],
583
+ strategy: OptimizationStrategy,
584
+ ) -> Optional[CostOptimization]:
585
+ """Check for spot instance optimization opportunities."""
586
+ on_demand_cost = instance_costs.get("on_demand", 0)
587
+
588
+ if on_demand_cost == 0:
589
+ return None
590
+
591
+ # Get spot pricing
592
+ spot_cost = on_demand_cost * 0.3 # Assume 70% savings
593
+ potential_savings = on_demand_cost - spot_cost
594
+
595
+ # Check if savings meet threshold
596
+ savings_percentage = potential_savings / on_demand_cost * 100
597
+
598
+ if savings_percentage < self.savings_threshold * 100:
599
+ return None
600
+
601
+ # Assess risk based on workload characteristics
602
+ interruption_risk = self._assess_interruption_risk(edge_node, resource_type)
603
+
604
+ # Strategy-based risk tolerance
605
+ if strategy == OptimizationStrategy.RISK_AVERSE and interruption_risk > 0.3:
606
+ return None
607
+
608
+ if (
609
+ strategy == OptimizationStrategy.PREDICTABLE_COST
610
+ and interruption_risk > 0.1
611
+ ):
612
+ return None
613
+
614
+ return CostOptimization(
615
+ optimization_id=f"spot_{edge_node}_{resource_type}_{datetime.now().timestamp()}",
616
+ edge_node=edge_node,
617
+ current_setup={
618
+ "instance_type": "on_demand",
619
+ "cost": on_demand_cost,
620
+ "resource_type": resource_type,
621
+ },
622
+ recommended_setup={
623
+ "instance_type": "spot",
624
+ "cost": spot_cost,
625
+ "resource_type": resource_type,
626
+ "interruption_risk": interruption_risk,
627
+ },
628
+ estimated_savings=potential_savings,
629
+ savings_percentage=savings_percentage,
630
+ confidence=0.8,
631
+ implementation_effort="low",
632
+ risk_level="medium" if interruption_risk > 0.2 else "low",
633
+ reasoning=[
634
+ f"Spot instances offer {savings_percentage:.1f}% cost savings",
635
+ f"Interruption risk is {interruption_risk:.1%}",
636
+ "Workload appears suitable for spot instances",
637
+ ],
638
+ )
639
+
640
+ async def _check_reserved_optimization(
641
+ self,
642
+ edge_node: str,
643
+ resource_type: str,
644
+ instance_costs: Dict[str, float],
645
+ strategy: OptimizationStrategy,
646
+ ) -> Optional[CostOptimization]:
647
+ """Check for reserved instance optimization opportunities."""
648
+ on_demand_cost = instance_costs.get("on_demand", 0)
649
+
650
+ if on_demand_cost == 0:
651
+ return None
652
+
653
+ # Calculate usage consistency
654
+ usage_consistency = self._calculate_usage_consistency(edge_node, resource_type)
655
+
656
+ # Reserved instances only make sense for consistent usage
657
+ if usage_consistency < 0.7:
658
+ return None
659
+
660
+ # Calculate reserved cost (assume 30% savings for 1-year)
661
+ reserved_cost = on_demand_cost * 0.7
662
+ potential_savings = on_demand_cost - reserved_cost
663
+ savings_percentage = potential_savings / on_demand_cost * 100
664
+
665
+ if savings_percentage < self.savings_threshold * 100:
666
+ return None
667
+
668
+ return CostOptimization(
669
+ optimization_id=f"reserved_{edge_node}_{resource_type}_{datetime.now().timestamp()}",
670
+ edge_node=edge_node,
671
+ current_setup={
672
+ "instance_type": "on_demand",
673
+ "cost": on_demand_cost,
674
+ "resource_type": resource_type,
675
+ },
676
+ recommended_setup={
677
+ "instance_type": "reserved_1yr",
678
+ "cost": reserved_cost,
679
+ "resource_type": resource_type,
680
+ "commitment": "1 year",
681
+ },
682
+ estimated_savings=potential_savings,
683
+ savings_percentage=savings_percentage,
684
+ confidence=0.9,
685
+ implementation_effort="low",
686
+ risk_level="low",
687
+ reasoning=[
688
+ f"Reserved instances offer {savings_percentage:.1f}% cost savings",
689
+ f"Usage consistency is {usage_consistency:.1%}",
690
+ "1-year commitment recommended based on usage patterns",
691
+ ],
692
+ )
693
+
694
+ async def _check_rightsizing_optimization(
695
+ self,
696
+ edge_node: str,
697
+ resource_type: str,
698
+ instance_costs: Dict[str, float],
699
+ strategy: OptimizationStrategy,
700
+ ) -> Optional[CostOptimization]:
701
+ """Check for right-sizing optimization opportunities."""
702
+ # Analyze actual resource utilization
703
+ utilization = self._get_resource_utilization(edge_node, resource_type)
704
+
705
+ if utilization is None or utilization > 0.8: # Well utilized
706
+ return None
707
+
708
+ # Calculate right-sized cost
709
+ utilization_factor = max(utilization * 1.2, 0.5) # 20% buffer, minimum 50%
710
+ current_cost = sum(instance_costs.values())
711
+ rightsized_cost = current_cost * utilization_factor
712
+ potential_savings = current_cost - rightsized_cost
713
+ savings_percentage = potential_savings / current_cost * 100
714
+
715
+ if savings_percentage < self.savings_threshold * 100:
716
+ return None
717
+
718
+ # Risk assessment
719
+ risk_level = "low" if utilization < 0.5 else "medium"
720
+
721
+ return CostOptimization(
722
+ optimization_id=f"rightsize_{edge_node}_{resource_type}_{datetime.now().timestamp()}",
723
+ edge_node=edge_node,
724
+ current_setup={
725
+ "instance_type": "current",
726
+ "cost": current_cost,
727
+ "resource_type": resource_type,
728
+ "utilization": utilization,
729
+ },
730
+ recommended_setup={
731
+ "instance_type": "rightsized",
732
+ "cost": rightsized_cost,
733
+ "resource_type": resource_type,
734
+ "target_utilization": utilization_factor,
735
+ },
736
+ estimated_savings=potential_savings,
737
+ savings_percentage=savings_percentage,
738
+ confidence=0.7,
739
+ implementation_effort="medium",
740
+ risk_level=risk_level,
741
+ reasoning=[
742
+ f"Current utilization is only {utilization:.1%}",
743
+ f"Right-sizing can save {savings_percentage:.1f}%",
744
+ "Recommend gradual capacity reduction with monitoring",
745
+ ],
746
+ )
747
+
748
+ def _assess_interruption_risk(self, edge_node: str, resource_type: str) -> float:
749
+ """Assess interruption risk for spot instances."""
750
+ # Simplified risk assessment
751
+ # In production, this would analyze historical interruption data
752
+
753
+ base_risk = 0.15 # 15% base interruption risk
754
+
755
+ # Adjust based on resource type
756
+ if resource_type == "gpu":
757
+ base_risk *= 1.5 # GPUs have higher interruption risk
758
+ elif resource_type == "memory":
759
+ base_risk *= 0.8 # Memory instances more stable
760
+
761
+ # Adjust based on time patterns
762
+ # Assume we have access to usage patterns
763
+ peak_usage = self._is_peak_usage_time()
764
+ if peak_usage:
765
+ base_risk *= 1.3
766
+
767
+ return min(base_risk, 0.5) # Cap at 50%
768
+
769
+ def _calculate_usage_consistency(self, edge_node: str, resource_type: str) -> float:
770
+ """Calculate usage consistency for reserved instance evaluation."""
771
+ # Analyze usage patterns over time
772
+ node_metrics = [
773
+ m
774
+ for m in self.cost_metrics
775
+ if m.edge_node == edge_node and m.resource_type == resource_type
776
+ ]
777
+
778
+ if len(node_metrics) < 7: # Need at least a week of data
779
+ return 0.0
780
+
781
+ # Calculate daily usage
782
+ daily_usage = defaultdict(float)
783
+ for metric in node_metrics:
784
+ day = metric.timestamp.date()
785
+ daily_usage[day] += metric.usage_hours
786
+
787
+ usage_values = list(daily_usage.values())
788
+
789
+ if not usage_values:
790
+ return 0.0
791
+
792
+ # Calculate coefficient of variation
793
+ mean_usage = np.mean(usage_values)
794
+ std_usage = np.std(usage_values)
795
+
796
+ if mean_usage == 0:
797
+ return 0.0
798
+
799
+ cv = std_usage / mean_usage
800
+ consistency = max(0, 1 - cv) # Lower CV = higher consistency
801
+
802
+ return consistency
803
+
804
+ def _get_resource_utilization(
805
+ self, edge_node: str, resource_type: str
806
+ ) -> Optional[float]:
807
+ """Get average resource utilization."""
808
+ # This would integrate with monitoring data
809
+ # For now, return simulated utilization
810
+
811
+ import random
812
+
813
+ random.seed(hash(f"{edge_node}_{resource_type}"))
814
+ return random.uniform(0.3, 0.9)
815
+
816
+ def _is_peak_usage_time(self) -> bool:
817
+ """Check if current time is peak usage."""
818
+ hour = datetime.now().hour
819
+ # Assume peak hours are 9 AM to 6 PM
820
+ return 9 <= hour <= 18
821
+
822
+ async def _analyze_spot_opportunity(
823
+ self, edge_node: str, current_costs: Dict[str, Any]
824
+ ) -> Optional[SpotInstanceRecommendation]:
825
+ """Analyze spot instance opportunity for a node."""
826
+ total_on_demand = sum(
827
+ costs.get("on_demand", 0)
828
+ for costs in current_costs["cost_breakdown"].values()
829
+ )
830
+
831
+ if total_on_demand == 0:
832
+ return None
833
+
834
+ # Calculate potential spot savings
835
+ spot_cost = total_on_demand * 0.3 # 70% savings
836
+ potential_savings = total_on_demand - spot_cost
837
+
838
+ # Assess interruption risk
839
+ interruption_risk = self._assess_interruption_risk(edge_node, "mixed")
840
+
841
+ # Determine strategy
842
+ if interruption_risk < 0.1:
843
+ strategy = "full_spot"
844
+ elif interruption_risk < 0.3:
845
+ strategy = "mixed_spot_on_demand"
846
+ else:
847
+ strategy = "diversified_spot"
848
+
849
+ # Create backup plan
850
+ backup_plan = {
851
+ "strategy": "auto_fallback",
852
+ "fallback_instances": "on_demand",
853
+ "max_interruptions_per_day": 2,
854
+ "auto_restart": True,
855
+ }
856
+
857
+ return SpotInstanceRecommendation(
858
+ edge_node=edge_node,
859
+ current_on_demand_cost=total_on_demand,
860
+ spot_cost=spot_cost,
861
+ potential_savings=potential_savings,
862
+ interruption_risk=interruption_risk,
863
+ recommended_strategy=strategy,
864
+ backup_plan=backup_plan,
865
+ )
866
+
867
+ def _analyze_provider_usage(
868
+ self, provider: CloudProvider
869
+ ) -> Dict[str, Dict[str, Any]]:
870
+ """Analyze usage patterns for a provider."""
871
+ provider_metrics = [m for m in self.cost_metrics if m.provider == provider]
872
+
873
+ usage_patterns = defaultdict(
874
+ lambda: {"total_hours": 0, "consistency": 0, "monthly_cost": 0}
875
+ )
876
+
877
+ for metric in provider_metrics:
878
+ patterns = usage_patterns[metric.resource_type]
879
+ patterns["total_hours"] += metric.usage_hours
880
+ patterns["monthly_cost"] += metric.total_cost
881
+
882
+ return dict(usage_patterns)
883
+
884
+ async def _analyze_reservation_opportunity(
885
+ self, provider: CloudProvider, resource_type: str, usage: Dict[str, Any]
886
+ ) -> Optional[ReservationRecommendation]:
887
+ """Analyze reservation opportunity for a resource."""
888
+ monthly_hours = usage["total_hours"]
889
+ monthly_cost = usage["monthly_cost"]
890
+
891
+ # Need significant usage to justify reservation
892
+ if monthly_hours < 500: # Less than ~70% of month
893
+ return None
894
+
895
+ # Calculate reservation costs
896
+ hourly_rate = monthly_cost / monthly_hours if monthly_hours > 0 else 0
897
+
898
+ # 1-year reservation (30% savings)
899
+ reserved_hourly = hourly_rate * 0.7
900
+ upfront_cost = hourly_rate * monthly_hours * 0.3 # 30% upfront
901
+ monthly_reserved_cost = reserved_hourly * monthly_hours
902
+
903
+ total_savings = (monthly_cost - monthly_reserved_cost) * 12
904
+ breakeven_months = (
905
+ upfront_cost / (monthly_cost - monthly_reserved_cost)
906
+ if monthly_cost > monthly_reserved_cost
907
+ else 12
908
+ )
909
+
910
+ if total_savings <= 0 or breakeven_months > 12:
911
+ return None
912
+
913
+ return ReservationRecommendation(
914
+ resource_type=resource_type,
915
+ provider=provider,
916
+ commitment_length=12,
917
+ upfront_cost=upfront_cost,
918
+ monthly_cost=monthly_reserved_cost,
919
+ on_demand_equivalent=monthly_cost,
920
+ total_savings=total_savings,
921
+ breakeven_months=int(breakeven_months),
922
+ utilization_requirement=0.7,
923
+ )
924
+
925
+ def _get_risk_multiplier(self, risk_level: str) -> float:
926
+ """Get risk multiplier for ROI calculations."""
927
+ multipliers = {"low": 0.95, "medium": 0.85, "high": 0.7}
928
+ return multipliers.get(risk_level, 0.85)
929
+
930
+ def _get_roi_recommendation(
931
+ self, roi_percentage: float, payback_months: float
932
+ ) -> str:
933
+ """Get ROI-based recommendation."""
934
+ if roi_percentage > 100 and payback_months < 6:
935
+ return "Strongly Recommended"
936
+ elif roi_percentage > 50 and payback_months < 12:
937
+ return "Recommended"
938
+ elif roi_percentage > 20 and payback_months < 18:
939
+ return "Consider"
940
+ else:
941
+ return "Not Recommended"
942
+
943
+ def _calculate_current_monthly_spend(self) -> float:
944
+ """Calculate current monthly spend."""
945
+ # Get last 30 days of metrics
946
+ cutoff = datetime.now() - timedelta(days=30)
947
+ recent_metrics = [m for m in self.cost_metrics if m.timestamp > cutoff]
948
+
949
+ return sum(m.total_cost for m in recent_metrics)