kailash 0.8.3__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. kailash/__init__.py +1 -7
  2. kailash/cli/__init__.py +11 -1
  3. kailash/cli/validation_audit.py +570 -0
  4. kailash/core/actors/supervisor.py +1 -1
  5. kailash/core/resilience/circuit_breaker.py +71 -1
  6. kailash/core/resilience/health_monitor.py +172 -0
  7. kailash/edge/compliance.py +33 -0
  8. kailash/edge/consistency.py +609 -0
  9. kailash/edge/coordination/__init__.py +30 -0
  10. kailash/edge/coordination/global_ordering.py +355 -0
  11. kailash/edge/coordination/leader_election.py +217 -0
  12. kailash/edge/coordination/partition_detector.py +296 -0
  13. kailash/edge/coordination/raft.py +485 -0
  14. kailash/edge/discovery.py +63 -1
  15. kailash/edge/migration/__init__.py +19 -0
  16. kailash/edge/migration/edge_migrator.py +832 -0
  17. kailash/edge/monitoring/__init__.py +21 -0
  18. kailash/edge/monitoring/edge_monitor.py +736 -0
  19. kailash/edge/prediction/__init__.py +10 -0
  20. kailash/edge/prediction/predictive_warmer.py +591 -0
  21. kailash/edge/resource/__init__.py +102 -0
  22. kailash/edge/resource/cloud_integration.py +796 -0
  23. kailash/edge/resource/cost_optimizer.py +949 -0
  24. kailash/edge/resource/docker_integration.py +919 -0
  25. kailash/edge/resource/kubernetes_integration.py +893 -0
  26. kailash/edge/resource/platform_integration.py +913 -0
  27. kailash/edge/resource/predictive_scaler.py +959 -0
  28. kailash/edge/resource/resource_analyzer.py +824 -0
  29. kailash/edge/resource/resource_pools.py +610 -0
  30. kailash/integrations/dataflow_edge.py +261 -0
  31. kailash/mcp_server/registry_integration.py +1 -1
  32. kailash/monitoring/__init__.py +18 -0
  33. kailash/monitoring/alerts.py +646 -0
  34. kailash/monitoring/metrics.py +677 -0
  35. kailash/nodes/__init__.py +2 -0
  36. kailash/nodes/ai/__init__.py +17 -0
  37. kailash/nodes/ai/a2a.py +1914 -43
  38. kailash/nodes/ai/a2a_backup.py +1807 -0
  39. kailash/nodes/ai/hybrid_search.py +972 -0
  40. kailash/nodes/ai/semantic_memory.py +558 -0
  41. kailash/nodes/ai/streaming_analytics.py +947 -0
  42. kailash/nodes/base.py +545 -0
  43. kailash/nodes/edge/__init__.py +36 -0
  44. kailash/nodes/edge/base.py +240 -0
  45. kailash/nodes/edge/cloud_node.py +710 -0
  46. kailash/nodes/edge/coordination.py +239 -0
  47. kailash/nodes/edge/docker_node.py +825 -0
  48. kailash/nodes/edge/edge_data.py +582 -0
  49. kailash/nodes/edge/edge_migration_node.py +392 -0
  50. kailash/nodes/edge/edge_monitoring_node.py +421 -0
  51. kailash/nodes/edge/edge_state.py +673 -0
  52. kailash/nodes/edge/edge_warming_node.py +393 -0
  53. kailash/nodes/edge/kubernetes_node.py +652 -0
  54. kailash/nodes/edge/platform_node.py +766 -0
  55. kailash/nodes/edge/resource_analyzer_node.py +378 -0
  56. kailash/nodes/edge/resource_optimizer_node.py +501 -0
  57. kailash/nodes/edge/resource_scaler_node.py +397 -0
  58. kailash/nodes/ports.py +676 -0
  59. kailash/runtime/local.py +344 -1
  60. kailash/runtime/validation/__init__.py +20 -0
  61. kailash/runtime/validation/connection_context.py +119 -0
  62. kailash/runtime/validation/enhanced_error_formatter.py +202 -0
  63. kailash/runtime/validation/error_categorizer.py +164 -0
  64. kailash/runtime/validation/metrics.py +380 -0
  65. kailash/runtime/validation/performance.py +615 -0
  66. kailash/runtime/validation/suggestion_engine.py +212 -0
  67. kailash/testing/fixtures.py +2 -2
  68. kailash/workflow/builder.py +234 -8
  69. kailash/workflow/contracts.py +418 -0
  70. kailash/workflow/edge_infrastructure.py +369 -0
  71. kailash/workflow/migration.py +3 -3
  72. kailash/workflow/type_inference.py +669 -0
  73. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/METADATA +44 -27
  74. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/RECORD +78 -28
  75. kailash/nexus/__init__.py +0 -21
  76. kailash/nexus/cli/__init__.py +0 -5
  77. kailash/nexus/cli/__main__.py +0 -6
  78. kailash/nexus/cli/main.py +0 -176
  79. kailash/nexus/factory.py +0 -413
  80. kailash/nexus/gateway.py +0 -545
  81. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
  82. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
  83. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
  84. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,646 @@
1
+ """
2
+ Alerting system for monitoring validation failures and security violations.
3
+
4
+ Provides configurable alerting rules, notification channels, and alert management
5
+ for critical events in the Kailash SDK validation system.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import smtplib
11
+ import threading
12
+ import time
13
+ from abc import ABC, abstractmethod
14
+ from dataclasses import dataclass, field
15
+ from datetime import UTC, datetime, timedelta
16
+ from enum import Enum
17
+ from typing import Any, Callable, Dict, List, Optional, Union
18
+
19
+ import requests
20
+
21
+ from .metrics import MetricSeries, MetricsRegistry
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class AlertSeverity(Enum):
27
+ """Alert severity levels."""
28
+
29
+ INFO = "info"
30
+ WARNING = "warning"
31
+ ERROR = "error"
32
+ CRITICAL = "critical"
33
+
34
+
35
+ class AlertStatus(Enum):
36
+ """Alert status."""
37
+
38
+ PENDING = "pending"
39
+ FIRING = "firing"
40
+ RESOLVED = "resolved"
41
+ SILENCED = "silenced"
42
+
43
+
44
+ @dataclass
45
+ class Alert:
46
+ """Alert instance."""
47
+
48
+ id: str
49
+ rule_name: str
50
+ severity: AlertSeverity
51
+ title: str
52
+ description: str
53
+ labels: Dict[str, str] = field(default_factory=dict)
54
+ annotations: Dict[str, str] = field(default_factory=dict)
55
+ status: AlertStatus = AlertStatus.PENDING
56
+ created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
57
+ fired_at: Optional[datetime] = None
58
+ resolved_at: Optional[datetime] = None
59
+ last_notification: Optional[datetime] = None
60
+ notification_count: int = 0
61
+
62
+ def fire(self):
63
+ """Mark alert as firing."""
64
+ if self.status != AlertStatus.FIRING:
65
+ self.status = AlertStatus.FIRING
66
+ self.fired_at = datetime.now(UTC)
67
+
68
+ def resolve(self):
69
+ """Mark alert as resolved."""
70
+ if self.status == AlertStatus.FIRING:
71
+ self.status = AlertStatus.RESOLVED
72
+ self.resolved_at = datetime.now(UTC)
73
+
74
+ def silence(self):
75
+ """Silence the alert."""
76
+ self.status = AlertStatus.SILENCED
77
+
78
+ def should_notify(self, notification_interval: timedelta) -> bool:
79
+ """Check if alert should send notification."""
80
+ if self.status != AlertStatus.FIRING:
81
+ return False
82
+
83
+ if self.last_notification is None:
84
+ return True
85
+
86
+ return datetime.now(UTC) - self.last_notification >= notification_interval
87
+
88
+ def mark_notified(self):
89
+ """Mark that notification was sent."""
90
+ self.last_notification = datetime.now(UTC)
91
+ self.notification_count += 1
92
+
93
+
94
+ @dataclass
95
+ class AlertRule:
96
+ """Alert rule configuration."""
97
+
98
+ name: str
99
+ description: str
100
+ severity: AlertSeverity
101
+ metric_name: str
102
+ condition: str # e.g., "> 10", "< 0.95", "== 0"
103
+ threshold: Union[int, float]
104
+ time_window: timedelta = timedelta(minutes=5)
105
+ evaluation_interval: timedelta = timedelta(minutes=1)
106
+ notification_interval: timedelta = timedelta(minutes=15)
107
+ labels: Dict[str, str] = field(default_factory=dict)
108
+ annotations: Dict[str, str] = field(default_factory=dict)
109
+ enabled: bool = True
110
+
111
+ def evaluate(self, metric_series: MetricSeries) -> bool:
112
+ """Evaluate if alert condition is met.
113
+
114
+ Args:
115
+ metric_series: Metric series to evaluate
116
+
117
+ Returns:
118
+ True if alert condition is met
119
+ """
120
+ if not self.enabled:
121
+ return False
122
+
123
+ # Get metric value over time window
124
+ if self.condition.startswith("rate"):
125
+ # Rate-based condition
126
+ value = metric_series.get_rate(self.time_window)
127
+ elif self.condition.startswith("avg"):
128
+ # Average-based condition
129
+ value = metric_series.get_average(self.time_window)
130
+ elif self.condition.startswith("max"):
131
+ # Maximum-based condition
132
+ value = metric_series.get_max(self.time_window)
133
+ else:
134
+ # Latest value condition
135
+ value = metric_series.get_latest_value()
136
+
137
+ if value is None:
138
+ return False
139
+
140
+ # Evaluate condition
141
+ if "> " in self.condition:
142
+ return value > self.threshold
143
+ elif "< " in self.condition:
144
+ return value < self.threshold
145
+ elif ">= " in self.condition:
146
+ return value >= self.threshold
147
+ elif "<= " in self.condition:
148
+ return value <= self.threshold
149
+ elif "== " in self.condition:
150
+ return value == self.threshold
151
+ elif "!= " in self.condition:
152
+ return value != self.threshold
153
+ else:
154
+ logger.warning(f"Unknown condition format: {self.condition}")
155
+ return False
156
+
157
+
158
+ class NotificationChannel(ABC):
159
+ """Base class for notification channels."""
160
+
161
+ @abstractmethod
162
+ def send_notification(self, alert: Alert, context: Dict[str, Any]) -> bool:
163
+ """Send notification for alert.
164
+
165
+ Args:
166
+ alert: Alert to send notification for
167
+ context: Additional context information
168
+
169
+ Returns:
170
+ True if notification was sent successfully
171
+ """
172
+ pass
173
+
174
+
175
+ class LogNotificationChannel(NotificationChannel):
176
+ """Log-based notification channel."""
177
+
178
+ def __init__(self, log_level: str = "ERROR"):
179
+ """Initialize log notification channel.
180
+
181
+ Args:
182
+ log_level: Log level for notifications
183
+ """
184
+ self.log_level = getattr(logging, log_level.upper())
185
+
186
+ def send_notification(self, alert: Alert, context: Dict[str, Any]) -> bool:
187
+ """Send notification via logging."""
188
+ message = (
189
+ f"ALERT [{alert.severity.value.upper()}] {alert.title}: {alert.description}"
190
+ )
191
+ logger.log(self.log_level, message)
192
+ return True
193
+
194
+
195
+ class EmailNotificationChannel(NotificationChannel):
196
+ """Email notification channel."""
197
+
198
+ def __init__(
199
+ self,
200
+ smtp_host: str,
201
+ smtp_port: int,
202
+ username: str,
203
+ password: str,
204
+ from_email: str,
205
+ to_emails: List[str],
206
+ use_tls: bool = True,
207
+ ):
208
+ """Initialize email notification channel.
209
+
210
+ Args:
211
+ smtp_host: SMTP server host
212
+ smtp_port: SMTP server port
213
+ username: SMTP username
214
+ password: SMTP password
215
+ from_email: From email address
216
+ to_emails: List of recipient email addresses
217
+ use_tls: Whether to use TLS
218
+ """
219
+ self.smtp_host = smtp_host
220
+ self.smtp_port = smtp_port
221
+ self.username = username
222
+ self.password = password
223
+ self.from_email = from_email
224
+ self.to_emails = to_emails
225
+ self.use_tls = use_tls
226
+
227
+ def send_notification(self, alert: Alert, context: Dict[str, Any]) -> bool:
228
+ """Send notification via email."""
229
+ try:
230
+ from email.mime.multipart import MimeMultipart
231
+ from email.mime.text import MimeText
232
+
233
+ msg = MimeMultipart()
234
+ msg["From"] = self.from_email
235
+ msg["To"] = ", ".join(self.to_emails)
236
+ msg["Subject"] = f"[{alert.severity.value.upper()}] {alert.title}"
237
+
238
+ body = self._format_email_body(alert, context)
239
+ msg.attach(MimeText(body, "html"))
240
+
241
+ server = smtplib.SMTP(self.smtp_host, self.smtp_port)
242
+ if self.use_tls:
243
+ server.starttls()
244
+ server.login(self.username, self.password)
245
+ server.sendmail(self.from_email, self.to_emails, msg.as_string())
246
+ server.quit()
247
+
248
+ return True
249
+ except Exception as e:
250
+ logger.error(f"Failed to send email notification: {e}")
251
+ return False
252
+
253
+ def _format_email_body(self, alert: Alert, context: Dict[str, Any]) -> str:
254
+ """Format email body for alert."""
255
+ return f"""
256
+ <html>
257
+ <body>
258
+ <h2>Kailash SDK Alert: {alert.title}</h2>
259
+ <p><strong>Severity:</strong> {alert.severity.value.upper()}</p>
260
+ <p><strong>Status:</strong> {alert.status.value}</p>
261
+ <p><strong>Description:</strong> {alert.description}</p>
262
+ <p><strong>Created:</strong> {alert.created_at.isoformat()}</p>
263
+
264
+ <h3>Labels:</h3>
265
+ <ul>
266
+ {"".join(f"<li><strong>{k}:</strong> {v}</li>" for k, v in alert.labels.items())}
267
+ </ul>
268
+
269
+ <h3>Context:</h3>
270
+ <ul>
271
+ {"".join(f"<li><strong>{k}:</strong> {v}</li>" for k, v in context.items())}
272
+ </ul>
273
+ </body>
274
+ </html>
275
+ """
276
+
277
+
278
+ class SlackNotificationChannel(NotificationChannel):
279
+ """Slack notification channel."""
280
+
281
+ def __init__(self, webhook_url: str, channel: str = "#alerts"):
282
+ """Initialize Slack notification channel.
283
+
284
+ Args:
285
+ webhook_url: Slack webhook URL
286
+ channel: Slack channel to send alerts to
287
+ """
288
+ self.webhook_url = webhook_url
289
+ self.channel = channel
290
+
291
+ def send_notification(self, alert: Alert, context: Dict[str, Any]) -> bool:
292
+ """Send notification via Slack."""
293
+ try:
294
+ color_map = {
295
+ AlertSeverity.INFO: "good",
296
+ AlertSeverity.WARNING: "warning",
297
+ AlertSeverity.ERROR: "danger",
298
+ AlertSeverity.CRITICAL: "danger",
299
+ }
300
+
301
+ payload = {
302
+ "channel": self.channel,
303
+ "username": "Kailash SDK Monitor",
304
+ "icon_emoji": ":warning:",
305
+ "attachments": [
306
+ {
307
+ "color": color_map.get(alert.severity, "danger"),
308
+ "title": f"{alert.severity.value.upper()}: {alert.title}",
309
+ "text": alert.description,
310
+ "fields": [
311
+ {
312
+ "title": "Status",
313
+ "value": alert.status.value,
314
+ "short": True,
315
+ },
316
+ {
317
+ "title": "Created",
318
+ "value": alert.created_at.isoformat(),
319
+ "short": True,
320
+ },
321
+ ]
322
+ + [
323
+ {"title": k, "value": str(v), "short": True}
324
+ for k, v in {**alert.labels, **context}.items()
325
+ ],
326
+ "ts": int(alert.created_at.timestamp()),
327
+ }
328
+ ],
329
+ }
330
+
331
+ response = requests.post(self.webhook_url, json=payload, timeout=10)
332
+ response.raise_for_status()
333
+ return True
334
+ except Exception as e:
335
+ logger.error(f"Failed to send Slack notification: {e}")
336
+ return False
337
+
338
+
339
+ class WebhookNotificationChannel(NotificationChannel):
340
+ """Generic webhook notification channel."""
341
+
342
+ def __init__(self, webhook_url: str, headers: Optional[Dict[str, str]] = None):
343
+ """Initialize webhook notification channel.
344
+
345
+ Args:
346
+ webhook_url: Webhook URL
347
+ headers: Optional HTTP headers
348
+ """
349
+ self.webhook_url = webhook_url
350
+ self.headers = headers or {}
351
+
352
+ def send_notification(self, alert: Alert, context: Dict[str, Any]) -> bool:
353
+ """Send notification via webhook."""
354
+ try:
355
+ payload = {
356
+ "alert": {
357
+ "id": alert.id,
358
+ "rule_name": alert.rule_name,
359
+ "severity": alert.severity.value,
360
+ "status": alert.status.value,
361
+ "title": alert.title,
362
+ "description": alert.description,
363
+ "labels": alert.labels,
364
+ "annotations": alert.annotations,
365
+ "created_at": alert.created_at.isoformat(),
366
+ "fired_at": alert.fired_at.isoformat() if alert.fired_at else None,
367
+ },
368
+ "context": context,
369
+ }
370
+
371
+ response = requests.post(
372
+ self.webhook_url, json=payload, headers=self.headers, timeout=10
373
+ )
374
+ response.raise_for_status()
375
+ return True
376
+ except Exception as e:
377
+ logger.error(f"Failed to send webhook notification: {e}")
378
+ return False
379
+
380
+
381
+ class AlertManager:
382
+ """Alert manager for handling alerting rules and notifications."""
383
+
384
+ def __init__(self, metrics_registry: MetricsRegistry):
385
+ """Initialize alert manager.
386
+
387
+ Args:
388
+ metrics_registry: Metrics registry to monitor
389
+ """
390
+ self.metrics_registry = metrics_registry
391
+ self.rules: Dict[str, AlertRule] = {}
392
+ self.alerts: Dict[str, Alert] = {}
393
+ self.notification_channels: List[NotificationChannel] = []
394
+ self._lock = threading.RLock()
395
+ self._running = False
396
+ self._thread: Optional[threading.Thread] = None
397
+
398
+ def add_rule(self, rule: AlertRule):
399
+ """Add an alerting rule.
400
+
401
+ Args:
402
+ rule: AlertRule to add
403
+ """
404
+ with self._lock:
405
+ self.rules[rule.name] = rule
406
+
407
+ def remove_rule(self, rule_name: str):
408
+ """Remove an alerting rule.
409
+
410
+ Args:
411
+ rule_name: Name of rule to remove
412
+ """
413
+ with self._lock:
414
+ if rule_name in self.rules:
415
+ del self.rules[rule_name]
416
+
417
+ def add_notification_channel(self, channel: NotificationChannel):
418
+ """Add a notification channel.
419
+
420
+ Args:
421
+ channel: NotificationChannel to add
422
+ """
423
+ with self._lock:
424
+ self.notification_channels.append(channel)
425
+
426
+ def start(self):
427
+ """Start the alert manager."""
428
+ with self._lock:
429
+ if self._running:
430
+ return
431
+
432
+ self._running = True
433
+ self._thread = threading.Thread(target=self._evaluation_loop, daemon=True)
434
+ self._thread.start()
435
+ logger.info("Alert manager started")
436
+
437
+ def stop(self):
438
+ """Stop the alert manager."""
439
+ with self._lock:
440
+ self._running = False
441
+ if self._thread:
442
+ self._thread.join(timeout=5)
443
+ logger.info("Alert manager stopped")
444
+
445
+ def _evaluation_loop(self):
446
+ """Main evaluation loop for alert rules."""
447
+ while self._running:
448
+ try:
449
+ self._evaluate_rules()
450
+ self._process_notifications()
451
+ time.sleep(10) # Evaluate every 10 seconds
452
+ except Exception as e:
453
+ logger.error(f"Error in alert evaluation loop: {e}")
454
+
455
+ def _evaluate_rules(self):
456
+ """Evaluate all alert rules."""
457
+ with self._lock:
458
+ for rule in self.rules.values():
459
+ if not rule.enabled:
460
+ continue
461
+
462
+ try:
463
+ # Find matching metrics
464
+ for (
465
+ collector_name,
466
+ collector,
467
+ ) in self.metrics_registry.get_all_collectors().items():
468
+ metric_series = collector.get_metric(rule.metric_name)
469
+ if metric_series:
470
+ self._evaluate_rule(rule, metric_series, collector_name)
471
+ except Exception as e:
472
+ logger.error(f"Error evaluating rule {rule.name}: {e}")
473
+
474
+ def _evaluate_rule(
475
+ self, rule: AlertRule, metric_series: MetricSeries, collector_name: str
476
+ ):
477
+ """Evaluate a single rule against a metric series."""
478
+ alert_id = f"{rule.name}_{collector_name}"
479
+
480
+ # Check if condition is met
481
+ condition_met = rule.evaluate(metric_series)
482
+
483
+ if condition_met:
484
+ # Create or update alert
485
+ if alert_id not in self.alerts:
486
+ alert = Alert(
487
+ id=alert_id,
488
+ rule_name=rule.name,
489
+ severity=rule.severity,
490
+ title=f"{rule.name} ({collector_name})",
491
+ description=rule.description,
492
+ labels={
493
+ **rule.labels,
494
+ "collector": collector_name,
495
+ "metric": rule.metric_name,
496
+ },
497
+ annotations=rule.annotations,
498
+ )
499
+ self.alerts[alert_id] = alert
500
+
501
+ # Fire the alert
502
+ self.alerts[alert_id].fire()
503
+ else:
504
+ # Resolve alert if it exists and is firing
505
+ if (
506
+ alert_id in self.alerts
507
+ and self.alerts[alert_id].status == AlertStatus.FIRING
508
+ ):
509
+ self.alerts[alert_id].resolve()
510
+
511
+ def _process_notifications(self):
512
+ """Process notifications for firing alerts."""
513
+ with self._lock:
514
+ for alert in self.alerts.values():
515
+ if alert.status != AlertStatus.FIRING:
516
+ continue
517
+
518
+ rule = self.rules.get(alert.rule_name)
519
+ if not rule:
520
+ continue
521
+
522
+ if alert.should_notify(rule.notification_interval):
523
+ self._send_notifications(alert)
524
+
525
+ def _send_notifications(self, alert: Alert):
526
+ """Send notifications for an alert."""
527
+ context = {
528
+ "metric_value": self._get_current_metric_value(alert),
529
+ "notification_count": alert.notification_count + 1,
530
+ "time_since_created": str(datetime.now(UTC) - alert.created_at),
531
+ }
532
+
533
+ success = False
534
+ for channel in self.notification_channels:
535
+ try:
536
+ if channel.send_notification(alert, context):
537
+ success = True
538
+ except Exception as e:
539
+ logger.error(
540
+ f"Failed to send notification via {type(channel).__name__}: {e}"
541
+ )
542
+
543
+ if success:
544
+ alert.mark_notified()
545
+
546
+ def _get_current_metric_value(self, alert: Alert) -> Optional[Union[int, float]]:
547
+ """Get current metric value for alert context."""
548
+ for collector in self.metrics_registry.get_all_collectors().values():
549
+ metric_series = collector.get_metric(alert.labels.get("metric"))
550
+ if metric_series:
551
+ return metric_series.get_latest_value()
552
+ return None
553
+
554
+ def get_active_alerts(self) -> List[Alert]:
555
+ """Get all active (firing) alerts."""
556
+ with self._lock:
557
+ return [
558
+ alert
559
+ for alert in self.alerts.values()
560
+ if alert.status == AlertStatus.FIRING
561
+ ]
562
+
563
+ def get_all_alerts(self) -> List[Alert]:
564
+ """Get all alerts."""
565
+ with self._lock:
566
+ return list(self.alerts.values())
567
+
568
+ def silence_alert(self, alert_id: str):
569
+ """Silence an alert.
570
+
571
+ Args:
572
+ alert_id: Alert ID to silence
573
+ """
574
+ with self._lock:
575
+ if alert_id in self.alerts:
576
+ self.alerts[alert_id].silence()
577
+
578
+ def acknowledge_alert(self, alert_id: str):
579
+ """Acknowledge an alert (same as silence for now).
580
+
581
+ Args:
582
+ alert_id: Alert ID to acknowledge
583
+ """
584
+ self.silence_alert(alert_id)
585
+
586
+
587
+ def create_default_alert_rules() -> List[AlertRule]:
588
+ """Create default alert rules for common scenarios."""
589
+ return [
590
+ # Validation failure rate
591
+ AlertRule(
592
+ name="high_validation_failure_rate",
593
+ description="Validation failure rate is above 10%",
594
+ severity=AlertSeverity.ERROR,
595
+ metric_name="validation_failure",
596
+ condition="rate > 0.1",
597
+ threshold=0.1,
598
+ time_window=timedelta(minutes=5),
599
+ labels={"component": "validation"},
600
+ ),
601
+ # Security violations
602
+ AlertRule(
603
+ name="security_violations_detected",
604
+ description="Security violations detected",
605
+ severity=AlertSeverity.CRITICAL,
606
+ metric_name="security_violations_total",
607
+ condition="rate > 0",
608
+ threshold=0,
609
+ time_window=timedelta(minutes=1),
610
+ notification_interval=timedelta(minutes=5),
611
+ labels={"component": "security"},
612
+ ),
613
+ # High response time
614
+ AlertRule(
615
+ name="high_response_time",
616
+ description="Average response time is above 1 second",
617
+ severity=AlertSeverity.WARNING,
618
+ metric_name="response_time",
619
+ condition="avg > 1000",
620
+ threshold=1000,
621
+ time_window=timedelta(minutes=5),
622
+ labels={"component": "performance"},
623
+ ),
624
+ # Low cache hit rate
625
+ AlertRule(
626
+ name="low_cache_hit_rate",
627
+ description="Cache hit rate is below 80%",
628
+ severity=AlertSeverity.WARNING,
629
+ metric_name="validation_cache_hits",
630
+ condition="rate < 0.8",
631
+ threshold=0.8,
632
+ time_window=timedelta(minutes=10),
633
+ labels={"component": "cache"},
634
+ ),
635
+ # High memory usage
636
+ AlertRule(
637
+ name="high_memory_usage",
638
+ description="Memory usage is above 90%",
639
+ severity=AlertSeverity.ERROR,
640
+ metric_name="memory_usage",
641
+ condition="> 90",
642
+ threshold=90,
643
+ time_window=timedelta(minutes=2),
644
+ labels={"component": "system"},
645
+ ),
646
+ ]