kekkai-cli 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kekkai/cli.py +238 -36
- kekkai/dojo_import.py +9 -1
- kekkai/output.py +2 -3
- kekkai/report/unified.py +226 -0
- kekkai/triage/__init__.py +54 -1
- kekkai/triage/fix_screen.py +232 -0
- kekkai/triage/loader.py +196 -0
- kekkai/triage/screens.py +1 -0
- kekkai_cli-2.0.0.dist-info/METADATA +317 -0
- {kekkai_cli-1.1.0.dist-info → kekkai_cli-2.0.0.dist-info}/RECORD +13 -28
- {kekkai_cli-1.1.0.dist-info → kekkai_cli-2.0.0.dist-info}/entry_points.txt +0 -1
- {kekkai_cli-1.1.0.dist-info → kekkai_cli-2.0.0.dist-info}/top_level.txt +0 -1
- kekkai_cli-1.1.0.dist-info/METADATA +0 -359
- portal/__init__.py +0 -19
- portal/api.py +0 -155
- portal/auth.py +0 -103
- portal/enterprise/__init__.py +0 -45
- portal/enterprise/audit.py +0 -435
- portal/enterprise/licensing.py +0 -408
- portal/enterprise/rbac.py +0 -276
- portal/enterprise/saml.py +0 -595
- portal/ops/__init__.py +0 -53
- portal/ops/backup.py +0 -553
- portal/ops/log_shipper.py +0 -469
- portal/ops/monitoring.py +0 -517
- portal/ops/restore.py +0 -469
- portal/ops/secrets.py +0 -408
- portal/ops/upgrade.py +0 -591
- portal/tenants.py +0 -340
- portal/uploads.py +0 -259
- portal/web.py +0 -393
- {kekkai_cli-1.1.0.dist-info → kekkai_cli-2.0.0.dist-info}/WHEEL +0 -0
portal/ops/monitoring.py
DELETED
|
@@ -1,517 +0,0 @@
|
|
|
1
|
-
"""Monitoring and alerting system for Kekkai Portal.
|
|
2
|
-
|
|
3
|
-
Provides:
|
|
4
|
-
- Alert rules for auth/authz anomalies
|
|
5
|
-
- Alert rules for import failures
|
|
6
|
-
- Metric collection
|
|
7
|
-
- Integration with audit log system
|
|
8
|
-
|
|
9
|
-
ASVS 5.0 Requirements:
|
|
10
|
-
- V16.4.3: Send logs to separate system
|
|
11
|
-
- V16.3.2: Log failed authz
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from __future__ import annotations
|
|
15
|
-
|
|
16
|
-
import json
|
|
17
|
-
import logging
|
|
18
|
-
import threading
|
|
19
|
-
import time
|
|
20
|
-
from collections import defaultdict
|
|
21
|
-
from collections.abc import Callable
|
|
22
|
-
from dataclasses import dataclass, field
|
|
23
|
-
from datetime import UTC, datetime, timedelta
|
|
24
|
-
from enum import Enum
|
|
25
|
-
from typing import Any
|
|
26
|
-
|
|
27
|
-
logger = logging.getLogger(__name__)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class AlertSeverity(Enum):
|
|
31
|
-
"""Alert severity levels."""
|
|
32
|
-
|
|
33
|
-
INFO = "info"
|
|
34
|
-
WARNING = "warning"
|
|
35
|
-
CRITICAL = "critical"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class AlertType(Enum):
|
|
39
|
-
"""Types of alerts."""
|
|
40
|
-
|
|
41
|
-
AUTH_FAILURE_SPIKE = "auth_failure_spike"
|
|
42
|
-
AUTH_BRUTE_FORCE = "auth_brute_force"
|
|
43
|
-
AUTHZ_DENIAL = "authz_denial"
|
|
44
|
-
CROSS_TENANT_ATTEMPT = "cross_tenant_attempt"
|
|
45
|
-
IMPORT_FAILURE = "import_failure"
|
|
46
|
-
BACKUP_FAILURE = "backup_failure"
|
|
47
|
-
SYSTEM_ERROR = "system_error"
|
|
48
|
-
SAML_REPLAY = "saml_replay"
|
|
49
|
-
LICENSE_EXPIRED = "license_expired"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@dataclass
|
|
53
|
-
class AlertRule:
|
|
54
|
-
"""Definition of an alert rule."""
|
|
55
|
-
|
|
56
|
-
name: str
|
|
57
|
-
alert_type: AlertType
|
|
58
|
-
severity: AlertSeverity
|
|
59
|
-
threshold: int
|
|
60
|
-
window_seconds: int
|
|
61
|
-
description: str = ""
|
|
62
|
-
enabled: bool = True
|
|
63
|
-
cooldown_seconds: int = 300
|
|
64
|
-
|
|
65
|
-
def to_dict(self) -> dict[str, Any]:
|
|
66
|
-
"""Convert to dictionary."""
|
|
67
|
-
return {
|
|
68
|
-
"name": self.name,
|
|
69
|
-
"alert_type": self.alert_type.value,
|
|
70
|
-
"severity": self.severity.value,
|
|
71
|
-
"threshold": self.threshold,
|
|
72
|
-
"window_seconds": self.window_seconds,
|
|
73
|
-
"description": self.description,
|
|
74
|
-
"enabled": self.enabled,
|
|
75
|
-
"cooldown_seconds": self.cooldown_seconds,
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@dataclass
|
|
80
|
-
class Alert:
|
|
81
|
-
"""Represents a triggered alert."""
|
|
82
|
-
|
|
83
|
-
rule_name: str
|
|
84
|
-
alert_type: AlertType
|
|
85
|
-
severity: AlertSeverity
|
|
86
|
-
timestamp: datetime
|
|
87
|
-
message: str
|
|
88
|
-
details: dict[str, Any] = field(default_factory=dict)
|
|
89
|
-
alert_id: str = ""
|
|
90
|
-
|
|
91
|
-
def __post_init__(self) -> None:
|
|
92
|
-
if not self.alert_id:
|
|
93
|
-
import secrets
|
|
94
|
-
|
|
95
|
-
self.alert_id = f"alert_{int(time.time())}_{secrets.token_hex(4)}"
|
|
96
|
-
|
|
97
|
-
def to_dict(self) -> dict[str, Any]:
|
|
98
|
-
"""Convert to dictionary."""
|
|
99
|
-
return {
|
|
100
|
-
"alert_id": self.alert_id,
|
|
101
|
-
"rule_name": self.rule_name,
|
|
102
|
-
"alert_type": self.alert_type.value,
|
|
103
|
-
"severity": self.severity.value,
|
|
104
|
-
"timestamp": self.timestamp.isoformat(),
|
|
105
|
-
"message": self.message,
|
|
106
|
-
"details": self.details,
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
@dataclass
|
|
111
|
-
class MonitoringConfig:
|
|
112
|
-
"""Configuration for monitoring service."""
|
|
113
|
-
|
|
114
|
-
enabled: bool = True
|
|
115
|
-
alert_handlers: list[Callable[[Alert], None]] = field(default_factory=list)
|
|
116
|
-
metrics_retention_hours: int = 24
|
|
117
|
-
check_interval_seconds: int = 60
|
|
118
|
-
|
|
119
|
-
rules: list[AlertRule] = field(default_factory=list)
|
|
120
|
-
|
|
121
|
-
def __post_init__(self) -> None:
|
|
122
|
-
if not self.rules:
|
|
123
|
-
self.rules = get_default_rules()
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def get_default_rules() -> list[AlertRule]:
|
|
127
|
-
"""Get default alert rules."""
|
|
128
|
-
return [
|
|
129
|
-
AlertRule(
|
|
130
|
-
name="auth_failure_spike",
|
|
131
|
-
alert_type=AlertType.AUTH_FAILURE_SPIKE,
|
|
132
|
-
severity=AlertSeverity.WARNING,
|
|
133
|
-
threshold=10,
|
|
134
|
-
window_seconds=300,
|
|
135
|
-
description="Multiple authentication failures in short period",
|
|
136
|
-
),
|
|
137
|
-
AlertRule(
|
|
138
|
-
name="brute_force_detection",
|
|
139
|
-
alert_type=AlertType.AUTH_BRUTE_FORCE,
|
|
140
|
-
severity=AlertSeverity.CRITICAL,
|
|
141
|
-
threshold=5,
|
|
142
|
-
window_seconds=60,
|
|
143
|
-
description="Potential brute force attack from single IP",
|
|
144
|
-
),
|
|
145
|
-
AlertRule(
|
|
146
|
-
name="authz_denial_alert",
|
|
147
|
-
alert_type=AlertType.AUTHZ_DENIAL,
|
|
148
|
-
severity=AlertSeverity.WARNING,
|
|
149
|
-
threshold=5,
|
|
150
|
-
window_seconds=300,
|
|
151
|
-
description="Multiple authorization denials for user",
|
|
152
|
-
),
|
|
153
|
-
AlertRule(
|
|
154
|
-
name="cross_tenant_attempt",
|
|
155
|
-
alert_type=AlertType.CROSS_TENANT_ATTEMPT,
|
|
156
|
-
severity=AlertSeverity.CRITICAL,
|
|
157
|
-
threshold=1,
|
|
158
|
-
window_seconds=60,
|
|
159
|
-
description="Cross-tenant access attempt detected",
|
|
160
|
-
),
|
|
161
|
-
AlertRule(
|
|
162
|
-
name="import_failure_alert",
|
|
163
|
-
alert_type=AlertType.IMPORT_FAILURE,
|
|
164
|
-
severity=AlertSeverity.WARNING,
|
|
165
|
-
threshold=3,
|
|
166
|
-
window_seconds=600,
|
|
167
|
-
description="Multiple import failures",
|
|
168
|
-
),
|
|
169
|
-
AlertRule(
|
|
170
|
-
name="saml_replay_alert",
|
|
171
|
-
alert_type=AlertType.SAML_REPLAY,
|
|
172
|
-
severity=AlertSeverity.CRITICAL,
|
|
173
|
-
threshold=1,
|
|
174
|
-
window_seconds=60,
|
|
175
|
-
description="SAML replay attack blocked",
|
|
176
|
-
),
|
|
177
|
-
AlertRule(
|
|
178
|
-
name="backup_failure_alert",
|
|
179
|
-
alert_type=AlertType.BACKUP_FAILURE,
|
|
180
|
-
severity=AlertSeverity.CRITICAL,
|
|
181
|
-
threshold=1,
|
|
182
|
-
window_seconds=3600,
|
|
183
|
-
description="Backup job failed",
|
|
184
|
-
),
|
|
185
|
-
]
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
class MetricsCollector:
|
|
189
|
-
"""Collects and stores metrics for monitoring."""
|
|
190
|
-
|
|
191
|
-
def __init__(self, retention_hours: int = 24) -> None:
|
|
192
|
-
self._retention_hours = retention_hours
|
|
193
|
-
self._metrics: dict[str, list[tuple[datetime, Any]]] = defaultdict(list)
|
|
194
|
-
self._counters: dict[str, int] = defaultdict(int)
|
|
195
|
-
self._lock = threading.Lock()
|
|
196
|
-
|
|
197
|
-
def increment(
|
|
198
|
-
self, metric_name: str, value: int = 1, labels: dict[str, str] | None = None
|
|
199
|
-
) -> None:
|
|
200
|
-
"""Increment a counter metric."""
|
|
201
|
-
key = self._make_key(metric_name, labels)
|
|
202
|
-
with self._lock:
|
|
203
|
-
self._counters[key] += value
|
|
204
|
-
self._metrics[key].append((datetime.now(UTC), value))
|
|
205
|
-
|
|
206
|
-
def gauge(self, metric_name: str, value: float, labels: dict[str, str] | None = None) -> None:
|
|
207
|
-
"""Set a gauge metric value."""
|
|
208
|
-
key = self._make_key(metric_name, labels)
|
|
209
|
-
with self._lock:
|
|
210
|
-
self._metrics[key].append((datetime.now(UTC), value))
|
|
211
|
-
|
|
212
|
-
def get_count(
|
|
213
|
-
self, metric_name: str, window_seconds: int, labels: dict[str, str] | None = None
|
|
214
|
-
) -> int:
|
|
215
|
-
"""Get count of events within window."""
|
|
216
|
-
key = self._make_key(metric_name, labels)
|
|
217
|
-
cutoff = datetime.now(UTC) - timedelta(seconds=window_seconds)
|
|
218
|
-
|
|
219
|
-
with self._lock:
|
|
220
|
-
values = self._metrics.get(key, [])
|
|
221
|
-
return sum(v for ts, v in values if ts >= cutoff and isinstance(v, int))
|
|
222
|
-
|
|
223
|
-
def get_events_in_window(
|
|
224
|
-
self, metric_name: str, window_seconds: int, labels: dict[str, str] | None = None
|
|
225
|
-
) -> list[tuple[datetime, Any]]:
|
|
226
|
-
"""Get all events within window."""
|
|
227
|
-
key = self._make_key(metric_name, labels)
|
|
228
|
-
cutoff = datetime.now(UTC) - timedelta(seconds=window_seconds)
|
|
229
|
-
|
|
230
|
-
with self._lock:
|
|
231
|
-
values = self._metrics.get(key, [])
|
|
232
|
-
return [(ts, v) for ts, v in values if ts >= cutoff]
|
|
233
|
-
|
|
234
|
-
def cleanup_old_metrics(self) -> int:
|
|
235
|
-
"""Remove metrics older than retention period. Returns count removed."""
|
|
236
|
-
cutoff = datetime.now(UTC) - timedelta(hours=self._retention_hours)
|
|
237
|
-
removed = 0
|
|
238
|
-
|
|
239
|
-
with self._lock:
|
|
240
|
-
for key in list(self._metrics.keys()):
|
|
241
|
-
original_len = len(self._metrics[key])
|
|
242
|
-
self._metrics[key] = [(ts, v) for ts, v in self._metrics[key] if ts >= cutoff]
|
|
243
|
-
removed += original_len - len(self._metrics[key])
|
|
244
|
-
|
|
245
|
-
return removed
|
|
246
|
-
|
|
247
|
-
def get_all_metrics(self) -> dict[str, Any]:
|
|
248
|
-
"""Get snapshot of all current metrics."""
|
|
249
|
-
with self._lock:
|
|
250
|
-
return {
|
|
251
|
-
"counters": dict(self._counters),
|
|
252
|
-
"metrics": {k: len(v) for k, v in self._metrics.items()},
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
def _make_key(self, metric_name: str, labels: dict[str, str] | None) -> str:
|
|
256
|
-
"""Create metric key from name and labels."""
|
|
257
|
-
if not labels:
|
|
258
|
-
return metric_name
|
|
259
|
-
label_str = ",".join(f"{k}={v}" for k, v in sorted(labels.items()))
|
|
260
|
-
return f"{metric_name}{{{label_str}}}"
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
class MonitoringService:
|
|
264
|
-
"""Main monitoring service for Kekkai Portal."""
|
|
265
|
-
|
|
266
|
-
def __init__(self, config: MonitoringConfig) -> None:
|
|
267
|
-
self._config = config
|
|
268
|
-
self._metrics = MetricsCollector(config.metrics_retention_hours)
|
|
269
|
-
self._last_alert_time: dict[str, datetime] = {}
|
|
270
|
-
self._lock = threading.Lock()
|
|
271
|
-
self._running = False
|
|
272
|
-
self._check_thread: threading.Thread | None = None
|
|
273
|
-
|
|
274
|
-
def start(self) -> None:
|
|
275
|
-
"""Start background monitoring."""
|
|
276
|
-
if not self._config.enabled:
|
|
277
|
-
return
|
|
278
|
-
|
|
279
|
-
self._running = True
|
|
280
|
-
self._check_thread = threading.Thread(target=self._check_loop, daemon=True)
|
|
281
|
-
self._check_thread.start()
|
|
282
|
-
logger.info("monitoring.started")
|
|
283
|
-
|
|
284
|
-
def stop(self) -> None:
|
|
285
|
-
"""Stop background monitoring."""
|
|
286
|
-
self._running = False
|
|
287
|
-
if self._check_thread:
|
|
288
|
-
self._check_thread.join(timeout=5)
|
|
289
|
-
logger.info("monitoring.stopped")
|
|
290
|
-
|
|
291
|
-
def record_auth_failure(self, client_ip: str, reason: str, user_id: str | None = None) -> None:
|
|
292
|
-
"""Record an authentication failure event."""
|
|
293
|
-
self._metrics.increment("auth_failures", labels={"ip": client_ip})
|
|
294
|
-
self._metrics.increment("auth_failures_total")
|
|
295
|
-
|
|
296
|
-
self._check_rule_immediate(AlertType.AUTH_FAILURE_SPIKE)
|
|
297
|
-
self._check_brute_force(client_ip)
|
|
298
|
-
|
|
299
|
-
def record_authz_denial(
|
|
300
|
-
self, user_id: str, tenant_id: str, permission: str, resource: str | None = None
|
|
301
|
-
) -> None:
|
|
302
|
-
"""Record an authorization denial event."""
|
|
303
|
-
self._metrics.increment("authz_denials", labels={"user": user_id, "tenant": tenant_id})
|
|
304
|
-
self._metrics.increment("authz_denials_total")
|
|
305
|
-
|
|
306
|
-
self._check_rule_immediate(AlertType.AUTHZ_DENIAL, {"user_id": user_id})
|
|
307
|
-
|
|
308
|
-
def record_cross_tenant_attempt(
|
|
309
|
-
self, user_id: str, source_tenant: str, target_tenant: str
|
|
310
|
-
) -> None:
|
|
311
|
-
"""Record a cross-tenant access attempt."""
|
|
312
|
-
self._metrics.increment(
|
|
313
|
-
"cross_tenant_attempts",
|
|
314
|
-
labels={"user": user_id, "source": source_tenant, "target": target_tenant},
|
|
315
|
-
)
|
|
316
|
-
|
|
317
|
-
self._trigger_alert(
|
|
318
|
-
AlertRule(
|
|
319
|
-
name="cross_tenant_attempt",
|
|
320
|
-
alert_type=AlertType.CROSS_TENANT_ATTEMPT,
|
|
321
|
-
severity=AlertSeverity.CRITICAL,
|
|
322
|
-
threshold=1,
|
|
323
|
-
window_seconds=60,
|
|
324
|
-
),
|
|
325
|
-
f"Cross-tenant access attempt: user={user_id} from={source_tenant} to={target_tenant}",
|
|
326
|
-
{"user_id": user_id, "source_tenant": source_tenant, "target_tenant": target_tenant},
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
def record_import_failure(self, tenant_id: str, reason: str) -> None:
|
|
330
|
-
"""Record an import failure event."""
|
|
331
|
-
self._metrics.increment("import_failures", labels={"tenant": tenant_id})
|
|
332
|
-
self._metrics.increment("import_failures_total")
|
|
333
|
-
|
|
334
|
-
self._check_rule_immediate(AlertType.IMPORT_FAILURE, {"tenant_id": tenant_id})
|
|
335
|
-
|
|
336
|
-
def record_saml_replay_blocked(self, assertion_id: str, client_ip: str) -> None:
|
|
337
|
-
"""Record a blocked SAML replay attempt."""
|
|
338
|
-
self._metrics.increment("saml_replay_blocked", labels={"ip": client_ip})
|
|
339
|
-
|
|
340
|
-
self._trigger_alert(
|
|
341
|
-
AlertRule(
|
|
342
|
-
name="saml_replay_alert",
|
|
343
|
-
alert_type=AlertType.SAML_REPLAY,
|
|
344
|
-
severity=AlertSeverity.CRITICAL,
|
|
345
|
-
threshold=1,
|
|
346
|
-
window_seconds=60,
|
|
347
|
-
),
|
|
348
|
-
f"SAML replay attack blocked: assertion={assertion_id[:16]}... ip={client_ip}",
|
|
349
|
-
{"assertion_id": assertion_id, "client_ip": client_ip},
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
def record_backup_failure(self, backup_id: str, error: str) -> None:
|
|
353
|
-
"""Record a backup failure event."""
|
|
354
|
-
self._metrics.increment("backup_failures")
|
|
355
|
-
|
|
356
|
-
self._trigger_alert(
|
|
357
|
-
AlertRule(
|
|
358
|
-
name="backup_failure_alert",
|
|
359
|
-
alert_type=AlertType.BACKUP_FAILURE,
|
|
360
|
-
severity=AlertSeverity.CRITICAL,
|
|
361
|
-
threshold=1,
|
|
362
|
-
window_seconds=3600,
|
|
363
|
-
),
|
|
364
|
-
f"Backup failed: {backup_id}",
|
|
365
|
-
{"backup_id": backup_id, "error": error},
|
|
366
|
-
)
|
|
367
|
-
|
|
368
|
-
def get_metrics(self) -> dict[str, Any]:
|
|
369
|
-
"""Get current metrics snapshot."""
|
|
370
|
-
return self._metrics.get_all_metrics()
|
|
371
|
-
|
|
372
|
-
def get_recent_alerts(self, limit: int = 100) -> list[dict[str, Any]]:
|
|
373
|
-
"""Get recent alerts (from log - placeholder for full implementation)."""
|
|
374
|
-
return []
|
|
375
|
-
|
|
376
|
-
def add_alert_handler(self, handler: Callable[[Alert], None]) -> None:
|
|
377
|
-
"""Add an alert handler callback."""
|
|
378
|
-
self._config.alert_handlers.append(handler)
|
|
379
|
-
|
|
380
|
-
def _check_loop(self) -> None:
|
|
381
|
-
"""Background loop to check alert rules."""
|
|
382
|
-
while self._running:
|
|
383
|
-
try:
|
|
384
|
-
self._check_all_rules()
|
|
385
|
-
self._metrics.cleanup_old_metrics()
|
|
386
|
-
except Exception as e:
|
|
387
|
-
logger.error("monitoring.check_error error=%s", str(e))
|
|
388
|
-
|
|
389
|
-
time.sleep(self._config.check_interval_seconds)
|
|
390
|
-
|
|
391
|
-
def _check_all_rules(self) -> None:
|
|
392
|
-
"""Check all configured alert rules."""
|
|
393
|
-
for rule in self._config.rules:
|
|
394
|
-
if not rule.enabled:
|
|
395
|
-
continue
|
|
396
|
-
self._check_rule(rule)
|
|
397
|
-
|
|
398
|
-
def _check_rule(self, rule: AlertRule) -> None:
|
|
399
|
-
"""Check a single alert rule."""
|
|
400
|
-
metric_name = self._alert_type_to_metric(rule.alert_type)
|
|
401
|
-
if not metric_name:
|
|
402
|
-
return
|
|
403
|
-
|
|
404
|
-
count = self._metrics.get_count(metric_name, rule.window_seconds)
|
|
405
|
-
if count >= rule.threshold:
|
|
406
|
-
self._trigger_alert(
|
|
407
|
-
rule, f"{rule.description}: {count} events in {rule.window_seconds}s"
|
|
408
|
-
)
|
|
409
|
-
|
|
410
|
-
def _check_rule_immediate(
|
|
411
|
-
self, alert_type: AlertType, context: dict[str, Any] | None = None
|
|
412
|
-
) -> None:
|
|
413
|
-
"""Check rules immediately for a specific alert type."""
|
|
414
|
-
for rule in self._config.rules:
|
|
415
|
-
if rule.alert_type == alert_type and rule.enabled:
|
|
416
|
-
self._check_rule(rule)
|
|
417
|
-
|
|
418
|
-
def _check_brute_force(self, client_ip: str) -> None:
|
|
419
|
-
"""Check for brute force attack from specific IP."""
|
|
420
|
-
count = self._metrics.get_count("auth_failures", 60, labels={"ip": client_ip})
|
|
421
|
-
if count >= 5:
|
|
422
|
-
rule = AlertRule(
|
|
423
|
-
name="brute_force_detection",
|
|
424
|
-
alert_type=AlertType.AUTH_BRUTE_FORCE,
|
|
425
|
-
severity=AlertSeverity.CRITICAL,
|
|
426
|
-
threshold=5,
|
|
427
|
-
window_seconds=60,
|
|
428
|
-
)
|
|
429
|
-
self._trigger_alert(
|
|
430
|
-
rule,
|
|
431
|
-
f"Potential brute force attack from {client_ip}: {count} failures in 60s",
|
|
432
|
-
{"client_ip": client_ip, "failure_count": count},
|
|
433
|
-
)
|
|
434
|
-
|
|
435
|
-
def _trigger_alert(
|
|
436
|
-
self, rule: AlertRule, message: str, details: dict[str, Any] | None = None
|
|
437
|
-
) -> None:
|
|
438
|
-
"""Trigger an alert if not in cooldown."""
|
|
439
|
-
with self._lock:
|
|
440
|
-
last_time = self._last_alert_time.get(rule.name)
|
|
441
|
-
now = datetime.now(UTC)
|
|
442
|
-
|
|
443
|
-
if last_time and (now - last_time).total_seconds() < rule.cooldown_seconds:
|
|
444
|
-
return
|
|
445
|
-
|
|
446
|
-
self._last_alert_time[rule.name] = now
|
|
447
|
-
|
|
448
|
-
alert = Alert(
|
|
449
|
-
rule_name=rule.name,
|
|
450
|
-
alert_type=rule.alert_type,
|
|
451
|
-
severity=rule.severity,
|
|
452
|
-
timestamp=now,
|
|
453
|
-
message=message,
|
|
454
|
-
details=details or {},
|
|
455
|
-
)
|
|
456
|
-
|
|
457
|
-
logger.warning(
|
|
458
|
-
"alert.triggered rule=%s severity=%s message=%s",
|
|
459
|
-
rule.name,
|
|
460
|
-
rule.severity.value,
|
|
461
|
-
message,
|
|
462
|
-
)
|
|
463
|
-
|
|
464
|
-
for handler in self._config.alert_handlers:
|
|
465
|
-
try:
|
|
466
|
-
handler(alert)
|
|
467
|
-
except Exception as e:
|
|
468
|
-
logger.error("alert.handler.error handler=%s error=%s", handler.__name__, str(e))
|
|
469
|
-
|
|
470
|
-
def _alert_type_to_metric(self, alert_type: AlertType) -> str | None:
|
|
471
|
-
"""Map alert type to metric name."""
|
|
472
|
-
mapping = {
|
|
473
|
-
AlertType.AUTH_FAILURE_SPIKE: "auth_failures_total",
|
|
474
|
-
AlertType.AUTHZ_DENIAL: "authz_denials_total",
|
|
475
|
-
AlertType.IMPORT_FAILURE: "import_failures_total",
|
|
476
|
-
AlertType.BACKUP_FAILURE: "backup_failures",
|
|
477
|
-
}
|
|
478
|
-
return mapping.get(alert_type)
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
def create_monitoring_service(
|
|
482
|
-
enabled: bool = True,
|
|
483
|
-
rules: list[AlertRule] | None = None,
|
|
484
|
-
) -> MonitoringService:
|
|
485
|
-
"""Create a configured MonitoringService instance."""
|
|
486
|
-
config = MonitoringConfig(enabled=enabled)
|
|
487
|
-
if rules:
|
|
488
|
-
config.rules = rules
|
|
489
|
-
return MonitoringService(config)
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
def log_alert_handler(alert: Alert) -> None:
|
|
493
|
-
"""Default alert handler that logs to file."""
|
|
494
|
-
logger.warning("ALERT: %s", json.dumps(alert.to_dict()))
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
def webhook_alert_handler_factory(webhook_url: str) -> Callable[[Alert], None]:
|
|
498
|
-
"""Create a webhook alert handler."""
|
|
499
|
-
import urllib.error
|
|
500
|
-
import urllib.request
|
|
501
|
-
|
|
502
|
-
def handler(alert: Alert) -> None:
|
|
503
|
-
try:
|
|
504
|
-
data = json.dumps(alert.to_dict()).encode("utf-8")
|
|
505
|
-
req = urllib.request.Request( # noqa: S310
|
|
506
|
-
webhook_url,
|
|
507
|
-
data=data,
|
|
508
|
-
headers={"Content-Type": "application/json"},
|
|
509
|
-
method="POST",
|
|
510
|
-
)
|
|
511
|
-
with urllib.request.urlopen(req, timeout=10) as resp: # noqa: S310
|
|
512
|
-
if resp.status >= 400:
|
|
513
|
-
logger.error("webhook.failed status=%d", resp.status)
|
|
514
|
-
except urllib.error.URLError as e:
|
|
515
|
-
logger.error("webhook.error url=%s error=%s", webhook_url, str(e))
|
|
516
|
-
|
|
517
|
-
return handler
|