proxilion 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. proxilion/__init__.py +136 -0
  2. proxilion/audit/__init__.py +133 -0
  3. proxilion/audit/base_exporters.py +527 -0
  4. proxilion/audit/compliance/__init__.py +130 -0
  5. proxilion/audit/compliance/base.py +457 -0
  6. proxilion/audit/compliance/eu_ai_act.py +603 -0
  7. proxilion/audit/compliance/iso27001.py +544 -0
  8. proxilion/audit/compliance/soc2.py +491 -0
  9. proxilion/audit/events.py +493 -0
  10. proxilion/audit/explainability.py +1173 -0
  11. proxilion/audit/exporters/__init__.py +58 -0
  12. proxilion/audit/exporters/aws_s3.py +636 -0
  13. proxilion/audit/exporters/azure_storage.py +608 -0
  14. proxilion/audit/exporters/cloud_base.py +468 -0
  15. proxilion/audit/exporters/gcp_storage.py +570 -0
  16. proxilion/audit/exporters/multi_exporter.py +498 -0
  17. proxilion/audit/hash_chain.py +652 -0
  18. proxilion/audit/logger.py +543 -0
  19. proxilion/caching/__init__.py +49 -0
  20. proxilion/caching/tool_cache.py +633 -0
  21. proxilion/context/__init__.py +73 -0
  22. proxilion/context/context_window.py +556 -0
  23. proxilion/context/message_history.py +505 -0
  24. proxilion/context/session.py +735 -0
  25. proxilion/contrib/__init__.py +51 -0
  26. proxilion/contrib/anthropic.py +609 -0
  27. proxilion/contrib/google.py +1012 -0
  28. proxilion/contrib/langchain.py +641 -0
  29. proxilion/contrib/mcp.py +893 -0
  30. proxilion/contrib/openai.py +646 -0
  31. proxilion/core.py +3058 -0
  32. proxilion/decorators.py +966 -0
  33. proxilion/engines/__init__.py +287 -0
  34. proxilion/engines/base.py +266 -0
  35. proxilion/engines/casbin_engine.py +412 -0
  36. proxilion/engines/opa_engine.py +493 -0
  37. proxilion/engines/simple.py +437 -0
  38. proxilion/exceptions.py +887 -0
  39. proxilion/guards/__init__.py +54 -0
  40. proxilion/guards/input_guard.py +522 -0
  41. proxilion/guards/output_guard.py +634 -0
  42. proxilion/observability/__init__.py +198 -0
  43. proxilion/observability/cost_tracker.py +866 -0
  44. proxilion/observability/hooks.py +683 -0
  45. proxilion/observability/metrics.py +798 -0
  46. proxilion/observability/session_cost_tracker.py +1063 -0
  47. proxilion/policies/__init__.py +67 -0
  48. proxilion/policies/base.py +304 -0
  49. proxilion/policies/builtin.py +486 -0
  50. proxilion/policies/registry.py +376 -0
  51. proxilion/providers/__init__.py +201 -0
  52. proxilion/providers/adapter.py +468 -0
  53. proxilion/providers/anthropic_adapter.py +330 -0
  54. proxilion/providers/gemini_adapter.py +391 -0
  55. proxilion/providers/openai_adapter.py +294 -0
  56. proxilion/py.typed +0 -0
  57. proxilion/resilience/__init__.py +81 -0
  58. proxilion/resilience/degradation.py +615 -0
  59. proxilion/resilience/fallback.py +555 -0
  60. proxilion/resilience/retry.py +554 -0
  61. proxilion/scheduling/__init__.py +57 -0
  62. proxilion/scheduling/priority_queue.py +419 -0
  63. proxilion/scheduling/scheduler.py +459 -0
  64. proxilion/security/__init__.py +244 -0
  65. proxilion/security/agent_trust.py +968 -0
  66. proxilion/security/behavioral_drift.py +794 -0
  67. proxilion/security/cascade_protection.py +869 -0
  68. proxilion/security/circuit_breaker.py +428 -0
  69. proxilion/security/cost_limiter.py +690 -0
  70. proxilion/security/idor_protection.py +460 -0
  71. proxilion/security/intent_capsule.py +849 -0
  72. proxilion/security/intent_validator.py +495 -0
  73. proxilion/security/memory_integrity.py +767 -0
  74. proxilion/security/rate_limiter.py +509 -0
  75. proxilion/security/scope_enforcer.py +680 -0
  76. proxilion/security/sequence_validator.py +636 -0
  77. proxilion/security/trust_boundaries.py +784 -0
  78. proxilion/streaming/__init__.py +70 -0
  79. proxilion/streaming/detector.py +761 -0
  80. proxilion/streaming/transformer.py +674 -0
  81. proxilion/timeouts/__init__.py +55 -0
  82. proxilion/timeouts/decorators.py +477 -0
  83. proxilion/timeouts/manager.py +545 -0
  84. proxilion/tools/__init__.py +69 -0
  85. proxilion/tools/decorators.py +493 -0
  86. proxilion/tools/registry.py +732 -0
  87. proxilion/types.py +339 -0
  88. proxilion/validation/__init__.py +93 -0
  89. proxilion/validation/pydantic_schema.py +351 -0
  90. proxilion/validation/schema.py +651 -0
  91. proxilion-0.0.1.dist-info/METADATA +872 -0
  92. proxilion-0.0.1.dist-info/RECORD +94 -0
  93. proxilion-0.0.1.dist-info/WHEEL +4 -0
  94. proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,798 @@
1
+ """
2
+ Real-Time Metrics and Alerts for Proxilion.
3
+
4
+ Provides observability into Proxilion's security operations with:
5
+ - Prometheus-compatible metrics export
6
+ - Real-time alerting via webhooks
7
+ - Security event aggregation
8
+ - Dashboard-ready data
9
+
10
+ Example:
11
+ >>> from proxilion.observability.metrics import (
12
+ ... MetricsCollector,
13
+ ... AlertManager,
14
+ ... PrometheusExporter,
15
+ ... )
16
+ >>>
17
+ >>> # Create collector
18
+ >>> collector = MetricsCollector()
19
+ >>>
20
+ >>> # Record security events
21
+ >>> collector.record_authorization(allowed=True, user="alice", resource="db")
22
+ >>> collector.record_guard_block(guard_type="input", pattern="injection")
23
+ >>> collector.record_rate_limit_hit(user="bob")
24
+ >>>
25
+ >>> # Get Prometheus metrics
26
+ >>> exporter = PrometheusExporter(collector)
27
+ >>> print(exporter.export())
28
+ >>>
29
+ >>> # Configure alerts
30
+ >>> alerts = AlertManager(webhook_url="https://hooks.slack.com/...")
31
+ >>> alerts.add_rule("high_block_rate", threshold=10, window_seconds=60)
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import json
37
+ import logging
38
+ import threading
39
+ import time
40
+ from collections import defaultdict, deque
41
+ from dataclasses import dataclass, field
42
+ from datetime import datetime, timezone
43
+ from enum import Enum
44
+ from typing import Any, Callable
45
+ from urllib.request import Request, urlopen
46
+ from urllib.error import URLError
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ class MetricType(Enum):
52
+ """Types of metrics."""
53
+
54
+ COUNTER = "counter"
55
+ """Monotonically increasing value."""
56
+
57
+ GAUGE = "gauge"
58
+ """Value that can go up or down."""
59
+
60
+ HISTOGRAM = "histogram"
61
+ """Distribution of values."""
62
+
63
+ SUMMARY = "summary"
64
+ """Summary statistics."""
65
+
66
+
67
+ class EventType(Enum):
68
+ """Types of security events."""
69
+
70
+ AUTHORIZATION_ALLOWED = "authorization_allowed"
71
+ AUTHORIZATION_DENIED = "authorization_denied"
72
+ INPUT_GUARD_BLOCK = "input_guard_block"
73
+ OUTPUT_GUARD_BLOCK = "output_guard_block"
74
+ RATE_LIMIT_HIT = "rate_limit_hit"
75
+ CIRCUIT_OPEN = "circuit_open"
76
+ IDOR_VIOLATION = "idor_violation"
77
+ SEQUENCE_VIOLATION = "sequence_violation"
78
+ INTENT_HIJACK = "intent_hijack"
79
+ BEHAVIORAL_DRIFT = "behavioral_drift"
80
+ KILL_SWITCH_ACTIVATED = "kill_switch_activated"
81
+ CONTEXT_TAMPERING = "context_tampering"
82
+ AGENT_TRUST_VIOLATION = "agent_trust_violation"
83
+
84
+
85
+ @dataclass
86
+ class SecurityEvent:
87
+ """A security-related event."""
88
+
89
+ event_type: EventType
90
+ timestamp: float
91
+ user_id: str | None = None
92
+ agent_id: str | None = None
93
+ resource: str | None = None
94
+ action: str | None = None
95
+ details: dict[str, Any] = field(default_factory=dict)
96
+ severity: float = 0.5 # 0.0 to 1.0
97
+
98
+ def to_dict(self) -> dict[str, Any]:
99
+ """Convert to dictionary."""
100
+ return {
101
+ "event_type": self.event_type.value,
102
+ "timestamp": self.timestamp,
103
+ "datetime": datetime.fromtimestamp(self.timestamp, tz=timezone.utc).isoformat(),
104
+ "user_id": self.user_id,
105
+ "agent_id": self.agent_id,
106
+ "resource": self.resource,
107
+ "action": self.action,
108
+ "details": self.details,
109
+ "severity": self.severity,
110
+ }
111
+
112
+
113
+ @dataclass
114
+ class MetricSample:
115
+ """A single metric sample."""
116
+
117
+ name: str
118
+ value: float
119
+ timestamp: float
120
+ labels: dict[str, str] = field(default_factory=dict)
121
+
122
+
123
+ class MetricsCollector:
124
+ """
125
+ Collects security metrics from Proxilion operations.
126
+
127
+ Provides both real-time and aggregated metrics for monitoring
128
+ and alerting.
129
+
130
+ Example:
131
+ >>> collector = MetricsCollector()
132
+ >>>
133
+ >>> # Record events
134
+ >>> collector.record_authorization(True, "alice", "database")
135
+ >>> collector.record_guard_block("input", "prompt_injection")
136
+ >>>
137
+ >>> # Get stats
138
+ >>> stats = collector.get_summary()
139
+ >>> print(f"Total authorizations: {stats['total_authorizations']}")
140
+ """
141
+
142
+ def __init__(
143
+ self,
144
+ event_window_size: int = 10000,
145
+ aggregation_window_seconds: float = 60.0,
146
+ ) -> None:
147
+ """
148
+ Initialize the collector.
149
+
150
+ Args:
151
+ event_window_size: Maximum events to keep in memory.
152
+ aggregation_window_seconds: Window for rate calculations.
153
+ """
154
+ self._event_window_size = event_window_size
155
+ self._aggregation_window = aggregation_window_seconds
156
+
157
+ # Event storage
158
+ self._events: deque[SecurityEvent] = deque(maxlen=event_window_size)
159
+
160
+ # Counters
161
+ self._counters: dict[str, int] = defaultdict(int)
162
+ self._counter_labels: dict[str, dict[str, dict[str, int]]] = defaultdict(
163
+ lambda: defaultdict(lambda: defaultdict(int))
164
+ )
165
+
166
+ # Gauges
167
+ self._gauges: dict[str, float] = {}
168
+
169
+ # Histograms (bucket counts)
170
+ self._histograms: dict[str, list[tuple[float, int]]] = {}
171
+ self._histogram_sums: dict[str, float] = defaultdict(float)
172
+ self._histogram_counts: dict[str, int] = defaultdict(int)
173
+
174
+ # Event callbacks
175
+ self._event_callbacks: list[Callable[[SecurityEvent], None]] = []
176
+
177
+ self._lock = threading.RLock()
178
+ self._start_time = time.time()
179
+
180
+ logger.debug("MetricsCollector initialized")
181
+
182
+ def record_event(self, event: SecurityEvent) -> None:
183
+ """Record a security event."""
184
+ with self._lock:
185
+ self._events.append(event)
186
+
187
+ # Update counters
188
+ self._counters[event.event_type.value] += 1
189
+
190
+ # Labeled counters
191
+ if event.user_id:
192
+ self._counter_labels["by_user"][event.event_type.value][event.user_id] += 1
193
+ if event.resource:
194
+ self._counter_labels["by_resource"][event.event_type.value][event.resource] += 1
195
+
196
+ # Notify callbacks
197
+ for callback in self._event_callbacks:
198
+ try:
199
+ callback(event)
200
+ except Exception as e:
201
+ logger.error(f"Event callback error: {e}")
202
+
203
+ def record_authorization(
204
+ self,
205
+ allowed: bool,
206
+ user: str | None = None,
207
+ resource: str | None = None,
208
+ action: str | None = None,
209
+ latency_ms: float | None = None,
210
+ ) -> None:
211
+ """Record an authorization decision."""
212
+ event_type = EventType.AUTHORIZATION_ALLOWED if allowed else EventType.AUTHORIZATION_DENIED
213
+
214
+ self.record_event(SecurityEvent(
215
+ event_type=event_type,
216
+ timestamp=time.time(),
217
+ user_id=user,
218
+ resource=resource,
219
+ action=action,
220
+ details={"latency_ms": latency_ms} if latency_ms else {},
221
+ severity=0.0 if allowed else 0.5,
222
+ ))
223
+
224
+ if latency_ms:
225
+ self.record_histogram("authorization_latency_ms", latency_ms)
226
+
227
+ def record_guard_block(
228
+ self,
229
+ guard_type: str,
230
+ pattern: str,
231
+ risk_score: float = 0.0,
232
+ user: str | None = None,
233
+ ) -> None:
234
+ """Record a guard block."""
235
+ event_type = EventType.INPUT_GUARD_BLOCK if guard_type == "input" else EventType.OUTPUT_GUARD_BLOCK
236
+
237
+ self.record_event(SecurityEvent(
238
+ event_type=event_type,
239
+ timestamp=time.time(),
240
+ user_id=user,
241
+ details={"pattern": pattern, "risk_score": risk_score},
242
+ severity=risk_score,
243
+ ))
244
+
245
+ def record_rate_limit_hit(
246
+ self,
247
+ user: str | None = None,
248
+ limit_type: str = "requests",
249
+ ) -> None:
250
+ """Record a rate limit hit."""
251
+ self.record_event(SecurityEvent(
252
+ event_type=EventType.RATE_LIMIT_HIT,
253
+ timestamp=time.time(),
254
+ user_id=user,
255
+ details={"limit_type": limit_type},
256
+ severity=0.4,
257
+ ))
258
+
259
+ def record_circuit_open(
260
+ self,
261
+ circuit_name: str,
262
+ failure_count: int = 0,
263
+ ) -> None:
264
+ """Record a circuit breaker opening."""
265
+ self.record_event(SecurityEvent(
266
+ event_type=EventType.CIRCUIT_OPEN,
267
+ timestamp=time.time(),
268
+ details={"circuit_name": circuit_name, "failure_count": failure_count},
269
+ severity=0.6,
270
+ ))
271
+
272
+ def record_idor_violation(
273
+ self,
274
+ user: str,
275
+ resource_type: str,
276
+ object_id: str,
277
+ ) -> None:
278
+ """Record an IDOR violation."""
279
+ self.record_event(SecurityEvent(
280
+ event_type=EventType.IDOR_VIOLATION,
281
+ timestamp=time.time(),
282
+ user_id=user,
283
+ resource=resource_type,
284
+ details={"object_id": object_id},
285
+ severity=0.8,
286
+ ))
287
+
288
+ def record_sequence_violation(
289
+ self,
290
+ user: str,
291
+ rule_name: str,
292
+ tool_name: str,
293
+ ) -> None:
294
+ """Record a sequence violation."""
295
+ self.record_event(SecurityEvent(
296
+ event_type=EventType.SEQUENCE_VIOLATION,
297
+ timestamp=time.time(),
298
+ user_id=user,
299
+ details={"rule_name": rule_name, "tool_name": tool_name},
300
+ severity=0.7,
301
+ ))
302
+
303
+ def record_intent_hijack(
304
+ self,
305
+ user: str | None,
306
+ agent: str | None,
307
+ original_intent: str,
308
+ detected_intent: str,
309
+ confidence: float,
310
+ ) -> None:
311
+ """Record an intent hijack detection."""
312
+ self.record_event(SecurityEvent(
313
+ event_type=EventType.INTENT_HIJACK,
314
+ timestamp=time.time(),
315
+ user_id=user,
316
+ agent_id=agent,
317
+ details={
318
+ "original_intent": original_intent,
319
+ "detected_intent": detected_intent,
320
+ "confidence": confidence,
321
+ },
322
+ severity=confidence,
323
+ ))
324
+
325
+ def record_behavioral_drift(
326
+ self,
327
+ agent: str,
328
+ severity: float,
329
+ drifting_metrics: list[str],
330
+ ) -> None:
331
+ """Record behavioral drift detection."""
332
+ self.record_event(SecurityEvent(
333
+ event_type=EventType.BEHAVIORAL_DRIFT,
334
+ timestamp=time.time(),
335
+ agent_id=agent,
336
+ details={"drifting_metrics": drifting_metrics},
337
+ severity=severity,
338
+ ))
339
+
340
+ def record_kill_switch(
341
+ self,
342
+ reason: str,
343
+ triggered_by: str,
344
+ ) -> None:
345
+ """Record kill switch activation."""
346
+ self.record_event(SecurityEvent(
347
+ event_type=EventType.KILL_SWITCH_ACTIVATED,
348
+ timestamp=time.time(),
349
+ details={"reason": reason, "triggered_by": triggered_by},
350
+ severity=1.0,
351
+ ))
352
+
353
+ def record_histogram(
354
+ self,
355
+ name: str,
356
+ value: float,
357
+ buckets: list[float] | None = None,
358
+ ) -> None:
359
+ """Record a histogram value."""
360
+ if buckets is None:
361
+ buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
362
+
363
+ with self._lock:
364
+ if name not in self._histograms:
365
+ self._histograms[name] = [(b, 0) for b in buckets]
366
+
367
+ # Increment bucket counts
368
+ new_buckets = []
369
+ for bucket_le, count in self._histograms[name]:
370
+ if value <= bucket_le:
371
+ new_buckets.append((bucket_le, count + 1))
372
+ else:
373
+ new_buckets.append((bucket_le, count))
374
+ self._histograms[name] = new_buckets
375
+
376
+ self._histogram_sums[name] += value
377
+ self._histogram_counts[name] += 1
378
+
379
+ def set_gauge(self, name: str, value: float) -> None:
380
+ """Set a gauge value."""
381
+ with self._lock:
382
+ self._gauges[name] = value
383
+
384
+ def increment_counter(self, name: str, value: int = 1) -> None:
385
+ """Increment a counter."""
386
+ with self._lock:
387
+ self._counters[name] += value
388
+
389
+ def on_event(self, callback: Callable[[SecurityEvent], None]) -> None:
390
+ """Register a callback for events."""
391
+ self._event_callbacks.append(callback)
392
+
393
+ def get_counter(self, name: str) -> int:
394
+ """Get a counter value."""
395
+ with self._lock:
396
+ return self._counters.get(name, 0)
397
+
398
+ def get_gauge(self, name: str) -> float | None:
399
+ """Get a gauge value."""
400
+ with self._lock:
401
+ return self._gauges.get(name)
402
+
403
+ def get_rate(self, event_type: EventType, window_seconds: float | None = None) -> float:
404
+ """Get event rate (events per second)."""
405
+ window = window_seconds or self._aggregation_window
406
+ now = time.time()
407
+ cutoff = now - window
408
+
409
+ with self._lock:
410
+ count = sum(
411
+ 1 for e in self._events
412
+ if e.event_type == event_type and e.timestamp > cutoff
413
+ )
414
+
415
+ return count / window
416
+
417
+ def get_recent_events(
418
+ self,
419
+ event_type: EventType | None = None,
420
+ limit: int = 100,
421
+ ) -> list[SecurityEvent]:
422
+ """Get recent events, optionally filtered by type."""
423
+ with self._lock:
424
+ if event_type:
425
+ events = [e for e in self._events if e.event_type == event_type]
426
+ else:
427
+ events = list(self._events)
428
+
429
+ return events[-limit:]
430
+
431
+ def get_summary(self) -> dict[str, Any]:
432
+ """Get a summary of all metrics."""
433
+ now = time.time()
434
+ uptime = now - self._start_time
435
+
436
+ with self._lock:
437
+ # Calculate rates
438
+ window = self._aggregation_window
439
+ cutoff = now - window
440
+
441
+ recent_events = [e for e in self._events if e.timestamp > cutoff]
442
+ event_counts = defaultdict(int)
443
+ for e in recent_events:
444
+ event_counts[e.event_type.value] += 1
445
+
446
+ # Total counts
447
+ total_auth_allowed = self._counters.get(EventType.AUTHORIZATION_ALLOWED.value, 0)
448
+ total_auth_denied = self._counters.get(EventType.AUTHORIZATION_DENIED.value, 0)
449
+ total_authorizations = total_auth_allowed + total_auth_denied
450
+
451
+ return {
452
+ "uptime_seconds": uptime,
453
+ "total_events": len(self._events),
454
+ "total_authorizations": total_authorizations,
455
+ "total_allowed": total_auth_allowed,
456
+ "total_denied": total_auth_denied,
457
+ "denial_rate": total_auth_denied / max(1, total_authorizations),
458
+ "recent_events_per_minute": {
459
+ k: v * 60 / window for k, v in event_counts.items()
460
+ },
461
+ "gauges": dict(self._gauges),
462
+ "counters": dict(self._counters),
463
+ }
464
+
465
+
466
+ class AlertRule:
467
+ """A rule for triggering alerts."""
468
+
469
+ def __init__(
470
+ self,
471
+ name: str,
472
+ event_type: EventType | None = None,
473
+ threshold: float = 1.0,
474
+ window_seconds: float = 60.0,
475
+ severity: str = "warning",
476
+ cooldown_seconds: float = 300.0,
477
+ ) -> None:
478
+ """
479
+ Initialize the rule.
480
+
481
+ Args:
482
+ name: Rule name.
483
+ event_type: Event type to monitor (None for custom metric).
484
+ threshold: Threshold for triggering.
485
+ window_seconds: Window for rate calculation.
486
+ severity: Alert severity (info, warning, critical).
487
+ cooldown_seconds: Minimum time between alerts.
488
+ """
489
+ self.name = name
490
+ self.event_type = event_type
491
+ self.threshold = threshold
492
+ self.window_seconds = window_seconds
493
+ self.severity = severity
494
+ self.cooldown_seconds = cooldown_seconds
495
+
496
+ self._last_triggered: float = 0
497
+
498
+ def can_trigger(self) -> bool:
499
+ """Check if rule can trigger (respects cooldown)."""
500
+ return time.time() - self._last_triggered > self.cooldown_seconds
501
+
502
+ def mark_triggered(self) -> None:
503
+ """Mark rule as triggered."""
504
+ self._last_triggered = time.time()
505
+
506
+
507
+ @dataclass
508
+ class Alert:
509
+ """An alert notification."""
510
+
511
+ rule_name: str
512
+ severity: str
513
+ message: str
514
+ value: float
515
+ threshold: float
516
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
517
+ details: dict[str, Any] = field(default_factory=dict)
518
+
519
+ def to_dict(self) -> dict[str, Any]:
520
+ """Convert to dictionary."""
521
+ return {
522
+ "rule_name": self.rule_name,
523
+ "severity": self.severity,
524
+ "message": self.message,
525
+ "value": self.value,
526
+ "threshold": self.threshold,
527
+ "timestamp": self.timestamp.isoformat(),
528
+ "details": self.details,
529
+ }
530
+
531
+
532
+ class AlertManager:
533
+ """
534
+ Manages alerting based on security metrics.
535
+
536
+ Example:
537
+ >>> alerts = AlertManager(webhook_url="https://hooks.slack.com/...")
538
+ >>>
539
+ >>> # Add rules
540
+ >>> alerts.add_rule(
541
+ ... name="high_denial_rate",
542
+ ... event_type=EventType.AUTHORIZATION_DENIED,
543
+ ... threshold=10,
544
+ ... window_seconds=60,
545
+ ... severity="warning",
546
+ ... )
547
+ >>>
548
+ >>> # Process events
549
+ >>> alerts.check(collector)
550
+ """
551
+
552
+ def __init__(
553
+ self,
554
+ webhook_url: str | None = None,
555
+ webhook_headers: dict[str, str] | None = None,
556
+ ) -> None:
557
+ """
558
+ Initialize the alert manager.
559
+
560
+ Args:
561
+ webhook_url: URL to send alerts to.
562
+ webhook_headers: HTTP headers for webhook requests.
563
+ """
564
+ self._webhook_url = webhook_url
565
+ self._webhook_headers = webhook_headers or {"Content-Type": "application/json"}
566
+
567
+ self._rules: dict[str, AlertRule] = {}
568
+ self._alert_history: deque[Alert] = deque(maxlen=1000)
569
+ self._alert_callbacks: list[Callable[[Alert], None]] = []
570
+
571
+ self._lock = threading.RLock()
572
+
573
+ def add_rule(
574
+ self,
575
+ name: str,
576
+ event_type: EventType | None = None,
577
+ threshold: float = 1.0,
578
+ window_seconds: float = 60.0,
579
+ severity: str = "warning",
580
+ cooldown_seconds: float = 300.0,
581
+ ) -> AlertRule:
582
+ """Add an alert rule."""
583
+ rule = AlertRule(
584
+ name=name,
585
+ event_type=event_type,
586
+ threshold=threshold,
587
+ window_seconds=window_seconds,
588
+ severity=severity,
589
+ cooldown_seconds=cooldown_seconds,
590
+ )
591
+
592
+ with self._lock:
593
+ self._rules[name] = rule
594
+
595
+ return rule
596
+
597
+ def remove_rule(self, name: str) -> bool:
598
+ """Remove an alert rule."""
599
+ with self._lock:
600
+ if name in self._rules:
601
+ del self._rules[name]
602
+ return True
603
+ return False
604
+
605
+ def check(self, collector: MetricsCollector) -> list[Alert]:
606
+ """
607
+ Check all rules against current metrics.
608
+
609
+ Args:
610
+ collector: MetricsCollector to check.
611
+
612
+ Returns:
613
+ List of triggered alerts.
614
+ """
615
+ triggered: list[Alert] = []
616
+
617
+ with self._lock:
618
+ for rule in self._rules.values():
619
+ if not rule.can_trigger():
620
+ continue
621
+
622
+ if rule.event_type:
623
+ # Rate-based rule
624
+ rate = collector.get_rate(rule.event_type, rule.window_seconds)
625
+ rate_per_minute = rate * 60
626
+
627
+ if rate_per_minute >= rule.threshold:
628
+ alert = Alert(
629
+ rule_name=rule.name,
630
+ severity=rule.severity,
631
+ message=f"{rule.event_type.value} rate ({rate_per_minute:.1f}/min) exceeds threshold ({rule.threshold}/min)",
632
+ value=rate_per_minute,
633
+ threshold=rule.threshold,
634
+ details={
635
+ "event_type": rule.event_type.value,
636
+ "window_seconds": rule.window_seconds,
637
+ },
638
+ )
639
+ triggered.append(alert)
640
+ rule.mark_triggered()
641
+
642
+ # Process triggered alerts
643
+ for alert in triggered:
644
+ self._process_alert(alert)
645
+
646
+ return triggered
647
+
648
+ def _process_alert(self, alert: Alert) -> None:
649
+ """Process a triggered alert."""
650
+ with self._lock:
651
+ self._alert_history.append(alert)
652
+
653
+ logger.warning(f"ALERT [{alert.severity.upper()}] {alert.rule_name}: {alert.message}")
654
+
655
+ # Send webhook
656
+ if self._webhook_url:
657
+ self._send_webhook(alert)
658
+
659
+ # Notify callbacks
660
+ for callback in self._alert_callbacks:
661
+ try:
662
+ callback(alert)
663
+ except Exception as e:
664
+ logger.error(f"Alert callback error: {e}")
665
+
666
+ def _send_webhook(self, alert: Alert) -> bool:
667
+ """Send alert to webhook."""
668
+ try:
669
+ payload = json.dumps(alert.to_dict()).encode()
670
+ request = Request(
671
+ self._webhook_url,
672
+ data=payload,
673
+ headers=self._webhook_headers,
674
+ method="POST",
675
+ )
676
+
677
+ with urlopen(request, timeout=10) as response:
678
+ return response.status == 200
679
+
680
+ except URLError as e:
681
+ logger.error(f"Webhook error: {e}")
682
+ return False
683
+ except Exception as e:
684
+ logger.error(f"Webhook error: {e}")
685
+ return False
686
+
687
+ def on_alert(self, callback: Callable[[Alert], None]) -> None:
688
+ """Register a callback for alerts."""
689
+ self._alert_callbacks.append(callback)
690
+
691
+ def get_recent_alerts(self, limit: int = 50) -> list[Alert]:
692
+ """Get recent alerts."""
693
+ with self._lock:
694
+ return list(self._alert_history)[-limit:]
695
+
696
+
697
+ class PrometheusExporter:
698
+ """
699
+ Exports metrics in Prometheus format.
700
+
701
+ Example:
702
+ >>> exporter = PrometheusExporter(collector)
703
+ >>> metrics_text = exporter.export()
704
+ >>>
705
+ >>> # Serve via HTTP (e.g., with Flask)
706
+ >>> @app.route('/metrics')
707
+ >>> def metrics():
708
+ ... return exporter.export(), 200, {'Content-Type': 'text/plain'}
709
+ """
710
+
711
+ def __init__(
712
+ self,
713
+ collector: MetricsCollector,
714
+ namespace: str = "proxilion",
715
+ ) -> None:
716
+ """
717
+ Initialize the exporter.
718
+
719
+ Args:
720
+ collector: MetricsCollector to export.
721
+ namespace: Metric namespace prefix.
722
+ """
723
+ self._collector = collector
724
+ self._namespace = namespace
725
+
726
+ def export(self) -> str:
727
+ """Export all metrics in Prometheus format."""
728
+ lines: list[str] = []
729
+
730
+ # Add header
731
+ lines.append(f"# Proxilion Security Metrics")
732
+ lines.append(f"# Generated at {datetime.now(timezone.utc).isoformat()}")
733
+ lines.append("")
734
+
735
+ # Export counters
736
+ for event_type in EventType:
737
+ name = f"{self._namespace}_events_total"
738
+ count = self._collector.get_counter(event_type.value)
739
+ labels = f'{{event_type="{event_type.value}"}}'
740
+
741
+ lines.append(f"# HELP {name} Total security events by type")
742
+ lines.append(f"# TYPE {name} counter")
743
+ lines.append(f"{name}{labels} {count}")
744
+ lines.append("")
745
+
746
+ # Export gauges
747
+ summary = self._collector.get_summary()
748
+ gauges = summary.get("gauges", {})
749
+ for gauge_name, value in gauges.items():
750
+ name = f"{self._namespace}_{gauge_name}"
751
+ lines.append(f"# HELP {name} {gauge_name}")
752
+ lines.append(f"# TYPE {name} gauge")
753
+ lines.append(f"{name} {value}")
754
+ lines.append("")
755
+
756
+ # Export summary stats
757
+ lines.append(f"# HELP {self._namespace}_uptime_seconds Uptime in seconds")
758
+ lines.append(f"# TYPE {self._namespace}_uptime_seconds gauge")
759
+ lines.append(f"{self._namespace}_uptime_seconds {summary['uptime_seconds']:.2f}")
760
+ lines.append("")
761
+
762
+ lines.append(f"# HELP {self._namespace}_denial_rate Authorization denial rate")
763
+ lines.append(f"# TYPE {self._namespace}_denial_rate gauge")
764
+ lines.append(f"{self._namespace}_denial_rate {summary['denial_rate']:.4f}")
765
+ lines.append("")
766
+
767
+ # Export histograms
768
+ for hist_name, buckets in self._collector._histograms.items():
769
+ name = f"{self._namespace}_{hist_name}"
770
+ lines.append(f"# HELP {name} {hist_name}")
771
+ lines.append(f"# TYPE {name} histogram")
772
+
773
+ for bucket_le, count in buckets:
774
+ lines.append(f'{name}_bucket{{le="{bucket_le}"}} {count}')
775
+
776
+ lines.append(f'{name}_bucket{{le="+Inf"}} {self._collector._histogram_counts.get(hist_name, 0)}')
777
+ lines.append(f"{name}_sum {self._collector._histogram_sums.get(hist_name, 0):.6f}")
778
+ lines.append(f"{name}_count {self._collector._histogram_counts.get(hist_name, 0)}")
779
+ lines.append("")
780
+
781
+ return "\n".join(lines)
782
+
783
+
784
+ # Convenience exports
785
+ __all__ = [
786
+ # Core classes
787
+ "MetricsCollector",
788
+ "AlertManager",
789
+ "AlertRule",
790
+ "PrometheusExporter",
791
+ # Data classes
792
+ "SecurityEvent",
793
+ "Alert",
794
+ "MetricSample",
795
+ # Enums
796
+ "EventType",
797
+ "MetricType",
798
+ ]