proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,798 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Real-Time Metrics and Alerts for Proxilion.
|
|
3
|
+
|
|
4
|
+
Provides observability into Proxilion's security operations with:
|
|
5
|
+
- Prometheus-compatible metrics export
|
|
6
|
+
- Real-time alerting via webhooks
|
|
7
|
+
- Security event aggregation
|
|
8
|
+
- Dashboard-ready data
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> from proxilion.observability.metrics import (
|
|
12
|
+
... MetricsCollector,
|
|
13
|
+
... AlertManager,
|
|
14
|
+
... PrometheusExporter,
|
|
15
|
+
... )
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Create collector
|
|
18
|
+
>>> collector = MetricsCollector()
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Record security events
|
|
21
|
+
>>> collector.record_authorization(allowed=True, user="alice", resource="db")
|
|
22
|
+
>>> collector.record_guard_block(guard_type="input", pattern="injection")
|
|
23
|
+
>>> collector.record_rate_limit_hit(user="bob")
|
|
24
|
+
>>>
|
|
25
|
+
>>> # Get Prometheus metrics
|
|
26
|
+
>>> exporter = PrometheusExporter(collector)
|
|
27
|
+
>>> print(exporter.export())
|
|
28
|
+
>>>
|
|
29
|
+
>>> # Configure alerts
|
|
30
|
+
>>> alerts = AlertManager(webhook_url="https://hooks.slack.com/...")
|
|
31
|
+
>>> alerts.add_rule("high_block_rate", threshold=10, window_seconds=60)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import json
|
|
37
|
+
import logging
|
|
38
|
+
import threading
|
|
39
|
+
import time
|
|
40
|
+
from collections import defaultdict, deque
|
|
41
|
+
from dataclasses import dataclass, field
|
|
42
|
+
from datetime import datetime, timezone
|
|
43
|
+
from enum import Enum
|
|
44
|
+
from typing import Any, Callable
|
|
45
|
+
from urllib.request import Request, urlopen
|
|
46
|
+
from urllib.error import URLError
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class MetricType(Enum):
|
|
52
|
+
"""Types of metrics."""
|
|
53
|
+
|
|
54
|
+
COUNTER = "counter"
|
|
55
|
+
"""Monotonically increasing value."""
|
|
56
|
+
|
|
57
|
+
GAUGE = "gauge"
|
|
58
|
+
"""Value that can go up or down."""
|
|
59
|
+
|
|
60
|
+
HISTOGRAM = "histogram"
|
|
61
|
+
"""Distribution of values."""
|
|
62
|
+
|
|
63
|
+
SUMMARY = "summary"
|
|
64
|
+
"""Summary statistics."""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class EventType(Enum):
|
|
68
|
+
"""Types of security events."""
|
|
69
|
+
|
|
70
|
+
AUTHORIZATION_ALLOWED = "authorization_allowed"
|
|
71
|
+
AUTHORIZATION_DENIED = "authorization_denied"
|
|
72
|
+
INPUT_GUARD_BLOCK = "input_guard_block"
|
|
73
|
+
OUTPUT_GUARD_BLOCK = "output_guard_block"
|
|
74
|
+
RATE_LIMIT_HIT = "rate_limit_hit"
|
|
75
|
+
CIRCUIT_OPEN = "circuit_open"
|
|
76
|
+
IDOR_VIOLATION = "idor_violation"
|
|
77
|
+
SEQUENCE_VIOLATION = "sequence_violation"
|
|
78
|
+
INTENT_HIJACK = "intent_hijack"
|
|
79
|
+
BEHAVIORAL_DRIFT = "behavioral_drift"
|
|
80
|
+
KILL_SWITCH_ACTIVATED = "kill_switch_activated"
|
|
81
|
+
CONTEXT_TAMPERING = "context_tampering"
|
|
82
|
+
AGENT_TRUST_VIOLATION = "agent_trust_violation"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class SecurityEvent:
|
|
87
|
+
"""A security-related event."""
|
|
88
|
+
|
|
89
|
+
event_type: EventType
|
|
90
|
+
timestamp: float
|
|
91
|
+
user_id: str | None = None
|
|
92
|
+
agent_id: str | None = None
|
|
93
|
+
resource: str | None = None
|
|
94
|
+
action: str | None = None
|
|
95
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
96
|
+
severity: float = 0.5 # 0.0 to 1.0
|
|
97
|
+
|
|
98
|
+
def to_dict(self) -> dict[str, Any]:
|
|
99
|
+
"""Convert to dictionary."""
|
|
100
|
+
return {
|
|
101
|
+
"event_type": self.event_type.value,
|
|
102
|
+
"timestamp": self.timestamp,
|
|
103
|
+
"datetime": datetime.fromtimestamp(self.timestamp, tz=timezone.utc).isoformat(),
|
|
104
|
+
"user_id": self.user_id,
|
|
105
|
+
"agent_id": self.agent_id,
|
|
106
|
+
"resource": self.resource,
|
|
107
|
+
"action": self.action,
|
|
108
|
+
"details": self.details,
|
|
109
|
+
"severity": self.severity,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class MetricSample:
|
|
115
|
+
"""A single metric sample."""
|
|
116
|
+
|
|
117
|
+
name: str
|
|
118
|
+
value: float
|
|
119
|
+
timestamp: float
|
|
120
|
+
labels: dict[str, str] = field(default_factory=dict)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class MetricsCollector:
|
|
124
|
+
"""
|
|
125
|
+
Collects security metrics from Proxilion operations.
|
|
126
|
+
|
|
127
|
+
Provides both real-time and aggregated metrics for monitoring
|
|
128
|
+
and alerting.
|
|
129
|
+
|
|
130
|
+
Example:
|
|
131
|
+
>>> collector = MetricsCollector()
|
|
132
|
+
>>>
|
|
133
|
+
>>> # Record events
|
|
134
|
+
>>> collector.record_authorization(True, "alice", "database")
|
|
135
|
+
>>> collector.record_guard_block("input", "prompt_injection")
|
|
136
|
+
>>>
|
|
137
|
+
>>> # Get stats
|
|
138
|
+
>>> stats = collector.get_summary()
|
|
139
|
+
>>> print(f"Total authorizations: {stats['total_authorizations']}")
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
event_window_size: int = 10000,
|
|
145
|
+
aggregation_window_seconds: float = 60.0,
|
|
146
|
+
) -> None:
|
|
147
|
+
"""
|
|
148
|
+
Initialize the collector.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
event_window_size: Maximum events to keep in memory.
|
|
152
|
+
aggregation_window_seconds: Window for rate calculations.
|
|
153
|
+
"""
|
|
154
|
+
self._event_window_size = event_window_size
|
|
155
|
+
self._aggregation_window = aggregation_window_seconds
|
|
156
|
+
|
|
157
|
+
# Event storage
|
|
158
|
+
self._events: deque[SecurityEvent] = deque(maxlen=event_window_size)
|
|
159
|
+
|
|
160
|
+
# Counters
|
|
161
|
+
self._counters: dict[str, int] = defaultdict(int)
|
|
162
|
+
self._counter_labels: dict[str, dict[str, dict[str, int]]] = defaultdict(
|
|
163
|
+
lambda: defaultdict(lambda: defaultdict(int))
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Gauges
|
|
167
|
+
self._gauges: dict[str, float] = {}
|
|
168
|
+
|
|
169
|
+
# Histograms (bucket counts)
|
|
170
|
+
self._histograms: dict[str, list[tuple[float, int]]] = {}
|
|
171
|
+
self._histogram_sums: dict[str, float] = defaultdict(float)
|
|
172
|
+
self._histogram_counts: dict[str, int] = defaultdict(int)
|
|
173
|
+
|
|
174
|
+
# Event callbacks
|
|
175
|
+
self._event_callbacks: list[Callable[[SecurityEvent], None]] = []
|
|
176
|
+
|
|
177
|
+
self._lock = threading.RLock()
|
|
178
|
+
self._start_time = time.time()
|
|
179
|
+
|
|
180
|
+
logger.debug("MetricsCollector initialized")
|
|
181
|
+
|
|
182
|
+
def record_event(self, event: SecurityEvent) -> None:
|
|
183
|
+
"""Record a security event."""
|
|
184
|
+
with self._lock:
|
|
185
|
+
self._events.append(event)
|
|
186
|
+
|
|
187
|
+
# Update counters
|
|
188
|
+
self._counters[event.event_type.value] += 1
|
|
189
|
+
|
|
190
|
+
# Labeled counters
|
|
191
|
+
if event.user_id:
|
|
192
|
+
self._counter_labels["by_user"][event.event_type.value][event.user_id] += 1
|
|
193
|
+
if event.resource:
|
|
194
|
+
self._counter_labels["by_resource"][event.event_type.value][event.resource] += 1
|
|
195
|
+
|
|
196
|
+
# Notify callbacks
|
|
197
|
+
for callback in self._event_callbacks:
|
|
198
|
+
try:
|
|
199
|
+
callback(event)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"Event callback error: {e}")
|
|
202
|
+
|
|
203
|
+
def record_authorization(
|
|
204
|
+
self,
|
|
205
|
+
allowed: bool,
|
|
206
|
+
user: str | None = None,
|
|
207
|
+
resource: str | None = None,
|
|
208
|
+
action: str | None = None,
|
|
209
|
+
latency_ms: float | None = None,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""Record an authorization decision."""
|
|
212
|
+
event_type = EventType.AUTHORIZATION_ALLOWED if allowed else EventType.AUTHORIZATION_DENIED
|
|
213
|
+
|
|
214
|
+
self.record_event(SecurityEvent(
|
|
215
|
+
event_type=event_type,
|
|
216
|
+
timestamp=time.time(),
|
|
217
|
+
user_id=user,
|
|
218
|
+
resource=resource,
|
|
219
|
+
action=action,
|
|
220
|
+
details={"latency_ms": latency_ms} if latency_ms else {},
|
|
221
|
+
severity=0.0 if allowed else 0.5,
|
|
222
|
+
))
|
|
223
|
+
|
|
224
|
+
if latency_ms:
|
|
225
|
+
self.record_histogram("authorization_latency_ms", latency_ms)
|
|
226
|
+
|
|
227
|
+
def record_guard_block(
|
|
228
|
+
self,
|
|
229
|
+
guard_type: str,
|
|
230
|
+
pattern: str,
|
|
231
|
+
risk_score: float = 0.0,
|
|
232
|
+
user: str | None = None,
|
|
233
|
+
) -> None:
|
|
234
|
+
"""Record a guard block."""
|
|
235
|
+
event_type = EventType.INPUT_GUARD_BLOCK if guard_type == "input" else EventType.OUTPUT_GUARD_BLOCK
|
|
236
|
+
|
|
237
|
+
self.record_event(SecurityEvent(
|
|
238
|
+
event_type=event_type,
|
|
239
|
+
timestamp=time.time(),
|
|
240
|
+
user_id=user,
|
|
241
|
+
details={"pattern": pattern, "risk_score": risk_score},
|
|
242
|
+
severity=risk_score,
|
|
243
|
+
))
|
|
244
|
+
|
|
245
|
+
def record_rate_limit_hit(
|
|
246
|
+
self,
|
|
247
|
+
user: str | None = None,
|
|
248
|
+
limit_type: str = "requests",
|
|
249
|
+
) -> None:
|
|
250
|
+
"""Record a rate limit hit."""
|
|
251
|
+
self.record_event(SecurityEvent(
|
|
252
|
+
event_type=EventType.RATE_LIMIT_HIT,
|
|
253
|
+
timestamp=time.time(),
|
|
254
|
+
user_id=user,
|
|
255
|
+
details={"limit_type": limit_type},
|
|
256
|
+
severity=0.4,
|
|
257
|
+
))
|
|
258
|
+
|
|
259
|
+
def record_circuit_open(
|
|
260
|
+
self,
|
|
261
|
+
circuit_name: str,
|
|
262
|
+
failure_count: int = 0,
|
|
263
|
+
) -> None:
|
|
264
|
+
"""Record a circuit breaker opening."""
|
|
265
|
+
self.record_event(SecurityEvent(
|
|
266
|
+
event_type=EventType.CIRCUIT_OPEN,
|
|
267
|
+
timestamp=time.time(),
|
|
268
|
+
details={"circuit_name": circuit_name, "failure_count": failure_count},
|
|
269
|
+
severity=0.6,
|
|
270
|
+
))
|
|
271
|
+
|
|
272
|
+
def record_idor_violation(
|
|
273
|
+
self,
|
|
274
|
+
user: str,
|
|
275
|
+
resource_type: str,
|
|
276
|
+
object_id: str,
|
|
277
|
+
) -> None:
|
|
278
|
+
"""Record an IDOR violation."""
|
|
279
|
+
self.record_event(SecurityEvent(
|
|
280
|
+
event_type=EventType.IDOR_VIOLATION,
|
|
281
|
+
timestamp=time.time(),
|
|
282
|
+
user_id=user,
|
|
283
|
+
resource=resource_type,
|
|
284
|
+
details={"object_id": object_id},
|
|
285
|
+
severity=0.8,
|
|
286
|
+
))
|
|
287
|
+
|
|
288
|
+
def record_sequence_violation(
|
|
289
|
+
self,
|
|
290
|
+
user: str,
|
|
291
|
+
rule_name: str,
|
|
292
|
+
tool_name: str,
|
|
293
|
+
) -> None:
|
|
294
|
+
"""Record a sequence violation."""
|
|
295
|
+
self.record_event(SecurityEvent(
|
|
296
|
+
event_type=EventType.SEQUENCE_VIOLATION,
|
|
297
|
+
timestamp=time.time(),
|
|
298
|
+
user_id=user,
|
|
299
|
+
details={"rule_name": rule_name, "tool_name": tool_name},
|
|
300
|
+
severity=0.7,
|
|
301
|
+
))
|
|
302
|
+
|
|
303
|
+
def record_intent_hijack(
|
|
304
|
+
self,
|
|
305
|
+
user: str | None,
|
|
306
|
+
agent: str | None,
|
|
307
|
+
original_intent: str,
|
|
308
|
+
detected_intent: str,
|
|
309
|
+
confidence: float,
|
|
310
|
+
) -> None:
|
|
311
|
+
"""Record an intent hijack detection."""
|
|
312
|
+
self.record_event(SecurityEvent(
|
|
313
|
+
event_type=EventType.INTENT_HIJACK,
|
|
314
|
+
timestamp=time.time(),
|
|
315
|
+
user_id=user,
|
|
316
|
+
agent_id=agent,
|
|
317
|
+
details={
|
|
318
|
+
"original_intent": original_intent,
|
|
319
|
+
"detected_intent": detected_intent,
|
|
320
|
+
"confidence": confidence,
|
|
321
|
+
},
|
|
322
|
+
severity=confidence,
|
|
323
|
+
))
|
|
324
|
+
|
|
325
|
+
def record_behavioral_drift(
|
|
326
|
+
self,
|
|
327
|
+
agent: str,
|
|
328
|
+
severity: float,
|
|
329
|
+
drifting_metrics: list[str],
|
|
330
|
+
) -> None:
|
|
331
|
+
"""Record behavioral drift detection."""
|
|
332
|
+
self.record_event(SecurityEvent(
|
|
333
|
+
event_type=EventType.BEHAVIORAL_DRIFT,
|
|
334
|
+
timestamp=time.time(),
|
|
335
|
+
agent_id=agent,
|
|
336
|
+
details={"drifting_metrics": drifting_metrics},
|
|
337
|
+
severity=severity,
|
|
338
|
+
))
|
|
339
|
+
|
|
340
|
+
def record_kill_switch(
|
|
341
|
+
self,
|
|
342
|
+
reason: str,
|
|
343
|
+
triggered_by: str,
|
|
344
|
+
) -> None:
|
|
345
|
+
"""Record kill switch activation."""
|
|
346
|
+
self.record_event(SecurityEvent(
|
|
347
|
+
event_type=EventType.KILL_SWITCH_ACTIVATED,
|
|
348
|
+
timestamp=time.time(),
|
|
349
|
+
details={"reason": reason, "triggered_by": triggered_by},
|
|
350
|
+
severity=1.0,
|
|
351
|
+
))
|
|
352
|
+
|
|
353
|
+
def record_histogram(
|
|
354
|
+
self,
|
|
355
|
+
name: str,
|
|
356
|
+
value: float,
|
|
357
|
+
buckets: list[float] | None = None,
|
|
358
|
+
) -> None:
|
|
359
|
+
"""Record a histogram value."""
|
|
360
|
+
if buckets is None:
|
|
361
|
+
buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
|
362
|
+
|
|
363
|
+
with self._lock:
|
|
364
|
+
if name not in self._histograms:
|
|
365
|
+
self._histograms[name] = [(b, 0) for b in buckets]
|
|
366
|
+
|
|
367
|
+
# Increment bucket counts
|
|
368
|
+
new_buckets = []
|
|
369
|
+
for bucket_le, count in self._histograms[name]:
|
|
370
|
+
if value <= bucket_le:
|
|
371
|
+
new_buckets.append((bucket_le, count + 1))
|
|
372
|
+
else:
|
|
373
|
+
new_buckets.append((bucket_le, count))
|
|
374
|
+
self._histograms[name] = new_buckets
|
|
375
|
+
|
|
376
|
+
self._histogram_sums[name] += value
|
|
377
|
+
self._histogram_counts[name] += 1
|
|
378
|
+
|
|
379
|
+
def set_gauge(self, name: str, value: float) -> None:
|
|
380
|
+
"""Set a gauge value."""
|
|
381
|
+
with self._lock:
|
|
382
|
+
self._gauges[name] = value
|
|
383
|
+
|
|
384
|
+
def increment_counter(self, name: str, value: int = 1) -> None:
|
|
385
|
+
"""Increment a counter."""
|
|
386
|
+
with self._lock:
|
|
387
|
+
self._counters[name] += value
|
|
388
|
+
|
|
389
|
+
def on_event(self, callback: Callable[[SecurityEvent], None]) -> None:
|
|
390
|
+
"""Register a callback for events."""
|
|
391
|
+
self._event_callbacks.append(callback)
|
|
392
|
+
|
|
393
|
+
def get_counter(self, name: str) -> int:
|
|
394
|
+
"""Get a counter value."""
|
|
395
|
+
with self._lock:
|
|
396
|
+
return self._counters.get(name, 0)
|
|
397
|
+
|
|
398
|
+
def get_gauge(self, name: str) -> float | None:
|
|
399
|
+
"""Get a gauge value."""
|
|
400
|
+
with self._lock:
|
|
401
|
+
return self._gauges.get(name)
|
|
402
|
+
|
|
403
|
+
def get_rate(self, event_type: EventType, window_seconds: float | None = None) -> float:
|
|
404
|
+
"""Get event rate (events per second)."""
|
|
405
|
+
window = window_seconds or self._aggregation_window
|
|
406
|
+
now = time.time()
|
|
407
|
+
cutoff = now - window
|
|
408
|
+
|
|
409
|
+
with self._lock:
|
|
410
|
+
count = sum(
|
|
411
|
+
1 for e in self._events
|
|
412
|
+
if e.event_type == event_type and e.timestamp > cutoff
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
return count / window
|
|
416
|
+
|
|
417
|
+
def get_recent_events(
|
|
418
|
+
self,
|
|
419
|
+
event_type: EventType | None = None,
|
|
420
|
+
limit: int = 100,
|
|
421
|
+
) -> list[SecurityEvent]:
|
|
422
|
+
"""Get recent events, optionally filtered by type."""
|
|
423
|
+
with self._lock:
|
|
424
|
+
if event_type:
|
|
425
|
+
events = [e for e in self._events if e.event_type == event_type]
|
|
426
|
+
else:
|
|
427
|
+
events = list(self._events)
|
|
428
|
+
|
|
429
|
+
return events[-limit:]
|
|
430
|
+
|
|
431
|
+
def get_summary(self) -> dict[str, Any]:
|
|
432
|
+
"""Get a summary of all metrics."""
|
|
433
|
+
now = time.time()
|
|
434
|
+
uptime = now - self._start_time
|
|
435
|
+
|
|
436
|
+
with self._lock:
|
|
437
|
+
# Calculate rates
|
|
438
|
+
window = self._aggregation_window
|
|
439
|
+
cutoff = now - window
|
|
440
|
+
|
|
441
|
+
recent_events = [e for e in self._events if e.timestamp > cutoff]
|
|
442
|
+
event_counts = defaultdict(int)
|
|
443
|
+
for e in recent_events:
|
|
444
|
+
event_counts[e.event_type.value] += 1
|
|
445
|
+
|
|
446
|
+
# Total counts
|
|
447
|
+
total_auth_allowed = self._counters.get(EventType.AUTHORIZATION_ALLOWED.value, 0)
|
|
448
|
+
total_auth_denied = self._counters.get(EventType.AUTHORIZATION_DENIED.value, 0)
|
|
449
|
+
total_authorizations = total_auth_allowed + total_auth_denied
|
|
450
|
+
|
|
451
|
+
return {
|
|
452
|
+
"uptime_seconds": uptime,
|
|
453
|
+
"total_events": len(self._events),
|
|
454
|
+
"total_authorizations": total_authorizations,
|
|
455
|
+
"total_allowed": total_auth_allowed,
|
|
456
|
+
"total_denied": total_auth_denied,
|
|
457
|
+
"denial_rate": total_auth_denied / max(1, total_authorizations),
|
|
458
|
+
"recent_events_per_minute": {
|
|
459
|
+
k: v * 60 / window for k, v in event_counts.items()
|
|
460
|
+
},
|
|
461
|
+
"gauges": dict(self._gauges),
|
|
462
|
+
"counters": dict(self._counters),
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class AlertRule:
|
|
467
|
+
"""A rule for triggering alerts."""
|
|
468
|
+
|
|
469
|
+
def __init__(
|
|
470
|
+
self,
|
|
471
|
+
name: str,
|
|
472
|
+
event_type: EventType | None = None,
|
|
473
|
+
threshold: float = 1.0,
|
|
474
|
+
window_seconds: float = 60.0,
|
|
475
|
+
severity: str = "warning",
|
|
476
|
+
cooldown_seconds: float = 300.0,
|
|
477
|
+
) -> None:
|
|
478
|
+
"""
|
|
479
|
+
Initialize the rule.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
name: Rule name.
|
|
483
|
+
event_type: Event type to monitor (None for custom metric).
|
|
484
|
+
threshold: Threshold for triggering.
|
|
485
|
+
window_seconds: Window for rate calculation.
|
|
486
|
+
severity: Alert severity (info, warning, critical).
|
|
487
|
+
cooldown_seconds: Minimum time between alerts.
|
|
488
|
+
"""
|
|
489
|
+
self.name = name
|
|
490
|
+
self.event_type = event_type
|
|
491
|
+
self.threshold = threshold
|
|
492
|
+
self.window_seconds = window_seconds
|
|
493
|
+
self.severity = severity
|
|
494
|
+
self.cooldown_seconds = cooldown_seconds
|
|
495
|
+
|
|
496
|
+
self._last_triggered: float = 0
|
|
497
|
+
|
|
498
|
+
def can_trigger(self) -> bool:
|
|
499
|
+
"""Check if rule can trigger (respects cooldown)."""
|
|
500
|
+
return time.time() - self._last_triggered > self.cooldown_seconds
|
|
501
|
+
|
|
502
|
+
def mark_triggered(self) -> None:
|
|
503
|
+
"""Mark rule as triggered."""
|
|
504
|
+
self._last_triggered = time.time()
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
@dataclass
|
|
508
|
+
class Alert:
|
|
509
|
+
"""An alert notification."""
|
|
510
|
+
|
|
511
|
+
rule_name: str
|
|
512
|
+
severity: str
|
|
513
|
+
message: str
|
|
514
|
+
value: float
|
|
515
|
+
threshold: float
|
|
516
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
517
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
518
|
+
|
|
519
|
+
def to_dict(self) -> dict[str, Any]:
|
|
520
|
+
"""Convert to dictionary."""
|
|
521
|
+
return {
|
|
522
|
+
"rule_name": self.rule_name,
|
|
523
|
+
"severity": self.severity,
|
|
524
|
+
"message": self.message,
|
|
525
|
+
"value": self.value,
|
|
526
|
+
"threshold": self.threshold,
|
|
527
|
+
"timestamp": self.timestamp.isoformat(),
|
|
528
|
+
"details": self.details,
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
class AlertManager:
|
|
533
|
+
"""
|
|
534
|
+
Manages alerting based on security metrics.
|
|
535
|
+
|
|
536
|
+
Example:
|
|
537
|
+
>>> alerts = AlertManager(webhook_url="https://hooks.slack.com/...")
|
|
538
|
+
>>>
|
|
539
|
+
>>> # Add rules
|
|
540
|
+
>>> alerts.add_rule(
|
|
541
|
+
... name="high_denial_rate",
|
|
542
|
+
... event_type=EventType.AUTHORIZATION_DENIED,
|
|
543
|
+
... threshold=10,
|
|
544
|
+
... window_seconds=60,
|
|
545
|
+
... severity="warning",
|
|
546
|
+
... )
|
|
547
|
+
>>>
|
|
548
|
+
>>> # Process events
|
|
549
|
+
>>> alerts.check(collector)
|
|
550
|
+
"""
|
|
551
|
+
|
|
552
|
+
def __init__(
|
|
553
|
+
self,
|
|
554
|
+
webhook_url: str | None = None,
|
|
555
|
+
webhook_headers: dict[str, str] | None = None,
|
|
556
|
+
) -> None:
|
|
557
|
+
"""
|
|
558
|
+
Initialize the alert manager.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
webhook_url: URL to send alerts to.
|
|
562
|
+
webhook_headers: HTTP headers for webhook requests.
|
|
563
|
+
"""
|
|
564
|
+
self._webhook_url = webhook_url
|
|
565
|
+
self._webhook_headers = webhook_headers or {"Content-Type": "application/json"}
|
|
566
|
+
|
|
567
|
+
self._rules: dict[str, AlertRule] = {}
|
|
568
|
+
self._alert_history: deque[Alert] = deque(maxlen=1000)
|
|
569
|
+
self._alert_callbacks: list[Callable[[Alert], None]] = []
|
|
570
|
+
|
|
571
|
+
self._lock = threading.RLock()
|
|
572
|
+
|
|
573
|
+
def add_rule(
|
|
574
|
+
self,
|
|
575
|
+
name: str,
|
|
576
|
+
event_type: EventType | None = None,
|
|
577
|
+
threshold: float = 1.0,
|
|
578
|
+
window_seconds: float = 60.0,
|
|
579
|
+
severity: str = "warning",
|
|
580
|
+
cooldown_seconds: float = 300.0,
|
|
581
|
+
) -> AlertRule:
|
|
582
|
+
"""Add an alert rule."""
|
|
583
|
+
rule = AlertRule(
|
|
584
|
+
name=name,
|
|
585
|
+
event_type=event_type,
|
|
586
|
+
threshold=threshold,
|
|
587
|
+
window_seconds=window_seconds,
|
|
588
|
+
severity=severity,
|
|
589
|
+
cooldown_seconds=cooldown_seconds,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
with self._lock:
|
|
593
|
+
self._rules[name] = rule
|
|
594
|
+
|
|
595
|
+
return rule
|
|
596
|
+
|
|
597
|
+
def remove_rule(self, name: str) -> bool:
|
|
598
|
+
"""Remove an alert rule."""
|
|
599
|
+
with self._lock:
|
|
600
|
+
if name in self._rules:
|
|
601
|
+
del self._rules[name]
|
|
602
|
+
return True
|
|
603
|
+
return False
|
|
604
|
+
|
|
605
|
+
def check(self, collector: MetricsCollector) -> list[Alert]:
|
|
606
|
+
"""
|
|
607
|
+
Check all rules against current metrics.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
collector: MetricsCollector to check.
|
|
611
|
+
|
|
612
|
+
Returns:
|
|
613
|
+
List of triggered alerts.
|
|
614
|
+
"""
|
|
615
|
+
triggered: list[Alert] = []
|
|
616
|
+
|
|
617
|
+
with self._lock:
|
|
618
|
+
for rule in self._rules.values():
|
|
619
|
+
if not rule.can_trigger():
|
|
620
|
+
continue
|
|
621
|
+
|
|
622
|
+
if rule.event_type:
|
|
623
|
+
# Rate-based rule
|
|
624
|
+
rate = collector.get_rate(rule.event_type, rule.window_seconds)
|
|
625
|
+
rate_per_minute = rate * 60
|
|
626
|
+
|
|
627
|
+
if rate_per_minute >= rule.threshold:
|
|
628
|
+
alert = Alert(
|
|
629
|
+
rule_name=rule.name,
|
|
630
|
+
severity=rule.severity,
|
|
631
|
+
message=f"{rule.event_type.value} rate ({rate_per_minute:.1f}/min) exceeds threshold ({rule.threshold}/min)",
|
|
632
|
+
value=rate_per_minute,
|
|
633
|
+
threshold=rule.threshold,
|
|
634
|
+
details={
|
|
635
|
+
"event_type": rule.event_type.value,
|
|
636
|
+
"window_seconds": rule.window_seconds,
|
|
637
|
+
},
|
|
638
|
+
)
|
|
639
|
+
triggered.append(alert)
|
|
640
|
+
rule.mark_triggered()
|
|
641
|
+
|
|
642
|
+
# Process triggered alerts
|
|
643
|
+
for alert in triggered:
|
|
644
|
+
self._process_alert(alert)
|
|
645
|
+
|
|
646
|
+
return triggered
|
|
647
|
+
|
|
648
|
+
def _process_alert(self, alert: Alert) -> None:
|
|
649
|
+
"""Process a triggered alert."""
|
|
650
|
+
with self._lock:
|
|
651
|
+
self._alert_history.append(alert)
|
|
652
|
+
|
|
653
|
+
logger.warning(f"ALERT [{alert.severity.upper()}] {alert.rule_name}: {alert.message}")
|
|
654
|
+
|
|
655
|
+
# Send webhook
|
|
656
|
+
if self._webhook_url:
|
|
657
|
+
self._send_webhook(alert)
|
|
658
|
+
|
|
659
|
+
# Notify callbacks
|
|
660
|
+
for callback in self._alert_callbacks:
|
|
661
|
+
try:
|
|
662
|
+
callback(alert)
|
|
663
|
+
except Exception as e:
|
|
664
|
+
logger.error(f"Alert callback error: {e}")
|
|
665
|
+
|
|
666
|
+
def _send_webhook(self, alert: Alert) -> bool:
|
|
667
|
+
"""Send alert to webhook."""
|
|
668
|
+
try:
|
|
669
|
+
payload = json.dumps(alert.to_dict()).encode()
|
|
670
|
+
request = Request(
|
|
671
|
+
self._webhook_url,
|
|
672
|
+
data=payload,
|
|
673
|
+
headers=self._webhook_headers,
|
|
674
|
+
method="POST",
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
with urlopen(request, timeout=10) as response:
|
|
678
|
+
return response.status == 200
|
|
679
|
+
|
|
680
|
+
except URLError as e:
|
|
681
|
+
logger.error(f"Webhook error: {e}")
|
|
682
|
+
return False
|
|
683
|
+
except Exception as e:
|
|
684
|
+
logger.error(f"Webhook error: {e}")
|
|
685
|
+
return False
|
|
686
|
+
|
|
687
|
+
def on_alert(self, callback: Callable[[Alert], None]) -> None:
|
|
688
|
+
"""Register a callback for alerts."""
|
|
689
|
+
self._alert_callbacks.append(callback)
|
|
690
|
+
|
|
691
|
+
def get_recent_alerts(self, limit: int = 50) -> list[Alert]:
|
|
692
|
+
"""Get recent alerts."""
|
|
693
|
+
with self._lock:
|
|
694
|
+
return list(self._alert_history)[-limit:]
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
class PrometheusExporter:
|
|
698
|
+
"""
|
|
699
|
+
Exports metrics in Prometheus format.
|
|
700
|
+
|
|
701
|
+
Example:
|
|
702
|
+
>>> exporter = PrometheusExporter(collector)
|
|
703
|
+
>>> metrics_text = exporter.export()
|
|
704
|
+
>>>
|
|
705
|
+
>>> # Serve via HTTP (e.g., with Flask)
|
|
706
|
+
>>> @app.route('/metrics')
|
|
707
|
+
>>> def metrics():
|
|
708
|
+
... return exporter.export(), 200, {'Content-Type': 'text/plain'}
|
|
709
|
+
"""
|
|
710
|
+
|
|
711
|
+
def __init__(
|
|
712
|
+
self,
|
|
713
|
+
collector: MetricsCollector,
|
|
714
|
+
namespace: str = "proxilion",
|
|
715
|
+
) -> None:
|
|
716
|
+
"""
|
|
717
|
+
Initialize the exporter.
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
collector: MetricsCollector to export.
|
|
721
|
+
namespace: Metric namespace prefix.
|
|
722
|
+
"""
|
|
723
|
+
self._collector = collector
|
|
724
|
+
self._namespace = namespace
|
|
725
|
+
|
|
726
|
+
def export(self) -> str:
|
|
727
|
+
"""Export all metrics in Prometheus format."""
|
|
728
|
+
lines: list[str] = []
|
|
729
|
+
|
|
730
|
+
# Add header
|
|
731
|
+
lines.append(f"# Proxilion Security Metrics")
|
|
732
|
+
lines.append(f"# Generated at {datetime.now(timezone.utc).isoformat()}")
|
|
733
|
+
lines.append("")
|
|
734
|
+
|
|
735
|
+
# Export counters
|
|
736
|
+
for event_type in EventType:
|
|
737
|
+
name = f"{self._namespace}_events_total"
|
|
738
|
+
count = self._collector.get_counter(event_type.value)
|
|
739
|
+
labels = f'{{event_type="{event_type.value}"}}'
|
|
740
|
+
|
|
741
|
+
lines.append(f"# HELP {name} Total security events by type")
|
|
742
|
+
lines.append(f"# TYPE {name} counter")
|
|
743
|
+
lines.append(f"{name}{labels} {count}")
|
|
744
|
+
lines.append("")
|
|
745
|
+
|
|
746
|
+
# Export gauges
|
|
747
|
+
summary = self._collector.get_summary()
|
|
748
|
+
gauges = summary.get("gauges", {})
|
|
749
|
+
for gauge_name, value in gauges.items():
|
|
750
|
+
name = f"{self._namespace}_{gauge_name}"
|
|
751
|
+
lines.append(f"# HELP {name} {gauge_name}")
|
|
752
|
+
lines.append(f"# TYPE {name} gauge")
|
|
753
|
+
lines.append(f"{name} {value}")
|
|
754
|
+
lines.append("")
|
|
755
|
+
|
|
756
|
+
# Export summary stats
|
|
757
|
+
lines.append(f"# HELP {self._namespace}_uptime_seconds Uptime in seconds")
|
|
758
|
+
lines.append(f"# TYPE {self._namespace}_uptime_seconds gauge")
|
|
759
|
+
lines.append(f"{self._namespace}_uptime_seconds {summary['uptime_seconds']:.2f}")
|
|
760
|
+
lines.append("")
|
|
761
|
+
|
|
762
|
+
lines.append(f"# HELP {self._namespace}_denial_rate Authorization denial rate")
|
|
763
|
+
lines.append(f"# TYPE {self._namespace}_denial_rate gauge")
|
|
764
|
+
lines.append(f"{self._namespace}_denial_rate {summary['denial_rate']:.4f}")
|
|
765
|
+
lines.append("")
|
|
766
|
+
|
|
767
|
+
# Export histograms
|
|
768
|
+
for hist_name, buckets in self._collector._histograms.items():
|
|
769
|
+
name = f"{self._namespace}_{hist_name}"
|
|
770
|
+
lines.append(f"# HELP {name} {hist_name}")
|
|
771
|
+
lines.append(f"# TYPE {name} histogram")
|
|
772
|
+
|
|
773
|
+
for bucket_le, count in buckets:
|
|
774
|
+
lines.append(f'{name}_bucket{{le="{bucket_le}"}} {count}')
|
|
775
|
+
|
|
776
|
+
lines.append(f'{name}_bucket{{le="+Inf"}} {self._collector._histogram_counts.get(hist_name, 0)}')
|
|
777
|
+
lines.append(f"{name}_sum {self._collector._histogram_sums.get(hist_name, 0):.6f}")
|
|
778
|
+
lines.append(f"{name}_count {self._collector._histogram_counts.get(hist_name, 0)}")
|
|
779
|
+
lines.append("")
|
|
780
|
+
|
|
781
|
+
return "\n".join(lines)
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
# Convenience exports
|
|
785
|
+
__all__ = [
|
|
786
|
+
# Core classes
|
|
787
|
+
"MetricsCollector",
|
|
788
|
+
"AlertManager",
|
|
789
|
+
"AlertRule",
|
|
790
|
+
"PrometheusExporter",
|
|
791
|
+
# Data classes
|
|
792
|
+
"SecurityEvent",
|
|
793
|
+
"Alert",
|
|
794
|
+
"MetricSample",
|
|
795
|
+
# Enums
|
|
796
|
+
"EventType",
|
|
797
|
+
"MetricType",
|
|
798
|
+
]
|