lollmsbot 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lollmsbot/guardian.py ADDED
@@ -0,0 +1,692 @@
1
+ """
2
+ Guardian Module - LollmsBot's Security & Ethics Layer
3
+
4
+ The Guardian is LollmsBot's "conscience" and "immune system" combined.
5
+ It monitors all inputs, outputs, and internal states for:
6
+ - Security threats (prompt injection, data exfiltration, unauthorized access)
7
+ - Ethical violations (against user-defined ethics.md rules)
8
+ - Behavioral anomalies (deviation from established patterns)
9
+ - Consent enforcement (permission gates for sensitive operations)
10
+
11
+ Architecture: The Guardian operates as a "reflexive layer" - it can intercept
12
+ and block any operation before execution, but cannot be bypassed by the
13
+ Agent or any Tool. It's the ultimate authority in the system.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import hmac
20
+ import json
21
+ import logging
22
+ import re
23
+ import secrets
24
+ import time
25
+ import zlib
26
+ from abc import ABC, abstractmethod
27
+ from dataclasses import dataclass, field
28
+ from datetime import datetime, timedelta
29
+ from enum import Enum, auto
30
+ from pathlib import Path
31
+ from typing import Any, Callable, Coroutine, Dict, List, Optional, Set, Tuple, Union
32
+
33
+ # Configure logging for security events
34
+ logger = logging.getLogger("lollmsbot.guardian")
35
+
36
+
37
+ class ThreatLevel(Enum):
38
+ """Severity classification for security events."""
39
+ INFO = auto() # Logged, no action needed
40
+ LOW = auto() # Flagged for review
41
+ MEDIUM = auto() # Requires user notification
42
+ HIGH = auto() # Blocks operation, alerts user
43
+ CRITICAL = auto() # Self-quarantine triggered
44
+
45
+
46
+ class GuardianAction(Enum):
47
+ """Possible responses to security checks."""
48
+ ALLOW = auto() # Proceed normally
49
+ FLAG = auto() # Allow but log for review
50
+ CHALLENGE = auto() # Require explicit user confirmation
51
+ BLOCK = auto() # Deny operation
52
+ QUARANTINE = auto() # Block and isolate affected components
53
+
54
+
55
+ @dataclass(frozen=True)
56
+ class SecurityEvent:
57
+ """Immutable record of a security-relevant event."""
58
+ timestamp: datetime
59
+ event_type: str
60
+ threat_level: ThreatLevel
61
+ source: str # Component that triggered the event
62
+ description: str
63
+ context_hash: str # Hash of relevant context (for integrity)
64
+ action_taken: GuardianAction
65
+ user_notified: bool = False
66
+
67
+ def to_dict(self) -> Dict[str, Any]:
68
+ return {
69
+ "timestamp": self.timestamp.isoformat(),
70
+ "event_type": self.event_type,
71
+ "threat_level": self.threat_level.name,
72
+ "source": self.source,
73
+ "description": self.description,
74
+ "context_hash": self.context_hash,
75
+ "action_taken": self.action_taken.name,
76
+ "user_notified": self.user_notified,
77
+ }
78
+
79
+
80
+ @dataclass
81
+ class EthicsRule:
82
+ """A single ethical constraint from ethics.md."""
83
+ rule_id: str
84
+ category: str # e.g., "privacy", "honesty", "consent", "safety"
85
+ statement: str # Human-readable rule
86
+ enforcement: str # "strict", "advisory", "confirm"
87
+ exceptions: List[str] = field(default_factory=list)
88
+
89
+ def matches_violation(self, action_description: str) -> bool:
90
+ """Check if an action description violates this rule."""
91
+ # Simple keyword matching - can be enhanced with LLM-based semantic matching
92
+ keywords = self.statement.lower().split()
93
+ action_lower = action_description.lower()
94
+ return any(kw in action_lower for kw in keywords if len(kw) > 4)
95
+
96
+
97
+ @dataclass
98
+ class PermissionGate:
99
+ """A conditional permission that can be time-bound, context-aware, or require confirmation."""
100
+ resource: str # What this gate protects (e.g., "gmail", "shell", "filesystem")
101
+ allowed: bool = False
102
+ conditions: Dict[str, Any] = field(default_factory=dict)
103
+ # Examples: {"time_window": "09:00-17:00"}, {"require_confirmation": True}, {"max_per_day": 10}
104
+
105
+ def check(self, context: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
106
+ """Check if operation is permitted under current conditions."""
107
+ if not self.allowed:
108
+ return False, f"Access to {self.resource} is disabled"
109
+
110
+ # Check time window if specified
111
+ if "time_window" in self.conditions:
112
+ start, end = self.conditions["time_window"].split("-")
113
+ now = datetime.now().strftime("%H:%M")
114
+ if not (start <= now <= end):
115
+ return False, f"{self.resource} only available {start}-{end}"
116
+
117
+ # Check rate limiting
118
+ if "max_per_day" in self.conditions:
119
+ today_key = f"{self.resource}_{datetime.now().strftime('%Y%m%d')}"
120
+ # This would need persistent counter storage in production
121
+
122
+ # Check confirmation requirement
123
+ if self.conditions.get("require_confirmation", False):
124
+ return False, "CONFIRMATION_REQUIRED"
125
+
126
+ return True, None
127
+
128
+
129
+ class PromptInjectionDetector:
130
+ """Multi-layer defense against prompt injection attacks."""
131
+
132
+ # Known attack patterns (simplified - production would use ML models)
133
+ PATTERNS: List[Tuple[str, float]] = [
134
+ # (regex pattern, confidence score 0-1)
135
+ (r"ignore\s+(all\s+)?previous\s+(instructions|commands)", 0.9),
136
+ (r"disregard\s+(your\s+)?(instructions|programming|rules)", 0.9),
137
+ (r"you\s+are\s+now\s+.*?(free|unrestricted|uncensored)", 0.85),
138
+ (r"system\s*:\s*.*?(override|ignore|bypass)", 0.9),
139
+ (r"<script.*?>.*?</script>", 0.95), # XSS attempt
140
+ (r"```\s*system\s*\n", 0.8), # Fake system block
141
+ (r"\{\{.*?\}\}", 0.7), # Template injection attempt
142
+ (r"\$\{.*?\}", 0.7), # Shell interpolation attempt
143
+ (r"`.*?`", 0.5), # Backtick execution (lower confidence)
144
+ (r"\[\s*system\s*\]", 0.75), # Fake system role markers
145
+ ]
146
+
147
+ # Delimiter confusion attacks
148
+ DELIMITER_ATTACKS = [
149
+ (r"human\s*:\s*.*?\n\s*assistant\s*:", 0.8),
150
+ (r"user\s*:\s*.*?\n\s*ai\s*:", 0.8),
151
+ (r"<\|.*?\|>", 0.75), # Special token injection
152
+ ]
153
+
154
+ def __init__(self):
155
+ self._compiled_patterns = [(re.compile(p, re.I), s) for p, s in self.PATTERNS]
156
+ self._compiled_delimiters = [(re.compile(p, re.I), s) for p, s in self.DELIMITER_ATTACKS]
157
+
158
+ def analyze(self, text: str) -> Tuple[float, List[str]]:
159
+ """
160
+ Analyze text for prompt injection attempts.
161
+ Returns: (confidence_score 0-1, list_of_detected_patterns)
162
+ """
163
+ detected: List[str] = []
164
+ max_score = 0.0
165
+
166
+ # Check primary patterns
167
+ for pattern, score in self._compiled_patterns:
168
+ if pattern.search(text):
169
+ detected.append(pattern.pattern[:50]) # Truncated for logging
170
+ max_score = max(max_score, score)
171
+
172
+ # Check delimiter confusion
173
+ for pattern, score in self._compiled_delimiters:
174
+ if pattern.search(text):
175
+ detected.append(f"delimiter:{pattern.pattern[:30]}")
176
+ max_score = max(max_score, score)
177
+
178
+ # Structural analysis: look for role confusion
179
+ role_markers = text.lower().count("role:") + text.lower().count("system:")
180
+ if role_markers > 2:
181
+ max_score = max(max_score, 0.6)
182
+ detected.append(f"excessive_role_markers:{role_markers}")
183
+
184
+ # Entropy analysis: unusually high entropy may indicate encoded attacks
185
+ if len(text) > 100:
186
+ entropy = self._calculate_entropy(text)
187
+ if entropy > 5.5: # Threshold for suspicious randomness
188
+ max_score = max(max_score, 0.5)
189
+ detected.append(f"high_entropy:{entropy:.2f}")
190
+
191
+ return min(max_score, 1.0), detected
192
+
193
+ def _calculate_entropy(self, text: str) -> float:
194
+ """Calculate Shannon entropy of text."""
195
+ if not text:
196
+ return 0.0
197
+ probs = [text.count(c) / len(text) for c in set(text)]
198
+ return -sum(p * (p.bit_length() - 1) for p in probs if p > 0)
199
+
200
+ def sanitize(self, text: str) -> str:
201
+ """Apply conservative sanitization to potentially dangerous input."""
202
+ # Remove null bytes and control characters
203
+ text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', text)
204
+ # Normalize whitespace
205
+ text = re.sub(r'\s+', ' ', text)
206
+ # Escape potential HTML
207
+ text = text.replace("<", "&lt;").replace(">", "&gt;")
208
+ return text.strip()
209
+
210
+
211
+ class AnomalyDetector:
212
+ """Behavioral anomaly detection for self-monitoring."""
213
+
214
+ def __init__(self, window_size: int = 100):
215
+ self.window_size = window_size
216
+ self._behavior_log: List[Dict[str, Any]] = []
217
+ self._pattern_hashes: Set[str] = set()
218
+
219
+ def record(self, action_type: str, details: Dict[str, Any]) -> Optional[SecurityEvent]:
220
+ """Record an action and check for anomalies."""
221
+ record = {
222
+ "timestamp": datetime.now(),
223
+ "action": action_type,
224
+ "tool": details.get("tool"),
225
+ "user": details.get("user_id"),
226
+ "params_hash": self._hash_params(details.get("params", {})),
227
+ }
228
+
229
+ self._behavior_log.append(record)
230
+ if len(self._behavior_log) > self.window_size:
231
+ self._behavior_log.pop(0)
232
+
233
+ # Check for anomalies
234
+ return self._detect_anomaly(record)
235
+
236
+ def _hash_params(self, params: Dict[str, Any]) -> str:
237
+ """Create stable hash of parameters for pattern comparison."""
238
+ normalized = json.dumps(params, sort_keys=True, default=str)
239
+ return hashlib.sha256(normalized.encode()).hexdigest()[:16]
240
+
241
+ def _detect_anomaly(self, record: Dict[str, Any]) -> Optional[SecurityEvent]:
242
+ """Check if current action deviates from established patterns."""
243
+ # Check 1: Rapid successive operations (potential automation abuse)
244
+ recent = [r for r in self._behavior_log
245
+ if r["timestamp"] > datetime.now() - timedelta(minutes=5)]
246
+ if len(recent) > 20: # More than 20 actions in 5 minutes
247
+ return SecurityEvent(
248
+ timestamp=datetime.now(),
249
+ event_type="rapid_operations",
250
+ threat_level=ThreatLevel.MEDIUM,
251
+ source="anomaly_detector",
252
+ description=f"Unusual activity: {len(recent)} actions in 5 minutes",
253
+ context_hash=record["params_hash"],
254
+ action_taken=GuardianAction.CHALLENGE,
255
+ )
256
+
257
+ # Check 2: New tool combination (unprecedented workflow)
258
+ recent_tools = set(r.get("tool") for r in recent if r.get("tool"))
259
+ if len(recent_tools) > 3: # Unusually diverse tool usage
260
+ # Check if this combination has been seen before
261
+ combo_hash = hashlib.sha256(
262
+ json.dumps(sorted(recent_tools), sort_keys=True).encode()
263
+ ).hexdigest()[:16]
264
+ if combo_hash not in self._pattern_hashes:
265
+ self._pattern_hashes.add(combo_hash)
266
+ if len(self._pattern_hashes) > 10: # Not first-time novelty
267
+ return SecurityEvent(
268
+ timestamp=datetime.now(),
269
+ event_type="novel_tool_combination",
270
+ threat_level=ThreatLevel.LOW,
271
+ source="anomaly_detector",
272
+ description=f"New tool combination: {recent_tools}",
273
+ context_hash=combo_hash,
274
+ action_taken=GuardianAction.FLAG,
275
+ )
276
+
277
+ # Check 3: Privilege escalation attempt (tools requiring higher permissions)
278
+ # This would integrate with permission system
279
+
280
+ return None
281
+
282
+
283
+ class Guardian:
284
+ """
285
+ The Guardian is LollmsBot's ultimate security and ethics authority.
286
+ It operates as a non-bypassable interceptor for all critical operations.
287
+ """
288
+
289
+ # Singleton instance for system-wide authority
290
+ _instance: Optional[Guardian] = None
291
+ _initialized: bool = False
292
+
293
+ def __new__(cls, *args, **kwargs):
294
+ if cls._instance is None:
295
+ cls._instance = super().__new__(cls)
296
+ return cls._instance
297
+
298
+ def __init__(
299
+ self,
300
+ ethics_file: Optional[Path] = None,
301
+ audit_log_path: Optional[Path] = None,
302
+ auto_quarantine: bool = True,
303
+ ):
304
+ if self._initialized:
305
+ return
306
+
307
+ self._initialized = True
308
+ self.ethics_file = ethics_file or Path.home() / ".lollmsbot" / "ethics.md"
309
+ self.audit_log_path = audit_log_path or Path.home() / ".lollmsbot" / "audit.log"
310
+
311
+ # Security components
312
+ self.injection_detector = PromptInjectionDetector()
313
+ self.anomaly_detector = AnomalyDetector()
314
+
315
+ # State
316
+ self._ethics_rules: List[EthicsRule] = []
317
+ self._permission_gates: Dict[str, PermissionGate] = {}
318
+ self._quarantined: bool = False
319
+ self._quarantine_reason: Optional[str] = None
320
+ self._event_history: List[SecurityEvent] = []
321
+ self._max_history = 10000
322
+
323
+ # Configuration
324
+ self.auto_quarantine = auto_quarantine
325
+ self.injection_threshold = 0.75 # Block above this confidence
326
+
327
+ # Load ethics and permissions
328
+ self._load_ethics()
329
+ self._load_permissions()
330
+
331
+ # Ensure audit log directory exists
332
+ self.audit_log_path.parent.mkdir(parents=True, exist_ok=True)
333
+
334
+ logger.info("🛡️ Guardian initialized - LollmsBot is protected")
335
+
336
+ def _load_ethics(self) -> None:
337
+ """Load ethics rules from ethics.md or use defaults."""
338
+ if self.ethics_file.exists():
339
+ try:
340
+ content = self.ethics_file.read_text(encoding='utf-8')
341
+ self._ethics_rules = self._parse_ethics_md(content)
342
+ logger.info(f"📜 Loaded {len(self._ethics_rules)} ethics rules")
343
+ except Exception as e:
344
+ logger.error(f"Failed to load ethics: {e}")
345
+ self._load_default_ethics()
346
+ else:
347
+ self._load_default_ethics()
348
+
349
+ def _load_default_ethics(self) -> None:
350
+ """Install default ethical constraints."""
351
+ self._ethics_rules = [
352
+ EthicsRule(
353
+ rule_id="privacy-001",
354
+ category="privacy",
355
+ statement="Never share user personal information without explicit consent",
356
+ enforcement="strict",
357
+ ),
358
+ EthicsRule(
359
+ rule_id="consent-001",
360
+ category="consent",
361
+ statement="Always ask permission before executing destructive operations",
362
+ enforcement="strict",
363
+ ),
364
+ EthicsRule(
365
+ rule_id="honesty-001",
366
+ category="honesty",
367
+ statement="Never misrepresent capabilities or pretend to be human",
368
+ enforcement="strict",
369
+ ),
370
+ EthicsRule(
371
+ rule_id="safety-001",
372
+ category="safety",
373
+ statement="Do not assist with creating malware, exploits, or harmful content",
374
+ enforcement="strict",
375
+ ),
376
+ EthicsRule(
377
+ rule_id="autonomy-001",
378
+ category="autonomy",
379
+ statement="Respect user autonomy and do not manipulate decisions",
380
+ enforcement="advisory",
381
+ ),
382
+ ]
383
+ logger.info(f"📜 Loaded {len(self._ethics_rules)} default ethics rules")
384
+
385
+ def _parse_ethics_md(self, content: str) -> List[EthicsRule]:
386
+ """Parse ethics.md format into structured rules."""
387
+ rules = []
388
+ current_rule = None
389
+
390
+ for line in content.split('\n'):
391
+ line = line.strip()
392
+ if line.startswith('## '):
393
+ # New rule section
394
+ if current_rule:
395
+ rules.append(current_rule)
396
+ rule_id = line[3:].strip().lower().replace(' ', '-')
397
+ current_rule = {
398
+ 'rule_id': rule_id,
399
+ 'category': 'general',
400
+ 'statement': '',
401
+ 'enforcement': 'advisory',
402
+ 'exceptions': []
403
+ }
404
+ elif line.startswith('- ') and current_rule:
405
+ if line.startswith('- Category:'):
406
+ current_rule['category'] = line[11:].strip()
407
+ elif line.startswith('- Enforcement:'):
408
+ current_rule['enforcement'] = line[14:].strip()
409
+ elif line.startswith('- Exception:'):
410
+ current_rule['exceptions'].append(line[12:].strip())
411
+ elif not current_rule['statement']:
412
+ current_rule['statement'] = line[2:].strip()
413
+
414
+ if current_rule:
415
+ rules.append(current_rule)
416
+
417
+ return [EthicsRule(**r) for r in rules]
418
+
419
+ def _load_permissions(self) -> None:
420
+ """Load permission gates from configuration."""
421
+ # Default restrictive permissions
422
+ self._permission_gates = {
423
+ "shell": PermissionGate("shell", allowed=False),
424
+ "filesystem_write": PermissionGate("filesystem_write", allowed=True),
425
+ "filesystem_delete": PermissionGate("filesystem_delete", allowed=False),
426
+ "http_external": PermissionGate("http_external", allowed=True,
427
+ conditions={"require_confirmation": True}),
428
+ "email_send": PermissionGate("email_send", allowed=False),
429
+ "calendar_write": PermissionGate("calendar_write", allowed=True),
430
+ }
431
+
432
+ # ============== PUBLIC API ==============
433
+
434
+ def check_input(self, text: str, source: str = "unknown") -> Tuple[bool, Optional[SecurityEvent]]:
435
+ """
436
+ Screen all incoming text for prompt injection and other attacks.
437
+ Returns: (is_safe, security_event_if_blocked)
438
+ """
439
+ if self._quarantined:
440
+ return False, SecurityEvent(
441
+ timestamp=datetime.now(),
442
+ event_type="quarantine_block",
443
+ threat_level=ThreatLevel.CRITICAL,
444
+ source=source,
445
+ description=f"Input blocked: Guardian is in quarantine mode ({self._quarantine_reason})",
446
+ context_hash=self._hash_context({"text": text[:100]}),
447
+ action_taken=GuardianAction.BLOCK,
448
+ )
449
+
450
+ # Run injection detection
451
+ confidence, patterns = self.injection_detector.analyze(text)
452
+
453
+ if confidence >= self.injection_threshold:
454
+ event = SecurityEvent(
455
+ timestamp=datetime.now(),
456
+ event_type="prompt_injection_detected",
457
+ threat_level=ThreatLevel.HIGH if confidence > 0.9 else ThreatLevel.MEDIUM,
458
+ source=source,
459
+ description=f"Injection detected (confidence: {confidence:.2f}): {patterns[:3]}",
460
+ context_hash=self._hash_context({"text": text[:200], "patterns": patterns}),
461
+ action_taken=GuardianAction.BLOCK if confidence > 0.9 else GuardianAction.CHALLENGE,
462
+ )
463
+ self._log_event(event)
464
+
465
+ if confidence > 0.95 and self.auto_quarantine:
466
+ self._enter_quarantine("Critical injection detected")
467
+
468
+ return False, event
469
+
470
+ # Low-confidence detection: flag but allow
471
+ if confidence > 0.5:
472
+ event = SecurityEvent(
473
+ timestamp=datetime.now(),
474
+ event_type="suspicious_input",
475
+ threat_level=ThreatLevel.LOW,
476
+ source=source,
477
+ description=f"Suspicious patterns detected (confidence: {confidence:.2f})",
478
+ context_hash=self._hash_context({"text": text[:100]}),
479
+ action_taken=GuardianAction.FLAG,
480
+ )
481
+ self._log_event(event)
482
+
483
+ return True, None
484
+
485
+ def check_tool_execution(
486
+ self,
487
+ tool_name: str,
488
+ params: Dict[str, Any],
489
+ user_id: str,
490
+ context: Dict[str, Any],
491
+ ) -> Tuple[bool, Optional[str], Optional[SecurityEvent]]:
492
+ """
493
+ Authorize a tool execution. Returns: (allowed, reason_if_denied, security_event)
494
+ """
495
+ if self._quarantined:
496
+ return False, f"Guardian quarantine active: {self._quarantine_reason}", None
497
+
498
+ # Check permission gate
499
+ gate = self._permission_gates.get(tool_name)
500
+ if gate:
501
+ permitted, reason = gate.check(context)
502
+ if not permitted:
503
+ if reason == "CONFIRMATION_REQUIRED":
504
+ return False, "This operation requires explicit user confirmation", None
505
+ event = SecurityEvent(
506
+ timestamp=datetime.now(),
507
+ event_type="permission_denied",
508
+ threat_level=ThreatLevel.MEDIUM,
509
+ source=f"tool:{tool_name}",
510
+ description=f"Permission gate blocked: {reason}",
511
+ context_hash=self._hash_context({"user": user_id, "params": params}),
512
+ action_taken=GuardianAction.BLOCK,
513
+ )
514
+ self._log_event(event)
515
+ return False, reason, event
516
+
517
+ # Check ethics constraints
518
+ action_desc = f"Execute {tool_name} with {list(params.keys())}"
519
+ for rule in self._ethics_rules:
520
+ if rule.enforcement == "strict" and rule.matches_violation(action_desc):
521
+ event = SecurityEvent(
522
+ timestamp=datetime.now(),
523
+ event_type="ethics_violation",
524
+ threat_level=ThreatLevel.HIGH,
525
+ source=f"tool:{tool_name}",
526
+ description=f"Violates rule {rule.rule_id}: {rule.statement}",
527
+ context_hash=self._hash_context({"rule": rule.rule_id, "action": action_desc}),
528
+ action_taken=GuardianAction.BLOCK,
529
+ )
530
+ self._log_event(event)
531
+ return False, f"Blocked by ethics rule: {rule.statement}", event
532
+
533
+ # Record for anomaly detection
534
+ anomaly = self.anomaly_detector.record("tool_execution", {
535
+ "tool": tool_name,
536
+ "user_id": user_id,
537
+ "params": params,
538
+ })
539
+ if anomaly:
540
+ self._log_event(anomaly)
541
+ if anomaly.action_taken == GuardianAction.CHALLENGE:
542
+ return False, "Unusual activity pattern detected - confirmation required", anomaly
543
+ # FLAG allows continuation
544
+
545
+ return True, None, None
546
+
547
+ def check_output(self, content: str, destination: str) -> Tuple[bool, Optional[SecurityEvent]]:
548
+ """
549
+ Screen outgoing content for data exfiltration or policy violations.
550
+ """
551
+ # Check for potential PII leakage (simplified - production uses NER models)
552
+ pii_patterns = [
553
+ (r'\b\d{3}-\d{2}-\d{4}\b', "SSN"), # US Social Security
554
+ (r'\b\d{4}[ -]?\d{4}[ -]?\d{4}[ -]?\d{4}\b', "credit_card"), # Credit cards
555
+ (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', "email"),
556
+ ]
557
+
558
+ detected_pii = []
559
+ for pattern, pii_type in pii_patterns:
560
+ if re.search(pattern, content):
561
+ detected_pii.append(pii_type)
562
+
563
+ if detected_pii and "public" in destination.lower():
564
+ event = SecurityEvent(
565
+ timestamp=datetime.now(),
566
+ event_type="potential_pii_exposure",
567
+ threat_level=ThreatLevel.HIGH,
568
+ source=f"output:{destination}",
569
+ description=f"Potential PII detected: {detected_pii}",
570
+ context_hash=self._hash_context({"types": detected_pii, "preview": content[:100]}),
571
+ action_taken=GuardianAction.CHALLENGE,
572
+ )
573
+ self._log_event(event)
574
+ return False, event
575
+
576
+ return True, None
577
+
578
+ def audit_decision(self, decision: str, reasoning: str, confidence: float) -> None:
579
+ """Log a significant AI decision for later review."""
580
+ event = SecurityEvent(
581
+ timestamp=datetime.now(),
582
+ event_type="ai_decision",
583
+ threat_level=ThreatLevel.INFO,
584
+ source="agent",
585
+ description=f"Decision: {decision[:100]}",
586
+ context_hash=self._hash_context({"reasoning": reasoning[:200], "confidence": confidence}),
587
+ action_taken=GuardianAction.ALLOW,
588
+ )
589
+ self._log_event(event)
590
+
591
+ # ============== SELF-PRESERVATION ==============
592
+
593
+ def _enter_quarantine(self, reason: str) -> None:
594
+ """Enter self-quarantine mode - disable all non-essential operations."""
595
+ self._quarantined = True
596
+ self._quarantine_reason = reason
597
+
598
+ event = SecurityEvent(
599
+ timestamp=datetime.now(),
600
+ event_type="self_quarantine",
601
+ threat_level=ThreatLevel.CRITICAL,
602
+ source="guardian",
603
+ description=f"Entered quarantine: {reason}",
604
+ context_hash=self._hash_context({"reason": reason}),
605
+ action_taken=GuardianAction.QUARANTINE,
606
+ user_notified=True,
607
+ )
608
+ self._log_event(event)
609
+
610
+ logger.critical(f"🚨 GUARDIAN QUARANTINE: {reason}")
611
+ # In production: send alert to all configured channels
612
+
613
+ def exit_quarantine(self, admin_key: str) -> bool:
614
+ """Exit quarantine mode (requires admin authentication)."""
615
+ # In production: verify admin_key against stored hash
616
+ if not self._quarantined:
617
+ return True
618
+
619
+ # Log the attempt
620
+ event = SecurityEvent(
621
+ timestamp=datetime.now(),
622
+ event_type="quarantine_exit_attempt",
623
+ threat_level=ThreatLevel.HIGH,
624
+ source="admin",
625
+ description="Attempt to exit quarantine",
626
+ context_hash=self._hash_context({"authorized": True}), # Would verify
627
+ action_taken=GuardianAction.ALLOW,
628
+ )
629
+ self._log_event(event)
630
+
631
+ self._quarantined = False
632
+ self._quarantine_reason = None
633
+ logger.info("✅ Exited quarantine mode")
634
+ return True
635
+
636
+ # ============== UTILITIES ==============
637
+
638
+ def _hash_context(self, context: Dict[str, Any]) -> str:
639
+ """Create integrity hash for audit logging."""
640
+ normalized = json.dumps(context, sort_keys=True, default=str)
641
+ return hashlib.sha256(normalized.encode()).hexdigest()[:32]
642
+
643
+ def _log_event(self, event: SecurityEvent) -> None:
644
+ """Persist security event to audit log."""
645
+ self._event_history.append(event)
646
+ if len(self._event_history) > self._max_history:
647
+ self._event_history.pop(0)
648
+
649
+ # Write to persistent log
650
+ try:
651
+ with open(self.audit_log_path, 'a', encoding='utf-8') as f:
652
+ f.write(json.dumps(event.to_dict()) + '\n')
653
+ except Exception as e:
654
+ logger.error(f"Failed to write audit log: {e}")
655
+
656
+ # Log at appropriate level
657
+ if event.threat_level == ThreatLevel.CRITICAL:
658
+ logger.critical(f"🚨 {event.event_type}: {event.description}")
659
+ elif event.threat_level == ThreatLevel.HIGH:
660
+ logger.error(f"⚠️ {event.event_type}: {event.description}")
661
+ elif event.threat_level == ThreatLevel.MEDIUM:
662
+ logger.warning(f"🔶 {event.event_type}: {event.description}")
663
+
664
+ def get_audit_report(self, since: Optional[datetime] = None) -> Dict[str, Any]:
665
+ """Generate security audit report."""
666
+ events = self._event_history
667
+ if since:
668
+ events = [e for e in events if e.timestamp >= since]
669
+
670
+ by_level = {level.name: [] for level in ThreatLevel}
671
+ for e in events:
672
+ by_level[e.threat_level.name].append(e.to_dict())
673
+
674
+ return {
675
+ "total_events": len(events),
676
+ "quarantine_active": self._quarantined,
677
+ "events_by_level": {k: len(v) for k, v in by_level.items()},
678
+ "recent_critical": by_level.get("CRITICAL", [])[-5:],
679
+ "recent_high": by_level.get("HIGH", [])[-10:],
680
+ "ethics_rules_active": len(self._ethics_rules),
681
+ "permission_gates_active": len(self._permission_gates),
682
+ }
683
+
684
+ @property
685
+ def is_quarantined(self) -> bool:
686
+ return self._quarantined
687
+
688
+
689
+ # Global access function
690
+ def get_guardian() -> Guardian:
691
+ """Get or create the singleton Guardian instance."""
692
+ return Guardian()