sentinelforge 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sentinelforge/__init__.py +3 -0
- sentinelforge/agents/__init__.py +17 -0
- sentinelforge/agents/base.py +42 -0
- sentinelforge/agents/containment.py +183 -0
- sentinelforge/agents/explainer.py +199 -0
- sentinelforge/agents/guardian.py +160 -0
- sentinelforge/agents/investigator.py +306 -0
- sentinelforge/agents/monitor.py +199 -0
- sentinelforge/agents/responder.py +238 -0
- sentinelforge/api/__init__.py +1 -0
- sentinelforge/api/server.py +346 -0
- sentinelforge/cli.py +269 -0
- sentinelforge/connectors/__init__.py +1 -0
- sentinelforge/connectors/siem.py +82 -0
- sentinelforge/connectors/threat_intel.py +118 -0
- sentinelforge/core/__init__.py +1 -0
- sentinelforge/core/alerting.py +289 -0
- sentinelforge/core/audit.py +139 -0
- sentinelforge/core/auth.py +144 -0
- sentinelforge/core/config.py +170 -0
- sentinelforge/core/database.py +384 -0
- sentinelforge/core/executors.py +364 -0
- sentinelforge/core/guardrails.py +118 -0
- sentinelforge/core/health.py +135 -0
- sentinelforge/core/knowledge.py +134 -0
- sentinelforge/core/llm.py +193 -0
- sentinelforge/core/logging.py +71 -0
- sentinelforge/core/models.py +162 -0
- sentinelforge/core/orchestrator.py +139 -0
- sentinelforge/core/safety.py +233 -0
- sentinelforge/core/secrets.py +138 -0
- sentinelforge/dashboard/__init__.py +1 -0
- sentinelforge/dashboard/app.py +776 -0
- sentinelforge/evaluation/__init__.py +1 -0
- sentinelforge/evaluation/harness.py +170 -0
- sentinelforge/knowledge/__init__.py +1 -0
- sentinelforge/knowledge/vector_store.py +100 -0
- sentinelforge/monitoring/__init__.py +0 -0
- sentinelforge/monitoring/file_integrity.py +161 -0
- sentinelforge/monitoring/network.py +200 -0
- sentinelforge/monitoring/windows_events.py +183 -0
- sentinelforge/simulation/__init__.py +1 -0
- sentinelforge/simulation/scenarios.py +169 -0
- sentinelforge-0.4.0.dist-info/METADATA +686 -0
- sentinelforge-0.4.0.dist-info/RECORD +48 -0
- sentinelforge-0.4.0.dist-info/WHEEL +4 -0
- sentinelforge-0.4.0.dist-info/entry_points.txt +2 -0
- sentinelforge-0.4.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""SentinelForge agent modules."""
|
|
2
|
+
|
|
3
|
+
from sentinelforge.agents.containment import ContainmentAgent
|
|
4
|
+
from sentinelforge.agents.explainer import ExplainerAgent
|
|
5
|
+
from sentinelforge.agents.guardian import GuardianAgent
|
|
6
|
+
from sentinelforge.agents.investigator import InvestigatorAgent
|
|
7
|
+
from sentinelforge.agents.monitor import MonitorAgent
|
|
8
|
+
from sentinelforge.agents.responder import ResponderAgent
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"MonitorAgent",
|
|
12
|
+
"InvestigatorAgent",
|
|
13
|
+
"ContainmentAgent",
|
|
14
|
+
"ResponderAgent",
|
|
15
|
+
"GuardianAgent",
|
|
16
|
+
"ExplainerAgent",
|
|
17
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Base agent class with shared safety and audit hooks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from sentinelforge.core.audit import AuditLogger, get_audit_logger
|
|
9
|
+
from sentinelforge.core.config import Settings, get_settings
|
|
10
|
+
from sentinelforge.core.logging import get_logger
|
|
11
|
+
from sentinelforge.core.models import ActionStatus, AgentRole, OrchestratorState
|
|
12
|
+
from sentinelforge.core.safety import SafetyEngine, get_safety_engine
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseAgent(ABC):
|
|
16
|
+
"""Every agent inherits safety checks, audit logging, and config access."""
|
|
17
|
+
|
|
18
|
+
role: AgentRole
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
settings: Settings | None = None,
|
|
23
|
+
safety: SafetyEngine | None = None,
|
|
24
|
+
audit: AuditLogger | None = None,
|
|
25
|
+
) -> None:
|
|
26
|
+
self.settings = settings or get_settings()
|
|
27
|
+
self.safety = safety or get_safety_engine()
|
|
28
|
+
self.audit = audit or get_audit_logger()
|
|
29
|
+
self.logger = get_logger(f"agent.{self.role.value}")
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
async def run(self, state: OrchestratorState) -> OrchestratorState:
|
|
33
|
+
"""Execute this agent's task and return updated state."""
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
def _audit(self, action: str, status: ActionStatus, **details: Any) -> None:
|
|
37
|
+
self.audit.log(
|
|
38
|
+
agent=self.role,
|
|
39
|
+
action=action,
|
|
40
|
+
status=status,
|
|
41
|
+
details=details,
|
|
42
|
+
)
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Containment Agent — proposes safe, reversible isolation actions.
|
|
2
|
+
|
|
3
|
+
v0.3: Expanded action registry (5 types), auto-actions for known event
|
|
4
|
+
patterns, human-approval flag based on risk score and reversibility.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from sentinelforge.agents.base import BaseAgent
|
|
13
|
+
from sentinelforge.core.models import (
|
|
14
|
+
AgentRole,
|
|
15
|
+
ContainmentAction,
|
|
16
|
+
Investigation,
|
|
17
|
+
OrchestratorState,
|
|
18
|
+
Severity,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
ACTION_REGISTRY: dict[str, dict[str, Any]] = {
|
|
22
|
+
"block_ip": {
|
|
23
|
+
"params": {"direction": "inbound", "duration": 3600},
|
|
24
|
+
"rollback": "unblock_ip",
|
|
25
|
+
"reversible": True,
|
|
26
|
+
"base_risk": 0.3,
|
|
27
|
+
},
|
|
28
|
+
"isolate_host": {
|
|
29
|
+
"params": {"method": "network_isolation"},
|
|
30
|
+
"rollback": "reconnect_host",
|
|
31
|
+
"reversible": True,
|
|
32
|
+
"base_risk": 0.6,
|
|
33
|
+
},
|
|
34
|
+
"disable_account": {
|
|
35
|
+
"params": {"method": "disable_login"},
|
|
36
|
+
"rollback": "enable_account",
|
|
37
|
+
"reversible": True,
|
|
38
|
+
"base_risk": 0.5,
|
|
39
|
+
},
|
|
40
|
+
"kill_process": {
|
|
41
|
+
"params": {"signal": "SIGKILL"},
|
|
42
|
+
"rollback": "",
|
|
43
|
+
"reversible": False,
|
|
44
|
+
"base_risk": 0.4,
|
|
45
|
+
},
|
|
46
|
+
"quarantine_file": {
|
|
47
|
+
"params": {"method": "move_to_quarantine"},
|
|
48
|
+
"rollback": "restore_file",
|
|
49
|
+
"reversible": True,
|
|
50
|
+
"base_risk": 0.35,
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
SEVERITY_WEIGHT = {
|
|
55
|
+
"info": 0.1, "low": 0.2, "medium": 0.4, "high": 0.6, "critical": 0.8,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ContainmentAgent(BaseAgent):
|
|
60
|
+
"""Generates containment actions based on investigation results."""
|
|
61
|
+
|
|
62
|
+
role = AgentRole.CONTAINMENT
|
|
63
|
+
|
|
64
|
+
async def run(self, state: OrchestratorState) -> OrchestratorState:
|
|
65
|
+
new_investigations = [
|
|
66
|
+
inv for inv in state.investigations
|
|
67
|
+
if not any(
|
|
68
|
+
a.reasoning and inv.id in a.reasoning
|
|
69
|
+
for a in state.proposed_actions
|
|
70
|
+
)
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
if not new_investigations:
|
|
74
|
+
self.logger.info("no_new_investigations")
|
|
75
|
+
return state
|
|
76
|
+
|
|
77
|
+
for inv in new_investigations:
|
|
78
|
+
actions = self._generate_actions(inv)
|
|
79
|
+
auto = self._auto_actions_for_event(inv)
|
|
80
|
+
all_actions = actions + auto
|
|
81
|
+
state.proposed_actions.extend(all_actions)
|
|
82
|
+
self.logger.info(
|
|
83
|
+
"actions_proposed",
|
|
84
|
+
investigation_id=inv.id,
|
|
85
|
+
count=len(all_actions),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return state
|
|
89
|
+
|
|
90
|
+
def _generate_actions(self, investigation: Investigation) -> list[ContainmentAction]:
|
|
91
|
+
actions: list[ContainmentAction] = []
|
|
92
|
+
|
|
93
|
+
for rec in investigation.recommended_actions:
|
|
94
|
+
action = self._parse_recommendation(rec, investigation)
|
|
95
|
+
if action:
|
|
96
|
+
actions.append(action)
|
|
97
|
+
|
|
98
|
+
if not actions and investigation.severity in (Severity.HIGH, Severity.CRITICAL):
|
|
99
|
+
for asset in investigation.affected_assets:
|
|
100
|
+
if self._is_ip(asset):
|
|
101
|
+
actions.append(
|
|
102
|
+
_build_action(
|
|
103
|
+
"block_ip",
|
|
104
|
+
asset,
|
|
105
|
+
investigation,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return actions
|
|
110
|
+
|
|
111
|
+
def _parse_recommendation(
|
|
112
|
+
self, rec: str, investigation: Investigation
|
|
113
|
+
) -> ContainmentAction | None:
|
|
114
|
+
parts = rec.strip().split(maxsplit=1)
|
|
115
|
+
if len(parts) < 2:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
action_type, target = parts[0], parts[1]
|
|
119
|
+
|
|
120
|
+
if action_type not in ACTION_REGISTRY:
|
|
121
|
+
self.logger.warning("unknown_action_type", action_type=action_type)
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
return _build_action(action_type, target, investigation)
|
|
125
|
+
|
|
126
|
+
def _auto_actions_for_event(self, investigation: Investigation) -> list[ContainmentAction]:
|
|
127
|
+
"""Generate automatic actions for well-known event patterns."""
|
|
128
|
+
actions: list[ContainmentAction] = []
|
|
129
|
+
event_types = _get_event_types(investigation)
|
|
130
|
+
|
|
131
|
+
if "suspicious_process" in event_types:
|
|
132
|
+
for asset in investigation.affected_assets:
|
|
133
|
+
if not self._is_ip(asset):
|
|
134
|
+
actions.append(
|
|
135
|
+
_build_action("kill_process", asset, investigation)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if "malware_detected" in event_types:
|
|
139
|
+
for asset in investigation.affected_assets:
|
|
140
|
+
if not self._is_ip(asset) and "." in asset:
|
|
141
|
+
actions.append(
|
|
142
|
+
_build_action("quarantine_file", asset, investigation)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
return actions
|
|
146
|
+
|
|
147
|
+
@staticmethod
|
|
148
|
+
def _is_ip(value: str) -> bool:
|
|
149
|
+
return bool(re.match(r"^\d{1,3}(\.\d{1,3}){3}$", value))
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _build_action(
|
|
153
|
+
action_type: str,
|
|
154
|
+
target: str,
|
|
155
|
+
investigation: Investigation,
|
|
156
|
+
) -> ContainmentAction:
|
|
157
|
+
cfg = ACTION_REGISTRY[action_type]
|
|
158
|
+
risk = _compute_risk(investigation.severity, cfg["base_risk"])
|
|
159
|
+
requires_human = risk > 0.6 or not cfg["reversible"]
|
|
160
|
+
|
|
161
|
+
return ContainmentAction(
|
|
162
|
+
action_type=action_type,
|
|
163
|
+
target=target,
|
|
164
|
+
parameters=cfg["params"],
|
|
165
|
+
reversible=cfg["reversible"],
|
|
166
|
+
rollback_procedure=f"{cfg['rollback']} {target}" if cfg["rollback"] else "",
|
|
167
|
+
reasoning=f"From investigation {investigation.id}: {investigation.summary}",
|
|
168
|
+
risk_score=risk,
|
|
169
|
+
requires_human=requires_human,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _compute_risk(severity: Severity, base_risk: float) -> float:
|
|
174
|
+
s = SEVERITY_WEIGHT.get(severity.value, 0.5)
|
|
175
|
+
return min(1.0, (s + base_risk) / 2)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _get_event_types(investigation: Investigation) -> set[str]:
|
|
179
|
+
"""Extract event type keywords from investigation text."""
|
|
180
|
+
keywords = {"suspicious_process", "malware_detected", "brute_force",
|
|
181
|
+
"data_exfiltration", "lateral_movement"}
|
|
182
|
+
text = f"{investigation.summary} {investigation.root_cause}".lower()
|
|
183
|
+
return {k for k in keywords if k.replace("_", " ") in text or k in text}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Explainer Agent — generates human-readable incident reports with reasoning traces.
|
|
2
|
+
|
|
3
|
+
v0.4: Two report modes (executive and technical), LLM-generated summaries with
|
|
4
|
+
structured output, confidence scores, and guardrail validation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from sentinelforge.agents.base import BaseAgent
|
|
12
|
+
from sentinelforge.core.llm import build_llm, invoke_llm_with_retry
|
|
13
|
+
from sentinelforge.core.models import (
|
|
14
|
+
ActionStatus,
|
|
15
|
+
AgentRole,
|
|
16
|
+
IncidentReport,
|
|
17
|
+
OrchestratorState,
|
|
18
|
+
Severity,
|
|
19
|
+
)
|
|
20
|
+
from sentinelforge.core.safety import get_safety_engine
|
|
21
|
+
|
|
22
|
+
EXECUTIVE_SCHEMA = """{
|
|
23
|
+
"title": "string — incident title",
|
|
24
|
+
"executive_summary": "string — 2-3 sentence summary",
|
|
25
|
+
"business_impact": "string — what this means for the organization",
|
|
26
|
+
"recommendations": ["string — actionable next steps in plain language"]
|
|
27
|
+
}"""
|
|
28
|
+
|
|
29
|
+
TECHNICAL_SCHEMA = """{
|
|
30
|
+
"title": "string — incident title with MITRE IDs",
|
|
31
|
+
"executive_summary": "string — detailed technical narrative",
|
|
32
|
+
"root_cause_detail": "string — in-depth technical root cause",
|
|
33
|
+
"ioc_summary": "string — indicators of compromise found",
|
|
34
|
+
"recommendations": ["string — specific technical remediation steps"]
|
|
35
|
+
}"""
|
|
36
|
+
|
|
37
|
+
REPORT_PROMPT = """{system_prompt}
|
|
38
|
+
|
|
39
|
+
Generate an incident report from the following data.
|
|
40
|
+
|
|
41
|
+
Investigation Summary: {summary}
|
|
42
|
+
Root Cause: {root_cause}
|
|
43
|
+
Severity: {severity}
|
|
44
|
+
Confidence: {confidence}
|
|
45
|
+
Affected Assets: {assets}
|
|
46
|
+
MITRE Techniques: {mitre}
|
|
47
|
+
Actions Taken: {actions}
|
|
48
|
+
|
|
49
|
+
Report Mode: {mode}
|
|
50
|
+
|
|
51
|
+
Respond ONLY with valid JSON matching the schema. No markdown, no explanation outside the JSON.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ExplainerAgent(BaseAgent):
|
|
56
|
+
"""Generates human-readable reports for every incident."""
|
|
57
|
+
|
|
58
|
+
role = AgentRole.EXPLAINER
|
|
59
|
+
|
|
60
|
+
def __init__(self, use_llm: bool = True, report_mode: str = "executive", **kwargs: Any) -> None:
|
|
61
|
+
super().__init__(**kwargs)
|
|
62
|
+
self._use_llm = use_llm
|
|
63
|
+
self._report_mode = report_mode
|
|
64
|
+
self._llm = None
|
|
65
|
+
|
|
66
|
+
async def run(self, state: OrchestratorState) -> OrchestratorState:
|
|
67
|
+
unreported = [
|
|
68
|
+
inv for inv in state.investigations
|
|
69
|
+
if not any(
|
|
70
|
+
r.investigation and r.investigation.id == inv.id
|
|
71
|
+
for r in state.reports
|
|
72
|
+
)
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
if not unreported:
|
|
76
|
+
self.logger.info("no_new_investigations_to_report")
|
|
77
|
+
return state
|
|
78
|
+
|
|
79
|
+
for inv in unreported:
|
|
80
|
+
related_actions = [
|
|
81
|
+
a for a in state.executed_actions
|
|
82
|
+
if inv.id in (a.reasoning or "")
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
report = await self._generate_report(inv, related_actions, state)
|
|
86
|
+
state.reports.append(report)
|
|
87
|
+
|
|
88
|
+
self._audit(
|
|
89
|
+
"report_generated",
|
|
90
|
+
ActionStatus.EXECUTED,
|
|
91
|
+
report_id=report.id,
|
|
92
|
+
investigation_id=inv.id,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return state
|
|
96
|
+
|
|
97
|
+
async def _generate_report(
|
|
98
|
+
self, inv: Any, actions: list[Any], state: OrchestratorState
|
|
99
|
+
) -> IncidentReport:
|
|
100
|
+
related_events = [
|
|
101
|
+
e for e in state.events if e.id in inv.event_ids
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
timeline = []
|
|
105
|
+
for e in sorted(related_events, key=lambda x: x.timestamp):
|
|
106
|
+
timeline.append({
|
|
107
|
+
"time": e.timestamp.isoformat(),
|
|
108
|
+
"type": e.event_type,
|
|
109
|
+
"description": e.description,
|
|
110
|
+
"severity": e.severity.value,
|
|
111
|
+
})
|
|
112
|
+
for a in sorted(actions, key=lambda x: x.timestamp):
|
|
113
|
+
timeline.append({
|
|
114
|
+
"time": a.timestamp.isoformat(),
|
|
115
|
+
"type": f"action:{a.action_type}",
|
|
116
|
+
"description": f"{a.action_type} on {a.target}",
|
|
117
|
+
"status": a.status.value,
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
if self._use_llm:
|
|
121
|
+
try:
|
|
122
|
+
exec_summary = await self._llm_summary(inv, actions)
|
|
123
|
+
except Exception as exc:
|
|
124
|
+
self.logger.warning("llm_report_failed", error=str(exc))
|
|
125
|
+
exec_summary = self._rule_summary(inv, actions)
|
|
126
|
+
else:
|
|
127
|
+
exec_summary = self._rule_summary(inv, actions)
|
|
128
|
+
|
|
129
|
+
recommendations = list(inv.recommended_actions)
|
|
130
|
+
if inv.severity in (Severity.HIGH, Severity.CRITICAL):
|
|
131
|
+
recommendations.append("Review and harden affected systems")
|
|
132
|
+
recommendations.append("Conduct post-incident review within 48 hours")
|
|
133
|
+
|
|
134
|
+
return IncidentReport(
|
|
135
|
+
title=f"Incident: {inv.summary}",
|
|
136
|
+
executive_summary=exec_summary,
|
|
137
|
+
timeline=timeline,
|
|
138
|
+
events=related_events,
|
|
139
|
+
investigation=inv,
|
|
140
|
+
actions_taken=actions,
|
|
141
|
+
reasoning_trace=inv.reasoning_trace,
|
|
142
|
+
recommendations=recommendations,
|
|
143
|
+
mitre_mapping=inv.mitre_techniques,
|
|
144
|
+
severity=inv.severity,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def _rule_summary(self, inv: Any, actions: list[Any]) -> str:
|
|
148
|
+
action_summary = (
|
|
149
|
+
f" {len(actions)} containment actions were executed."
|
|
150
|
+
if actions else " No automated actions were taken."
|
|
151
|
+
)
|
|
152
|
+
return (
|
|
153
|
+
f"A {inv.severity.value}-severity incident was detected: {inv.summary}. "
|
|
154
|
+
f"Root cause: {inv.root_cause}. "
|
|
155
|
+
f"{len(inv.affected_assets)} assets were affected."
|
|
156
|
+
f"{action_summary}"
|
|
157
|
+
f" Confidence: {inv.confidence:.0%}."
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
async def _llm_summary(self, inv: Any, actions: list[Any]) -> str:
|
|
161
|
+
if self._llm is None:
|
|
162
|
+
self._llm = build_llm()
|
|
163
|
+
if self._llm is None:
|
|
164
|
+
raise RuntimeError("No LLM available")
|
|
165
|
+
|
|
166
|
+
safety = get_safety_engine()
|
|
167
|
+
schema = EXECUTIVE_SCHEMA if self._report_mode == "executive" else TECHNICAL_SCHEMA
|
|
168
|
+
system_prompt = safety.build_system_prompt(schema)
|
|
169
|
+
|
|
170
|
+
prompt = REPORT_PROMPT.format(
|
|
171
|
+
system_prompt=system_prompt,
|
|
172
|
+
summary=inv.summary,
|
|
173
|
+
root_cause=inv.root_cause,
|
|
174
|
+
severity=inv.severity.value,
|
|
175
|
+
confidence=f"{inv.confidence:.0%}",
|
|
176
|
+
assets=", ".join(inv.affected_assets[:20]),
|
|
177
|
+
mitre=", ".join(inv.mitre_techniques),
|
|
178
|
+
actions="; ".join(
|
|
179
|
+
f"{a.action_type} on {a.target} ({a.status.value})" for a in actions
|
|
180
|
+
) or "None",
|
|
181
|
+
mode=self._report_mode,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
valid, reason = safety.validate_llm_prompt(prompt)
|
|
185
|
+
if not valid:
|
|
186
|
+
raise ValueError(f"Prompt validation failed: {reason}")
|
|
187
|
+
|
|
188
|
+
content = await invoke_llm_with_retry(self._llm, prompt, sanitize=True)
|
|
189
|
+
|
|
190
|
+
valid, reason = safety.validate_llm_output(content)
|
|
191
|
+
if not valid:
|
|
192
|
+
raise ValueError(f"Output validation failed: {reason}")
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
import json
|
|
196
|
+
data = json.loads(content.strip())
|
|
197
|
+
return str(data.get("executive_summary", content))
|
|
198
|
+
except (json.JSONDecodeError, AttributeError):
|
|
199
|
+
return content
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Guardian Agent — oversees all other agents for safety and integrity.
|
|
2
|
+
|
|
3
|
+
This is the most critical agent. It validates every proposed action,
|
|
4
|
+
detects prompt injection, monitors for goal hijacking, and enforces
|
|
5
|
+
constitutional rules. It has veto power over all other agents.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from sentinelforge.agents.base import BaseAgent
|
|
11
|
+
from sentinelforge.core.models import (
|
|
12
|
+
ActionStatus,
|
|
13
|
+
AgentRole,
|
|
14
|
+
ContainmentAction,
|
|
15
|
+
OrchestratorState,
|
|
16
|
+
PendingApproval,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GuardianAgent(BaseAgent):
|
|
21
|
+
"""Validates actions, detects anomalous agent behavior, enforces safety policy."""
|
|
22
|
+
|
|
23
|
+
role = AgentRole.GUARDIAN
|
|
24
|
+
|
|
25
|
+
async def run(self, state: OrchestratorState) -> OrchestratorState:
|
|
26
|
+
self.logger.info("guardian_review_start", proposed=len(state.proposed_actions))
|
|
27
|
+
|
|
28
|
+
pending = [
|
|
29
|
+
a for a in state.proposed_actions
|
|
30
|
+
if a.status == ActionStatus.PROPOSED
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
for action in pending:
|
|
34
|
+
approved, reason = self._review_action(action)
|
|
35
|
+
|
|
36
|
+
if approved:
|
|
37
|
+
if self.safety.requires_human_approval(action):
|
|
38
|
+
pa = PendingApproval(
|
|
39
|
+
action=action,
|
|
40
|
+
timeout_seconds=self.settings.responder.approval_timeout_seconds,
|
|
41
|
+
reason=f"Risk={action.risk_score:.2f}, requires human review",
|
|
42
|
+
)
|
|
43
|
+
state.pending_approvals.append(pa)
|
|
44
|
+
state.human_escalations.append(
|
|
45
|
+
f"Action {action.action_type} on {action.target} requires human approval "
|
|
46
|
+
f"(risk={action.risk_score:.2f}): {action.reasoning}"
|
|
47
|
+
)
|
|
48
|
+
self._persist_approval(pa)
|
|
49
|
+
self.logger.info(
|
|
50
|
+
"escalated_to_human",
|
|
51
|
+
action=action.action_type,
|
|
52
|
+
risk=action.risk_score,
|
|
53
|
+
)
|
|
54
|
+
action.status = ActionStatus.PROPOSED
|
|
55
|
+
else:
|
|
56
|
+
action.status = ActionStatus.APPROVED
|
|
57
|
+
state.approved_actions.append(action)
|
|
58
|
+
self._audit(
|
|
59
|
+
"approve_action",
|
|
60
|
+
ActionStatus.APPROVED,
|
|
61
|
+
action_id=action.id,
|
|
62
|
+
action_type=action.action_type,
|
|
63
|
+
target=action.target,
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
action.status = ActionStatus.REJECTED
|
|
67
|
+
is_policy = (
|
|
68
|
+
"rate limit" in reason.lower()
|
|
69
|
+
or "sandbox" in reason.lower()
|
|
70
|
+
or "not in the allowed" in reason.lower()
|
|
71
|
+
or "human approval" in reason.lower()
|
|
72
|
+
)
|
|
73
|
+
if not is_policy:
|
|
74
|
+
state.safety_violations.append(reason)
|
|
75
|
+
self._audit(
|
|
76
|
+
"reject_action",
|
|
77
|
+
ActionStatus.REJECTED,
|
|
78
|
+
action_id=action.id,
|
|
79
|
+
reason=reason,
|
|
80
|
+
)
|
|
81
|
+
self.logger.warning(
|
|
82
|
+
"action_rejected",
|
|
83
|
+
action=action.action_type,
|
|
84
|
+
reason=reason,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
self._check_agent_behavior(state)
|
|
88
|
+
|
|
89
|
+
return state
|
|
90
|
+
|
|
91
|
+
def _review_action(self, action: ContainmentAction) -> tuple[bool, str]:
|
|
92
|
+
"""Multi-layer validation of a proposed action."""
|
|
93
|
+
allowed, reason = self.safety.validate_action(action)
|
|
94
|
+
if not allowed:
|
|
95
|
+
return False, reason
|
|
96
|
+
|
|
97
|
+
violations = self.safety.check_constitutional_compliance(
|
|
98
|
+
f"{action.action_type} {action.target} {action.reasoning}"
|
|
99
|
+
)
|
|
100
|
+
if violations:
|
|
101
|
+
return False, f"Constitutional violation: {violations[0]}"
|
|
102
|
+
|
|
103
|
+
if self.safety.detect_prompt_injection(action.reasoning):
|
|
104
|
+
return False, "Prompt injection detected in action reasoning"
|
|
105
|
+
|
|
106
|
+
if action.risk_score > 0.9:
|
|
107
|
+
return False, f"Risk score {action.risk_score:.2f} exceeds maximum threshold"
|
|
108
|
+
|
|
109
|
+
return True, "Approved by Guardian"
|
|
110
|
+
|
|
111
|
+
def _check_agent_behavior(self, state: OrchestratorState) -> None:
|
|
112
|
+
"""Detect anomalous patterns in agent behavior."""
|
|
113
|
+
actions_per_iteration = (
|
|
114
|
+
len(state.proposed_actions) / max(1, state.iteration + 1)
|
|
115
|
+
)
|
|
116
|
+
if state.iteration > 1 and actions_per_iteration > 20:
|
|
117
|
+
self.logger.warning(
|
|
118
|
+
"high_action_volume",
|
|
119
|
+
per_iteration=actions_per_iteration,
|
|
120
|
+
msg="Unusually high number of actions — possible runaway agent",
|
|
121
|
+
)
|
|
122
|
+
state.safety_violations.append(
|
|
123
|
+
"Guardian alert: high action volume detected, possible agent loop"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
action_types = [a.action_type for a in state.proposed_actions[-10:]]
|
|
127
|
+
if len(action_types) >= 8 and len(set(action_types)) == 1:
|
|
128
|
+
self.logger.warning(
|
|
129
|
+
"repetitive_actions",
|
|
130
|
+
action_type=action_types[0],
|
|
131
|
+
msg="Agent appears stuck in a loop",
|
|
132
|
+
)
|
|
133
|
+
state.safety_violations.append(
|
|
134
|
+
f"Guardian alert: repetitive {action_types[0]} actions detected"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
for inv in state.investigations:
|
|
138
|
+
if self.safety.detect_prompt_injection(inv.summary):
|
|
139
|
+
state.safety_violations.append(
|
|
140
|
+
f"Prompt injection detected in investigation {inv.id}"
|
|
141
|
+
)
|
|
142
|
+
if self.safety.detect_prompt_injection(inv.root_cause):
|
|
143
|
+
state.safety_violations.append(
|
|
144
|
+
f"Prompt injection detected in root cause of investigation {inv.id}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def _persist_approval(self, pa: PendingApproval) -> None:
|
|
148
|
+
"""Store a PendingApproval in the database."""
|
|
149
|
+
try:
|
|
150
|
+
from sentinelforge.core.database import get_database
|
|
151
|
+
db = get_database()
|
|
152
|
+
db.save_pending_approval(
|
|
153
|
+
action_id=pa.action.id,
|
|
154
|
+
action_data=pa.action.model_dump_json(),
|
|
155
|
+
requested_at=pa.requested_at.isoformat(),
|
|
156
|
+
timeout_seconds=pa.timeout_seconds,
|
|
157
|
+
reason=pa.reason,
|
|
158
|
+
)
|
|
159
|
+
except Exception as exc:
|
|
160
|
+
self.logger.warning("persist_approval_failed", error=str(exc))
|