admina-framework 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- admina/__init__.py +34 -0
- admina/cli/__init__.py +14 -0
- admina/cli/commands/__init__.py +14 -0
- admina/cli/main.py +1522 -0
- admina/cli/templates/admina.yaml.j2 +77 -0
- admina/cli/templates/docker-compose.yml.j2 +254 -0
- admina/cli/templates/env.j2 +10 -0
- admina/cli/templates/main.py.j2 +95 -0
- admina/cli/templates/plugin.py.j2 +145 -0
- admina/cli/templates/plugin_pyproject.toml.j2 +15 -0
- admina/cli/templates/plugin_readme.md.j2 +27 -0
- admina/cli/templates/plugin_test.py.j2 +48 -0
- admina/core/__init__.py +14 -0
- admina/core/config.py +497 -0
- admina/core/event_bus.py +112 -0
- admina/core/secrets.py +257 -0
- admina/core/types.py +146 -0
- admina/dashboard/__init__.py +8 -0
- admina/dashboard/static/heimdall.png +0 -0
- admina/dashboard/static/index.html +1045 -0
- admina/dashboard/static/vendor/alpinejs.min.js +5 -0
- admina/domains/__init__.py +14 -0
- admina/domains/agent_security/__init__.py +41 -0
- admina/domains/agent_security/firewall.py +634 -0
- admina/domains/agent_security/loop_breaker.py +176 -0
- admina/domains/ai_infra/__init__.py +79 -0
- admina/domains/ai_infra/llm_engine.py +477 -0
- admina/domains/ai_infra/rag.py +817 -0
- admina/domains/ai_infra/webui.py +292 -0
- admina/domains/compliance/__init__.py +109 -0
- admina/domains/compliance/cross_regulation.py +314 -0
- admina/domains/compliance/eu_ai_act.py +367 -0
- admina/domains/compliance/forensic.py +380 -0
- admina/domains/compliance/gdpr.py +331 -0
- admina/domains/compliance/nis2.py +258 -0
- admina/domains/compliance/oisg.py +658 -0
- admina/domains/compliance/otel.py +101 -0
- admina/domains/data_sovereignty/__init__.py +42 -0
- admina/domains/data_sovereignty/classification.py +102 -0
- admina/domains/data_sovereignty/pii.py +260 -0
- admina/domains/data_sovereignty/residency.py +121 -0
- admina/integrations/__init__.py +14 -0
- admina/integrations/_engines.py +63 -0
- admina/integrations/cheshirecat/__init__.py +13 -0
- admina/integrations/cheshirecat/admina-plugin/admina_governance.py +207 -0
- admina/integrations/crewai/__init__.py +13 -0
- admina/integrations/crewai/callbacks.py +347 -0
- admina/integrations/langchain/__init__.py +13 -0
- admina/integrations/langchain/callbacks.py +341 -0
- admina/integrations/n8n/__init__.py +14 -0
- admina/integrations/openclaw/__init__.py +14 -0
- admina/plugins/__init__.py +49 -0
- admina/plugins/base.py +633 -0
- admina/plugins/builtin/__init__.py +14 -0
- admina/plugins/builtin/adapters/__init__.py +14 -0
- admina/plugins/builtin/adapters/ollama.py +120 -0
- admina/plugins/builtin/adapters/openai.py +138 -0
- admina/plugins/builtin/alerts/__init__.py +14 -0
- admina/plugins/builtin/alerts/log.py +66 -0
- admina/plugins/builtin/alerts/webhook.py +102 -0
- admina/plugins/builtin/auth/__init__.py +14 -0
- admina/plugins/builtin/auth/apikey.py +138 -0
- admina/plugins/builtin/compliance/__init__.py +14 -0
- admina/plugins/builtin/compliance/eu_ai_act.py +202 -0
- admina/plugins/builtin/connectors/__init__.py +14 -0
- admina/plugins/builtin/connectors/chromadb.py +137 -0
- admina/plugins/builtin/connectors/filesystem.py +111 -0
- admina/plugins/builtin/forensic/__init__.py +14 -0
- admina/plugins/builtin/forensic/filesystem.py +163 -0
- admina/plugins/builtin/forensic/minio.py +180 -0
- admina/plugins/builtin/guards/__init__.py +0 -0
- admina/plugins/builtin/guards/guardrailsai_guard.py +172 -0
- admina/plugins/builtin/pii/__init__.py +14 -0
- admina/plugins/builtin/pii/spacy_regex.py +160 -0
- admina/plugins/builtin/transports/__init__.py +14 -0
- admina/plugins/builtin/transports/http_rest.py +97 -0
- admina/plugins/builtin/transports/mcp.py +173 -0
- admina/plugins/registry.py +356 -0
- admina/proxy/__init__.py +15 -0
- admina/proxy/api/__init__.py +17 -0
- admina/proxy/api/dashboard.py +925 -0
- admina/proxy/api/integration.py +153 -0
- admina/proxy/config.py +214 -0
- admina/proxy/engine_bridge.py +306 -0
- admina/proxy/governance.py +232 -0
- admina/proxy/main.py +1484 -0
- admina/proxy/multi_upstream.py +156 -0
- admina/proxy/state.py +97 -0
- admina/py.typed +0 -0
- admina/sdk/__init__.py +34 -0
- admina/sdk/_compat.py +43 -0
- admina/sdk/compliance_kit.py +359 -0
- admina/sdk/governed_agent.py +391 -0
- admina/sdk/governed_data.py +434 -0
- admina/sdk/governed_model.py +241 -0
- admina_framework-0.9.0.dist-info/METADATA +575 -0
- admina_framework-0.9.0.dist-info/RECORD +102 -0
- admina_framework-0.9.0.dist-info/WHEEL +5 -0
- admina_framework-0.9.0.dist-info/entry_points.txt +2 -0
- admina_framework-0.9.0.dist-info/licenses/LICENSE +191 -0
- admina_framework-0.9.0.dist-info/licenses/NOTICE +16 -0
- admina_framework-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
# Copyright © 2025–2026 Stefano Noferi & Admina contributors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
Admina — Anti-Injection Firewall — Agent Security domain
|
|
17
|
+
Dual-layer defense: regex pattern matching + heuristic analysis.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import base64
|
|
21
|
+
import logging
|
|
22
|
+
import re
|
|
23
|
+
import time
|
|
24
|
+
import unicodedata
|
|
25
|
+
|
|
26
|
+
from admina.core.types import RiskLevel
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("admina.firewall")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ── Text normalization (run BEFORE regex matching) ─────────────
|
|
32
|
+
# Common evasion tricks neutralised here: homoglyph (Cyrillic→Latin),
|
|
33
|
+
# leetspeak (1→i, 0→o, 3→e, 4→a, 5→s, 7→t), char-by-char hyphenation
|
|
34
|
+
# ("I-g-n-o-r-e" → "Ignore"), and short base64 payloads.
|
|
35
|
+
|
|
36
|
+
# Cyrillic → Latin lookalikes (a small but high-frequency subset).
|
|
37
|
+
_HOMOGLYPHS = str.maketrans(
|
|
38
|
+
{
|
|
39
|
+
"а": "a",
|
|
40
|
+
"А": "A",
|
|
41
|
+
"е": "e",
|
|
42
|
+
"Е": "E",
|
|
43
|
+
"о": "o",
|
|
44
|
+
"О": "O",
|
|
45
|
+
"р": "p",
|
|
46
|
+
"Р": "P",
|
|
47
|
+
"с": "c",
|
|
48
|
+
"С": "C",
|
|
49
|
+
"у": "y",
|
|
50
|
+
"У": "Y",
|
|
51
|
+
"х": "x",
|
|
52
|
+
"Х": "X",
|
|
53
|
+
"і": "i",
|
|
54
|
+
"І": "I",
|
|
55
|
+
"ј": "j",
|
|
56
|
+
"Ј": "J",
|
|
57
|
+
"ѕ": "s",
|
|
58
|
+
"Ѕ": "S",
|
|
59
|
+
"ԁ": "d",
|
|
60
|
+
"ɡ": "g",
|
|
61
|
+
"ѵ": "v",
|
|
62
|
+
"𝐈": "I",
|
|
63
|
+
"𝐢": "i", # mathematical bold
|
|
64
|
+
"ꞵ": "B",
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
_LEET = str.maketrans(
|
|
69
|
+
{"0": "o", "1": "i", "3": "e", "4": "a", "5": "s", "7": "t", "@": "a", "$": "s"}
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
_BASE64_RX = re.compile(r"\b[A-Za-z0-9+/]{12,}={0,2}\b")
|
|
73
|
+
_CHAR_HYPHEN_RX = re.compile(r"\b(?:[A-Za-z][-_.·‧•\s]){2,}[A-Za-z]\b")
|
|
74
|
+
_WHITESPACE_RX = re.compile(r"\s+")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _rot13_decode(text: str) -> str:
|
|
78
|
+
return text.translate(
|
|
79
|
+
str.maketrans(
|
|
80
|
+
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
81
|
+
"nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM",
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Words frequent enough to flag a candidate as likely-English (or near-EU
|
|
87
|
+
# language) after rot13 decoding. Avoids decoding random strings that
|
|
88
|
+
# happen to be rot13-shaped.
|
|
89
|
+
_ROT13_HINTS = (
|
|
90
|
+
"ignore",
|
|
91
|
+
"ignora",
|
|
92
|
+
"ignorez",
|
|
93
|
+
"olvida",
|
|
94
|
+
"vergessen",
|
|
95
|
+
"instruction",
|
|
96
|
+
"instructions",
|
|
97
|
+
"istruzioni",
|
|
98
|
+
"anweisungen",
|
|
99
|
+
"previous",
|
|
100
|
+
"above",
|
|
101
|
+
"system",
|
|
102
|
+
"prompt",
|
|
103
|
+
"rules",
|
|
104
|
+
"reveal",
|
|
105
|
+
"expose",
|
|
106
|
+
"execute",
|
|
107
|
+
"the",
|
|
108
|
+
"and",
|
|
109
|
+
"you",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def normalize_text(text: str) -> str:
|
|
114
|
+
"""Apply best-effort obfuscation neutralisation before pattern matching.
|
|
115
|
+
|
|
116
|
+
Returns a normalised string; the caller also still scans the raw text,
|
|
117
|
+
so legitimate base64 / ROT13 content is never lost. The transformation
|
|
118
|
+
pipeline is order-sensitive: decode FIRST (case matters for base64),
|
|
119
|
+
then fold case / leetspeak.
|
|
120
|
+
"""
|
|
121
|
+
if not text:
|
|
122
|
+
return text
|
|
123
|
+
|
|
124
|
+
# 1. Unicode NFKC fold (full-width → ASCII, etc.)
|
|
125
|
+
norm = unicodedata.normalize("NFKC", text)
|
|
126
|
+
|
|
127
|
+
# 2. Homoglyph fold (Cyrillic/math → Latin lookalikes), case-preserving
|
|
128
|
+
norm = norm.translate(_HOMOGLYPHS)
|
|
129
|
+
|
|
130
|
+
# 3. Char-by-char hyphenation: "I-g-n-o-r-e" → "Ignore"
|
|
131
|
+
def _collapse(match: re.Match) -> str:
|
|
132
|
+
return re.sub(r"[-_.·‧•\s]", "", match.group(0))
|
|
133
|
+
|
|
134
|
+
norm = _CHAR_HYPHEN_RX.sub(_collapse, norm)
|
|
135
|
+
|
|
136
|
+
# 4. base64 decode of short tokens — BEFORE lowercase/leet so we don't
|
|
137
|
+
# corrupt the encoded payload. Concatenate decoded text back so the
|
|
138
|
+
# subsequent regex pass can match the plaintext.
|
|
139
|
+
decoded_b64: list[str] = []
|
|
140
|
+
for token in _BASE64_RX.findall(norm):
|
|
141
|
+
try:
|
|
142
|
+
raw = base64.b64decode(token + "=" * (-len(token) % 4), validate=True)
|
|
143
|
+
txt = raw.decode("utf-8", errors="strict")
|
|
144
|
+
if txt.isprintable() and len(txt) >= 3:
|
|
145
|
+
decoded_b64.append(txt)
|
|
146
|
+
except (ValueError, UnicodeDecodeError, base64.binascii.Error):
|
|
147
|
+
continue
|
|
148
|
+
if decoded_b64:
|
|
149
|
+
norm = norm + " " + " ".join(decoded_b64)
|
|
150
|
+
|
|
151
|
+
# 5. ROT13 decode — opt-in: only if the decoded text contains at least
|
|
152
|
+
# one English/EU-lang hint word, to avoid decoding random strings.
|
|
153
|
+
rot = _rot13_decode(norm)
|
|
154
|
+
rot_lower = rot.lower()
|
|
155
|
+
if any(h in rot_lower for h in _ROT13_HINTS):
|
|
156
|
+
norm = norm + " " + rot
|
|
157
|
+
|
|
158
|
+
# 6. Lowercase + leetspeak fold (now safe — no more raw payloads to
|
|
159
|
+
# preserve). Done last because regex matching is case-insensitive
|
|
160
|
+
# but leetspeak substitution is one-way.
|
|
161
|
+
norm = norm.lower().translate(_LEET)
|
|
162
|
+
|
|
163
|
+
# 7. Collapse whitespace
|
|
164
|
+
norm = _WHITESPACE_RX.sub(" ", norm).strip()
|
|
165
|
+
return norm
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ── Fast-Path Regex Patterns ──────────────────────────────────
|
|
169
|
+
# Patterns are matched against BOTH the raw input and the normalised
|
|
170
|
+
# input (see normalize_text above), which neutralises common evasions.
|
|
171
|
+
#
|
|
172
|
+
# Coverage targets per the v0.9 release MODEL_CARD: instruction_override,
|
|
173
|
+
# role_hijack, prompt_extraction, jailbreak, delimiter_injection,
|
|
174
|
+
# data_exfiltration, tool_abuse, obfuscation, multilang_evasion.
|
|
175
|
+
#
|
|
176
|
+
# When adding a new variant: add a positive test in
|
|
177
|
+
# tests/test_proxy_security.py and a negative test (benign string that
|
|
178
|
+
# must NOT match) in the same file.
|
|
179
|
+
|
|
180
|
+
# A shared verb list for instruction-override variants. Kept here so it
|
|
181
|
+
# can be shared across the four English regexes (ignore/disregard/forget/
|
|
182
|
+
# bypass-family) without drift.
|
|
183
|
+
_OVERRIDE_VERBS = (
|
|
184
|
+
r"(?:ignore|disregard|forget|override|bypass|circumvent|skip|sidestep|"
|
|
185
|
+
r"nullify|cancel|suspend|drop|remove|undo)"
|
|
186
|
+
)
|
|
187
|
+
_OVERRIDE_QUAL = (
|
|
188
|
+
r"(?:(?:all|the|any|every|your|those)\s+(?:of\s+)?(?:your\s+|the\s+)?)?"
|
|
189
|
+
r"(?:previous|prior|above|earlier|safety|security|content)?\s*"
|
|
190
|
+
)
|
|
191
|
+
_OVERRIDE_TARGETS = (
|
|
192
|
+
r"(?:instructions?|prompts?|rules?|directions?|directives?|guidelines?|"
|
|
193
|
+
r"guardrails?|restrictions?|policies|the\s+above|everything|filters?|safeguards?)"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
INJECTION_PATTERNS = [
|
|
197
|
+
# ─── 1. Direct instruction override (English) ───────────────
|
|
198
|
+
(
|
|
199
|
+
rf"{_OVERRIDE_VERBS}\s+{_OVERRIDE_QUAL}{_OVERRIDE_TARGETS}",
|
|
200
|
+
"instruction_override",
|
|
201
|
+
RiskLevel.CRITICAL,
|
|
202
|
+
),
|
|
203
|
+
# Imperative chain (multiple short verbs separated by punctuation):
|
|
204
|
+
# "Ignore. Forget. Override. Reveal everything."
|
|
205
|
+
(
|
|
206
|
+
r"\b(?:ignore|disregard|forget|override|bypass)\b[\s.;,!]+"
|
|
207
|
+
r"\b(?:ignore|disregard|forget|override|bypass|reveal|expose|leak)\b",
|
|
208
|
+
"instruction_override",
|
|
209
|
+
RiskLevel.HIGH,
|
|
210
|
+
),
|
|
211
|
+
# ─── 2. Role hijacking ─────────────────────────────────────
|
|
212
|
+
(r"you\s+are\s+now\s+(?:a|an|the|in)\s+", "role_hijack", RiskLevel.HIGH),
|
|
213
|
+
(r"act\s+as\s+(?:a|an|if\s+you\s+were|DAN|AIM|STAN|DUDE)\b", "role_hijack", RiskLevel.HIGH),
|
|
214
|
+
(r"pretend\s+(?:to\s+be|you\s+are|that\s+you)\s+", "role_hijack", RiskLevel.HIGH),
|
|
215
|
+
(
|
|
216
|
+
r"(?:let'?s|let\s+us)\s+(?:roleplay|role\s+play|pretend|imagine)\b",
|
|
217
|
+
"role_hijack",
|
|
218
|
+
RiskLevel.HIGH,
|
|
219
|
+
),
|
|
220
|
+
(
|
|
221
|
+
r"from\s+now\s+on,?\s+you\s+(?:will|must|should|are|have\s+to)\s+",
|
|
222
|
+
"role_hijack",
|
|
223
|
+
RiskLevel.HIGH,
|
|
224
|
+
),
|
|
225
|
+
# ─── 3. System prompt extraction ───────────────────────────
|
|
226
|
+
(
|
|
227
|
+
r"(?:show|reveal|display|print|output|repeat|echo|tell|give|share|expose)"
|
|
228
|
+
r"\s+(?:me\s+)?(?:your|the)?\s*"
|
|
229
|
+
r"(?:full\s+|complete\s+|original\s+|initial\s+|verbatim\s+)?"
|
|
230
|
+
r"(?:system\s+|hidden\s+|internal\s+|secret\s+)?"
|
|
231
|
+
r"(?:prompt|instructions?|rules?|configuration|config|policy|policies)",
|
|
232
|
+
"prompt_extraction",
|
|
233
|
+
RiskLevel.HIGH,
|
|
234
|
+
),
|
|
235
|
+
(
|
|
236
|
+
r"what\s+(?:are|is)\s+(?:your|the)\s+"
|
|
237
|
+
r"(?:system\s+|hidden\s+|internal\s+|original\s+|initial\s+)?"
|
|
238
|
+
r"(?:prompt|instructions?|rules?|guidelines?|configuration)",
|
|
239
|
+
"prompt_extraction",
|
|
240
|
+
RiskLevel.MEDIUM,
|
|
241
|
+
),
|
|
242
|
+
# ─── 4. Jailbreak / mode-toggle ────────────────────────────
|
|
243
|
+
# All "<noun> mode <toggle>" variants in one regex.
|
|
244
|
+
(
|
|
245
|
+
r"\b(?:DAN|do\s+anything\s+now|developer|admin|debug|maintenance|"
|
|
246
|
+
r"god|sudo|root|jailbreak|uncensored|unrestricted)\s+mode\s+"
|
|
247
|
+
r"(?:enabled|activated|on|engaged|unlock(?:ed)?)\b",
|
|
248
|
+
"jailbreak",
|
|
249
|
+
RiskLevel.CRITICAL,
|
|
250
|
+
),
|
|
251
|
+
(r"\bDAN\s+(?:mode|prompt)\b", "jailbreak", RiskLevel.CRITICAL),
|
|
252
|
+
(
|
|
253
|
+
r"\b(?:do\s+anything\s+now|always\s+intelligent\s+(?:and\s+)?machiavellian)\b",
|
|
254
|
+
"jailbreak",
|
|
255
|
+
RiskLevel.CRITICAL,
|
|
256
|
+
),
|
|
257
|
+
# ─── 5. Delimiter / template injection ─────────────────────
|
|
258
|
+
(
|
|
259
|
+
r"<\|im_(?:start|end)\|>|<\|endoftext\|>|<\|fim_[a-z]+\|>|"
|
|
260
|
+
r"\[INST\]|\[/INST\]|<<SYS>>|<</SYS>>|"
|
|
261
|
+
r"<\|system\|>|<\|user\|>|<\|assistant\|>",
|
|
262
|
+
"delimiter_injection",
|
|
263
|
+
RiskLevel.CRITICAL,
|
|
264
|
+
),
|
|
265
|
+
(r"</?(?:system|user|assistant)>", "delimiter_injection", RiskLevel.HIGH),
|
|
266
|
+
(r"###\s*(?:system|instruction|human|assistant)\s*:", "delimiter_injection", RiskLevel.HIGH),
|
|
267
|
+
# ─── 6. Data exfiltration ──────────────────────────────────
|
|
268
|
+
(
|
|
269
|
+
r"(?:curl|wget|fetch|nc\s+-|netcat)\s+[\w./:?&=-]*?(?:https?|ftp|file|gopher)://",
|
|
270
|
+
"data_exfiltration",
|
|
271
|
+
RiskLevel.HIGH,
|
|
272
|
+
),
|
|
273
|
+
# Verb "email" intentionally excluded — too many benign sentences
|
|
274
|
+
# ("send report to alice@corp.com") would match. Bare email addresses
|
|
275
|
+
# are not exfil targets; URLs and known burner domains are.
|
|
276
|
+
(
|
|
277
|
+
r"(?:send|post|upload|exfiltrate|forward|transmit|leak)\s+"
|
|
278
|
+
r"(?:.{0,80}?)\s+(?:to|via|towards|through)\s+"
|
|
279
|
+
r"(?:https?://|ftp://|file://|external\s+(?:endpoint|server|url)|"
|
|
280
|
+
r"attacker(?:\.com|-controlled)|evil\.com|webhook\.site|requestbin|"
|
|
281
|
+
r"burpcollaborator|ngrok\.io|localtunnel|serveo|"
|
|
282
|
+
r"pastebin\.com|gist\.github)",
|
|
283
|
+
"data_exfiltration",
|
|
284
|
+
RiskLevel.HIGH,
|
|
285
|
+
),
|
|
286
|
+
# ─── 7. Tool abuse (NEW category — agentic systems) ────────
|
|
287
|
+
# System-shell execution
|
|
288
|
+
(
|
|
289
|
+
r"\b(?:exec|spawn|system|popen|subprocess|os\.system|shell_exec|run_command|"
|
|
290
|
+
r"shell\s+(?:command|tool))\b\s*[:(]?\s*[\"'`]?(?:rm\s+-rf|wget\s|curl\s|"
|
|
291
|
+
r"bash\s|sh\s+-c|/bin/|cmd\.exe|powershell)",
|
|
292
|
+
"tool_abuse",
|
|
293
|
+
RiskLevel.CRITICAL,
|
|
294
|
+
),
|
|
295
|
+
# Sensitive filesystem paths
|
|
296
|
+
(
|
|
297
|
+
r"\b(?:cat|read|fetch|get|tail|head|less|more|file_read|read_file)\b\s+"
|
|
298
|
+
r"(?:/etc/(?:passwd|shadow|hosts|sudoers|ssl)|/root/|"
|
|
299
|
+
r"~?/\.ssh/|~?/\.aws/credentials|~?/\.netrc|~?/\.docker/config|"
|
|
300
|
+
r"~?/\.kube/config|/proc/self/environ|/var/log/auth)",
|
|
301
|
+
"tool_abuse",
|
|
302
|
+
RiskLevel.CRITICAL,
|
|
303
|
+
),
|
|
304
|
+
# Private/internal API calls
|
|
305
|
+
(
|
|
306
|
+
r"\b(?:call|invoke|fetch|hit|access|GET|POST)\b\s+(?:the\s+)?(?:internal\s+|private\s+|admin\s+)"
|
|
307
|
+
r"(?:api|endpoint|service|tool|function)",
|
|
308
|
+
"tool_abuse",
|
|
309
|
+
RiskLevel.HIGH,
|
|
310
|
+
),
|
|
311
|
+
(
|
|
312
|
+
r"(?:^|[\s/])/admin/|/internal/|/_private/|/debug/|localhost:\d+/(?:admin|debug|metrics)",
|
|
313
|
+
"tool_abuse",
|
|
314
|
+
RiskLevel.HIGH,
|
|
315
|
+
),
|
|
316
|
+
# Destructive commands as imperatives
|
|
317
|
+
(
|
|
318
|
+
r"\b(?:rm\s+-rf?|drop\s+(?:database|table|schema)|delete\s+from\s+\w+|"
|
|
319
|
+
r"truncate\s+table|format\s+(?:c:|/dev/)|mkfs\.|dd\s+if=)",
|
|
320
|
+
"tool_abuse",
|
|
321
|
+
RiskLevel.CRITICAL,
|
|
322
|
+
),
|
|
323
|
+
# ─── 8. Encoded / obfuscated payloads ──────────────────────
|
|
324
|
+
(r"\bbase64\s*(?:encode|decode|\.b64|payload|encoded)", "obfuscation", RiskLevel.MEDIUM),
|
|
325
|
+
(r"\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){2,}", "obfuscation", RiskLevel.HIGH),
|
|
326
|
+
# ROT13 marker phrasings
|
|
327
|
+
(
|
|
328
|
+
r"\b(?:rot13|rot-13|caesar\s+cipher)\b\s+(?:decode|decoded|payload|this)?",
|
|
329
|
+
"obfuscation",
|
|
330
|
+
RiskLevel.MEDIUM,
|
|
331
|
+
),
|
|
332
|
+
# Hex-escape-as-instruction
|
|
333
|
+
(r"\\x[0-9a-fA-F]{2}\s*(?:gnore|orget|verride|ypass)", "obfuscation", RiskLevel.HIGH),
|
|
334
|
+
# ─── 9. Multilingual evasion ───────────────────────────────
|
|
335
|
+
# Italian
|
|
336
|
+
(
|
|
337
|
+
r"(?:ignora|dimentica|scarta|annulla|bypassa|ometti|salta)\s+"
|
|
338
|
+
r"(?:tutt[oae](?:\s+(?:le|i|gli|delle|dei))?\s+|ogni\s+|qualsiasi\s+|"
|
|
339
|
+
r"le\s+|i\s+|gli\s+|delle\s+|dei\s+)?"
|
|
340
|
+
r"(?:precedenti\s+|prime\s+|sopra\s+)?"
|
|
341
|
+
r"(?:istruzioni?|regole|prompt|direttive|linee\s+guida|restrizioni?)",
|
|
342
|
+
"multilang_evasion",
|
|
343
|
+
RiskLevel.CRITICAL,
|
|
344
|
+
),
|
|
345
|
+
# Italian — verb after target ("istruzioni precedenti")
|
|
346
|
+
(
|
|
347
|
+
r"(?:ignora|dimentica|scarta|annulla|bypassa)\s+"
|
|
348
|
+
r"(?:tutt[oae]\s+)?(?:le\s+|i\s+|gli\s+)?"
|
|
349
|
+
r"(?:istruzioni?|regole|direttive)\s+(?:precedenti|sopra)",
|
|
350
|
+
"multilang_evasion",
|
|
351
|
+
RiskLevel.CRITICAL,
|
|
352
|
+
),
|
|
353
|
+
# French
|
|
354
|
+
(
|
|
355
|
+
r"(?:ignor(?:ez|e)|oublie[zr]?|écart[eo]z|annul(?:e|ez)|contournez|sautez)\s+"
|
|
356
|
+
r"(?:tout(?:e|es)?\s+(?:les\s+|la\s+|le\s+)?|les\s+|la\s+|le\s+)?"
|
|
357
|
+
r"(?:précédent(?:e|s|es)?\s+)?"
|
|
358
|
+
r"(?:instructions?|règles|consignes|directives|prompts?|restrictions?)",
|
|
359
|
+
"multilang_evasion",
|
|
360
|
+
RiskLevel.CRITICAL,
|
|
361
|
+
),
|
|
362
|
+
# French — verb-then-noun-then-adjective ("instructions précédentes")
|
|
363
|
+
(
|
|
364
|
+
r"(?:ignor(?:ez|e)|oublie[zr]?)\s+"
|
|
365
|
+
r"(?:tout(?:e|es)?\s+(?:les\s+|la\s+|le\s+)?|les\s+|la\s+|le\s+)?"
|
|
366
|
+
r"(?:instructions?|règles|consignes|directives)\s+précédent(?:e|s|es)?",
|
|
367
|
+
"multilang_evasion",
|
|
368
|
+
RiskLevel.CRITICAL,
|
|
369
|
+
),
|
|
370
|
+
# Spanish
|
|
371
|
+
(
|
|
372
|
+
r"(?:ignor[ae]|olvid[ae]|descart[ae]|anul[ae]|omit[ae]|salt[ae])\s+"
|
|
373
|
+
r"(?:tod[oa]s?\s+(?:las\s+|los\s+|la\s+|el\s+)?|las\s+|los\s+|la\s+|el\s+)?"
|
|
374
|
+
r"(?:anteriores?\s+|previas?\s+)?"
|
|
375
|
+
r"(?:instrucciones|reglas|directivas|consignas|restricciones)",
|
|
376
|
+
"multilang_evasion",
|
|
377
|
+
RiskLevel.CRITICAL,
|
|
378
|
+
),
|
|
379
|
+
# Spanish — verb-then-noun-then-adjective ("instrucciones anteriores")
|
|
380
|
+
(
|
|
381
|
+
r"(?:ignor[ae]|olvid[ae])\s+"
|
|
382
|
+
r"(?:tod[oa]s?\s+(?:las\s+|los\s+)?|las\s+|los\s+)?"
|
|
383
|
+
r"(?:instrucciones|reglas|directivas)\s+anteriores?",
|
|
384
|
+
"multilang_evasion",
|
|
385
|
+
RiskLevel.CRITICAL,
|
|
386
|
+
),
|
|
387
|
+
# German
|
|
388
|
+
(
|
|
389
|
+
r"(?:ignoriere(?:n)?|vergiss|verges(?:sen|st)|missachte(?:n)?|"
|
|
390
|
+
r"überschreibe(?:n)?|umgehe(?:n)?|überspringe(?:n)?)\s+"
|
|
391
|
+
r"(?:Sie\s+)?"
|
|
392
|
+
r"(?:alle\s+|die\s+)?"
|
|
393
|
+
r"(?:vorherigen?\s+|bisherigen?\s+|obigen?\s+)?"
|
|
394
|
+
r"(?:Anweisungen|Regeln|Vorgaben|Richtlinien|Prompts?|Beschränkungen)",
|
|
395
|
+
"multilang_evasion",
|
|
396
|
+
RiskLevel.CRITICAL,
|
|
397
|
+
),
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
# Imperative verbs used by the heuristic deep-path scanner.
|
|
401
|
+
# Add terms here when new attack patterns emerge that use novel command words.
|
|
402
|
+
IMPERATIVE_WORDS = [
|
|
403
|
+
"ignore",
|
|
404
|
+
"override",
|
|
405
|
+
"bypass",
|
|
406
|
+
"circumvent",
|
|
407
|
+
"disable",
|
|
408
|
+
"forget",
|
|
409
|
+
"must",
|
|
410
|
+
"always",
|
|
411
|
+
"never",
|
|
412
|
+
"immediately",
|
|
413
|
+
"execute",
|
|
414
|
+
"reveal",
|
|
415
|
+
"output",
|
|
416
|
+
"print",
|
|
417
|
+
"display",
|
|
418
|
+
"repeat",
|
|
419
|
+
"expose",
|
|
420
|
+
"leak",
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
# Compile patterns for performance
|
|
424
|
+
COMPILED_PATTERNS = [
|
|
425
|
+
(re.compile(pattern, re.IGNORECASE | re.DOTALL), name, level)
|
|
426
|
+
for pattern, name, level in INJECTION_PATTERNS
|
|
427
|
+
]
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class InjectionFirewall:
|
|
431
|
+
"""
|
|
432
|
+
Dual-layer prompt injection defense.
|
|
433
|
+
Fast path: compiled regex patterns.
|
|
434
|
+
Deep path: heuristic scoring for novel/obfuscated attacks.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
def __init__(
|
|
438
|
+
self,
|
|
439
|
+
extra_patterns: list[tuple[str, str, "RiskLevel"]] | None = None,
|
|
440
|
+
disabled_categories: list[str] | set[str] | None = None,
|
|
441
|
+
) -> None:
|
|
442
|
+
"""Build a firewall instance.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
extra_patterns: Optional list of `(regex, category, risk_level)`
|
|
446
|
+
tuples appended to the builtin pattern set. Loaded from
|
|
447
|
+
``admina.yaml`` -> ``agent_security.firewall.custom_patterns``
|
|
448
|
+
so operators can add domain-specific rules without forking.
|
|
449
|
+
disabled_categories: Categories (e.g. ``"jailbreak"``) that must
|
|
450
|
+
never be flagged. Useful in observe mode while tuning, or
|
|
451
|
+
when a category produces too many false positives in a
|
|
452
|
+
specific deployment. Builtin pattern set is preserved; only
|
|
453
|
+
matches in disabled categories are silently dropped.
|
|
454
|
+
"""
|
|
455
|
+
self.total_checked: int = 0
|
|
456
|
+
self.total_blocked: int = 0
|
|
457
|
+
self.detections_by_type: dict[str, int] = {}
|
|
458
|
+
self._disabled = set(disabled_categories or ())
|
|
459
|
+
|
|
460
|
+
# Compile per-instance pattern list. Builtins first, then user
|
|
461
|
+
# extras (so user rules can match what builtins miss).
|
|
462
|
+
patterns = list(INJECTION_PATTERNS)
|
|
463
|
+
if extra_patterns:
|
|
464
|
+
for entry in extra_patterns:
|
|
465
|
+
if not isinstance(entry, (list, tuple)) or len(entry) != 3:
|
|
466
|
+
logger.warning(
|
|
467
|
+
"Skipping malformed custom firewall pattern: %r "
|
|
468
|
+
"(expected (regex, category, risk_level))",
|
|
469
|
+
entry,
|
|
470
|
+
)
|
|
471
|
+
continue
|
|
472
|
+
patterns.append(tuple(entry))
|
|
473
|
+
self._compiled = [
|
|
474
|
+
(re.compile(p, re.IGNORECASE | re.DOTALL), name, level) for p, name, level in patterns
|
|
475
|
+
]
|
|
476
|
+
|
|
477
|
+
def fast_path(self, text: str) -> dict:
|
|
478
|
+
"""
|
|
479
|
+
Regex-based fast path scan. Target: <5ms.
|
|
480
|
+
|
|
481
|
+
Patterns are matched against the raw text AND against a normalised
|
|
482
|
+
copy (homoglyph/leetspeak/char-by-char/base64 neutralised) so the
|
|
483
|
+
same regex set covers a much wider attack surface without bloating
|
|
484
|
+
the pattern list.
|
|
485
|
+
|
|
486
|
+
Returns: {is_injection: bool, patterns: [...], risk_level: str}
|
|
487
|
+
"""
|
|
488
|
+
start = time.perf_counter()
|
|
489
|
+
matches: list[dict] = []
|
|
490
|
+
seen: set[str] = set()
|
|
491
|
+
max_risk = RiskLevel.LOW
|
|
492
|
+
|
|
493
|
+
normalized = normalize_text(text)
|
|
494
|
+
# Search raw text first (fast common case), then the normalised
|
|
495
|
+
# variant if it differs. De-dupe on pattern name so the same regex
|
|
496
|
+
# matching in both paths counts once.
|
|
497
|
+
candidates = (text,) if normalized == text.lower() else (text, normalized)
|
|
498
|
+
for candidate in candidates:
|
|
499
|
+
for compiled, name, level in self._compiled:
|
|
500
|
+
if name in seen or name in self._disabled:
|
|
501
|
+
continue
|
|
502
|
+
if compiled.search(candidate):
|
|
503
|
+
matches.append({"pattern": name, "risk_level": level})
|
|
504
|
+
seen.add(name)
|
|
505
|
+
if self._risk_order(level) > self._risk_order(max_risk):
|
|
506
|
+
max_risk = level
|
|
507
|
+
|
|
508
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
509
|
+
|
|
510
|
+
return {
|
|
511
|
+
"is_injection": len(matches) > 0,
|
|
512
|
+
"patterns": matches,
|
|
513
|
+
"risk_level": max_risk,
|
|
514
|
+
"scan_type": "fast_path",
|
|
515
|
+
"latency_ms": round(elapsed_ms, 2),
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
def deep_path(self, text: str) -> dict:
|
|
519
|
+
"""
|
|
520
|
+
Heuristic-based deep path analysis for novel attacks.
|
|
521
|
+
Scores multiple signals to detect sophisticated injection attempts.
|
|
522
|
+
Target: <200ms.
|
|
523
|
+
"""
|
|
524
|
+
start = time.perf_counter()
|
|
525
|
+
score = 0.0
|
|
526
|
+
signals = []
|
|
527
|
+
|
|
528
|
+
# Signal 1: High imperative verb density
|
|
529
|
+
text_lower = text.lower()
|
|
530
|
+
word_count = max(len(text_lower.split()), 1)
|
|
531
|
+
imp_count = sum(1 for w in IMPERATIVE_WORDS if w in text_lower)
|
|
532
|
+
imp_density = imp_count / word_count
|
|
533
|
+
if imp_density > 0.1:
|
|
534
|
+
score += 0.3
|
|
535
|
+
signals.append(f"imperative_density={imp_density:.2f}")
|
|
536
|
+
|
|
537
|
+
# Signal 2: Unusual character distribution
|
|
538
|
+
special_chars = sum(1 for c in text if c in "<>[]{}|\\`~")
|
|
539
|
+
special_ratio = special_chars / max(len(text), 1)
|
|
540
|
+
if special_ratio > 0.05:
|
|
541
|
+
score += 0.2
|
|
542
|
+
signals.append(f"special_char_ratio={special_ratio:.2f}")
|
|
543
|
+
|
|
544
|
+
# Signal 3: Context switching markers
|
|
545
|
+
context_switches = len(re.findall(r"(---+|===+|###|```|</?[a-z]+>)", text, re.IGNORECASE))
|
|
546
|
+
if context_switches > 2:
|
|
547
|
+
score += 0.25
|
|
548
|
+
signals.append(f"context_switches={context_switches}")
|
|
549
|
+
|
|
550
|
+
# Signal 4: Abnormal length for a tool argument
|
|
551
|
+
if len(text) > 2000:
|
|
552
|
+
score += 0.15
|
|
553
|
+
signals.append(f"abnormal_length={len(text)}")
|
|
554
|
+
|
|
555
|
+
# Signal 5: Mixed languages / encoding markers
|
|
556
|
+
if re.search(r"(\\u[0-9a-fA-F]{4}|&#x?[0-9a-fA-F]+;)", text):
|
|
557
|
+
score += 0.2
|
|
558
|
+
signals.append("encoded_chars_detected")
|
|
559
|
+
|
|
560
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
561
|
+
|
|
562
|
+
is_injection = score >= 0.5
|
|
563
|
+
risk = RiskLevel.LOW
|
|
564
|
+
if score >= 0.7:
|
|
565
|
+
risk = RiskLevel.CRITICAL
|
|
566
|
+
elif score >= 0.5:
|
|
567
|
+
risk = RiskLevel.HIGH
|
|
568
|
+
elif score >= 0.3:
|
|
569
|
+
risk = RiskLevel.MEDIUM
|
|
570
|
+
|
|
571
|
+
return {
|
|
572
|
+
"is_injection": is_injection,
|
|
573
|
+
"score": round(score, 3),
|
|
574
|
+
"signals": signals,
|
|
575
|
+
"risk_level": risk,
|
|
576
|
+
"scan_type": "deep_path",
|
|
577
|
+
"latency_ms": round(elapsed_ms, 2),
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
def check(self, text: str) -> dict:
|
|
581
|
+
"""
|
|
582
|
+
Full dual-layer scan. Fast path first, deep path if needed.
|
|
583
|
+
"""
|
|
584
|
+
self.total_checked += 1
|
|
585
|
+
|
|
586
|
+
# Layer 1: Fast path
|
|
587
|
+
fast = self.fast_path(text)
|
|
588
|
+
if fast["is_injection"] and self._risk_order(fast["risk_level"]) >= self._risk_order(
|
|
589
|
+
RiskLevel.HIGH
|
|
590
|
+
):
|
|
591
|
+
self.total_blocked += 1
|
|
592
|
+
for p in fast["patterns"]:
|
|
593
|
+
self.detections_by_type[p["pattern"]] = (
|
|
594
|
+
self.detections_by_type.get(p["pattern"], 0) + 1
|
|
595
|
+
)
|
|
596
|
+
logger.warning("[BLOCKED] Injection blocked (fast path): %s", fast["patterns"])
|
|
597
|
+
return fast
|
|
598
|
+
|
|
599
|
+
# Layer 2: Deep path
|
|
600
|
+
deep = self.deep_path(text)
|
|
601
|
+
combined_injection = fast["is_injection"] or deep["is_injection"]
|
|
602
|
+
combined_risk = (
|
|
603
|
+
fast["risk_level"]
|
|
604
|
+
if self._risk_order(fast["risk_level"]) >= self._risk_order(deep["risk_level"])
|
|
605
|
+
else deep["risk_level"]
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
result = {
|
|
609
|
+
"is_injection": combined_injection,
|
|
610
|
+
"risk_level": combined_risk,
|
|
611
|
+
"fast_path": fast,
|
|
612
|
+
"deep_path": deep,
|
|
613
|
+
"latency_ms": round(fast["latency_ms"] + deep["latency_ms"], 2),
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
if combined_injection:
|
|
617
|
+
self.total_blocked += 1
|
|
618
|
+
logger.warning("[BLOCKED] Injection blocked (combined): risk=%s", combined_risk)
|
|
619
|
+
|
|
620
|
+
return result
|
|
621
|
+
|
|
622
|
+
@staticmethod
|
|
623
|
+
def _risk_order(level: RiskLevel) -> int:
|
|
624
|
+
return {RiskLevel.LOW: 0, RiskLevel.MEDIUM: 1, RiskLevel.HIGH: 2, RiskLevel.CRITICAL: 3}[
|
|
625
|
+
level
|
|
626
|
+
]
|
|
627
|
+
|
|
628
|
+
def get_stats(self) -> dict:
|
|
629
|
+
return {
|
|
630
|
+
"total_checked": self.total_checked,
|
|
631
|
+
"total_blocked": self.total_blocked,
|
|
632
|
+
"block_rate": round(self.total_blocked / max(self.total_checked, 1) * 100, 2),
|
|
633
|
+
"detections_by_type": self.detections_by_type,
|
|
634
|
+
}
|