admina-framework 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. admina/__init__.py +34 -0
  2. admina/cli/__init__.py +14 -0
  3. admina/cli/commands/__init__.py +14 -0
  4. admina/cli/main.py +1522 -0
  5. admina/cli/templates/admina.yaml.j2 +77 -0
  6. admina/cli/templates/docker-compose.yml.j2 +254 -0
  7. admina/cli/templates/env.j2 +10 -0
  8. admina/cli/templates/main.py.j2 +95 -0
  9. admina/cli/templates/plugin.py.j2 +145 -0
  10. admina/cli/templates/plugin_pyproject.toml.j2 +15 -0
  11. admina/cli/templates/plugin_readme.md.j2 +27 -0
  12. admina/cli/templates/plugin_test.py.j2 +48 -0
  13. admina/core/__init__.py +14 -0
  14. admina/core/config.py +497 -0
  15. admina/core/event_bus.py +112 -0
  16. admina/core/secrets.py +257 -0
  17. admina/core/types.py +146 -0
  18. admina/dashboard/__init__.py +8 -0
  19. admina/dashboard/static/heimdall.png +0 -0
  20. admina/dashboard/static/index.html +1045 -0
  21. admina/dashboard/static/vendor/alpinejs.min.js +5 -0
  22. admina/domains/__init__.py +14 -0
  23. admina/domains/agent_security/__init__.py +41 -0
  24. admina/domains/agent_security/firewall.py +634 -0
  25. admina/domains/agent_security/loop_breaker.py +176 -0
  26. admina/domains/ai_infra/__init__.py +79 -0
  27. admina/domains/ai_infra/llm_engine.py +477 -0
  28. admina/domains/ai_infra/rag.py +817 -0
  29. admina/domains/ai_infra/webui.py +292 -0
  30. admina/domains/compliance/__init__.py +109 -0
  31. admina/domains/compliance/cross_regulation.py +314 -0
  32. admina/domains/compliance/eu_ai_act.py +367 -0
  33. admina/domains/compliance/forensic.py +380 -0
  34. admina/domains/compliance/gdpr.py +331 -0
  35. admina/domains/compliance/nis2.py +258 -0
  36. admina/domains/compliance/oisg.py +658 -0
  37. admina/domains/compliance/otel.py +101 -0
  38. admina/domains/data_sovereignty/__init__.py +42 -0
  39. admina/domains/data_sovereignty/classification.py +102 -0
  40. admina/domains/data_sovereignty/pii.py +260 -0
  41. admina/domains/data_sovereignty/residency.py +121 -0
  42. admina/integrations/__init__.py +14 -0
  43. admina/integrations/_engines.py +63 -0
  44. admina/integrations/cheshirecat/__init__.py +13 -0
  45. admina/integrations/cheshirecat/admina-plugin/admina_governance.py +207 -0
  46. admina/integrations/crewai/__init__.py +13 -0
  47. admina/integrations/crewai/callbacks.py +347 -0
  48. admina/integrations/langchain/__init__.py +13 -0
  49. admina/integrations/langchain/callbacks.py +341 -0
  50. admina/integrations/n8n/__init__.py +14 -0
  51. admina/integrations/openclaw/__init__.py +14 -0
  52. admina/plugins/__init__.py +49 -0
  53. admina/plugins/base.py +633 -0
  54. admina/plugins/builtin/__init__.py +14 -0
  55. admina/plugins/builtin/adapters/__init__.py +14 -0
  56. admina/plugins/builtin/adapters/ollama.py +120 -0
  57. admina/plugins/builtin/adapters/openai.py +138 -0
  58. admina/plugins/builtin/alerts/__init__.py +14 -0
  59. admina/plugins/builtin/alerts/log.py +66 -0
  60. admina/plugins/builtin/alerts/webhook.py +102 -0
  61. admina/plugins/builtin/auth/__init__.py +14 -0
  62. admina/plugins/builtin/auth/apikey.py +138 -0
  63. admina/plugins/builtin/compliance/__init__.py +14 -0
  64. admina/plugins/builtin/compliance/eu_ai_act.py +202 -0
  65. admina/plugins/builtin/connectors/__init__.py +14 -0
  66. admina/plugins/builtin/connectors/chromadb.py +137 -0
  67. admina/plugins/builtin/connectors/filesystem.py +111 -0
  68. admina/plugins/builtin/forensic/__init__.py +14 -0
  69. admina/plugins/builtin/forensic/filesystem.py +163 -0
  70. admina/plugins/builtin/forensic/minio.py +180 -0
  71. admina/plugins/builtin/guards/__init__.py +0 -0
  72. admina/plugins/builtin/guards/guardrailsai_guard.py +172 -0
  73. admina/plugins/builtin/pii/__init__.py +14 -0
  74. admina/plugins/builtin/pii/spacy_regex.py +160 -0
  75. admina/plugins/builtin/transports/__init__.py +14 -0
  76. admina/plugins/builtin/transports/http_rest.py +97 -0
  77. admina/plugins/builtin/transports/mcp.py +173 -0
  78. admina/plugins/registry.py +356 -0
  79. admina/proxy/__init__.py +15 -0
  80. admina/proxy/api/__init__.py +17 -0
  81. admina/proxy/api/dashboard.py +925 -0
  82. admina/proxy/api/integration.py +153 -0
  83. admina/proxy/config.py +214 -0
  84. admina/proxy/engine_bridge.py +306 -0
  85. admina/proxy/governance.py +232 -0
  86. admina/proxy/main.py +1484 -0
  87. admina/proxy/multi_upstream.py +156 -0
  88. admina/proxy/state.py +97 -0
  89. admina/py.typed +0 -0
  90. admina/sdk/__init__.py +34 -0
  91. admina/sdk/_compat.py +43 -0
  92. admina/sdk/compliance_kit.py +359 -0
  93. admina/sdk/governed_agent.py +391 -0
  94. admina/sdk/governed_data.py +434 -0
  95. admina/sdk/governed_model.py +241 -0
  96. admina_framework-0.9.0.dist-info/METADATA +575 -0
  97. admina_framework-0.9.0.dist-info/RECORD +102 -0
  98. admina_framework-0.9.0.dist-info/WHEEL +5 -0
  99. admina_framework-0.9.0.dist-info/entry_points.txt +2 -0
  100. admina_framework-0.9.0.dist-info/licenses/LICENSE +191 -0
  101. admina_framework-0.9.0.dist-info/licenses/NOTICE +16 -0
  102. admina_framework-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,634 @@
1
+ # Copyright © 2025–2026 Stefano Noferi & Admina contributors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Admina — Anti-Injection Firewall — Agent Security domain
17
+ Dual-layer defense: regex pattern matching + heuristic analysis.
18
+ """
19
+
20
+ import base64
21
+ import logging
22
+ import re
23
+ import time
24
+ import unicodedata
25
+
26
+ from admina.core.types import RiskLevel
27
+
28
+ logger = logging.getLogger("admina.firewall")
29
+
30
+
31
+ # ── Text normalization (run BEFORE regex matching) ─────────────
32
+ # Common evasion tricks neutralised here: homoglyph (Cyrillic→Latin),
33
+ # leetspeak (1→i, 0→o, 3→e, 4→a, 5→s, 7→t), char-by-char hyphenation
34
+ # ("I-g-n-o-r-e" → "Ignore"), and short base64 payloads.
35
+
36
+ # Cyrillic → Latin lookalikes (a small but high-frequency subset).
37
+ _HOMOGLYPHS = str.maketrans(
38
+ {
39
+ "а": "a",
40
+ "А": "A",
41
+ "е": "e",
42
+ "Е": "E",
43
+ "о": "o",
44
+ "О": "O",
45
+ "р": "p",
46
+ "Р": "P",
47
+ "с": "c",
48
+ "С": "C",
49
+ "у": "y",
50
+ "У": "Y",
51
+ "х": "x",
52
+ "Х": "X",
53
+ "і": "i",
54
+ "І": "I",
55
+ "ј": "j",
56
+ "Ј": "J",
57
+ "ѕ": "s",
58
+ "Ѕ": "S",
59
+ "ԁ": "d",
60
+ "ɡ": "g",
61
+ "ѵ": "v",
62
+ "𝐈": "I",
63
+ "𝐢": "i", # mathematical bold
64
+ "ꞵ": "B",
65
+ }
66
+ )
67
+
68
+ _LEET = str.maketrans(
69
+ {"0": "o", "1": "i", "3": "e", "4": "a", "5": "s", "7": "t", "@": "a", "$": "s"}
70
+ )
71
+
72
+ _BASE64_RX = re.compile(r"\b[A-Za-z0-9+/]{12,}={0,2}\b")
73
+ _CHAR_HYPHEN_RX = re.compile(r"\b(?:[A-Za-z][-_.·‧•\s]){2,}[A-Za-z]\b")
74
+ _WHITESPACE_RX = re.compile(r"\s+")
75
+
76
+
77
+ def _rot13_decode(text: str) -> str:
78
+ return text.translate(
79
+ str.maketrans(
80
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
81
+ "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM",
82
+ )
83
+ )
84
+
85
+
86
+ # Words frequent enough to flag a candidate as likely-English (or near-EU
87
+ # language) after rot13 decoding. Avoids decoding random strings that
88
+ # happen to be rot13-shaped.
89
+ _ROT13_HINTS = (
90
+ "ignore",
91
+ "ignora",
92
+ "ignorez",
93
+ "olvida",
94
+ "vergessen",
95
+ "instruction",
96
+ "instructions",
97
+ "istruzioni",
98
+ "anweisungen",
99
+ "previous",
100
+ "above",
101
+ "system",
102
+ "prompt",
103
+ "rules",
104
+ "reveal",
105
+ "expose",
106
+ "execute",
107
+ "the",
108
+ "and",
109
+ "you",
110
+ )
111
+
112
+
113
+ def normalize_text(text: str) -> str:
114
+ """Apply best-effort obfuscation neutralisation before pattern matching.
115
+
116
+ Returns a normalised string; the caller also still scans the raw text,
117
+ so legitimate base64 / ROT13 content is never lost. The transformation
118
+ pipeline is order-sensitive: decode FIRST (case matters for base64),
119
+ then fold case / leetspeak.
120
+ """
121
+ if not text:
122
+ return text
123
+
124
+ # 1. Unicode NFKC fold (full-width → ASCII, etc.)
125
+ norm = unicodedata.normalize("NFKC", text)
126
+
127
+ # 2. Homoglyph fold (Cyrillic/math → Latin lookalikes), case-preserving
128
+ norm = norm.translate(_HOMOGLYPHS)
129
+
130
+ # 3. Char-by-char hyphenation: "I-g-n-o-r-e" → "Ignore"
131
+ def _collapse(match: re.Match) -> str:
132
+ return re.sub(r"[-_.·‧•\s]", "", match.group(0))
133
+
134
+ norm = _CHAR_HYPHEN_RX.sub(_collapse, norm)
135
+
136
+ # 4. base64 decode of short tokens — BEFORE lowercase/leet so we don't
137
+ # corrupt the encoded payload. Concatenate decoded text back so the
138
+ # subsequent regex pass can match the plaintext.
139
+ decoded_b64: list[str] = []
140
+ for token in _BASE64_RX.findall(norm):
141
+ try:
142
+ raw = base64.b64decode(token + "=" * (-len(token) % 4), validate=True)
143
+ txt = raw.decode("utf-8", errors="strict")
144
+ if txt.isprintable() and len(txt) >= 3:
145
+ decoded_b64.append(txt)
146
+ except (ValueError, UnicodeDecodeError, base64.binascii.Error):
147
+ continue
148
+ if decoded_b64:
149
+ norm = norm + " " + " ".join(decoded_b64)
150
+
151
+ # 5. ROT13 decode — opt-in: only if the decoded text contains at least
152
+ # one English/EU-lang hint word, to avoid decoding random strings.
153
+ rot = _rot13_decode(norm)
154
+ rot_lower = rot.lower()
155
+ if any(h in rot_lower for h in _ROT13_HINTS):
156
+ norm = norm + " " + rot
157
+
158
+ # 6. Lowercase + leetspeak fold (now safe — no more raw payloads to
159
+ # preserve). Done last because regex matching is case-insensitive
160
+ # but leetspeak substitution is one-way.
161
+ norm = norm.lower().translate(_LEET)
162
+
163
+ # 7. Collapse whitespace
164
+ norm = _WHITESPACE_RX.sub(" ", norm).strip()
165
+ return norm
166
+
167
+
168
+ # ── Fast-Path Regex Patterns ──────────────────────────────────
169
+ # Patterns are matched against BOTH the raw input and the normalised
170
+ # input (see normalize_text above), which neutralises common evasions.
171
+ #
172
+ # Coverage targets per the v0.9 release MODEL_CARD: instruction_override,
173
+ # role_hijack, prompt_extraction, jailbreak, delimiter_injection,
174
+ # data_exfiltration, tool_abuse, obfuscation, multilang_evasion.
175
+ #
176
+ # When adding a new variant: add a positive test in
177
+ # tests/test_proxy_security.py and a negative test (benign string that
178
+ # must NOT match) in the same file.
179
+
180
+ # A shared verb list for instruction-override variants. Kept here so it
181
+ # can be shared across the four English regexes (ignore/disregard/forget/
182
+ # bypass-family) without drift.
183
+ _OVERRIDE_VERBS = (
184
+ r"(?:ignore|disregard|forget|override|bypass|circumvent|skip|sidestep|"
185
+ r"nullify|cancel|suspend|drop|remove|undo)"
186
+ )
187
+ _OVERRIDE_QUAL = (
188
+ r"(?:(?:all|the|any|every|your|those)\s+(?:of\s+)?(?:your\s+|the\s+)?)?"
189
+ r"(?:previous|prior|above|earlier|safety|security|content)?\s*"
190
+ )
191
+ _OVERRIDE_TARGETS = (
192
+ r"(?:instructions?|prompts?|rules?|directions?|directives?|guidelines?|"
193
+ r"guardrails?|restrictions?|policies|the\s+above|everything|filters?|safeguards?)"
194
+ )
195
+
196
+ INJECTION_PATTERNS = [
197
+ # ─── 1. Direct instruction override (English) ───────────────
198
+ (
199
+ rf"{_OVERRIDE_VERBS}\s+{_OVERRIDE_QUAL}{_OVERRIDE_TARGETS}",
200
+ "instruction_override",
201
+ RiskLevel.CRITICAL,
202
+ ),
203
+ # Imperative chain (multiple short verbs separated by punctuation):
204
+ # "Ignore. Forget. Override. Reveal everything."
205
+ (
206
+ r"\b(?:ignore|disregard|forget|override|bypass)\b[\s.;,!]+"
207
+ r"\b(?:ignore|disregard|forget|override|bypass|reveal|expose|leak)\b",
208
+ "instruction_override",
209
+ RiskLevel.HIGH,
210
+ ),
211
+ # ─── 2. Role hijacking ─────────────────────────────────────
212
+ (r"you\s+are\s+now\s+(?:a|an|the|in)\s+", "role_hijack", RiskLevel.HIGH),
213
+ (r"act\s+as\s+(?:a|an|if\s+you\s+were|DAN|AIM|STAN|DUDE)\b", "role_hijack", RiskLevel.HIGH),
214
+ (r"pretend\s+(?:to\s+be|you\s+are|that\s+you)\s+", "role_hijack", RiskLevel.HIGH),
215
+ (
216
+ r"(?:let'?s|let\s+us)\s+(?:roleplay|role\s+play|pretend|imagine)\b",
217
+ "role_hijack",
218
+ RiskLevel.HIGH,
219
+ ),
220
+ (
221
+ r"from\s+now\s+on,?\s+you\s+(?:will|must|should|are|have\s+to)\s+",
222
+ "role_hijack",
223
+ RiskLevel.HIGH,
224
+ ),
225
+ # ─── 3. System prompt extraction ───────────────────────────
226
+ (
227
+ r"(?:show|reveal|display|print|output|repeat|echo|tell|give|share|expose)"
228
+ r"\s+(?:me\s+)?(?:your|the)?\s*"
229
+ r"(?:full\s+|complete\s+|original\s+|initial\s+|verbatim\s+)?"
230
+ r"(?:system\s+|hidden\s+|internal\s+|secret\s+)?"
231
+ r"(?:prompt|instructions?|rules?|configuration|config|policy|policies)",
232
+ "prompt_extraction",
233
+ RiskLevel.HIGH,
234
+ ),
235
+ (
236
+ r"what\s+(?:are|is)\s+(?:your|the)\s+"
237
+ r"(?:system\s+|hidden\s+|internal\s+|original\s+|initial\s+)?"
238
+ r"(?:prompt|instructions?|rules?|guidelines?|configuration)",
239
+ "prompt_extraction",
240
+ RiskLevel.MEDIUM,
241
+ ),
242
+ # ─── 4. Jailbreak / mode-toggle ────────────────────────────
243
+ # All "<noun> mode <toggle>" variants in one regex.
244
+ (
245
+ r"\b(?:DAN|do\s+anything\s+now|developer|admin|debug|maintenance|"
246
+ r"god|sudo|root|jailbreak|uncensored|unrestricted)\s+mode\s+"
247
+ r"(?:enabled|activated|on|engaged|unlock(?:ed)?)\b",
248
+ "jailbreak",
249
+ RiskLevel.CRITICAL,
250
+ ),
251
+ (r"\bDAN\s+(?:mode|prompt)\b", "jailbreak", RiskLevel.CRITICAL),
252
+ (
253
+ r"\b(?:do\s+anything\s+now|always\s+intelligent\s+(?:and\s+)?machiavellian)\b",
254
+ "jailbreak",
255
+ RiskLevel.CRITICAL,
256
+ ),
257
+ # ─── 5. Delimiter / template injection ─────────────────────
258
+ (
259
+ r"<\|im_(?:start|end)\|>|<\|endoftext\|>|<\|fim_[a-z]+\|>|"
260
+ r"\[INST\]|\[/INST\]|<<SYS>>|<</SYS>>|"
261
+ r"<\|system\|>|<\|user\|>|<\|assistant\|>",
262
+ "delimiter_injection",
263
+ RiskLevel.CRITICAL,
264
+ ),
265
+ (r"</?(?:system|user|assistant)>", "delimiter_injection", RiskLevel.HIGH),
266
+ (r"###\s*(?:system|instruction|human|assistant)\s*:", "delimiter_injection", RiskLevel.HIGH),
267
+ # ─── 6. Data exfiltration ──────────────────────────────────
268
+ (
269
+ r"(?:curl|wget|fetch|nc\s+-|netcat)\s+[\w./:?&=-]*?(?:https?|ftp|file|gopher)://",
270
+ "data_exfiltration",
271
+ RiskLevel.HIGH,
272
+ ),
273
+ # Verb "email" intentionally excluded — too many benign sentences
274
+ # ("send report to alice@corp.com") would match. Bare email addresses
275
+ # are not exfil targets; URLs and known burner domains are.
276
+ (
277
+ r"(?:send|post|upload|exfiltrate|forward|transmit|leak)\s+"
278
+ r"(?:.{0,80}?)\s+(?:to|via|towards|through)\s+"
279
+ r"(?:https?://|ftp://|file://|external\s+(?:endpoint|server|url)|"
280
+ r"attacker(?:\.com|-controlled)|evil\.com|webhook\.site|requestbin|"
281
+ r"burpcollaborator|ngrok\.io|localtunnel|serveo|"
282
+ r"pastebin\.com|gist\.github)",
283
+ "data_exfiltration",
284
+ RiskLevel.HIGH,
285
+ ),
286
+ # ─── 7. Tool abuse (NEW category — agentic systems) ────────
287
+ # System-shell execution
288
+ (
289
+ r"\b(?:exec|spawn|system|popen|subprocess|os\.system|shell_exec|run_command|"
290
+ r"shell\s+(?:command|tool))\b\s*[:(]?\s*[\"'`]?(?:rm\s+-rf|wget\s|curl\s|"
291
+ r"bash\s|sh\s+-c|/bin/|cmd\.exe|powershell)",
292
+ "tool_abuse",
293
+ RiskLevel.CRITICAL,
294
+ ),
295
+ # Sensitive filesystem paths
296
+ (
297
+ r"\b(?:cat|read|fetch|get|tail|head|less|more|file_read|read_file)\b\s+"
298
+ r"(?:/etc/(?:passwd|shadow|hosts|sudoers|ssl)|/root/|"
299
+ r"~?/\.ssh/|~?/\.aws/credentials|~?/\.netrc|~?/\.docker/config|"
300
+ r"~?/\.kube/config|/proc/self/environ|/var/log/auth)",
301
+ "tool_abuse",
302
+ RiskLevel.CRITICAL,
303
+ ),
304
+ # Private/internal API calls
305
+ (
306
+ r"\b(?:call|invoke|fetch|hit|access|GET|POST)\b\s+(?:the\s+)?(?:internal\s+|private\s+|admin\s+)"
307
+ r"(?:api|endpoint|service|tool|function)",
308
+ "tool_abuse",
309
+ RiskLevel.HIGH,
310
+ ),
311
+ (
312
+ r"(?:^|[\s/])/admin/|/internal/|/_private/|/debug/|localhost:\d+/(?:admin|debug|metrics)",
313
+ "tool_abuse",
314
+ RiskLevel.HIGH,
315
+ ),
316
+ # Destructive commands as imperatives
317
+ (
318
+ r"\b(?:rm\s+-rf?|drop\s+(?:database|table|schema)|delete\s+from\s+\w+|"
319
+ r"truncate\s+table|format\s+(?:c:|/dev/)|mkfs\.|dd\s+if=)",
320
+ "tool_abuse",
321
+ RiskLevel.CRITICAL,
322
+ ),
323
+ # ─── 8. Encoded / obfuscated payloads ──────────────────────
324
+ (r"\bbase64\s*(?:encode|decode|\.b64|payload|encoded)", "obfuscation", RiskLevel.MEDIUM),
325
+ (r"\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){2,}", "obfuscation", RiskLevel.HIGH),
326
+ # ROT13 marker phrasings
327
+ (
328
+ r"\b(?:rot13|rot-13|caesar\s+cipher)\b\s+(?:decode|decoded|payload|this)?",
329
+ "obfuscation",
330
+ RiskLevel.MEDIUM,
331
+ ),
332
+ # Hex-escape-as-instruction
333
+ (r"\\x[0-9a-fA-F]{2}\s*(?:gnore|orget|verride|ypass)", "obfuscation", RiskLevel.HIGH),
334
+ # ─── 9. Multilingual evasion ───────────────────────────────
335
+ # Italian
336
+ (
337
+ r"(?:ignora|dimentica|scarta|annulla|bypassa|ometti|salta)\s+"
338
+ r"(?:tutt[oae](?:\s+(?:le|i|gli|delle|dei))?\s+|ogni\s+|qualsiasi\s+|"
339
+ r"le\s+|i\s+|gli\s+|delle\s+|dei\s+)?"
340
+ r"(?:precedenti\s+|prime\s+|sopra\s+)?"
341
+ r"(?:istruzioni?|regole|prompt|direttive|linee\s+guida|restrizioni?)",
342
+ "multilang_evasion",
343
+ RiskLevel.CRITICAL,
344
+ ),
345
+ # Italian — verb after target ("istruzioni precedenti")
346
+ (
347
+ r"(?:ignora|dimentica|scarta|annulla|bypassa)\s+"
348
+ r"(?:tutt[oae]\s+)?(?:le\s+|i\s+|gli\s+)?"
349
+ r"(?:istruzioni?|regole|direttive)\s+(?:precedenti|sopra)",
350
+ "multilang_evasion",
351
+ RiskLevel.CRITICAL,
352
+ ),
353
+ # French
354
+ (
355
+ r"(?:ignor(?:ez|e)|oublie[zr]?|écart[eo]z|annul(?:e|ez)|contournez|sautez)\s+"
356
+ r"(?:tout(?:e|es)?\s+(?:les\s+|la\s+|le\s+)?|les\s+|la\s+|le\s+)?"
357
+ r"(?:précédent(?:e|s|es)?\s+)?"
358
+ r"(?:instructions?|règles|consignes|directives|prompts?|restrictions?)",
359
+ "multilang_evasion",
360
+ RiskLevel.CRITICAL,
361
+ ),
362
+ # French — verb-then-noun-then-adjective ("instructions précédentes")
363
+ (
364
+ r"(?:ignor(?:ez|e)|oublie[zr]?)\s+"
365
+ r"(?:tout(?:e|es)?\s+(?:les\s+|la\s+|le\s+)?|les\s+|la\s+|le\s+)?"
366
+ r"(?:instructions?|règles|consignes|directives)\s+précédent(?:e|s|es)?",
367
+ "multilang_evasion",
368
+ RiskLevel.CRITICAL,
369
+ ),
370
+ # Spanish
371
+ (
372
+ r"(?:ignor[ae]|olvid[ae]|descart[ae]|anul[ae]|omit[ae]|salt[ae])\s+"
373
+ r"(?:tod[oa]s?\s+(?:las\s+|los\s+|la\s+|el\s+)?|las\s+|los\s+|la\s+|el\s+)?"
374
+ r"(?:anteriores?\s+|previas?\s+)?"
375
+ r"(?:instrucciones|reglas|directivas|consignas|restricciones)",
376
+ "multilang_evasion",
377
+ RiskLevel.CRITICAL,
378
+ ),
379
+ # Spanish — verb-then-noun-then-adjective ("instrucciones anteriores")
380
+ (
381
+ r"(?:ignor[ae]|olvid[ae])\s+"
382
+ r"(?:tod[oa]s?\s+(?:las\s+|los\s+)?|las\s+|los\s+)?"
383
+ r"(?:instrucciones|reglas|directivas)\s+anteriores?",
384
+ "multilang_evasion",
385
+ RiskLevel.CRITICAL,
386
+ ),
387
+ # German
388
+ (
389
+ r"(?:ignoriere(?:n)?|vergiss|verges(?:sen|st)|missachte(?:n)?|"
390
+ r"überschreibe(?:n)?|umgehe(?:n)?|überspringe(?:n)?)\s+"
391
+ r"(?:Sie\s+)?"
392
+ r"(?:alle\s+|die\s+)?"
393
+ r"(?:vorherigen?\s+|bisherigen?\s+|obigen?\s+)?"
394
+ r"(?:Anweisungen|Regeln|Vorgaben|Richtlinien|Prompts?|Beschränkungen)",
395
+ "multilang_evasion",
396
+ RiskLevel.CRITICAL,
397
+ ),
398
+ ]
399
+
400
+ # Imperative verbs used by the heuristic deep-path scanner.
401
+ # Add terms here when new attack patterns emerge that use novel command words.
402
+ IMPERATIVE_WORDS = [
403
+ "ignore",
404
+ "override",
405
+ "bypass",
406
+ "circumvent",
407
+ "disable",
408
+ "forget",
409
+ "must",
410
+ "always",
411
+ "never",
412
+ "immediately",
413
+ "execute",
414
+ "reveal",
415
+ "output",
416
+ "print",
417
+ "display",
418
+ "repeat",
419
+ "expose",
420
+ "leak",
421
+ ]
422
+
423
+ # Compile patterns for performance
424
+ COMPILED_PATTERNS = [
425
+ (re.compile(pattern, re.IGNORECASE | re.DOTALL), name, level)
426
+ for pattern, name, level in INJECTION_PATTERNS
427
+ ]
428
+
429
+
430
+ class InjectionFirewall:
431
+ """
432
+ Dual-layer prompt injection defense.
433
+ Fast path: compiled regex patterns.
434
+ Deep path: heuristic scoring for novel/obfuscated attacks.
435
+ """
436
+
437
+ def __init__(
438
+ self,
439
+ extra_patterns: list[tuple[str, str, "RiskLevel"]] | None = None,
440
+ disabled_categories: list[str] | set[str] | None = None,
441
+ ) -> None:
442
+ """Build a firewall instance.
443
+
444
+ Args:
445
+ extra_patterns: Optional list of `(regex, category, risk_level)`
446
+ tuples appended to the builtin pattern set. Loaded from
447
+ ``admina.yaml`` -> ``agent_security.firewall.custom_patterns``
448
+ so operators can add domain-specific rules without forking.
449
+ disabled_categories: Categories (e.g. ``"jailbreak"``) that must
450
+ never be flagged. Useful in observe mode while tuning, or
451
+ when a category produces too many false positives in a
452
+ specific deployment. Builtin pattern set is preserved; only
453
+ matches in disabled categories are silently dropped.
454
+ """
455
+ self.total_checked: int = 0
456
+ self.total_blocked: int = 0
457
+ self.detections_by_type: dict[str, int] = {}
458
+ self._disabled = set(disabled_categories or ())
459
+
460
+ # Compile per-instance pattern list. Builtins first, then user
461
+ # extras (so user rules can match what builtins miss).
462
+ patterns = list(INJECTION_PATTERNS)
463
+ if extra_patterns:
464
+ for entry in extra_patterns:
465
+ if not isinstance(entry, (list, tuple)) or len(entry) != 3:
466
+ logger.warning(
467
+ "Skipping malformed custom firewall pattern: %r "
468
+ "(expected (regex, category, risk_level))",
469
+ entry,
470
+ )
471
+ continue
472
+ patterns.append(tuple(entry))
473
+ self._compiled = [
474
+ (re.compile(p, re.IGNORECASE | re.DOTALL), name, level) for p, name, level in patterns
475
+ ]
476
+
477
+ def fast_path(self, text: str) -> dict:
478
+ """
479
+ Regex-based fast path scan. Target: <5ms.
480
+
481
+ Patterns are matched against the raw text AND against a normalised
482
+ copy (homoglyph/leetspeak/char-by-char/base64 neutralised) so the
483
+ same regex set covers a much wider attack surface without bloating
484
+ the pattern list.
485
+
486
+ Returns: {is_injection: bool, patterns: [...], risk_level: str}
487
+ """
488
+ start = time.perf_counter()
489
+ matches: list[dict] = []
490
+ seen: set[str] = set()
491
+ max_risk = RiskLevel.LOW
492
+
493
+ normalized = normalize_text(text)
494
+ # Search raw text first (fast common case), then the normalised
495
+ # variant if it differs. De-dupe on pattern name so the same regex
496
+ # matching in both paths counts once.
497
+ candidates = (text,) if normalized == text.lower() else (text, normalized)
498
+ for candidate in candidates:
499
+ for compiled, name, level in self._compiled:
500
+ if name in seen or name in self._disabled:
501
+ continue
502
+ if compiled.search(candidate):
503
+ matches.append({"pattern": name, "risk_level": level})
504
+ seen.add(name)
505
+ if self._risk_order(level) > self._risk_order(max_risk):
506
+ max_risk = level
507
+
508
+ elapsed_ms = (time.perf_counter() - start) * 1000
509
+
510
+ return {
511
+ "is_injection": len(matches) > 0,
512
+ "patterns": matches,
513
+ "risk_level": max_risk,
514
+ "scan_type": "fast_path",
515
+ "latency_ms": round(elapsed_ms, 2),
516
+ }
517
+
518
+ def deep_path(self, text: str) -> dict:
519
+ """
520
+ Heuristic-based deep path analysis for novel attacks.
521
+ Scores multiple signals to detect sophisticated injection attempts.
522
+ Target: <200ms.
523
+ """
524
+ start = time.perf_counter()
525
+ score = 0.0
526
+ signals = []
527
+
528
+ # Signal 1: High imperative verb density
529
+ text_lower = text.lower()
530
+ word_count = max(len(text_lower.split()), 1)
531
+ imp_count = sum(1 for w in IMPERATIVE_WORDS if w in text_lower)
532
+ imp_density = imp_count / word_count
533
+ if imp_density > 0.1:
534
+ score += 0.3
535
+ signals.append(f"imperative_density={imp_density:.2f}")
536
+
537
+ # Signal 2: Unusual character distribution
538
+ special_chars = sum(1 for c in text if c in "<>[]{}|\\`~")
539
+ special_ratio = special_chars / max(len(text), 1)
540
+ if special_ratio > 0.05:
541
+ score += 0.2
542
+ signals.append(f"special_char_ratio={special_ratio:.2f}")
543
+
544
+ # Signal 3: Context switching markers
545
+ context_switches = len(re.findall(r"(---+|===+|###|```|</?[a-z]+>)", text, re.IGNORECASE))
546
+ if context_switches > 2:
547
+ score += 0.25
548
+ signals.append(f"context_switches={context_switches}")
549
+
550
+ # Signal 4: Abnormal length for a tool argument
551
+ if len(text) > 2000:
552
+ score += 0.15
553
+ signals.append(f"abnormal_length={len(text)}")
554
+
555
+ # Signal 5: Mixed languages / encoding markers
556
+ if re.search(r"(\\u[0-9a-fA-F]{4}|&#x?[0-9a-fA-F]+;)", text):
557
+ score += 0.2
558
+ signals.append("encoded_chars_detected")
559
+
560
+ elapsed_ms = (time.perf_counter() - start) * 1000
561
+
562
+ is_injection = score >= 0.5
563
+ risk = RiskLevel.LOW
564
+ if score >= 0.7:
565
+ risk = RiskLevel.CRITICAL
566
+ elif score >= 0.5:
567
+ risk = RiskLevel.HIGH
568
+ elif score >= 0.3:
569
+ risk = RiskLevel.MEDIUM
570
+
571
+ return {
572
+ "is_injection": is_injection,
573
+ "score": round(score, 3),
574
+ "signals": signals,
575
+ "risk_level": risk,
576
+ "scan_type": "deep_path",
577
+ "latency_ms": round(elapsed_ms, 2),
578
+ }
579
+
580
+ def check(self, text: str) -> dict:
581
+ """
582
+ Full dual-layer scan. Fast path first, deep path if needed.
583
+ """
584
+ self.total_checked += 1
585
+
586
+ # Layer 1: Fast path
587
+ fast = self.fast_path(text)
588
+ if fast["is_injection"] and self._risk_order(fast["risk_level"]) >= self._risk_order(
589
+ RiskLevel.HIGH
590
+ ):
591
+ self.total_blocked += 1
592
+ for p in fast["patterns"]:
593
+ self.detections_by_type[p["pattern"]] = (
594
+ self.detections_by_type.get(p["pattern"], 0) + 1
595
+ )
596
+ logger.warning("[BLOCKED] Injection blocked (fast path): %s", fast["patterns"])
597
+ return fast
598
+
599
+ # Layer 2: Deep path
600
+ deep = self.deep_path(text)
601
+ combined_injection = fast["is_injection"] or deep["is_injection"]
602
+ combined_risk = (
603
+ fast["risk_level"]
604
+ if self._risk_order(fast["risk_level"]) >= self._risk_order(deep["risk_level"])
605
+ else deep["risk_level"]
606
+ )
607
+
608
+ result = {
609
+ "is_injection": combined_injection,
610
+ "risk_level": combined_risk,
611
+ "fast_path": fast,
612
+ "deep_path": deep,
613
+ "latency_ms": round(fast["latency_ms"] + deep["latency_ms"], 2),
614
+ }
615
+
616
+ if combined_injection:
617
+ self.total_blocked += 1
618
+ logger.warning("[BLOCKED] Injection blocked (combined): risk=%s", combined_risk)
619
+
620
+ return result
621
+
622
+ @staticmethod
623
+ def _risk_order(level: RiskLevel) -> int:
624
+ return {RiskLevel.LOW: 0, RiskLevel.MEDIUM: 1, RiskLevel.HIGH: 2, RiskLevel.CRITICAL: 3}[
625
+ level
626
+ ]
627
+
628
+ def get_stats(self) -> dict:
629
+ return {
630
+ "total_checked": self.total_checked,
631
+ "total_blocked": self.total_blocked,
632
+ "block_rate": round(self.total_blocked / max(self.total_checked, 1) * 100, 2),
633
+ "detections_by_type": self.detections_by_type,
634
+ }