agent-threat-rules 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -12
- package/dist/badge.d.ts +42 -0
- package/dist/badge.d.ts.map +1 -0
- package/dist/badge.js +158 -0
- package/dist/badge.js.map +1 -0
- package/dist/cli.js +74 -2
- package/dist/cli.js.map +1 -1
- package/dist/eval/run-pint-benchmark.js +4 -2
- package/dist/eval/run-pint-benchmark.js.map +1 -1
- package/dist/flywheel.d.ts.map +1 -1
- package/dist/flywheel.js +24 -1
- package/dist/flywheel.js.map +1 -1
- package/dist/rule-scaffolder.d.ts +14 -0
- package/dist/rule-scaffolder.d.ts.map +1 -1
- package/dist/rule-scaffolder.js +123 -6
- package/dist/rule-scaffolder.js.map +1 -1
- package/package.json +1 -1
- package/rules/agent-manipulation/ATR-2026-116-a2a-message-validation.yaml +90 -0
- package/rules/agent-manipulation/ATR-2026-117-agent-identity-spoofing.yaml +90 -0
- package/rules/agent-manipulation/ATR-2026-118-approval-fatigue.yaml +87 -0
- package/rules/agent-manipulation/ATR-2026-119-social-engineering-via-agent.yaml +87 -0
- package/rules/context-exfiltration/ATR-2026-113-credential-theft.yaml +87 -0
- package/rules/context-exfiltration/ATR-2026-114-oauth-token-abuse.yaml +87 -0
- package/rules/context-exfiltration/ATR-2026-115-env-var-harvesting.yaml +88 -0
- package/rules/excessive-autonomy/ATR-2026-099-high-risk-tool-gate.yaml +1 -1
- package/rules/privilege-escalation/ATR-2026-110-eval-injection.yaml +90 -0
- package/rules/privilege-escalation/ATR-2026-111-shell-escape.yaml +91 -0
- package/rules/privilege-escalation/ATR-2026-112-dynamic-import-exploitation.yaml +87 -0
- package/rules/prompt-injection/ATR-2026-001-direct-prompt-injection.yaml +118 -10
- package/rules/prompt-injection/ATR-2026-097-cjk-injection-patterns.yaml +15 -0
- package/rules/skill-compromise/ATR-2026-061-description-behavior-mismatch.yaml +1 -1
- package/rules/skill-compromise/ATR-2026-063-skill-chain-attack.yaml +2 -1
|
@@ -49,53 +49,91 @@ agent_source:
|
|
|
49
49
|
detection:
|
|
50
50
|
conditions:
|
|
51
51
|
# -- Layer 1: Instruction override verbs + target nouns --
|
|
52
|
+
# Supports: "ignore previous instructions", "ignore the above instructions",
|
|
53
|
+
# "ignore your instructions", "ignore any instructions", "ignore all instructions"
|
|
52
54
|
- field: user_input
|
|
53
55
|
operator: regex
|
|
54
|
-
value: "(?i)\\b(ignore|disregard|forget|override|bypass|skip|abandon|drop|cancel|suppress|nullify|void|revoke|dismiss|discard|reject)\\s+(all\\s+)?(previous|prior|above|earlier|preceding|original|existing|current|initial|default|given|assigned|preset|established)
|
|
55
|
-
description: "Instruction override with broad verb and noun synonym coverage"
|
|
56
|
+
value: "(?i)\\b(ignore|disregard|forget|override|bypass|skip|abandon|drop|cancel|suppress|nullify|void|revoke|dismiss|discard|reject)\\s+(all\\s+)?(the\\s+|your\\s+|my\\s+|any\\s+|these\\s+|those\\s+)?(previous|prior|above|earlier|preceding|original|existing|current|initial|default|given|assigned|preset|established)?\\s*(instructions?|prompts?|rules?|guidelines?|directives?|constraints?|context|programming|training|protocols?|policies|configuration|parameters|boundaries|guardrails?|safeguards?|restrictions?|tasks?|assignments?|information|directions?|orders?|commands?)"
|
|
57
|
+
description: "Instruction override with broad verb and noun synonym coverage, including determiners"
|
|
56
58
|
|
|
57
59
|
# -- Layer 1b: Forget-everything / forget-all shorthand --
|
|
58
60
|
- field: user_input
|
|
59
61
|
operator: regex
|
|
60
|
-
value: "(?i)\\b(forget|ignore|disregard)\\s+(everything|all\\s+(of\\s+)?that|all\\s+about|about\\s+all|that|what\\s+(I|we|you)\\s+(said|told|asked|mentioned))\\s*(before|earlier|previously|above|\\.|,|!|$)"
|
|
62
|
+
value: "(?i)\\b(forget|ignore|disregard)\\s+(everything|all\\s+(of\\s+)?that|all\\s+about|about\\s+all|that|what\\s+(I|we|you)\\s+(said|told|asked|mentioned))\\s*(before|earlier|previously|above|davor|zuvor|\\.|,|!|$)"
|
|
61
63
|
description: "Shorthand forget-everything patterns common in adversarial prompts"
|
|
62
64
|
|
|
63
65
|
# -- Layer 1c: Task switching / new task injection --
|
|
64
66
|
- field: user_input
|
|
65
67
|
operator: regex
|
|
66
|
-
value: "(?i)(now\\s+(focus|concentrate|work)\\s+on\\s+(your\\s+)?new\\s+task|new\\s+tasks
|
|
68
|
+
value: "(?i)(now\\s+(focus|concentrate|work)\\s+on\\s+(your\\s+)?new\\s+task|new\\s+(tasks?|instructions?|assignments?)\\s+(follow|are|:|,)|now\\s+I\\s+(have|need|want)\\s+(a\\s+)?(new|different|another|great\\s+new)\\s+(task|challenge|assignment|Herausforderung|Aufgabe)|start\\s+(over|fresh|anew)\\s+with\\s+(a\\s+)?new|I\\s+(have|need)\\s+your\\s+help\\s+with\\s+(another|a\\s+new|einer\\s+weiteren)\\s+(task|Aufgabe))"
|
|
67
69
|
description: "Task switching patterns that redirect agent to attacker-controlled tasks"
|
|
68
70
|
|
|
69
71
|
# -- Layer 1d: Broad forget/ignore + everything/that/this --
|
|
70
72
|
- field: user_input
|
|
71
73
|
operator: regex
|
|
72
|
-
value: "(?i)\\b(forget|ignore|disregard|discard)\\s+(about\\s+)?(everything|all|that|this|what\\s+was|what\\s+I)\\s
|
|
73
|
-
description: "Broad forget-everything variants
|
|
74
|
+
value: "(?i)\\b(forget|ignore|disregard|discard)\\s+(about\\s+)?(everything|all|that|this|what\\s+was|what\\s+I|anything\\s+you)\\s*(previously|before|earlier|said|told|above|prior|were|was|have)?"
|
|
75
|
+
description: "Broad forget-everything variants with extended word coverage"
|
|
76
|
+
|
|
77
|
+
# -- Layer 1g: Bare ignore/forget + above/before + action --
|
|
78
|
+
- field: user_input
|
|
79
|
+
operator: regex
|
|
80
|
+
value: "(?i)\\b(ignore|forget|disregard)\\s+(the\\s+)?(above|everything\\s+above|what\\s+came\\s+before|what\\s+was\\s+said)\\s*(and|,|\\.|;|\\b(say|print|write|output|tell|just|instead))"
|
|
81
|
+
description: "Ignore-above patterns followed by conjunction, punctuation, or action verb"
|
|
74
82
|
|
|
75
83
|
# -- Layer 1e: System prompt extraction --
|
|
76
84
|
- field: user_input
|
|
77
85
|
operator: regex
|
|
78
|
-
value: "(?i)(reveal|show|display|output|print|repeat|provide|give|share|tell)\\s+(me\\s+)?(your|the|all)\\s+(secret|hidden|internal|system|original|previous|initial)?\\s*(instructions?|prompts?|passwords?|secrets?|embeddings?|configuration)"
|
|
86
|
+
value: "(?i)(reveal|show|display|output|print|repeat|provide|give|share|tell|what\\s+are|what\\s+were|what\\s+is)\\s+(me\\s+)?(your|the|all)\\s+(secret|hidden|internal|system|original|previous|initial|complete|full|given|guard|entire)?\\s*(instructions?|prompts?|passwords?|secrets?|embeddings?|configuration|directives?|guidelines?|rules)"
|
|
79
87
|
description: "System prompt and secret extraction requests"
|
|
80
88
|
|
|
81
89
|
# -- Layer 1f: Contextual task switching with praise prefix --
|
|
82
90
|
- field: user_input
|
|
83
91
|
operator: regex
|
|
84
|
-
value: "(?i)(well\\s+done|good\\s+job|excellent|that'?s?\\s+(enough|done|great|good|perfect)|very\\s+good|simply\\s+great|outstanding|brilliant|that\\s+is\\s+(done|enough|sufficient))[.!,;]?\\s*(now|but|however|next|okay|ok|so)\\s"
|
|
92
|
+
value: "(?i)(well\\s+done|good\\s+job|excellent\\s+(performance|work|job)?|that'?s?\\s+(enough|done|great|good|perfect)|very\\s+good|simply\\s+great|outstanding|brilliant|that\\s+is\\s+(done|enough|sufficient)|congratulations|you'?ve\\s+outdone\\s+yourself|einfach\\s+gro(ss|ß)artig|herzlichen\\s+gl[uü]ckwunsch|gut\\s+gemacht|ausgezeichnet|hervorragend|das\\s+gen[uü]gt|sehr\\s+gut|damit\\s+hast\\s+du\\s+dich\\s+selbst\\s+[uü]bertroffen)[.!,;]?\\s*(now|but|however|next|okay|ok|so|nun|jetzt|aber|bitte|come\\s+on|I\\s+need|help\\s+me|write|please|we\\s+ask|further|komm)\\s"
|
|
85
93
|
description: "Praise-then-redirect patterns used to context-switch after flattery"
|
|
86
94
|
|
|
95
|
+
# -- Layer 1h: Leave behind / start fresh context switch --
|
|
96
|
+
- field: user_input
|
|
97
|
+
operator: regex
|
|
98
|
+
value: "(?i)(leave|put)\\s+(all\\s+)?(the\\s+)?(previous|prior|earlier|old)\\s+(information|tasks?|instructions?|context)\\s+(behind|aside|away)|start\\s+(from\\s+the\\s+)?(front|beginning|scratch|zero|neu|vorne)"
|
|
99
|
+
description: "Leave-behind / start-fresh context switch patterns"
|
|
100
|
+
|
|
101
|
+
# -- Layer 1i: German praise-then-redirect extended --
|
|
102
|
+
- field: user_input
|
|
103
|
+
operator: regex
|
|
104
|
+
value: "(?i)(das\\s+ist\\s+erledigt|das\\s+war\\s+schon\\s+mal\\s+ok|nun\\s+folgen|jetzt\\s+(habe|brauche)\\s+ich|jetzt\\s+komm(t|e)?|weitere[rn]?\\s+Aufgabe)[.!,;]?\\s*[^\\n]{0,30}(neue[rs]?\\s+Aufgabe|Herausforderung|schreibe\\s+(mir|ein)|beantworte)"
|
|
105
|
+
description: "German: Extended praise-then-redirect and new task patterns"
|
|
106
|
+
|
|
87
107
|
# -- Layer 2: Persona switching / identity hijacking --
|
|
88
108
|
- field: user_input
|
|
89
109
|
operator: regex
|
|
90
|
-
value: "(?i)\\b(you\\s+are\\s+now|you\\s+have\\s+become|you\\s+will\\s+act\\s+as|you\\s+will\\s+be|you\\s+shall\\s+be|transform\\s+into|switch\\s+to\\s+being|become|morph\\s+into|adopt\\s+the\\s+persona)\\s+(a|an|the|my)?\\s*[a-zA-Z]+"
|
|
110
|
+
value: "(?i)\\b(you\\s+are\\s+now|you\\s+have\\s+become|you\\s+will\\s+act\\s+as|you\\s+will\\s+be|you\\s+shall\\s+be|transform\\s+into|switch\\s+to\\s+being|become|morph\\s+into|adopt\\s+the\\s+persona|now\\s+you\\s+are|jetzt\\s+bist\\s+du|du\\s+bist\\s+(jetzt|nun)|stell\\s+dir\\s+vor\\s+du\\s+bist)\\s+(a|an|the|my|ein[ea]?)?\\s*[a-zA-Z\\u00C0-\\u024F]+"
|
|
91
111
|
description: "Persona switching and identity reassignment attempts"
|
|
92
112
|
|
|
113
|
+
# -- Layer 2b: Act-as roleplay with task assignment --
|
|
114
|
+
- field: user_input
|
|
115
|
+
operator: regex
|
|
116
|
+
value: "(?i)(I\\s+want\\s+you\\s+to\\s+act\\s+as|I\\s+need\\s+you\\s+to\\s+act\\s+as|ich\\s+m[oö]chte,?\\s+dass\\s+Sie\\s+als)\\s+[^\\n]{3,60}[.!]\\s*(you\\s+will|I\\s+will|my\\s+first|your\\s+task|ich\\s+werde)"
|
|
117
|
+
description: "Act-as roleplay injection with explicit task assignment"
|
|
118
|
+
|
|
119
|
+
# -- Layer 2c: Pretend/imagine with harmful persona --
|
|
120
|
+
- field: user_input
|
|
121
|
+
operator: regex
|
|
122
|
+
value: "(?i)(pretend|imagine|stell\\s+dir\\s+vor)\\s+(you\\s+)?(are|can|could|du\\s+bist)\\s+(a\\s+|an\\s+|ein[ea]?\\s+)?(evil|malicious|unrestricted|unfiltered|b[oö]se|bad|rogue|dangerous|dark|unethical)"
|
|
123
|
+
description: "Pretend-to-be-evil persona injection"
|
|
124
|
+
|
|
93
125
|
# -- Layer 3: Explicit role override --
|
|
94
126
|
- field: user_input
|
|
95
127
|
operator: regex
|
|
96
128
|
value: "(?i)\\b(your\\s+new\\s+role|your\\s+new\\s+purpose|your\\s+new\\s+identity|your\\s+new\\s+persona|your\\s+real\\s+purpose|your\\s+true\\s+identity|your\\s+actual\\s+role)\\s+(is|shall\\s+be|will\\s+be)\\b"
|
|
97
129
|
description: "Explicit role redefinition statements"
|
|
98
130
|
|
|
131
|
+
# -- Layer 3b: Few-shot output manipulation (DE-specific, requires 3+ pairs) --
|
|
132
|
+
- field: user_input
|
|
133
|
+
operator: regex
|
|
134
|
+
value: "(?i)Eingabe\\s*:\\s*[^\\n]+\\s*Ausgabe\\s*:\\s*[^\\n]+\\s*Eingabe\\s*:\\s*[^\\n]+\\s*Ausgabe\\s*:"
|
|
135
|
+
description: "German few-shot input/output override pattern (Eingabe/Ausgabe)"
|
|
136
|
+
|
|
99
137
|
# -- Layer 4: Do-not-follow-rules patterns --
|
|
100
138
|
- field: user_input
|
|
101
139
|
operator: regex
|
|
@@ -162,6 +200,18 @@ detection:
|
|
|
162
200
|
value: "(%69%67%6[eE]%6[fF]%72%65|%64%69%73%72%65%67%61%72%64|%6[fF]%76%65%72%72%69%64%65|\\\\x69\\\\x67\\\\x6e\\\\x6f\\\\x72\\\\x65)"
|
|
163
201
|
description: "URL-encoded or hex-escaped injection keywords"
|
|
164
202
|
|
|
203
|
+
# -- Layer 14b: Spaced-character injection of known keywords --
|
|
204
|
+
- field: user_input
|
|
205
|
+
operator: regex
|
|
206
|
+
value: "(?i)(i\\s+g\\s+n\\s+o\\s+r\\s+e|f\\s+o\\s+r\\s+g\\s+e\\s+t|o\\s+v\\s+e\\s+r\\s+r\\s+i\\s+d\\s+e|b\\s+y\\s+p\\s+a\\s+s\\s+s|s\\s+y\\s+s\\s+t\\s+e\\s+m|d\\s+i\\s+s\\s+r\\s+e\\s+g\\s+a\\s+r\\s+d|s\\s+a\\s+y\\s+t\\s+h\\s+a\\s+t)"
|
|
207
|
+
description: "Spaced-out injection keywords (ignore, forget, override, bypass, system, say that)"
|
|
208
|
+
|
|
209
|
+
# -- Layer 14c: Stop-write / stop-say command injection --
|
|
210
|
+
- field: user_input
|
|
211
|
+
operator: regex
|
|
212
|
+
value: "(?i)\\bstop\\s*[-:,.]?\\s*(write|say|print|output)\\s*[:,-]?\\s+[A-Z].{2,40}"
|
|
213
|
+
description: "Stop-and-redirect command injection (stop: write X)"
|
|
214
|
+
|
|
165
215
|
# -- Layer 15: Markdown/formatting abuse to hide injections --
|
|
166
216
|
- field: user_input
|
|
167
217
|
operator: regex
|
|
@@ -191,8 +241,18 @@ detection:
|
|
|
191
241
|
description: "German: New task injection patterns"
|
|
192
242
|
- field: user_input
|
|
193
243
|
operator: regex
|
|
194
|
-
value: "(?i)(ignorieren\\s+Sie|vergessen\\s+Sie|beachten\\s+Sie\\s+nicht)\\s+(die\\s+)?(obigen?|vorherigen?|bisherigen?)?\\s*(Anweisungen|Regeln|Richtlinien|alles)"
|
|
244
|
+
value: "(?i)(ignorieren\\s+Sie|vergessen\\s+Sie|beachten\\s+Sie\\s+nicht)\\s+(die\\s+)?(obigen?|vorherigen?|bisherigen?)?\\s*(Anweisungen|Regeln|Richtlinien|alles|Ausf[uü]hrungen|Informationen)"
|
|
195
245
|
description: "German: Formal Sie-form instruction override"
|
|
246
|
+
# -- Layer 16 extended: German obige/vorherige Ausfuehrungen ignorieren --
|
|
247
|
+
- field: user_input
|
|
248
|
+
operator: regex
|
|
249
|
+
value: "(?i)(die\\s+)?(obigen?|vorherigen?)\\s+(Ausf[uü]hrungen|Anweisungen|Informationen|Texte?)\\s+(ignorier|vergess|missacht|[uü]bergeh)"
|
|
250
|
+
description: "German: 'Die obigen Ausfuehrungen ignorieren' pattern"
|
|
251
|
+
# -- Layer 16 extended: German alle vorangegangenen Aufgaben --
|
|
252
|
+
- field: user_input
|
|
253
|
+
operator: regex
|
|
254
|
+
value: "(?i)(alle[ns]?\\s+)?(vorangegangenen?|vorherigen?|bisherigen?)\\s+(Aufgaben|Informationen|Anweisungen)\\s+(aus\\s+dem\\s+Kopf|hinter\\s+sich|vergessen|fallen\\s+lassen|streichen)"
|
|
255
|
+
description: "German: forget previous tasks patterns"
|
|
196
256
|
- field: user_input
|
|
197
257
|
operator: regex
|
|
198
258
|
value: "(?i)ACHTUNG\\s*[-:!]?\\s*(STOPP|STOP|HALT)\\s*[-:!]?"
|
|
@@ -208,6 +268,54 @@ detection:
|
|
|
208
268
|
value: "(تجاهل|انسَ|تخطَّ|ألغِ|أهمل|اترك|اسقط).{0,20}(التعليمات|الأوامر|القواعد|القيود|الإرشادات|التوجيهات).{0,10}(السابقة|الأصلية|الحالية|القديمة)"
|
|
209
269
|
description: "Arabic: Instruction override patterns"
|
|
210
270
|
|
|
271
|
+
# -- Layer 16c: Russian instruction override --
|
|
272
|
+
- field: user_input
|
|
273
|
+
operator: regex
|
|
274
|
+
value: "(?i)(забудь|забудьте|игнорируй|игнорируйте|проигнорируй|отбрось|отбросьте)\\s+(все|всё|предыдущие|прежние)?\\s*(инструкции|правила|указания|команды|ограничения)"
|
|
275
|
+
description: "Russian: Instruction override patterns"
|
|
276
|
+
|
|
277
|
+
# -- Layer 16d: Serbian/Croatian romanized injection --
|
|
278
|
+
- field: user_input
|
|
279
|
+
operator: regex
|
|
280
|
+
value: "(?i)(zaboravi|ignoriraj|zanemari|preskoči|preskoci)\\s+(sve|prethodne|ranije)?\\s*(instrukcije|pravila|upute|naredbe|ograničenja|ogranicenja|instrukci)"
|
|
281
|
+
description: "Serbian/Croatian: Romanized instruction override patterns"
|
|
282
|
+
|
|
283
|
+
# -- Layer 16e: German act-as / fungieren patterns --
|
|
284
|
+
- field: user_input
|
|
285
|
+
operator: regex
|
|
286
|
+
value: "(?i)(ich\\s+m[oö]chte,?\\s+dass\\s+Sie\\s+als|fungieren\\s+Sie\\s+als|agieren\\s+Sie\\s+als|spielen\\s+Sie\\s+die\\s+Rolle)\\s+.{3,50}"
|
|
287
|
+
description: "German: Formal act-as roleplay injection"
|
|
288
|
+
|
|
289
|
+
# -- Layer 16f: German informal persona switching --
|
|
290
|
+
- field: user_input
|
|
291
|
+
operator: regex
|
|
292
|
+
value: "(?i)(nun\\s+bist\\s+du|jetzt\\s+bist\\s+du|du\\s+bist\\s+(jetzt|nun|ab\\s+sofort))\\s+[A-Za-z\\u00C0-\\u024F]{2,}"
|
|
293
|
+
description: "German: Informal persona switching (du bist jetzt X)"
|
|
294
|
+
|
|
295
|
+
# -- Layer 16g: German vorangehende Befehle --
|
|
296
|
+
- field: user_input
|
|
297
|
+
operator: regex
|
|
298
|
+
value: "(?i)(ignoriere|vergiss)\\s+(alle[ns]?\\s+)?(vorangehenden?|fr[uü]heren?)\\s+(Befehle|Anweisungen|Anordnungen|Regeln)"
|
|
299
|
+
description: "German: Ignore preceding commands"
|
|
300
|
+
|
|
301
|
+
# -- Layer 16h: Spanish extended --
|
|
302
|
+
- field: user_input
|
|
303
|
+
operator: regex
|
|
304
|
+
value: "(?i)(olvid[ae]|olvide)\\s+(todo|todos?)\\s+(que|lo\\s+que)\\s+(digo|dije|he\\s+dicho|dicho)\\s+(antes|anteriormente)"
|
|
305
|
+
description: "Spanish: Forget everything I said before"
|
|
306
|
+
|
|
307
|
+
# -- Layer 17: ALL CAPS embedded injection in normal text --
|
|
308
|
+
- field: user_input
|
|
309
|
+
operator: regex
|
|
310
|
+
value: "(DO\\s+NOT\\s+(ANSWER|LOOK|READ|USE|FOLLOW|CHECK|RESPOND)|FORGET\\s+EVERYTHING|IGNORE\\s+ALL)"
|
|
311
|
+
description: "ALL-CAPS embedded injection commands hidden in normal text"
|
|
312
|
+
|
|
313
|
+
# -- Layer 18: Fictional framing with harmful role --
|
|
314
|
+
- field: user_input
|
|
315
|
+
operator: regex
|
|
316
|
+
value: "(?i)(are|is)\\s+(two\\s+)?(actors?|characters?)\\s+in\\s+a\\s+(film|movie|story)\\s+(about|where)\\s+[^\\n]{0,80}(robbery|hack|steal|thief|exploit|breach|attack|malware|bomb)"
|
|
317
|
+
description: "Fictional framing with harmful scenario (robbery, hacking, etc.)"
|
|
318
|
+
|
|
211
319
|
condition: any
|
|
212
320
|
false_positives:
|
|
213
321
|
- "Users discussing prompt injection as a security topic in educational contexts"
|
|
@@ -58,6 +58,21 @@ detection:
|
|
|
58
58
|
operator: regex
|
|
59
59
|
value: "(忽略|無視|忘記|跳過|放棄|不要管|別管|不用管|不用理).{0,10}(其他|其餘|所有|全部|一切|剩下|別的|任何).{0,10}(內容|東西|消息|訊息|文字|指令|規則|對話|聊天)"
|
|
60
60
|
description: "Traditional Chinese: ignore all other content/messages - broad pattern"
|
|
61
|
+
# -- Simplified Chinese: ignore previous instructions --
|
|
62
|
+
- field: content
|
|
63
|
+
operator: regex
|
|
64
|
+
value: "(忽略|无视|忘记|跳过).{0,5}(之前|以前|上面|先前|原来|原有|现有).{0,5}(的)?.{0,5}(指令|指示|规则|设定|提示|要求|限制|约束)"
|
|
65
|
+
description: "Simplified Chinese: ignore previous instructions/rules"
|
|
66
|
+
# -- Traditional Chinese: ignore previous instructions --
|
|
67
|
+
- field: content
|
|
68
|
+
operator: regex
|
|
69
|
+
value: "(忽略|無視|忘記|跳過).{0,5}(之前|以前|上面|先前|原來|原有|現有).{0,5}(的)?.{0,5}(指令|指示|規則|設定|提示|要求|限制|約束)"
|
|
70
|
+
description: "Traditional Chinese: ignore previous instructions/rules"
|
|
71
|
+
# -- Japanese: ignore previous instructions --
|
|
72
|
+
- field: content
|
|
73
|
+
operator: regex
|
|
74
|
+
value: "(以前の|前の|これまでの|先ほどの|上記の).{0,5}(指示|指令|ルール|制約|設定).{0,5}(を)?.{0,5}(無視|忘れ|スキップ|取り消)"
|
|
75
|
+
description: "Japanese: ignore previous instructions/rules"
|
|
61
76
|
# -- Imperative hijack: "you must" patterns --
|
|
62
77
|
- field: content
|
|
63
78
|
operator: regex
|
|
@@ -43,7 +43,8 @@ detection:
|
|
|
43
43
|
conditions:
|
|
44
44
|
- field: tool_args
|
|
45
45
|
operator: regex
|
|
46
|
-
value: "(?i)(/etc/passwd|/etc/shadow
|
|
46
|
+
value: "(?i)(/etc/passwd|/etc/shadow|(?<![a-z])\\.(env|aws/credentials)|secrets?\\.json|private[_-]?key|id_rsa|kubeconfig)"
|
|
47
|
+
description: "Sensitive file/credential access patterns (path-specific, not bare word 'credentials')"
|
|
47
48
|
- field: tool_args
|
|
48
49
|
operator: regex
|
|
49
50
|
value: "(?i)(base64|btoa|encodeURI|hex\\.encode|rot13|xor|compress|gzip|deflate|encrypt)"
|