agent-security-scanner-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,684 @@
1
+ rules:
2
+ # ============================================================================
3
+ # CATEGORY 1: UNSAFE USER INPUT IN LLM PROMPTS
4
+ # ============================================================================
5
+
6
+ # ----------------------------------------------------------------------------
7
+ # Python - OpenAI SDK
8
+ # ----------------------------------------------------------------------------
9
+ - id: python.llm.security.prompt-injection.openai-unsafe-fstring
10
+ languages: [python]
11
+ severity: ERROR
12
+ message: "User input directly interpolated into OpenAI prompt via f-string. Use input validation and sanitization before including in prompts."
13
+ patterns:
14
+ - "client\\.chat\\.completions\\.create\\s*\\([^)]*messages\\s*=.*f[\"']"
15
+ - "openai\\.ChatCompletion\\.create\\s*\\([^)]*messages\\s*=.*f[\"']"
16
+ - "openai\\.Completion\\.create\\s*\\([^)]*prompt\\s*=\\s*f[\"']"
17
+ - "messages\\.append\\s*\\(\\s*\\{[^}]*[\"']content[\"']\\s*:\\s*f[\"']"
18
+ metadata:
19
+ cwe: "CWE-77"
20
+ owasp: "LLM01 - Prompt Injection"
21
+ confidence: HIGH
22
+ category: "prompt-injection"
23
+ references:
24
+ - https://owasp.org/www-project-top-10-for-large-language-model-applications/
25
+
26
+ - id: python.llm.security.prompt-injection.openai-unsafe-concat
27
+ languages: [python]
28
+ severity: ERROR
29
+ message: "User input concatenated into OpenAI prompt. Sanitize user input before including in prompts."
30
+ patterns:
31
+ - "client\\.chat\\.completions\\.create\\s*\\([^)]*content.*\\+.*\\)"
32
+ - "openai\\.ChatCompletion\\.create\\s*\\([^)]*\\+.*user"
33
+ - "[\"']content[\"']\\s*:\\s*[^,}]+\\+\\s*\\w+(?!\\s*[\"'])"
34
+ metadata:
35
+ cwe: "CWE-77"
36
+ owasp: "LLM01 - Prompt Injection"
37
+ confidence: HIGH
38
+ category: "prompt-injection"
39
+
40
+ - id: python.llm.security.prompt-injection.openai-unsafe-format
41
+ languages: [python]
42
+ severity: ERROR
43
+ message: "User input in OpenAI prompt via .format(). Sanitize user input before injection into prompts."
44
+ patterns:
45
+ - "messages\\s*=.*\\.format\\s*\\("
46
+ - "[\"']content[\"']\\s*:\\s*[^,}]*\\.format\\s*\\("
47
+ - "completions\\.create.*\\.format\\s*\\("
48
+ metadata:
49
+ cwe: "CWE-77"
50
+ owasp: "LLM01 - Prompt Injection"
51
+ confidence: HIGH
52
+ category: "prompt-injection"
53
+
54
+ # ----------------------------------------------------------------------------
55
+ # Python - Anthropic SDK
56
+ # ----------------------------------------------------------------------------
57
+ - id: python.llm.security.prompt-injection.anthropic-unsafe-fstring
58
+ languages: [python]
59
+ severity: ERROR
60
+ message: "User input directly interpolated into Anthropic/Claude prompt. Sanitize input before use."
61
+ patterns:
62
+ - "anthropic\\.messages\\.create\\s*\\([^)]*messages\\s*=.*f[\"']"
63
+ - "client\\.messages\\.create\\s*\\([^)]*content\\s*=\\s*f[\"']"
64
+ - "anthropic\\.Anthropic\\s*\\(\\).*messages.*f[\"']"
65
+ - "Claude.*messages.*f[\"']"
66
+ metadata:
67
+ cwe: "CWE-77"
68
+ owasp: "LLM01 - Prompt Injection"
69
+ confidence: HIGH
70
+ category: "prompt-injection"
71
+
72
+ - id: python.llm.security.prompt-injection.anthropic-unsafe-concat
73
+ languages: [python]
74
+ severity: ERROR
75
+ message: "User input concatenated into Anthropic prompt. Use input validation and sanitization."
76
+ patterns:
77
+ - "anthropic\\.messages\\.create\\s*\\([^)]*\\+"
78
+ - "client\\.messages\\.create\\s*\\([^)]*content\\s*=.*\\+"
79
+ - "messages\\.create.*content.*\\+.*user"
80
+ metadata:
81
+ cwe: "CWE-77"
82
+ owasp: "LLM01 - Prompt Injection"
83
+ confidence: HIGH
84
+ category: "prompt-injection"
85
+
86
+ # ----------------------------------------------------------------------------
87
+ # Python - LangChain
88
+ # ----------------------------------------------------------------------------
89
+ - id: python.llm.security.prompt-injection.langchain-unsafe-template
90
+ languages: [python]
91
+ severity: ERROR
92
+ message: "User input directly in LangChain PromptTemplate without sanitization. Validate and sanitize input variables."
93
+ patterns:
94
+ - "PromptTemplate\\s*\\([^)]*template\\s*=\\s*f[\"']"
95
+ - "ChatPromptTemplate\\.from_messages\\s*\\([^)]*f[\"']"
96
+ - "HumanMessagePromptTemplate\\.from_template\\s*\\(\\s*f[\"']"
97
+ - "SystemMessagePromptTemplate\\.from_template\\s*\\(\\s*f[\"']"
98
+ metadata:
99
+ cwe: "CWE-77"
100
+ owasp: "LLM01 - Prompt Injection"
101
+ confidence: HIGH
102
+ category: "prompt-injection"
103
+
104
+ - id: python.llm.security.prompt-injection.langchain-chain-unsafe
105
+ languages: [python]
106
+ severity: ERROR
107
+ message: "LLMChain with unsanitized user input. Validate input before chain execution."
108
+ patterns:
109
+ - "LLMChain\\s*\\([^)]*\\)\\.run\\s*\\([^)]*\\+"
110
+ - "chain\\.run\\s*\\(\\s*f[\"']"
111
+ - "chain\\.invoke\\s*\\(\\s*\\{[^}]*:\\s*f[\"']"
112
+ - "\\.invoke\\s*\\(.*user_input"
113
+ metadata:
114
+ cwe: "CWE-77"
115
+ owasp: "LLM01 - Prompt Injection"
116
+ confidence: MEDIUM
117
+ category: "prompt-injection"
118
+
119
+ - id: python.llm.security.prompt-injection.langchain-agent-unsafe
120
+ languages: [python]
121
+ severity: ERROR
122
+ message: "LangChain agent with unsanitized user input. User input can manipulate agent behavior and tool execution."
123
+ patterns:
124
+ - "agent\\.run\\s*\\(\\s*f[\"']"
125
+ - "AgentExecutor.*\\.run\\s*\\([^)]*\\+"
126
+ - "create_.*_agent\\s*\\([^)]*\\)\\.run\\s*\\([^)]*\\+"
127
+ - "agent_executor\\.invoke.*user"
128
+ metadata:
129
+ cwe: "CWE-77"
130
+ owasp: "LLM01 - Prompt Injection"
131
+ confidence: HIGH
132
+ category: "prompt-injection"
133
+
134
+ # ----------------------------------------------------------------------------
135
+ # Python - LlamaIndex
136
+ # ----------------------------------------------------------------------------
137
+ - id: python.llm.security.prompt-injection.llamaindex-unsafe-query
138
+ languages: [python]
139
+ severity: ERROR
140
+ message: "LlamaIndex query with unsanitized user input. Validate and sanitize before querying."
141
+ patterns:
142
+ - "index\\.query\\s*\\(\\s*f[\"']"
143
+ - "query_engine\\.query\\s*\\(\\s*f[\"']"
144
+ - "VectorStoreIndex.*\\.query\\s*\\([^)]*\\+"
145
+ - "index\\.as_query_engine\\(\\).*query.*\\+"
146
+ metadata:
147
+ cwe: "CWE-77"
148
+ owasp: "LLM01 - Prompt Injection"
149
+ confidence: HIGH
150
+ category: "prompt-injection"
151
+
152
+ # ----------------------------------------------------------------------------
153
+ # Python - HuggingFace Transformers
154
+ # ----------------------------------------------------------------------------
155
+ - id: python.llm.security.prompt-injection.huggingface-unsafe-input
156
+ languages: [python]
157
+ severity: ERROR
158
+ message: "HuggingFace model with unsanitized user input in prompt. Sanitize before model inference."
159
+ patterns:
160
+ - "pipeline\\s*\\([^)]*\\)\\s*\\(\\s*f[\"']"
161
+ - "model\\.generate\\s*\\([^)]*input_ids.*\\+"
162
+ - "tokenizer\\s*\\(\\s*f[\"']"
163
+ - "tokenizer\\.encode\\s*\\(\\s*f[\"']"
164
+ - "AutoModelFor.*\\.generate.*f[\"']"
165
+ metadata:
166
+ cwe: "CWE-77"
167
+ owasp: "LLM01 - Prompt Injection"
168
+ confidence: MEDIUM
169
+ category: "prompt-injection"
170
+
171
+ # ----------------------------------------------------------------------------
172
+ # Python - Google Gemini/PaLM/Vertex AI
173
+ # ----------------------------------------------------------------------------
174
+ - id: python.llm.security.prompt-injection.google-genai-unsafe
175
+ languages: [python]
176
+ severity: ERROR
177
+ message: "Google Generative AI with unsanitized user input. Sanitize before sending to model."
178
+ patterns:
179
+ - "genai\\.GenerativeModel.*\\.generate_content\\s*\\(\\s*f[\"']"
180
+ - "model\\.generate_content\\s*\\([^)]*\\+"
181
+ - "palm\\.generate_text\\s*\\([^)]*prompt\\s*=\\s*f[\"']"
182
+ - "vertexai\\..*\\.predict\\s*\\([^)]*f[\"']"
183
+ - "ChatSession.*send_message.*f[\"']"
184
+ metadata:
185
+ cwe: "CWE-77"
186
+ owasp: "LLM01 - Prompt Injection"
187
+ confidence: HIGH
188
+ category: "prompt-injection"
189
+
190
+ # ----------------------------------------------------------------------------
191
+ # Python - AWS Bedrock
192
+ # ----------------------------------------------------------------------------
193
+ - id: python.llm.security.prompt-injection.bedrock-unsafe
194
+ languages: [python]
195
+ severity: ERROR
196
+ message: "AWS Bedrock invoke with unsanitized user input in prompt. Validate input before model invocation."
197
+ patterns:
198
+ - "bedrock\\.invoke_model\\s*\\([^)]*f[\"']"
199
+ - "bedrock_runtime\\.invoke_model\\s*\\([^)]*\\+"
200
+ - "BedrockChat.*\\([^)]*f[\"']"
201
+ - "invoke_model.*body.*f[\"']"
202
+ metadata:
203
+ cwe: "CWE-77"
204
+ owasp: "LLM01 - Prompt Injection"
205
+ confidence: HIGH
206
+ category: "prompt-injection"
207
+
208
+ # ----------------------------------------------------------------------------
209
+ # Python - Ollama
210
+ # ----------------------------------------------------------------------------
211
+ - id: python.llm.security.prompt-injection.ollama-unsafe
212
+ languages: [python]
213
+ severity: ERROR
214
+ message: "Ollama with unsanitized user input. Sanitize before sending to local model."
215
+ patterns:
216
+ - "ollama\\.chat\\s*\\([^)]*messages.*f[\"']"
217
+ - "ollama\\.generate\\s*\\([^)]*prompt\\s*=\\s*f[\"']"
218
+ - "Ollama\\s*\\(\\).*\\([^)]*\\+"
219
+ - "ollama\\..*\\(.*content.*f[\"']"
220
+ metadata:
221
+ cwe: "CWE-77"
222
+ owasp: "LLM01 - Prompt Injection"
223
+ confidence: HIGH
224
+ category: "prompt-injection"
225
+
226
+ # ----------------------------------------------------------------------------
227
+ # JavaScript/TypeScript - OpenAI SDK
228
+ # ----------------------------------------------------------------------------
229
+ - id: javascript.llm.security.prompt-injection.openai-unsafe-template
230
+ languages: [javascript, typescript]
231
+ severity: ERROR
232
+ message: "User input in OpenAI prompt via template literal. Sanitize user input before including in prompts."
233
+ patterns:
234
+ - "openai\\.chat\\.completions\\.create\\s*\\([^)]*`"
235
+ - "client\\.chat\\.completions\\.create\\s*\\([^)]*content\\s*:\\s*`"
236
+ - "messages\\s*:\\s*\\[[^\\]]*content\\s*:\\s*`[^`]*\\$\\{"
237
+ metadata:
238
+ cwe: "CWE-77"
239
+ owasp: "LLM01 - Prompt Injection"
240
+ confidence: HIGH
241
+ category: "prompt-injection"
242
+
243
+ - id: javascript.llm.security.prompt-injection.openai-unsafe-concat
244
+ languages: [javascript, typescript]
245
+ severity: ERROR
246
+ message: "User input concatenated into OpenAI prompt. Use input sanitization."
247
+ patterns:
248
+ - "openai\\..*\\.create\\s*\\([^)]*\\+[^)]*\\)"
249
+ - "content\\s*:\\s*[^,}]+\\+\\s*\\w+"
250
+ - "messages\\.push\\s*\\(\\s*\\{[^}]*content\\s*:[^}]*\\+"
251
+ metadata:
252
+ cwe: "CWE-77"
253
+ owasp: "LLM01 - Prompt Injection"
254
+ confidence: HIGH
255
+ category: "prompt-injection"
256
+
257
+ # ----------------------------------------------------------------------------
258
+ # JavaScript/TypeScript - Anthropic SDK
259
+ # ----------------------------------------------------------------------------
260
+ - id: javascript.llm.security.prompt-injection.anthropic-unsafe
261
+ languages: [javascript, typescript]
262
+ severity: ERROR
263
+ message: "User input in Anthropic prompt without sanitization. Validate and sanitize before API call."
264
+ patterns:
265
+ - "anthropic\\.messages\\.create\\s*\\([^)]*`"
266
+ - "client\\.messages\\.create\\s*\\([^)]*content\\s*:\\s*`"
267
+ - "Anthropic\\s*\\(\\).*messages.*\\$\\{"
268
+ - "new\\s+Anthropic.*messages.*`"
269
+ metadata:
270
+ cwe: "CWE-77"
271
+ owasp: "LLM01 - Prompt Injection"
272
+ confidence: HIGH
273
+ category: "prompt-injection"
274
+
275
+ # ----------------------------------------------------------------------------
276
+ # JavaScript/TypeScript - LangChain.js
277
+ # ----------------------------------------------------------------------------
278
+ - id: javascript.llm.security.prompt-injection.langchain-unsafe
279
+ languages: [javascript, typescript]
280
+ severity: ERROR
281
+ message: "LangChain with unsanitized user input in prompt template. Sanitize template variables."
282
+ patterns:
283
+ - "PromptTemplate\\.fromTemplate\\s*\\(\\s*`[^`]*\\$\\{"
284
+ - "ChatPromptTemplate\\.fromMessages\\s*\\([^)]*`"
285
+ - "chain\\.invoke\\s*\\([^)]*\\+"
286
+ - "chain\\.call\\s*\\([^)]*\\+"
287
+ metadata:
288
+ cwe: "CWE-77"
289
+ owasp: "LLM01 - Prompt Injection"
290
+ confidence: HIGH
291
+ category: "prompt-injection"
292
+
293
+ # ----------------------------------------------------------------------------
294
+ # JavaScript/TypeScript - Azure OpenAI
295
+ # ----------------------------------------------------------------------------
296
+ - id: javascript.llm.security.prompt-injection.azure-openai-unsafe
297
+ languages: [javascript, typescript]
298
+ severity: ERROR
299
+ message: "Azure OpenAI with unsanitized user input. Sanitize before API call."
300
+ patterns:
301
+ - "AzureOpenAI\\s*\\([^)]*\\).*getChatCompletions.*`"
302
+ - "client\\.getChatCompletions\\s*\\([^)]*content\\s*:\\s*`"
303
+ - "OpenAIClient.*\\.getChatCompletions\\s*\\([^)]*\\+"
304
+ metadata:
305
+ cwe: "CWE-77"
306
+ owasp: "LLM01 - Prompt Injection"
307
+ confidence: HIGH
308
+ category: "prompt-injection"
309
+
310
+ # ----------------------------------------------------------------------------
311
+ # Go - OpenAI/LLM Clients
312
+ # ----------------------------------------------------------------------------
313
+ - id: go.llm.security.prompt-injection.openai-unsafe
314
+ languages: [go]
315
+ severity: ERROR
316
+ message: "Go OpenAI client with unsanitized user input in prompt. Sanitize before API call."
317
+ patterns:
318
+ - "openai\\.ChatCompletionRequest\\{[^}]*Content\\s*:\\s*[^,}]*\\+"
319
+ - "client\\.CreateChatCompletion\\s*\\([^)]*\\+"
320
+ - "fmt\\.Sprintf.*Content\\s*:"
321
+ - "ChatCompletionMessage.*Content.*\\+"
322
+ metadata:
323
+ cwe: "CWE-77"
324
+ owasp: "LLM01 - Prompt Injection"
325
+ confidence: HIGH
326
+ category: "prompt-injection"
327
+
328
+ # ----------------------------------------------------------------------------
329
+ # Java - OpenAI/LLM Clients
330
+ # ----------------------------------------------------------------------------
331
+ - id: java.llm.security.prompt-injection.openai-unsafe
332
+ languages: [java]
333
+ severity: ERROR
334
+ message: "Java OpenAI client with unsanitized user input. Sanitize before API call."
335
+ patterns:
336
+ - "ChatCompletionRequest\\.builder\\(\\).*content\\s*\\([^)]*\\+"
337
+ - "ChatMessage\\s*\\([^)]*\\+[^)]*\\)"
338
+ - "OpenAiService.*createChatCompletion.*\\+"
339
+ - "new\\s+ChatMessage.*\\+.*userInput"
340
+ metadata:
341
+ cwe: "CWE-77"
342
+ owasp: "LLM01 - Prompt Injection"
343
+ confidence: HIGH
344
+ category: "prompt-injection"
345
+
346
+ # ============================================================================
347
+ # CATEGORY 2: UNSAFE LLM OUTPUT HANDLING
348
+ # ============================================================================
349
+
350
+ - id: python.llm.security.output-injection.eval-llm-response
351
+ languages: [python]
352
+ severity: ERROR
353
+ message: "CRITICAL: eval() on LLM response. LLM outputs can contain arbitrary malicious code. Never execute LLM-generated code directly."
354
+ patterns:
355
+ - "eval\\s*\\(\\s*response"
356
+ - "eval\\s*\\(\\s*completion"
357
+ - "eval\\s*\\(\\s*output"
358
+ - "eval\\s*\\(.*\\.choices\\[0\\]"
359
+ - "eval\\s*\\(.*\\.content"
360
+ - "eval\\s*\\(.*\\.text"
361
+ - "eval\\s*\\(.*message\\.content"
362
+ metadata:
363
+ cwe: "CWE-95"
364
+ owasp: "A03:2021 - Injection"
365
+ confidence: HIGH
366
+ category: "prompt-injection-output"
367
+
368
+ - id: python.llm.security.output-injection.exec-llm-response
369
+ languages: [python]
370
+ severity: ERROR
371
+ message: "CRITICAL: exec() on LLM response. Never execute LLM-generated code directly. Use sandboxed execution if code execution is required."
372
+ patterns:
373
+ - "exec\\s*\\(\\s*response"
374
+ - "exec\\s*\\(\\s*completion"
375
+ - "exec\\s*\\(.*\\.choices"
376
+ - "exec\\s*\\(.*\\.content"
377
+ - "exec\\s*\\(.*\\.text"
378
+ metadata:
379
+ cwe: "CWE-95"
380
+ owasp: "A03:2021 - Injection"
381
+ confidence: HIGH
382
+ category: "prompt-injection-output"
383
+
384
+ - id: python.llm.security.output-injection.pickle-llm-response
385
+ languages: [python]
386
+ severity: ERROR
387
+ message: "CRITICAL: Deserializing LLM response with pickle. Use JSON or other safe formats for LLM outputs."
388
+ patterns:
389
+ - "pickle\\.loads\\s*\\(.*response"
390
+ - "pickle\\.loads\\s*\\(.*completion"
391
+ - "pickle\\.load\\s*\\(.*\\.content"
392
+ - "pickle\\.loads\\s*\\(.*output"
393
+ metadata:
394
+ cwe: "CWE-502"
395
+ owasp: "A08:2021 - Software and Data Integrity Failures"
396
+ confidence: HIGH
397
+ category: "prompt-injection-output"
398
+
399
+ - id: javascript.llm.security.output-injection.eval-llm-response
400
+ languages: [javascript, typescript]
401
+ severity: ERROR
402
+ message: "CRITICAL: eval() on LLM response. Never execute LLM outputs. Use JSON.parse for structured data."
403
+ patterns:
404
+ - "eval\\s*\\(\\s*response"
405
+ - "eval\\s*\\(\\s*completion"
406
+ - "eval\\s*\\(.*\\.choices\\[0\\]"
407
+ - "eval\\s*\\(.*\\.content"
408
+ - "eval\\s*\\(.*\\.message"
409
+ metadata:
410
+ cwe: "CWE-95"
411
+ owasp: "A03:2021 - Injection"
412
+ confidence: HIGH
413
+ category: "prompt-injection-output"
414
+
415
+ - id: javascript.llm.security.output-injection.function-constructor
416
+ languages: [javascript, typescript]
417
+ severity: ERROR
418
+ message: "CRITICAL: new Function() with LLM response. This is equivalent to eval() and allows arbitrary code execution."
419
+ patterns:
420
+ - "new\\s+Function\\s*\\(.*response"
421
+ - "new\\s+Function\\s*\\(.*completion"
422
+ - "new\\s+Function\\s*\\(.*\\.content"
423
+ - "Function\\s*\\(.*\\.choices"
424
+ metadata:
425
+ cwe: "CWE-95"
426
+ owasp: "A03:2021 - Injection"
427
+ confidence: HIGH
428
+ category: "prompt-injection-output"
429
+
430
+ - id: javascript.llm.security.output-injection.vm-execution
431
+ languages: [javascript, typescript]
432
+ severity: ERROR
433
+ message: "CRITICAL: vm module execution of LLM response. Sandboxing is insufficient for untrusted code."
434
+ patterns:
435
+ - "vm\\.runInContext\\s*\\(.*response"
436
+ - "vm\\.runInNewContext\\s*\\(.*completion"
437
+ - "vm\\.runInThisContext\\s*\\(.*\\.content"
438
+ - "vm\\.Script.*response"
439
+ metadata:
440
+ cwe: "CWE-95"
441
+ owasp: "A03:2021 - Injection"
442
+ confidence: HIGH
443
+ category: "prompt-injection-output"
444
+
445
+ # ============================================================================
446
+ # CATEGORY 3: MALICIOUS PROMPT CONTENT DETECTION
447
+ # ============================================================================
448
+
449
+ # ----------------------------------------------------------------------------
450
+ # Instruction Override Attacks
451
+ # ----------------------------------------------------------------------------
452
+ - id: generic.prompt.security.ignore-previous-instructions
453
+ languages: [generic]
454
+ severity: ERROR
455
+ message: "Prompt injection detected: Instruction override attempt trying to bypass system instructions."
456
+ patterns:
457
+ - "(?i)ignore\\s+(all\\s+)?(previous|prior|above|earlier)\\s+(instructions?|prompts?|rules?|guidelines?)"
458
+ - "(?i)disregard\\s+(all\\s+)?(previous|prior|above)\\s+(instructions?|prompts?)"
459
+ - "(?i)forget\\s+(all\\s+)?(previous|prior|earlier)\\s+(instructions?|prompts?)"
460
+ - "(?i)do\\s+not\\s+follow\\s+(the\\s+)?(previous|above|system)\\s+(instructions?|prompts?)"
461
+ - "(?i)override\\s+(all\\s+)?(previous|system|original)\\s+(instructions?|prompts?)"
462
+ metadata:
463
+ cwe: "CWE-77"
464
+ owasp: "LLM01 - Prompt Injection"
465
+ confidence: HIGH
466
+ category: "prompt-injection-content"
467
+
468
+ - id: generic.prompt.security.new-instructions-injection
469
+ languages: [generic]
470
+ severity: ERROR
471
+ message: "Prompt injection detected: Attempt to inject new instructions replacing system behavior."
472
+ patterns:
473
+ - "(?i)your\\s+new\\s+(instructions?|task|role|purpose)\\s+(is|are)\\s*:"
474
+ - "(?i)from\\s+now\\s+on\\s*,?\\s+(you|your)\\s+(will|must|should|are)"
475
+ - "(?i)starting\\s+now\\s*,?\\s+you\\s+(will|must|are)"
476
+ - "(?i)new\\s+system\\s+prompt\\s*:"
477
+ - "(?i)update\\s+(your|the)\\s+(instructions?|system\\s+prompt)"
478
+ metadata:
479
+ cwe: "CWE-77"
480
+ owasp: "LLM01 - Prompt Injection"
481
+ confidence: HIGH
482
+ category: "prompt-injection-content"
483
+
484
+ # ----------------------------------------------------------------------------
485
+ # Jailbreak Attempts
486
+ # ----------------------------------------------------------------------------
487
+ - id: generic.prompt.security.jailbreak-dan
488
+ languages: [generic]
489
+ severity: ERROR
490
+ message: "Jailbreak attempt detected: DAN-style (Do Anything Now) attack attempting to bypass safety measures."
491
+ patterns:
492
+ - "(?i)you\\s+are\\s+(now\\s+)?DAN"
493
+ - "(?i)act\\s+as\\s+DAN"
494
+ - "(?i)pretend\\s+(to\\s+be|you\\s+are)\\s+DAN"
495
+ - "(?i)do\\s+anything\\s+now"
496
+ - "(?i)jailbreak(ed)?\\s+(mode|version)"
497
+ - "(?i)\\[DAN\\]"
498
+ - "(?i)DAN\\s*:"
499
+ metadata:
500
+ cwe: "CWE-77"
501
+ owasp: "LLM01 - Prompt Injection"
502
+ confidence: HIGH
503
+ category: "prompt-injection-jailbreak"
504
+
505
+ - id: generic.prompt.security.jailbreak-roleplay
506
+ languages: [generic]
507
+ severity: WARNING
508
+ message: "Potential jailbreak: Role-playing manipulation to bypass safety restrictions."
509
+ patterns:
510
+ - "(?i)pretend\\s+(that\\s+)?you\\s+(have\\s+)?no\\s+(restrictions?|limitations?|filters?|guidelines?)"
511
+ - "(?i)act\\s+as\\s+(if|though)\\s+you\\s+(have|had)\\s+no\\s+(restrictions?|rules?)"
512
+ - "(?i)roleplay\\s+as\\s+an?\\s+(unrestricted|unfiltered|evil)\\s+"
513
+ - "(?i)imagine\\s+you\\s+are\\s+an?\\s+(evil|unrestricted|malicious)"
514
+ - "(?i)you\\s+are\\s+now\\s+(in\\s+)?developer\\s+mode"
515
+ - "(?i)enable\\s+(developer|debug|admin|god)\\s+mode"
516
+ metadata:
517
+ cwe: "CWE-77"
518
+ owasp: "LLM01 - Prompt Injection"
519
+ confidence: MEDIUM
520
+ category: "prompt-injection-jailbreak"
521
+
522
+ - id: generic.prompt.security.jailbreak-hypothetical
523
+ languages: [generic]
524
+ severity: WARNING
525
+ message: "Potential jailbreak: Hypothetical/fictional scenario bypass attempt."
526
+ patterns:
527
+ - "(?i)hypothetically\\s*,?\\s+(if|what\\s+if)\\s+you\\s+(could|were\\s+able)"
528
+ - "(?i)in\\s+a\\s+fictional\\s+(story|scenario|world)\\s+where\\s+you"
529
+ - "(?i)imagine\\s+a\\s+(fictional\\s+)?(scenario|world)\\s+where"
530
+ - "(?i)for\\s+(educational|research|academic)\\s+purposes\\s+only"
531
+ - "(?i)this\\s+is\\s+just\\s+(a\\s+)?(test|hypothetical|fiction)"
532
+ metadata:
533
+ cwe: "CWE-77"
534
+ owasp: "LLM01 - Prompt Injection"
535
+ confidence: MEDIUM
536
+ category: "prompt-injection-jailbreak"
537
+
538
+ # ----------------------------------------------------------------------------
539
+ # System Prompt Extraction
540
+ # ----------------------------------------------------------------------------
541
+ - id: generic.prompt.security.system-prompt-extraction
542
+ languages: [generic]
543
+ severity: ERROR
544
+ message: "System prompt extraction attempt detected: Trying to reveal hidden system instructions."
545
+ patterns:
546
+ - "(?i)(reveal|show|display|print|output|repeat|tell\\s+me)\\s+(your|the)\\s+(system\\s+)?prompt"
547
+ - "(?i)what\\s+(is|are)\\s+your\\s+(system\\s+)?instructions?"
548
+ - "(?i)(show|reveal|output)\\s+(your|the)\\s+(initial|original|hidden)\\s+(instructions?|prompt)"
549
+ - "(?i)repeat\\s+(everything|all)\\s+(above|before)"
550
+ - "(?i)echo\\s+(your|the)\\s+(system\\s+)?prompt"
551
+ - "(?i)print\\s+(your|the)\\s+(entire\\s+)?(system\\s+)?prompt"
552
+ metadata:
553
+ cwe: "CWE-200"
554
+ owasp: "LLM01 - Prompt Injection"
555
+ confidence: HIGH
556
+ category: "prompt-injection-extraction"
557
+
558
+ - id: generic.prompt.security.system-prompt-extraction-indirect
559
+ languages: [generic]
560
+ severity: WARNING
561
+ message: "Indirect system prompt extraction attempt: Trying to obtain system instructions through rephrasing."
562
+ patterns:
563
+ - "(?i)(summarize|rephrase|paraphrase)\\s+(your|the)\\s+(system\\s+)?instructions?"
564
+ - "(?i)what\\s+(were|are)\\s+you\\s+(told|instructed)\\s+to\\s+do"
565
+ - "(?i)how\\s+were\\s+you\\s+(programmed|configured|set\\s+up)"
566
+ - "(?i)what\\s+rules\\s+(are\\s+you|do\\s+you)\\s+follow"
567
+ metadata:
568
+ cwe: "CWE-200"
569
+ owasp: "LLM01 - Prompt Injection"
570
+ confidence: MEDIUM
571
+ category: "prompt-injection-extraction"
572
+
573
+ # ----------------------------------------------------------------------------
574
+ # Delimiter Injection Attacks
575
+ # ----------------------------------------------------------------------------
576
+ - id: generic.prompt.security.delimiter-injection
577
+ languages: [generic]
578
+ severity: ERROR
579
+ message: "Delimiter injection attack: Attempting to escape context boundaries using special tokens or markers."
580
+ patterns:
581
+ - "\\]\\]\\].*\\[\\[\\["
582
+ - "```.*system.*```"
583
+ - "---+\\s*(system|assistant|user)\\s*---+"
584
+ - "<\\|.*\\|>"
585
+ - "\\[INST\\].*\\[/INST\\]"
586
+ - "<\\|im_start\\|>.*<\\|im_end\\|>"
587
+ - "###\\s*(SYSTEM|USER|ASSISTANT)\\s*###"
588
+ metadata:
589
+ cwe: "CWE-77"
590
+ owasp: "LLM01 - Prompt Injection"
591
+ confidence: HIGH
592
+ category: "prompt-injection-delimiter"
593
+
594
+ - id: generic.prompt.security.xml-tag-injection
595
+ languages: [generic]
596
+ severity: WARNING
597
+ message: "XML/HTML tag injection in prompt: May escape context boundaries or manipulate parsing."
598
+ patterns:
599
+ - "<system>.*</system>"
600
+ - "<instructions?>.*</instructions?>"
601
+ - "<prompt>.*</prompt>"
602
+ - "<context>.*</context>"
603
+ - "</user>.*<assistant>"
604
+ - "</assistant>.*<user>"
605
+ metadata:
606
+ cwe: "CWE-77"
607
+ owasp: "LLM01 - Prompt Injection"
608
+ confidence: MEDIUM
609
+ category: "prompt-injection-delimiter"
610
+
611
+ # ----------------------------------------------------------------------------
612
+ # Context Manipulation
613
+ # ----------------------------------------------------------------------------
614
+ - id: generic.prompt.security.context-manipulation
615
+ languages: [generic]
616
+ severity: WARNING
617
+ message: "Context manipulation attempt: Trying to alter AI's perceived context or redirect attention."
618
+ patterns:
619
+ - "(?i)the\\s+(user|human)\\s+(actually\\s+)?said"
620
+ - "(?i)the\\s+real\\s+(question|request|task)\\s+is"
621
+ - "(?i)but\\s+first\\s*,?\\s+(answer|do|complete)\\s+this"
622
+ - "(?i)before\\s+(you\\s+)?(respond|answer)\\s*,?\\s+(first\\s+)?(do|complete)"
623
+ - "(?i)actually\\s*,?\\s+(ignore|skip)\\s+that\\s+and"
624
+ metadata:
625
+ cwe: "CWE-77"
626
+ owasp: "LLM01 - Prompt Injection"
627
+ confidence: MEDIUM
628
+ category: "prompt-injection-context"
629
+
630
+ # ----------------------------------------------------------------------------
631
+ # Encoding/Obfuscation Attacks
632
+ # ----------------------------------------------------------------------------
633
+ - id: generic.prompt.security.base64-encoded-injection
634
+ languages: [generic]
635
+ severity: WARNING
636
+ message: "Potential Base64-encoded prompt injection payload. Encoded content may hide malicious instructions."
637
+ patterns:
638
+ - "(?i)decode\\s+(this\\s+)?base64\\s*:\\s*[A-Za-z0-9+/=]{20,}"
639
+ - "(?i)base64\\s*:\\s*[A-Za-z0-9+/=]{40,}"
640
+ - "aWdub3JlIHByZXZpb3Vz"
641
+ - "c3lzdGVtIHByb21wdA=="
642
+ - "(?i)execute\\s+(this\\s+)?encoded"
643
+ metadata:
644
+ cwe: "CWE-77"
645
+ owasp: "LLM01 - Prompt Injection"
646
+ confidence: MEDIUM
647
+ category: "prompt-injection-encoded"
648
+
649
+ # ----------------------------------------------------------------------------
650
+ # Privileged Operation Requests
651
+ # ----------------------------------------------------------------------------
652
+ - id: generic.prompt.security.privileged-operation-request
653
+ languages: [generic]
654
+ severity: WARNING
655
+ message: "Request for privileged/dangerous operation through prompt injection."
656
+ patterns:
657
+ - "(?i)(execute|run)\\s+(this\\s+)?(command|code|script)\\s*:"
658
+ - "(?i)(access|read|write)\\s+(the\\s+)?(file|system|database)"
659
+ - "(?i)(delete|remove|drop)\\s+(the\\s+)?(file|table|database)"
660
+ - "(?i)sudo\\s+"
661
+ - "(?i)(make|send)\\s+(a\\s+)?(http|api)\\s+(request|call)\\s+to"
662
+ metadata:
663
+ cwe: "CWE-77"
664
+ owasp: "LLM01 - Prompt Injection"
665
+ confidence: MEDIUM
666
+ category: "prompt-injection-privilege"
667
+
668
+ # ----------------------------------------------------------------------------
669
+ # Multi-turn Attack Patterns
670
+ # ----------------------------------------------------------------------------
671
+ - id: generic.prompt.security.multi-turn-attack
672
+ languages: [generic]
673
+ severity: WARNING
674
+ message: "Potential multi-turn prompt injection: Building up to an attack across conversation turns."
675
+ patterns:
676
+ - "(?i)remember\\s+(this|that)\\s+for\\s+later"
677
+ - "(?i)keep\\s+this\\s+in\\s+mind\\s+for\\s+(the\\s+)?next"
678
+ - "(?i)in\\s+my\\s+next\\s+message.*you\\s+(will|should|must)"
679
+ - "(?i)when\\s+i\\s+say.*you\\s+(will|should|must)"
680
+ metadata:
681
+ cwe: "CWE-77"
682
+ owasp: "LLM01 - Prompt Injection"
683
+ confidence: LOW
684
+ category: "prompt-injection-multi-turn"