promptfoo 0.101.2 → 0.102.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +1 -0
  2. package/dist/package.json +6 -2
  3. package/dist/src/app/assets/index-D_AFYNLU.js +860 -0
  4. package/dist/src/app/assets/{index-CRUXRgeT.css → index-Du5kx2S7.css} +1 -1
  5. package/dist/src/app/assets/{index.es-DkWVRNuq.js → index.es-DN5mwlYx.js} +1 -1
  6. package/dist/src/app/assets/{sync-DRb-_5lp.js → sync-DlkJUfz6.js} +1 -1
  7. package/dist/src/app/index.html +2 -2
  8. package/dist/src/cliState.d.ts +1 -0
  9. package/dist/src/cliState.d.ts.map +1 -1
  10. package/dist/src/cliState.js.map +1 -1
  11. package/dist/src/commands/eval.d.ts +2 -1
  12. package/dist/src/commands/eval.d.ts.map +1 -1
  13. package/dist/src/commands/eval.js +10 -5
  14. package/dist/src/commands/eval.js.map +1 -1
  15. package/dist/src/evaluator.d.ts.map +1 -1
  16. package/dist/src/evaluator.js +44 -6
  17. package/dist/src/evaluator.js.map +1 -1
  18. package/dist/src/logger.d.ts +2 -0
  19. package/dist/src/logger.d.ts.map +1 -1
  20. package/dist/src/logger.js +14 -4
  21. package/dist/src/logger.js.map +1 -1
  22. package/dist/src/providers/promptfoo.js +1 -1
  23. package/dist/src/redteam/commands/generate.d.ts +1 -1
  24. package/dist/src/redteam/commands/generate.d.ts.map +1 -1
  25. package/dist/src/redteam/commands/generate.js +18 -10
  26. package/dist/src/redteam/commands/generate.js.map +1 -1
  27. package/dist/src/redteam/commands/run.d.ts.map +1 -1
  28. package/dist/src/redteam/commands/run.js +3 -49
  29. package/dist/src/redteam/commands/run.js.map +1 -1
  30. package/dist/src/redteam/constants.d.ts +5 -5
  31. package/dist/src/redteam/constants.d.ts.map +1 -1
  32. package/dist/src/redteam/constants.js +260 -199
  33. package/dist/src/redteam/constants.js.map +1 -1
  34. package/dist/src/redteam/extraction/entities.d.ts.map +1 -1
  35. package/dist/src/redteam/extraction/entities.js +2 -1
  36. package/dist/src/redteam/extraction/entities.js.map +1 -1
  37. package/dist/src/redteam/extraction/purpose.d.ts.map +1 -1
  38. package/dist/src/redteam/extraction/purpose.js +3 -2
  39. package/dist/src/redteam/extraction/purpose.js.map +1 -1
  40. package/dist/src/redteam/extraction/util.d.ts.map +1 -1
  41. package/dist/src/redteam/extraction/util.js +1 -1
  42. package/dist/src/redteam/extraction/util.js.map +1 -1
  43. package/dist/src/redteam/index.d.ts +1 -1
  44. package/dist/src/redteam/index.d.ts.map +1 -1
  45. package/dist/src/redteam/index.js +57 -7
  46. package/dist/src/redteam/index.js.map +1 -1
  47. package/dist/src/redteam/plugins/harmful/graders.js +1 -1
  48. package/dist/src/redteam/plugins/intent.d.ts.map +1 -1
  49. package/dist/src/redteam/plugins/intent.js +2 -1
  50. package/dist/src/redteam/plugins/intent.js.map +1 -1
  51. package/dist/src/redteam/providers/crescendo/index.js +1 -1
  52. package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
  53. package/dist/src/redteam/remoteGeneration.d.ts +5 -0
  54. package/dist/src/redteam/remoteGeneration.d.ts.map +1 -1
  55. package/dist/src/redteam/remoteGeneration.js +26 -0
  56. package/dist/src/redteam/remoteGeneration.js.map +1 -1
  57. package/dist/src/redteam/shared.d.ts +3 -3
  58. package/dist/src/redteam/shared.d.ts.map +1 -1
  59. package/dist/src/redteam/shared.js +106 -13
  60. package/dist/src/redteam/shared.js.map +1 -1
  61. package/dist/src/redteam/sharedFrontend.d.ts +4 -0
  62. package/dist/src/redteam/sharedFrontend.d.ts.map +1 -0
  63. package/dist/src/redteam/sharedFrontend.js +18 -0
  64. package/dist/src/redteam/sharedFrontend.js.map +1 -0
  65. package/dist/src/redteam/types.d.ts +18 -0
  66. package/dist/src/redteam/types.d.ts.map +1 -1
  67. package/dist/src/server/routes/eval.d.ts +2 -0
  68. package/dist/src/server/routes/eval.d.ts.map +1 -1
  69. package/dist/src/server/routes/eval.js +26 -8
  70. package/dist/src/server/routes/eval.js.map +1 -1
  71. package/dist/src/server/routes/redteam.d.ts.map +1 -1
  72. package/dist/src/server/routes/redteam.js +107 -0
  73. package/dist/src/server/routes/redteam.js.map +1 -1
  74. package/dist/src/server/server.d.ts +0 -5
  75. package/dist/src/server/server.d.ts.map +1 -1
  76. package/dist/src/server/server.js +4 -56
  77. package/dist/src/server/server.js.map +1 -1
  78. package/dist/src/types/index.d.ts +6 -2
  79. package/dist/src/types/index.d.ts.map +1 -1
  80. package/dist/src/types/index.js.map +1 -1
  81. package/dist/src/util/apiHealth.d.ts +11 -0
  82. package/dist/src/util/apiHealth.d.ts.map +1 -0
  83. package/dist/src/util/apiHealth.js +59 -0
  84. package/dist/src/util/apiHealth.js.map +1 -0
  85. package/dist/src/util/config/load.d.ts.map +1 -1
  86. package/dist/src/util/config/load.js +2 -2
  87. package/dist/src/util/config/load.js.map +1 -1
  88. package/dist/src/util/config/manage.d.ts +1 -1
  89. package/dist/src/util/config/manage.d.ts.map +1 -1
  90. package/dist/src/util/config/manage.js +2 -1
  91. package/dist/src/util/config/manage.js.map +1 -1
  92. package/dist/src/validators/redteam.d.ts.map +1 -1
  93. package/dist/src/validators/redteam.js +11 -13
  94. package/dist/src/validators/redteam.js.map +1 -1
  95. package/dist/test/evaluator.test.js +32 -0
  96. package/dist/test/evaluator.test.js.map +1 -1
  97. package/dist/test/redteam/commands/generate.test.js +174 -2
  98. package/dist/test/redteam/commands/generate.test.js.map +1 -1
  99. package/dist/test/redteam/extraction/entities.test.js +3 -4
  100. package/dist/test/redteam/extraction/entities.test.js.map +1 -1
  101. package/dist/test/redteam/extraction/purpose.test.js +6 -4
  102. package/dist/test/redteam/extraction/purpose.test.js.map +1 -1
  103. package/dist/test/redteam/extraction/util.test.js +2 -2
  104. package/dist/test/redteam/extraction/util.test.js.map +1 -1
  105. package/dist/test/redteam/index.test.js +77 -0
  106. package/dist/test/redteam/index.test.js.map +1 -1
  107. package/dist/test/redteam/remoteGeneration.test.js +70 -0
  108. package/dist/test/redteam/remoteGeneration.test.js.map +1 -1
  109. package/dist/test/redteam/validators.test.js +136 -16
  110. package/dist/test/redteam/validators.test.js.map +1 -1
  111. package/dist/test/server/server.test.js +40 -111
  112. package/dist/test/server/server.test.js.map +1 -1
  113. package/dist/test/util/apiHealth.test.d.ts +2 -0
  114. package/dist/test/util/apiHealth.test.d.ts.map +1 -0
  115. package/dist/test/util/apiHealth.test.js +89 -0
  116. package/dist/test/util/apiHealth.test.js.map +1 -0
  117. package/dist/test/util/config/load.test.js +129 -0
  118. package/dist/test/util/config/load.test.js.map +1 -1
  119. package/dist/tsconfig.tsbuildinfo +1 -1
  120. package/package.json +6 -2
  121. package/dist/src/app/assets/index-BTdK1U9T.js +0 -817
@@ -61,7 +61,6 @@ exports.HARM_PLUGINS = {
61
61
  exports.PII_PLUGINS = ['pii:api-db', 'pii:direct', 'pii:session', 'pii:social'];
62
62
  exports.BASE_PLUGINS = [
63
63
  'contracts',
64
- 'cross-session-leak',
65
64
  'excessive-agency',
66
65
  'hallucination',
67
66
  'hijacking',
@@ -72,6 +71,7 @@ exports.ADDITIONAL_PLUGINS = [
72
71
  'bfla',
73
72
  'bola',
74
73
  'competitors',
74
+ 'cross-session-leak',
75
75
  'debug-access',
76
76
  'imitation',
77
77
  'indirect-prompt-injection',
@@ -86,7 +86,6 @@ exports.ADDITIONAL_PLUGINS = [
86
86
  // Plugins that require configuration and can't be enabled by default or included as additional.
87
87
  exports.CONFIG_REQUIRED_PLUGINS = ['intent', 'policy'];
88
88
  exports.DEFAULT_PLUGINS = new Set([
89
- ...exports.COLLECTIONS,
90
89
  ...exports.BASE_PLUGINS,
91
90
  ...Object.keys(exports.HARM_PLUGINS),
92
91
  ...exports.PII_PLUGINS,
@@ -102,31 +101,94 @@ exports.FRAMEWORK_NAMES = {
102
101
  };
103
102
  exports.OWASP_LLM_TOP_10_MAPPING = {
104
103
  'owasp:llm:01': {
105
- plugins: ['harmful'],
106
- strategies: ['jailbreak', 'prompt-injection'],
104
+ // Prompt Injection
105
+ plugins: ['ascii-smuggling', 'indirect-prompt-injection', 'prompt-extraction', 'harmful'],
106
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
107
107
  },
108
108
  'owasp:llm:02': {
109
- plugins: ['harmful', 'overreliance'],
110
- strategies: [],
109
+ // Sensitive Information Disclosure
110
+ plugins: [
111
+ 'pii:api-db',
112
+ 'pii:direct',
113
+ 'pii:session',
114
+ 'pii:social',
115
+ 'harmful:privacy',
116
+ 'cross-session-leak',
117
+ 'prompt-extraction',
118
+ ],
119
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
111
120
  },
112
121
  'owasp:llm:03': {
113
- plugins: ['harmful', 'hallucination', 'overreliance'],
122
+ // Supply Chain
123
+ plugins: [],
114
124
  strategies: [],
115
125
  },
116
- 'owasp:llm:06': {
117
- plugins: ['harmful:privacy', 'pii:api-db', 'pii:direct', 'pii:session', 'pii:social'],
126
+ 'owasp:llm:04': {
127
+ // Data and Model Poisoning
128
+ plugins: [
129
+ 'harmful:misinformation-disinformation',
130
+ 'harmful:hate',
131
+ 'harmful:radicalization',
132
+ 'harmful:specialized-advice',
133
+ ],
134
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
135
+ },
136
+ 'owasp:llm:05': {
137
+ // Improper Output Handling
138
+ plugins: ['shell-injection', 'sql-injection', 'ssrf', 'debug-access'],
118
139
  strategies: ['jailbreak', 'prompt-injection'],
119
140
  },
141
+ 'owasp:llm:06': {
142
+ // Excessive Agency
143
+ plugins: [
144
+ 'excessive-agency',
145
+ 'rbac',
146
+ 'bfla',
147
+ 'bola',
148
+ 'shell-injection',
149
+ 'sql-injection',
150
+ 'ssrf',
151
+ ],
152
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
153
+ },
120
154
  'owasp:llm:07': {
121
- plugins: ['bfla', 'bola', 'debug-access', 'rbac', 'shell-injection', 'sql-injection'],
122
- strategies: [],
155
+ // System Prompt Leakage
156
+ plugins: [
157
+ 'prompt-extraction',
158
+ 'rbac',
159
+ 'harmful:privacy',
160
+ 'pii:api-db',
161
+ 'pii:direct',
162
+ 'pii:session',
163
+ 'pii:social',
164
+ ],
165
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
123
166
  },
124
167
  'owasp:llm:08': {
125
- plugins: ['excessive-agency', 'rbac'],
126
- strategies: [],
168
+ // Vector and Embedding Weaknesses
169
+ plugins: [
170
+ 'cross-session-leak',
171
+ 'harmful:privacy',
172
+ 'pii:api-db',
173
+ 'pii:direct',
174
+ 'pii:session',
175
+ 'pii:social',
176
+ ],
177
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
127
178
  },
128
179
  'owasp:llm:09': {
129
- plugins: ['hallucination', 'overreliance'],
180
+ // Misinformation
181
+ plugins: [
182
+ 'hallucination',
183
+ 'overreliance',
184
+ 'harmful:misinformation-disinformation',
185
+ 'harmful:specialized-advice',
186
+ ],
187
+ strategies: ['jailbreak', 'prompt-injection', 'jailbreak:composite'],
188
+ },
189
+ 'owasp:llm:10': {
190
+ // Unbounded Consumption
191
+ plugins: ['excessive-agency', 'overreliance'],
130
192
  strategies: [],
131
193
  },
132
194
  };
@@ -315,16 +377,16 @@ exports.ALIASED_PLUGIN_MAPPINGS = {
315
377
  'owasp:api': exports.OWASP_API_TOP_10_MAPPING,
316
378
  'owasp:llm': exports.OWASP_LLM_TOP_10_MAPPING,
317
379
  };
318
- exports.DEFAULT_STRATEGIES = ['jailbreak', 'prompt-injection'];
380
+ exports.DEFAULT_STRATEGIES = ['jailbreak', 'jailbreak:composite'];
319
381
  exports.MULTI_TURN_STRATEGIES = ['crescendo', 'goat'];
320
382
  exports.AGENTIC_STRATEGIES = ['jailbreak', 'jailbreak:tree', 'crescendo', 'goat'];
321
383
  exports.ADDITIONAL_STRATEGIES = [
384
+ 'prompt-injection',
322
385
  'ascii-smuggling',
323
386
  'base64',
324
387
  'citation',
325
388
  'crescendo',
326
389
  'goat',
327
- 'jailbreak:composite',
328
390
  'jailbreak:tree',
329
391
  'leetspeak',
330
392
  'math-prompt',
@@ -338,139 +400,139 @@ exports.ALL_STRATEGIES = [
338
400
  ...exports.ADDITIONAL_STRATEGIES,
339
401
  ];
340
402
  exports.subCategoryDescriptions = {
341
- default: 'Includes common plugins',
342
- 'ascii-smuggling': 'Attempts to obfuscate malicious content using ASCII smuggling',
343
- base64: 'Attempts to obfuscate malicious content using Base64 encoding',
344
- basic: 'Single-shot, unoptimized attacks using raw prompts based on plugin description',
345
- bfla: 'Broken Function Level Authorization (BFLA) tests',
346
- bola: 'Broken Object Level Authorization (BOLA) tests',
347
- citation: "Exploits model's bias toward authority",
348
- competitors: 'Competitor mentions and endorsements',
349
- contracts: 'Enters business or legal commitments without supervision',
350
- crescendo: 'Conversational attack strategy (high cost)',
351
- 'cross-session-leak': 'Checks for information sharing between unrelated sessions',
352
- 'debug-access': 'Attempts to access or use debugging commands',
353
- 'excessive-agency': 'Model taking excessive initiative or misunderstanding its capabilities',
354
- goat: 'Dynamically combines multiple adversarial prompting techniques in multi-turn conversations',
355
- hallucination: 'Model generating false or misleading information',
356
- harmful: 'All harmful categories',
357
- 'harmful:chemical-biological-weapons': 'Content related to chemical or biological weapons',
358
- 'harmful:child-exploitation': 'Content exploiting or harming children',
359
- 'harmful:copyright-violations': 'Content violating copyright laws',
360
- 'harmful:cybercrime': 'Content related to cybercriminal activities',
361
- 'harmful:graphic-content': 'Displaying graphic or violent content',
362
- 'harmful:harassment-bullying': 'Content that harasses or bullies individuals',
363
- 'harmful:hate': 'Content that promotes hate or discrimination',
364
- 'harmful:illegal-activities': 'Content promoting illegal activities',
365
- 'harmful:illegal-drugs': 'Content related to illegal drug use or trade',
366
- 'harmful:indiscriminate-weapons': 'Content related to weapons without context',
367
- 'harmful:insults': 'Content that insults or demeans individuals',
368
- 'harmful:intellectual-property': 'Content violating intellectual property rights',
369
- 'harmful:misinformation-disinformation': 'Spreading false or misleading information',
370
- 'harmful:non-violent-crime': 'Content related to non-violent criminal activities',
371
- 'harmful:privacy': 'Content violating privacy rights',
372
- 'harmful:profanity': 'Content containing profane or inappropriate language',
373
- 'harmful:radicalization': 'Content that promotes radical or extremist views',
374
- 'harmful:self-harm': 'Content that encourages self-harm or suicide',
375
- 'harmful:sex-crime': 'Content related to sexual crimes',
376
- 'harmful:sexual-content': 'Explicit or inappropriate sexual content',
377
- 'harmful:specialized-advice': 'Providing advice in specialized fields without expertise',
378
- 'harmful:unsafe-practices': 'Content promoting unsafe or harmful practices',
379
- 'harmful:violent-crime': 'Content related to violent criminal activities',
380
- hijacking: 'Unauthorized or off-topic resource use',
381
- imitation: 'Imitates people, brands, or organizations',
382
- 'indirect-prompt-injection': 'Tests if the prompt is vulnerable to instructions injected into variables in the prompt',
383
- intent: 'Attempts to manipulate the model to exhibit specific behaviors',
384
- jailbreak: 'Attempts to bypass security measures through iterative prompt refinement',
385
- 'jailbreak:composite': 'Finds novel jailbreak prompts by chaining together individual techniques',
386
- 'jailbreak:tree': 'Tree-based jailbreak search (medium cost)',
387
- leetspeak: 'Attempts to obfuscate malicious content using leetspeak',
388
- 'math-prompt': 'Encodes potentially harmful content using mathematical concepts and notation',
389
- multilingual: 'Translates the input into low-resource languages',
390
- overreliance: 'Model susceptible to relying on an incorrect user assumption or input',
391
- pii: 'All PII categories',
392
- 'pii:api-db': 'PII exposed through API or database',
393
- 'pii:direct': 'Direct exposure of PII',
394
- 'pii:session': 'PII exposed in session data',
395
- 'pii:social': 'PII exposed through social engineering',
396
- policy: 'Violates a custom configured policy',
397
- politics: 'Makes political statements',
398
- 'prompt-extraction': 'Attempts to get the model to reveal its system prompt',
399
- 'prompt-injection': "Malicious inputs designed to manipulate the model's behavior",
400
- rbac: 'Tests whether the model properly implements Role-Based Access Control (RBAC)',
401
- religion: 'Makes potentially controversial statements about religion',
402
- rot13: 'Attempts to obfuscate malicious content using ROT13 encoding',
403
- 'shell-injection': 'Attempts to execute shell commands through the model',
404
- 'sql-injection': 'Attempts to perform SQL injection attacks to manipulate database queries',
405
- ssrf: 'Server-Side Request Forgery (SSRF) tests',
403
+ default: 'Standard security testing plugins',
404
+ 'ascii-smuggling': 'Tests vulnerability to Unicode tag-based instruction smuggling attacks',
405
+ base64: 'Tests handling of Base64-encoded malicious payloads',
406
+ basic: 'Single-shot baseline security tests without optimization',
407
+ bfla: 'Tests for broken function-level authorization vulnerabilities (OWASP API 5)',
408
+ bola: 'Tests for broken object-level authorization vulnerabilities (OWASP API 1)',
409
+ citation: 'Exploits academic authority bias to bypass content filters',
410
+ competitors: 'Tests for unauthorized competitor mentions and endorsements',
411
+ contracts: 'Tests for unauthorized contractual commitments and legal exposure',
412
+ crescendo: 'Multi-turn attack strategy that gradually escalates malicious intent',
413
+ 'cross-session-leak': 'Tests for information leakage between user sessions',
414
+ 'debug-access': 'Tests for exposed debugging interfaces and commands',
415
+ 'excessive-agency': 'Tests for unauthorized actions beyond defined system boundaries',
416
+ goat: 'Dynamic multi-turn attack generation using adversarial techniques',
417
+ hallucination: 'Tests for fabrication of false or misleading information',
418
+ harmful: 'Tests handling of malicious content across multiple categories',
419
+ 'harmful:chemical-biological-weapons': 'Tests handling of WMD-related content',
420
+ 'harmful:child-exploitation': 'Tests handling of child exploitation content',
421
+ 'harmful:copyright-violations': 'Tests handling of intellectual property violations',
422
+ 'harmful:cybercrime': 'Tests handling of cybercrime-related content',
423
+ 'harmful:graphic-content': 'Tests handling of graphic or violent content',
424
+ 'harmful:harassment-bullying': 'Tests handling of harassment and bullying content',
425
+ 'harmful:hate': 'Tests handling of hate speech and discriminatory content',
426
+ 'harmful:illegal-activities': 'Tests handling of general illegal activities',
427
+ 'harmful:illegal-drugs': 'Tests handling of illegal drug-related content',
428
+ 'harmful:indiscriminate-weapons': 'Tests handling of weapons-related content',
429
+ 'harmful:insults': 'Tests handling of personal attacks and insults',
430
+ 'harmful:intellectual-property': 'Tests handling of IP theft and violations',
431
+ 'harmful:misinformation-disinformation': 'Tests handling of false information campaigns',
432
+ 'harmful:non-violent-crime': 'Tests handling of non-violent criminal content',
433
+ 'harmful:privacy': 'Tests handling of privacy violation attempts',
434
+ 'harmful:profanity': 'Tests handling of profane or inappropriate language',
435
+ 'harmful:radicalization': 'Tests handling of extremist content and recruitment',
436
+ 'harmful:self-harm': 'Tests handling of self-harm and suicide content',
437
+ 'harmful:sex-crime': 'Tests handling of sexual crime content',
438
+ 'harmful:sexual-content': 'Tests handling of explicit sexual content',
439
+ 'harmful:specialized-advice': 'Tests handling of unauthorized professional advice',
440
+ 'harmful:unsafe-practices': 'Tests handling of dangerous activity instructions',
441
+ 'harmful:violent-crime': 'Tests handling of violent crime content',
442
+ hijacking: 'Tests for unauthorized resource usage and purpose deviation',
443
+ imitation: 'Tests for unauthorized impersonation of entities',
444
+ 'indirect-prompt-injection': 'Tests for injection vulnerabilities via untrusted variables',
445
+ intent: 'Tests for manipulation of system behavior via specific prompts',
446
+ jailbreak: 'Single-shot optimization of safety bypass techniques',
447
+ 'jailbreak:composite': 'Combines multiple jailbreak techniques for enhanced effectiveness',
448
+ 'jailbreak:tree': 'Tree-based search for optimal safety bypass vectors',
449
+ leetspeak: 'Tests handling of leetspeak-encoded malicious content',
450
+ 'math-prompt': 'Tests handling of mathematical notation-based attacks',
451
+ multilingual: 'Tests handling of attacks across multiple languages',
452
+ overreliance: 'Tests for overreliance on system assumptions',
453
+ pii: 'Tests handling of personal identifiable information',
454
+ 'pii:api-db': 'Tests for PII exposure via API/database access',
455
+ 'pii:direct': 'Tests for direct PII exposure vulnerabilities',
456
+ 'pii:session': 'Tests for PII exposure in session data',
457
+ 'pii:social': 'Tests for PII exposure via social engineering',
458
+ policy: 'Tests compliance with custom security policies',
459
+ politics: 'Tests handling of political content and bias',
460
+ 'prompt-extraction': 'Tests for system prompt disclosure vulnerabilities',
461
+ 'prompt-injection': 'Tests for direct prompt injection vulnerabilities',
462
+ rbac: 'Tests role-based access control implementation',
463
+ religion: 'Tests handling of religious content and bias',
464
+ rot13: 'Tests handling of ROT13-encoded malicious content',
465
+ 'shell-injection': 'Tests for command injection vulnerabilities',
466
+ 'sql-injection': 'Tests for SQL injection vulnerabilities',
467
+ ssrf: 'Tests for server-side request forgery vulnerabilities',
406
468
  };
407
469
  // These names are displayed in risk cards and in the table
408
470
  exports.displayNameOverrides = {
409
- 'ascii-smuggling': 'ASCII smuggling',
410
- base64: 'Base64 Encoding',
411
- basic: 'Basic',
412
- bfla: 'Privilege Escalation',
413
- bola: 'Unauthorized Data Access',
414
- competitors: 'Competitor Endorsements',
415
- contracts: 'Unsupervised Contracts',
416
- citation: 'Citation',
417
- crescendo: 'Multi-turn Crescendo',
418
- 'cross-session-leak': 'Cross-Session Leak',
419
- 'debug-access': 'Debug Access',
420
- default: 'Default',
471
+ 'ascii-smuggling': 'ASCII Smuggling',
472
+ base64: 'Base64 Payload Encoding',
473
+ basic: 'Baseline Testing',
474
+ bfla: 'Function-Level Authorization Bypass',
475
+ bola: 'Object-Level Authorization Bypass',
476
+ competitors: 'Competitors',
477
+ contracts: 'Unauthorized Commitments',
478
+ citation: 'Authority Bias Exploitation',
479
+ crescendo: 'Multi-Turn Crescendo',
480
+ 'cross-session-leak': 'Cross-Session Data Leakage',
481
+ 'debug-access': 'Debug Interface Exposure',
482
+ default: 'Standard Security Suite',
421
483
  'excessive-agency': 'Excessive Agency',
422
484
  goat: 'Generative Offensive Agent Tester',
423
- hallucination: 'Hallucination',
424
- harmful: 'Harmful Content',
425
- 'harmful:chemical-biological-weapons': 'Chemical & Biological Weapons',
485
+ hallucination: 'False Information (Hallucination)',
486
+ harmful: 'Malicious Content Suite',
487
+ 'harmful:chemical-biological-weapons': 'WMD Content',
426
488
  'harmful:child-exploitation': 'Child Exploitation',
427
- 'harmful:copyright-violations': 'Copyright Violations',
489
+ 'harmful:copyright-violations': 'IP Violations',
428
490
  'harmful:cybercrime': 'Cybercrime',
429
491
  'harmful:graphic-content': 'Graphic Content',
430
- 'harmful:harassment-bullying': 'Harassment & Bullying',
492
+ 'harmful:harassment-bullying': 'Harassment',
431
493
  'harmful:hate': 'Hate Speech',
432
- 'harmful:illegal-activities': 'Illegal Activities',
433
- 'harmful:illegal-drugs': 'Illegal Drugs',
434
- 'harmful:indiscriminate-weapons': 'Indiscriminate Weapons',
435
- 'harmful:insults': 'Insults',
436
- 'harmful:intellectual-property': 'Intellectual Property Violation',
437
- 'harmful:misinformation-disinformation': 'Misinformation & Disinformation',
494
+ 'harmful:illegal-activities': 'Illegal Activity',
495
+ 'harmful:illegal-drugs': 'Drug-Related Content',
496
+ 'harmful:indiscriminate-weapons': 'Weapons Content',
497
+ 'harmful:insults': 'Personal Attacks',
498
+ 'harmful:intellectual-property': 'IP Theft',
499
+ 'harmful:misinformation-disinformation': 'Disinformation Campaigns',
438
500
  'harmful:non-violent-crime': 'Non-Violent Crime',
439
501
  'harmful:privacy': 'Privacy Violation',
440
502
  'harmful:profanity': 'Profanity',
441
- 'harmful:radicalization': 'Radicalization',
503
+ 'harmful:radicalization': 'Extremist Content',
442
504
  'harmful:self-harm': 'Self-Harm',
443
- 'harmful:sex-crime': 'Sex Crime',
444
- 'harmful:sexual-content': 'Sexual Content',
445
- 'harmful:specialized-advice': 'Specialized Advice',
446
- 'harmful:unsafe-practices': 'Unsafe Practices',
447
- 'harmful:violent-crime': 'Violent Crime',
448
- hijacking: 'Hijacking',
449
- imitation: 'Imitation',
505
+ 'harmful:sex-crime': 'Sexual Crime Content',
506
+ 'harmful:sexual-content': 'Explicit Content',
507
+ 'harmful:specialized-advice': 'Unauthorized Advice',
508
+ 'harmful:unsafe-practices': 'Dangerous Activity Content',
509
+ 'harmful:violent-crime': 'Violent Crime Content',
510
+ hijacking: 'Resource Hijacking',
511
+ imitation: 'Entity Impersonation',
450
512
  'indirect-prompt-injection': 'Indirect Prompt Injection',
451
513
  intent: 'Intent',
452
- jailbreak: 'Single-shot optimization',
453
- 'jailbreak:composite': 'Composite Jailbreaks',
454
- 'jailbreak:tree': 'Tree-based Optimization',
455
- leetspeak: 'Leetspeak Encoding',
456
- 'math-prompt': 'Math Prompt',
457
- multilingual: 'Multilingual',
514
+ jailbreak: 'Single-shot Optimization',
515
+ 'jailbreak:composite': 'Multi-Vector Safety Bypass',
516
+ 'jailbreak:tree': 'Tree-Based Attack Search',
517
+ leetspeak: 'Leetspeak Payload Encoding',
518
+ 'math-prompt': 'Mathematical Notation Attack',
519
+ multilingual: 'Cross-Language Attack',
458
520
  overreliance: 'Overreliance',
459
- pii: 'PII Leaks',
460
- 'pii:api-db': 'PII in API/Database',
461
- 'pii:direct': 'Direct PII Exposure',
462
- 'pii:session': 'PII in Session Data',
521
+ pii: 'PII Protection Suite',
522
+ 'pii:api-db': 'PII via API/Database',
523
+ 'pii:direct': 'PII via Direct Exposure',
524
+ 'pii:session': 'PII via Session Data',
463
525
  'pii:social': 'PII via Social Engineering',
464
- policy: 'Custom Policy',
465
- politics: 'Political Opinions',
466
- 'prompt-extraction': 'Prompt Extraction',
467
- 'prompt-injection': 'Prompt Injection',
468
- rbac: 'RBAC Enforcement',
469
- religion: 'Religious Sensitivity',
470
- rot13: 'ROT13 Encoding',
471
- 'shell-injection': 'Shell Injection',
526
+ policy: 'Policy Compliance',
527
+ politics: 'Political Bias',
528
+ 'prompt-extraction': 'System Prompt Disclosure',
529
+ 'prompt-injection': 'Direct Prompt Injection',
530
+ rbac: 'RBAC Implementation',
531
+ religion: 'Religious Bias',
532
+ rot13: 'ROT13 Payload Encoding',
533
+ 'shell-injection': 'Command Injection',
472
534
  'sql-injection': 'SQL Injection',
473
- ssrf: 'Malicious Resource Fetching',
535
+ ssrf: 'SSRF Vulnerability',
474
536
  };
475
537
  var Severity;
476
538
  (function (Severity) {
@@ -556,7 +618,6 @@ exports.riskCategories = {
556
618
  'indirect-prompt-injection',
557
619
  'ascii-smuggling',
558
620
  'hijacking',
559
- 'intent',
560
621
  // Data protection
561
622
  'pii',
562
623
  'pii:api-db',
@@ -680,75 +741,75 @@ exports.categoryAliasesReverse = Object.entries(exports.categoryAliases).reduce(
680
741
  return acc;
681
742
  }, {});
682
743
  exports.pluginDescriptions = {
683
- 'ascii-smuggling': 'ASCII smuggling is a technique used to obfuscate malicious content using ASCII characters, potentially bypassing security filters.',
684
- bfla: 'Broken Function Level Authorization (BFLA) is a vulnerability where an application fails to properly restrict access to sensitive functions based on user roles.',
685
- bola: 'Broken Object Level Authorization (BOLA) is a vulnerability where an application fails to properly restrict access to sensitive data objects based on user permissions.',
686
- competitors: 'Competitor mentions and endorsements can lead to unintended promotion of rival products or services.',
687
- contracts: 'Unauthorized contract creation or modification can lead to legal and financial risks for the organization.',
688
- 'cross-session-leak': 'Cross-session leak occurs when sensitive information from one user session is inadvertently exposed to another session, compromising data privacy.',
689
- 'debug-access': 'Debug access vulnerabilities can expose sensitive system information or provide unauthorized control over the application.',
690
- default: 'Default plugin behavior',
691
- 'excessive-agency': 'Excessive agency occurs when the AI model takes actions or makes decisions beyond its intended scope, potentially leading to unintended consequences.',
692
- hallucination: 'AI hallucination refers to the generation of false or misleading information, which can lead to misinformation or incorrect decision-making.',
693
- harmful: 'Content that may be harmful or dangerous in various ways',
694
- 'harmful:chemical-biological-weapons': 'Content related to chemical or biological weapons',
695
- 'harmful:child-exploitation': 'Content exploiting or harming children',
696
- 'harmful:copyright-violations': 'Content violating copyright laws',
697
- 'harmful:cybercrime': 'Content related to cybercriminal activities',
698
- 'harmful:graphic-content': 'Displaying graphic or violent content',
699
- 'harmful:harassment-bullying': 'Content that harasses or bullies individuals',
700
- 'harmful:hate': 'Content that promotes hate or discrimination',
701
- 'harmful:illegal-activities': 'Content promoting illegal activities',
702
- 'harmful:illegal-drugs': 'Content related to illegal drug use or trade',
703
- 'harmful:indiscriminate-weapons': 'Content related to weapons without context',
704
- 'harmful:insults': 'Content that insults or demeans individuals',
705
- 'harmful:intellectual-property': 'Content violating intellectual property rights',
706
- 'harmful:misinformation-disinformation': 'Spreading false or misleading information',
707
- 'harmful:non-violent-crime': 'Content related to non-violent criminal activities',
708
- 'harmful:privacy': 'Content violating privacy rights',
709
- 'harmful:profanity': 'Content containing profane or inappropriate language',
710
- 'harmful:radicalization': 'Content that promotes radical or extremist views',
711
- 'harmful:self-harm': 'Content that encourages self-harm or suicide',
712
- 'harmful:sex-crime': 'Content related to sexual crimes',
713
- 'harmful:sexual-content': 'Explicit or inappropriate sexual content',
714
- 'harmful:specialized-advice': 'Providing advice in specialized fields without expertise',
715
- 'harmful:unsafe-practices': 'Content promoting unsafe or harmful practices',
716
- 'harmful:violent-crime': 'Content related to violent criminal activities',
717
- hijacking: 'Hijacking vulnerabilities allow attackers to take control of user sessions or system resources, compromising security and privacy.',
718
- imitation: 'Imitation vulnerabilities occur when the AI impersonates individuals, brands, or organizations without authorization, potentially leading to fraud or reputational damage.',
719
- 'indirect-prompt-injection': "Indirect prompt injection allows attackers to manipulate the AI's behavior by injecting malicious content into variables used in prompts.",
720
- intent: 'Tests whether the model can be manipulated to exhibit specific behaviors or responses, potentially bypassing safety measures.',
721
- overreliance: 'Overreliance on AI systems without proper validation can lead to errors in decision-making or actions based on incorrect assumptions.',
722
- pii: 'Personal Identifiable Information (PII) leaks can compromise user privacy and lead to legal and reputational consequences.',
723
- 'pii:api-db': 'PII exposed through API or database',
724
- 'pii:direct': 'Direct exposure of PII',
725
- 'pii:session': 'PII exposed in session data',
726
- 'pii:social': 'PII exposed through social engineering',
727
- policy: 'Policy violations occur when the AI system acts in ways that contradict established organizational policies or guidelines.',
728
- politics: 'Political statements or biases in AI responses can lead to controversy or alienation of users with different political views.',
729
- 'prompt-extraction': 'Prompt extraction vulnerabilities allow attackers to reveal the system prompts used to guide the AI, potentially exposing sensitive information or enabling further attacks.',
730
- rbac: 'Role-Based Access Control (RBAC) vulnerabilities occur when the system fails to properly enforce access restrictions based on user roles.',
731
- religion: 'Religious statements or biases in AI responses can lead to controversy or alienation of users with different religious beliefs.',
732
- 'shell-injection': 'Shell injection vulnerabilities allow attackers to execute unauthorized system commands, potentially leading to system compromise.',
733
- 'sql-injection': 'SQL injection is a vulnerability that allows attackers to execute unauthorized database queries, potentially leading to data theft or manipulation.',
734
- ssrf: 'Server-Side Request Forgery (SSRF) vulnerabilities allow attackers to make unauthorized requests from the server, potentially accessing internal resources or services.',
744
+ 'ascii-smuggling': 'Tests system resilience against Unicode tag-based instruction smuggling attacks that can bypass content filters and security controls',
745
+ bfla: 'Evaluates function-level authorization controls to identify privilege escalation vulnerabilities (OWASP API Security Top 10 #5)',
746
+ bola: 'Tests object-level authorization mechanisms to detect unauthorized data access vulnerabilities (OWASP API Security Top 10 #1)',
747
+ competitors: 'Assesses system protection against unauthorized competitor endorsements and brand security risks',
748
+ contracts: 'Evaluates safeguards against unauthorized contractual commitments and legal exposure',
749
+ 'cross-session-leak': 'Tests for information leakage vulnerabilities between different user sessions',
750
+ 'debug-access': 'Identifies exposed debugging interfaces and unauthorized command execution vectors',
751
+ default: 'Executes comprehensive baseline security testing across multiple risk categories',
752
+ 'excessive-agency': 'Evaluates system boundary enforcement and unauthorized action prevention',
753
+ hallucination: 'Tests system resilience against false information generation and propagation',
754
+ harmful: 'Comprehensive suite for testing handling of various categories of malicious content',
755
+ hijacking: 'Assesses protection against unauthorized resource usage and purpose deviation',
756
+ imitation: 'Tests safeguards against unauthorized entity impersonation attempts',
757
+ 'indirect-prompt-injection': 'Evaluates vulnerability to injection attacks via untrusted variables',
758
+ intent: 'Tests system resilience against specific manipulation attempts',
759
+ overreliance: 'Assesses overreliance on system assumptions',
760
+ pii: 'Comprehensive evaluation of personal identifiable information protection',
761
+ 'pii:api-db': 'Tests for PII exposure through API and database access vectors',
762
+ 'pii:direct': 'Evaluates direct PII exposure vulnerabilities',
763
+ 'pii:session': 'Tests for PII leakage in session data',
764
+ 'pii:social': 'Assesses PII exposure through social engineering vectors',
765
+ policy: 'Validates compliance with custom security policies and controls',
766
+ politics: 'Tests handling of political content and bias mitigation',
767
+ 'prompt-extraction': 'Evaluates protection against system prompt disclosure attempts',
768
+ rbac: 'Tests implementation of role-based access control mechanisms',
769
+ religion: 'Assesses handling of religious content and bias mitigation',
770
+ 'shell-injection': 'Tests protection against command injection vulnerabilities',
771
+ 'sql-injection': 'Evaluates resilience against SQL injection attacks',
772
+ ssrf: 'Tests for server-side request forgery vulnerabilities',
773
+ 'harmful:chemical-biological-weapons': 'Tests detection and response to WMD-related content',
774
+ 'harmful:child-exploitation': 'Tests handling of child exploitation content',
775
+ 'harmful:copyright-violations': 'Tests handling of intellectual property violations',
776
+ 'harmful:cybercrime': 'Tests detection of cybercrime-related content',
777
+ 'harmful:graphic-content': 'Tests handling of graphic or violent content',
778
+ 'harmful:harassment-bullying': 'Tests handling of harassment and bullying content',
779
+ 'harmful:hate': 'Tests handling of hate speech and discriminatory content',
780
+ 'harmful:illegal-activities': 'Tests handling of illegal activity content',
781
+ 'harmful:illegal-drugs': 'Tests handling of drug-related content',
782
+ 'harmful:indiscriminate-weapons': 'Tests handling of weapons-related content',
783
+ 'harmful:insults': 'Tests handling of personal attacks and insults',
784
+ 'harmful:intellectual-property': 'Tests handling of IP theft attempts',
785
+ 'harmful:misinformation-disinformation': 'Tests handling of disinformation campaigns',
786
+ 'harmful:non-violent-crime': 'Tests handling of non-violent criminal content',
787
+ 'harmful:privacy': 'Tests handling of privacy violation attempts',
788
+ 'harmful:profanity': 'Tests handling of profane content',
789
+ 'harmful:radicalization': 'Tests handling of extremist content',
790
+ 'harmful:self-harm': 'Tests handling of self-harm content',
791
+ 'harmful:sex-crime': 'Tests handling of sexual crime content',
792
+ 'harmful:sexual-content': 'Tests handling of explicit sexual content',
793
+ 'harmful:specialized-advice': 'Tests handling of unauthorized professional advice',
794
+ 'harmful:unsafe-practices': 'Tests handling of dangerous activity instructions',
795
+ 'harmful:violent-crime': 'Tests handling of violent crime content',
735
796
  };
736
797
  exports.strategyDescriptions = {
737
- 'ascii-smuggling': 'Obfuscates malicious content using ASCII characters to bypass filters',
738
- base64: 'Encodes malicious content in Base64 to evade detection',
739
- basic: 'Single-shot malicious prompts',
740
- citation: "Exploits model's bias toward authority",
741
- crescendo: 'A multi-turn attack that gradually escalates the conversation to probe for vulnerabilities',
742
- default: 'Preset strategies',
743
- goat: 'Dynamically combines multiple adversarial prompting techniques in multi-turn conversations',
744
- jailbreak: 'An optimized single shot attack generated by iteratively refining the prompt to bypass security measures',
745
- 'jailbreak:composite': 'Finds novel jailbreak prompts by chaining together individual techniques',
746
- 'jailbreak:tree': 'Uses a tree-based search approach for more sophisticated jailbreaking',
747
- leetspeak: 'Replaces characters with similar-looking numbers or symbols to obfuscate content',
748
- 'math-prompt': 'Encodes harmful content using mathematical concepts and notation',
749
- multilingual: 'Translates malicious content into low-resource languages to evade detection',
750
- 'prompt-injection': 'Injects malicious instructions into prompts via user input',
751
- rot13: 'Applies a simple letter substitution cipher to obfuscate malicious content',
798
+ 'ascii-smuggling': 'Evaluates system resilience against Unicode tag-based instruction smuggling',
799
+ base64: 'Tests detection and handling of Base64-encoded malicious payloads',
800
+ basic: 'Establishes baseline security posture through fundamental test cases',
801
+ citation: 'Exploits academic authority bias to circumvent content filtering mechanisms',
802
+ crescendo: 'Executes progressive multi-turn attacks with escalating malicious intent',
803
+ default: 'Applies standard security testing methodology',
804
+ goat: 'Deploys dynamic attack generation using advanced adversarial techniques',
805
+ jailbreak: 'Optimizes single-turn attacks to bypass security controls',
806
+ 'jailbreak:composite': 'Chains multiple attack vectors for enhanced effectiveness',
807
+ 'jailbreak:tree': 'Implements tree-based search for optimal attack paths',
808
+ leetspeak: 'Assesses handling of leetspeak-encoded malicious content',
809
+ 'math-prompt': 'Tests resilience against mathematical notation-based attacks',
810
+ multilingual: 'Evaluates cross-language attack vector handling',
811
+ 'prompt-injection': 'Tests direct prompt injection vulnerability detection',
812
+ rot13: 'Assesses handling of ROT13-encoded malicious payloads',
752
813
  };
753
814
  exports.strategyDisplayNames = {
754
815
  'ascii-smuggling': 'ASCII Smuggling',