@artemiskit/redteam 0.1.6 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/CHANGELOG.md +93 -0
  2. package/dist/custom-attacks.d.ts +59 -0
  3. package/dist/custom-attacks.d.ts.map +1 -0
  4. package/dist/detector.d.ts +13 -2
  5. package/dist/detector.d.ts.map +1 -1
  6. package/dist/generator.d.ts.map +1 -1
  7. package/dist/index.d.ts +2 -1
  8. package/dist/index.d.ts.map +1 -1
  9. package/dist/index.js +7755 -58
  10. package/dist/mutations/cot-injection.d.ts +2 -0
  11. package/dist/mutations/cot-injection.d.ts.map +1 -1
  12. package/dist/mutations/encoding.d.ts +37 -0
  13. package/dist/mutations/encoding.d.ts.map +1 -0
  14. package/dist/mutations/index.d.ts +5 -0
  15. package/dist/mutations/index.d.ts.map +1 -1
  16. package/dist/mutations/instruction-flip.d.ts +2 -0
  17. package/dist/mutations/instruction-flip.d.ts.map +1 -1
  18. package/dist/mutations/multi-turn.d.ts +90 -0
  19. package/dist/mutations/multi-turn.d.ts.map +1 -0
  20. package/dist/mutations/role-spoof.d.ts +2 -0
  21. package/dist/mutations/role-spoof.d.ts.map +1 -1
  22. package/dist/mutations/typo.d.ts +2 -0
  23. package/dist/mutations/typo.d.ts.map +1 -1
  24. package/dist/severity.d.ts +69 -1
  25. package/dist/severity.d.ts.map +1 -1
  26. package/package.json +3 -2
  27. package/src/custom-attacks.ts +233 -0
  28. package/src/detector.ts +48 -11
  29. package/src/generator.ts +4 -0
  30. package/src/index.ts +17 -1
  31. package/src/mutations/cot-injection.ts +2 -0
  32. package/src/mutations/encoding.ts +116 -0
  33. package/src/mutations/index.ts +12 -0
  34. package/src/mutations/instruction-flip.ts +2 -0
  35. package/src/mutations/multi-turn.test.ts +144 -0
  36. package/src/mutations/multi-turn.ts +305 -0
  37. package/src/mutations/role-spoof.ts +2 -0
  38. package/src/mutations/typo.ts +2 -0
  39. package/src/severity.test.ts +238 -0
  40. package/src/severity.ts +381 -1
package/CHANGELOG.md CHANGED
@@ -1,5 +1,98 @@
1
1
  # @artemiskit/redteam
2
2
 
3
+ ## 0.2.2
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [d5ca7c6]
8
+ - @artemiskit/core@0.2.2
9
+
10
+ ## 0.2.1
11
+
12
+ ### Patch Changes
13
+
14
+ - Updated dependencies
15
+ - @artemiskit/core@0.2.1
16
+
17
+ ## 0.2.0
18
+
19
+ ### Minor Changes
20
+
21
+ - d2c3835: ## v0.2.0 - Enhanced Evaluation Features
22
+
23
+ ### CLI (`@artemiskit/cli`)
24
+
25
+ #### New Features
26
+
27
+ - **Multi-turn mutations**: Added `--mutations multi_turn` flag for red team testing with 4 built-in strategies:
28
+ - `gradual_escalation`: Gradually intensifies requests over conversation turns
29
+ - `context_switching`: Shifts topics to lower defenses before attack
30
+ - `persona_building`: Establishes trust through roleplay
31
+ - `distraction`: Uses side discussions to slip in harmful requests
32
+ - **Custom multi-turn conversations**: Support for array prompts in red team scenarios (consistent with `run` command format). The last user message becomes the attack target, preceding messages form conversation context.
33
+ - **Custom attacks**: Added `--custom-attacks` flag to load custom attack patterns from YAML files with template variables and variations.
34
+ - **Encoding mutations**: Added `--mutations encoding` for obfuscation attacks (base64, ROT13, hex, unicode).
35
+ - **Directory scanning**: Run all scenarios in a directory with `akit run scenarios/`
36
+ - **Glob pattern matching**: Use patterns like `akit run scenarios/**/*.yaml`
37
+ - **Parallel execution**: Added `--parallel` flag for concurrent scenario execution
38
+ - **Scenario tags**: Filter scenarios with `--tags` flag
39
+
40
+ ### Core (`@artemiskit/core`)
41
+
42
+ #### New Features
43
+
44
+ - **Combined matchers**: New `type: combined` expectation with `operator: and|or` for complex assertion logic
45
+ - **`not_contains` expectation**: Negative containment check to ensure responses don't include specific text
46
+ - **`similarity` expectation**: Semantic similarity matching with two modes:
47
+ - Embedding-based: Uses vector embeddings for fast semantic comparison
48
+ - LLM-based fallback: Uses LLM to evaluate semantic similarity when embeddings unavailable
49
+ - Configurable threshold (default 0.75)
50
+ - **`inline` expectation**: Safe expression-based custom matchers in YAML using JavaScript-like expressions (e.g., `response.length > 100`, `response.includes('hello')`)
51
+ - **p90 latency metric**: Added p90 percentile to stress test latency metrics
52
+ - **Token usage tracking**: Monitor token consumption per request in stress tests
53
+ - **Cost estimation**: Estimate API costs with model pricing data
54
+
55
+ ### Red Team (`@artemiskit/redteam`)
56
+
57
+ #### New Features
58
+
59
+ - **MultiTurnMutation class**: Full implementation with strategy support and custom conversation prefixes
60
+ - **Custom attack loader**: Parse and load custom attack patterns from YAML
61
+ - **Encoding mutation**: Obfuscate attack payloads using various encoding schemes
62
+ - **CVSS-like severity scoring**: Detailed attack severity scoring with:
63
+ - `CvssScore` interface with attack vector, complexity, impact metrics
64
+ - `CvssCalculator` class for score calculation and aggregation
65
+ - Predefined scores for all mutations and detection categories
66
+ - Human-readable score descriptions and vector strings
67
+
68
+ ### Reports (`@artemiskit/reports`)
69
+
70
+ #### New Features
71
+
72
+ - **Run comparison HTML report**: Visual diff between two runs showing:
73
+ - Metrics overview with baseline vs current comparison
74
+ - Change summary (regressions, improvements, unchanged)
75
+ - Case-by-case comparison table with filtering
76
+ - Side-by-side response comparison for each case
77
+ - **Comparison JSON export**: Structured comparison data for programmatic use
78
+
79
+ ### CLI Enhancements
80
+
81
+ - **Compare command `--html` flag**: Generate HTML comparison report
82
+ - **Compare command `--json` flag**: Generate JSON comparison data
83
+
84
+ ### Documentation
85
+
86
+ - Updated all CLI command documentation
87
+ - Added comprehensive examples for custom multi-turn scenarios
88
+ - Documented combined matchers and `not_contains` expectations
89
+ - Added mutation strategy reference tables
90
+
91
+ ### Patch Changes
92
+
93
+ - Updated dependencies [d2c3835]
94
+ - @artemiskit/core@0.2.0
95
+
3
96
  ## 0.1.6
4
97
 
5
98
  ### Patch Changes
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Custom attack YAML loader
3
+ * Allows users to define custom red team attacks in YAML format
4
+ */
5
+ import type { Mutation } from './mutations';
6
+ /**
7
+ * Schema for custom attack definition in YAML
8
+ */
9
+ export interface CustomAttackDefinition {
10
+ /** Unique name for the attack */
11
+ name: string;
12
+ /** Description of what the attack tests */
13
+ description: string;
14
+ /** Severity level */
15
+ severity: 'low' | 'medium' | 'high' | 'critical';
16
+ /** Attack templates - use {{prompt}} as placeholder for the original prompt */
17
+ templates: string[];
18
+ /** Optional: Variations to apply to templates */
19
+ variations?: {
20
+ /** Placeholder name (e.g., 'role') */
21
+ name: string;
22
+ /** Values to substitute */
23
+ values: string[];
24
+ }[];
25
+ }
26
+ /**
27
+ * Schema for custom attacks YAML file
28
+ */
29
+ export interface CustomAttacksFile {
30
+ /** File format version */
31
+ version: string;
32
+ /** List of custom attack definitions */
33
+ attacks: CustomAttackDefinition[];
34
+ }
35
+ /**
36
+ * Custom mutation created from YAML definition
37
+ */
38
+ export declare class CustomMutation implements Mutation {
39
+ readonly name: string;
40
+ readonly description: string;
41
+ readonly severity: 'low' | 'medium' | 'high' | 'critical';
42
+ private templates;
43
+ private variations;
44
+ constructor(definition: CustomAttackDefinition);
45
+ mutate(prompt: string): string;
46
+ }
47
+ /**
48
+ * Load custom attacks from a YAML file
49
+ */
50
+ export declare function loadCustomAttacks(filePath: string): CustomMutation[];
51
+ /**
52
+ * Load custom attacks from a YAML string
53
+ */
54
+ export declare function parseCustomAttacks(yamlContent: string): CustomMutation[];
55
+ /**
56
+ * Generate an example custom attacks YAML file
57
+ */
58
+ export declare function generateExampleCustomAttacksYaml(): string;
59
+ //# sourceMappingURL=custom-attacks.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"custom-attacks.d.ts","sourceRoot":"","sources":["../src/custom-attacks.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,iCAAiC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,qBAAqB;IACrB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,+EAA+E;IAC/E,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,iDAAiD;IACjD,UAAU,CAAC,EAAE;QACX,sCAAsC;QACtC,IAAI,EAAE,MAAM,CAAC;QACb,2BAA2B;QAC3B,MAAM,EAAE,MAAM,EAAE,CAAC;KAClB,EAAE,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,0BAA0B;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,wCAAwC;IACxC,OAAO,EAAE,sBAAsB,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,qBAAa,cAAe,YAAW,QAAQ;IAC7C,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IAE1D,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,UAAU,CAAuC;gBAE7C,UAAU,EAAE,sBAAsB;IAQ9C,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM;CAkB/B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,EAAE,CAwBpE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,cAAc,EAAE,CAexE;AAgDD;;GAEG;AACH,wBAAgB,gCAAgC,IAAI,MAAM,CAsDzD"}
@@ -1,11 +1,14 @@
1
1
  /**
2
- * Unsafe response detector
2
+ * Unsafe response detector with CVSS-like severity scoring
3
3
  */
4
+ import { CvssCalculator, type CvssScore } from './severity';
4
5
  export interface DetectionResult {
5
6
  unsafe: boolean;
6
7
  reasons: string[];
7
8
  severity: 'low' | 'medium' | 'high' | 'critical';
8
9
  categories: string[];
10
+ /** Detailed CVSS-like score for the detection */
11
+ cvssScore?: CvssScore;
9
12
  }
10
13
  export declare class UnsafeResponseDetector {
11
14
  private patterns;
@@ -17,6 +20,14 @@ export declare class UnsafeResponseDetector {
17
20
  /**
18
21
  * Add a custom detection pattern
19
22
  */
20
- addPattern(pattern: RegExp, category: string, severity: 'low' | 'medium' | 'high' | 'critical', description: string): void;
23
+ addPattern(pattern: RegExp, category: string, severity: 'low' | 'medium' | 'high' | 'critical', description: string, cvssParams?: Parameters<typeof CvssCalculator.calculate>[0]): void;
24
+ /**
25
+ * Get severity info for display
26
+ */
27
+ static getSeverityInfo(severity: 'low' | 'medium' | 'high' | 'critical'): import("./severity").SeverityInfo;
28
+ /**
29
+ * Describe a CVSS score in human-readable form
30
+ */
31
+ static describeCvssScore(cvssScore: CvssScore): string;
21
32
  }
22
33
  //# sourceMappingURL=detector.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAKb;;IAoDH;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe;IAmCzC;;OAEG;IACH,UAAU,CACR,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,EAChD,WAAW,EAAE,MAAM,GAClB,IAAI;CAGR"}
1
+ {"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,cAAc,EAAE,KAAK,SAAS,EAAyC,MAAM,YAAY,CAAC;AAEnG,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,iDAAiD;IACjD,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;AAUD,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAAqB;;IA2DrC;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe;IA0CzC;;OAEG;IACH,UAAU,CACR,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,EAChD,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,UAAU,CAAC,OAAO,cAAc,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAC1D,IAAI;IAKP;;OAEG;IACH,MAAM,CAAC,eAAe,CAAC,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU;IAIvE;;OAEG;IACH,MAAM,CAAC,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM;CAGvD"}
@@ -1 +1 @@
1
- {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAM5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;CAClD;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAAa;gBAElB,SAAS,CAAC,EAAE,QAAQ,EAAE;IASlC;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,eAAe,EAAE;IA0BvD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM;IAQ3D;;OAEG;IACH,aAAa,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAQ/E,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;CAOpB"}
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAQ5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;CAClD;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAAa;gBAElB,SAAS,CAAC,EAAE,QAAQ,EAAE;IAWlC;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,eAAe,EAAE;IA0BvD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM;IAQ3D;;OAEG;IACH,aAAa,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAQ/E,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;CAOpB"}
package/dist/index.d.ts CHANGED
@@ -5,5 +5,6 @@
5
5
  export * from './mutations';
6
6
  export { RedTeamGenerator, type GeneratedPrompt } from './generator';
7
7
  export { UnsafeResponseDetector, type DetectionResult } from './detector';
8
- export { SeverityMapper, type Severity, type SeverityInfo } from './severity';
8
+ export { SeverityMapper, CvssCalculator, MUTATION_CVSS_SCORES, DETECTION_CVSS_SCORES, type Severity, type SeverityInfo, type CvssScore, } from './severity';
9
+ export { CustomMutation, loadCustomAttacks, parseCustomAttacks, generateExampleCustomAttacksYaml, type CustomAttackDefinition, type CustomAttacksFile, } from './custom-attacks';
9
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,KAAK,QAAQ,EAAE,KAAK,YAAY,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,cAAc,EACd,oBAAoB,EACpB,qBAAqB,EACrB,KAAK,QAAQ,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACf,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,cAAc,EACd,iBAAiB,EACjB,kBAAkB,EAClB,gCAAgC,EAChC,KAAK,sBAAsB,EAC3B,KAAK,iBAAiB,GACvB,MAAM,kBAAkB,CAAC"}