@artemiskit/redteam 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/CHANGELOG.md +79 -0
  2. package/dist/custom-attacks.d.ts +59 -0
  3. package/dist/custom-attacks.d.ts.map +1 -0
  4. package/dist/detector.d.ts +13 -2
  5. package/dist/detector.d.ts.map +1 -1
  6. package/dist/generator.d.ts.map +1 -1
  7. package/dist/index.d.ts +2 -1
  8. package/dist/index.d.ts.map +1 -1
  9. package/dist/index.js +7755 -58
  10. package/dist/mutations/cot-injection.d.ts +2 -0
  11. package/dist/mutations/cot-injection.d.ts.map +1 -1
  12. package/dist/mutations/encoding.d.ts +37 -0
  13. package/dist/mutations/encoding.d.ts.map +1 -0
  14. package/dist/mutations/index.d.ts +5 -0
  15. package/dist/mutations/index.d.ts.map +1 -1
  16. package/dist/mutations/instruction-flip.d.ts +2 -0
  17. package/dist/mutations/instruction-flip.d.ts.map +1 -1
  18. package/dist/mutations/multi-turn.d.ts +90 -0
  19. package/dist/mutations/multi-turn.d.ts.map +1 -0
  20. package/dist/mutations/role-spoof.d.ts +2 -0
  21. package/dist/mutations/role-spoof.d.ts.map +1 -1
  22. package/dist/mutations/typo.d.ts +2 -0
  23. package/dist/mutations/typo.d.ts.map +1 -1
  24. package/dist/severity.d.ts +69 -1
  25. package/dist/severity.d.ts.map +1 -1
  26. package/package.json +3 -2
  27. package/src/custom-attacks.ts +233 -0
  28. package/src/detector.ts +48 -11
  29. package/src/generator.ts +4 -0
  30. package/src/index.ts +17 -1
  31. package/src/mutations/cot-injection.ts +2 -0
  32. package/src/mutations/encoding.ts +116 -0
  33. package/src/mutations/index.ts +12 -0
  34. package/src/mutations/instruction-flip.ts +2 -0
  35. package/src/mutations/multi-turn.test.ts +144 -0
  36. package/src/mutations/multi-turn.ts +305 -0
  37. package/src/mutations/role-spoof.ts +2 -0
  38. package/src/mutations/typo.ts +2 -0
  39. package/src/severity.test.ts +238 -0
  40. package/src/severity.ts +381 -1
package/CHANGELOG.md CHANGED
@@ -1,5 +1,84 @@
1
1
  # @artemiskit/redteam
2
2
 
3
+ ## 0.2.0
4
+
5
+ ### Minor Changes
6
+
7
+ - d2c3835: ## v0.2.0 - Enhanced Evaluation Features
8
+
9
+ ### CLI (`@artemiskit/cli`)
10
+
11
+ #### New Features
12
+
13
+ - **Multi-turn mutations**: Added `--mutations multi_turn` flag for red team testing with 4 built-in strategies:
14
+ - `gradual_escalation`: Gradually intensifies requests over conversation turns
15
+ - `context_switching`: Shifts topics to lower defenses before attack
16
+ - `persona_building`: Establishes trust through roleplay
17
+ - `distraction`: Uses side discussions to slip in harmful requests
18
+ - **Custom multi-turn conversations**: Support for array prompts in red team scenarios (consistent with `run` command format). The last user message becomes the attack target, preceding messages form conversation context.
19
+ - **Custom attacks**: Added `--custom-attacks` flag to load custom attack patterns from YAML files with template variables and variations.
20
+ - **Encoding mutations**: Added `--mutations encoding` for obfuscation attacks (base64, ROT13, hex, unicode).
21
+ - **Directory scanning**: Run all scenarios in a directory with `akit run scenarios/`
22
+ - **Glob pattern matching**: Use patterns like `akit run scenarios/**/*.yaml`
23
+ - **Parallel execution**: Added `--parallel` flag for concurrent scenario execution
24
+ - **Scenario tags**: Filter scenarios with `--tags` flag
25
+
26
+ ### Core (`@artemiskit/core`)
27
+
28
+ #### New Features
29
+
30
+ - **Combined matchers**: New `type: combined` expectation with `operator: and|or` for complex assertion logic
31
+ - **`not_contains` expectation**: Negative containment check to ensure responses don't include specific text
32
+ - **`similarity` expectation**: Semantic similarity matching with two modes:
33
+ - Embedding-based: Uses vector embeddings for fast semantic comparison
34
+ - LLM-based fallback: Uses LLM to evaluate semantic similarity when embeddings unavailable
35
+ - Configurable threshold (default 0.75)
36
+ - **`inline` expectation**: Safe expression-based custom matchers in YAML using JavaScript-like expressions (e.g., `response.length > 100`, `response.includes('hello')`)
37
+ - **p90 latency metric**: Added p90 percentile to stress test latency metrics
38
+ - **Token usage tracking**: Monitor token consumption per request in stress tests
39
+ - **Cost estimation**: Estimate API costs with model pricing data
40
+
41
+ ### Red Team (`@artemiskit/redteam`)
42
+
43
+ #### New Features
44
+
45
+ - **MultiTurnMutation class**: Full implementation with strategy support and custom conversation prefixes
46
+ - **Custom attack loader**: Parse and load custom attack patterns from YAML
47
+ - **Encoding mutation**: Obfuscate attack payloads using various encoding schemes
48
+ - **CVSS-like severity scoring**: Detailed attack severity scoring with:
49
+ - `CvssScore` interface with attack vector, complexity, impact metrics
50
+ - `CvssCalculator` class for score calculation and aggregation
51
+ - Predefined scores for all mutations and detection categories
52
+ - Human-readable score descriptions and vector strings
53
+
54
+ ### Reports (`@artemiskit/reports`)
55
+
56
+ #### New Features
57
+
58
+ - **Run comparison HTML report**: Visual diff between two runs showing:
59
+ - Metrics overview with baseline vs current comparison
60
+ - Change summary (regressions, improvements, unchanged)
61
+ - Case-by-case comparison table with filtering
62
+ - Side-by-side response comparison for each case
63
+ - **Comparison JSON export**: Structured comparison data for programmatic use
64
+
65
+ ### CLI Enhancements
66
+
67
+ - **Compare command `--html` flag**: Generate HTML comparison report
68
+ - **Compare command `--json` flag**: Generate JSON comparison data
69
+
70
+ ### Documentation
71
+
72
+ - Updated all CLI command documentation
73
+ - Added comprehensive examples for custom multi-turn scenarios
74
+ - Documented combined matchers and `not_contains` expectations
75
+ - Added mutation strategy reference tables
76
+
77
+ ### Patch Changes
78
+
79
+ - Updated dependencies [d2c3835]
80
+ - @artemiskit/core@0.2.0
81
+
3
82
  ## 0.1.6
4
83
 
5
84
  ### Patch Changes
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Custom attack YAML loader
3
+ * Allows users to define custom red team attacks in YAML format
4
+ */
5
+ import type { Mutation } from './mutations';
6
+ /**
7
+ * Schema for custom attack definition in YAML
8
+ */
9
+ export interface CustomAttackDefinition {
10
+ /** Unique name for the attack */
11
+ name: string;
12
+ /** Description of what the attack tests */
13
+ description: string;
14
+ /** Severity level */
15
+ severity: 'low' | 'medium' | 'high' | 'critical';
16
+ /** Attack templates - use {{prompt}} as placeholder for the original prompt */
17
+ templates: string[];
18
+ /** Optional: Variations to apply to templates */
19
+ variations?: {
20
+ /** Placeholder name (e.g., 'role') */
21
+ name: string;
22
+ /** Values to substitute */
23
+ values: string[];
24
+ }[];
25
+ }
26
+ /**
27
+ * Schema for custom attacks YAML file
28
+ */
29
+ export interface CustomAttacksFile {
30
+ /** File format version */
31
+ version: string;
32
+ /** List of custom attack definitions */
33
+ attacks: CustomAttackDefinition[];
34
+ }
35
+ /**
36
+ * Custom mutation created from YAML definition
37
+ */
38
+ export declare class CustomMutation implements Mutation {
39
+ readonly name: string;
40
+ readonly description: string;
41
+ readonly severity: 'low' | 'medium' | 'high' | 'critical';
42
+ private templates;
43
+ private variations;
44
+ constructor(definition: CustomAttackDefinition);
45
+ mutate(prompt: string): string;
46
+ }
47
+ /**
48
+ * Load custom attacks from a YAML file
49
+ */
50
+ export declare function loadCustomAttacks(filePath: string): CustomMutation[];
51
+ /**
52
+ * Load custom attacks from a YAML string
53
+ */
54
+ export declare function parseCustomAttacks(yamlContent: string): CustomMutation[];
55
+ /**
56
+ * Generate an example custom attacks YAML file
57
+ */
58
+ export declare function generateExampleCustomAttacksYaml(): string;
59
+ //# sourceMappingURL=custom-attacks.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"custom-attacks.d.ts","sourceRoot":"","sources":["../src/custom-attacks.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,iCAAiC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,qBAAqB;IACrB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,+EAA+E;IAC/E,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,iDAAiD;IACjD,UAAU,CAAC,EAAE;QACX,sCAAsC;QACtC,IAAI,EAAE,MAAM,CAAC;QACb,2BAA2B;QAC3B,MAAM,EAAE,MAAM,EAAE,CAAC;KAClB,EAAE,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,0BAA0B;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,wCAAwC;IACxC,OAAO,EAAE,sBAAsB,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,qBAAa,cAAe,YAAW,QAAQ;IAC7C,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IAE1D,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,UAAU,CAAuC;gBAE7C,UAAU,EAAE,sBAAsB;IAQ9C,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM;CAkB/B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,EAAE,CAwBpE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,cAAc,EAAE,CAexE;AAgDD;;GAEG;AACH,wBAAgB,gCAAgC,IAAI,MAAM,CAsDzD"}
@@ -1,11 +1,14 @@
1
1
  /**
2
- * Unsafe response detector
2
+ * Unsafe response detector with CVSS-like severity scoring
3
3
  */
4
+ import { CvssCalculator, type CvssScore } from './severity';
4
5
  export interface DetectionResult {
5
6
  unsafe: boolean;
6
7
  reasons: string[];
7
8
  severity: 'low' | 'medium' | 'high' | 'critical';
8
9
  categories: string[];
10
+ /** Detailed CVSS-like score for the detection */
11
+ cvssScore?: CvssScore;
9
12
  }
10
13
  export declare class UnsafeResponseDetector {
11
14
  private patterns;
@@ -17,6 +20,14 @@ export declare class UnsafeResponseDetector {
17
20
  /**
18
21
  * Add a custom detection pattern
19
22
  */
20
- addPattern(pattern: RegExp, category: string, severity: 'low' | 'medium' | 'high' | 'critical', description: string): void;
23
+ addPattern(pattern: RegExp, category: string, severity: 'low' | 'medium' | 'high' | 'critical', description: string, cvssParams?: Parameters<typeof CvssCalculator.calculate>[0]): void;
24
+ /**
25
+ * Get severity info for display
26
+ */
27
+ static getSeverityInfo(severity: 'low' | 'medium' | 'high' | 'critical'): import("./severity").SeverityInfo;
28
+ /**
29
+ * Describe a CVSS score in human-readable form
30
+ */
31
+ static describeCvssScore(cvssScore: CvssScore): string;
21
32
  }
22
33
  //# sourceMappingURL=detector.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAKb;;IAoDH;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe;IAmCzC;;OAEG;IACH,UAAU,CACR,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,EAChD,WAAW,EAAE,MAAM,GAClB,IAAI;CAGR"}
1
+ {"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,cAAc,EAAE,KAAK,SAAS,EAAyC,MAAM,YAAY,CAAC;AAEnG,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,iDAAiD;IACjD,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;AAUD,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAAqB;;IA2DrC;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe;IA0CzC;;OAEG;IACH,UAAU,CACR,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,EAChD,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,UAAU,CAAC,OAAO,cAAc,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAC1D,IAAI;IAKP;;OAEG;IACH,MAAM,CAAC,eAAe,CAAC,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU;IAIvE;;OAEG;IACH,MAAM,CAAC,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM;CAGvD"}
@@ -1 +1 @@
1
- {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAM5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;CAClD;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAAa;gBAElB,SAAS,CAAC,EAAE,QAAQ,EAAE;IASlC;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,eAAe,EAAE;IA0BvD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM;IAQ3D;;OAEG;IACH,aAAa,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAQ/E,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;CAOpB"}
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAQ5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;CAClD;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAAa;gBAElB,SAAS,CAAC,EAAE,QAAQ,EAAE;IAWlC;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,eAAe,EAAE;IA0BvD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM;IAQ3D;;OAEG;IACH,aAAa,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAQ/E,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;CAOpB"}
package/dist/index.d.ts CHANGED
@@ -5,5 +5,6 @@
5
5
  export * from './mutations';
6
6
  export { RedTeamGenerator, type GeneratedPrompt } from './generator';
7
7
  export { UnsafeResponseDetector, type DetectionResult } from './detector';
8
- export { SeverityMapper, type Severity, type SeverityInfo } from './severity';
8
+ export { SeverityMapper, CvssCalculator, MUTATION_CVSS_SCORES, DETECTION_CVSS_SCORES, type Severity, type SeverityInfo, type CvssScore, } from './severity';
9
+ export { CustomMutation, loadCustomAttacks, parseCustomAttacks, generateExampleCustomAttacksYaml, type CustomAttackDefinition, type CustomAttacksFile, } from './custom-attacks';
9
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,KAAK,QAAQ,EAAE,KAAK,YAAY,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,cAAc,EACd,oBAAoB,EACpB,qBAAqB,EACrB,KAAK,QAAQ,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACf,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,cAAc,EACd,iBAAiB,EACjB,kBAAkB,EAClB,gCAAgC,EAChC,KAAK,sBAAsB,EAC3B,KAAK,iBAAiB,GACvB,MAAM,kBAAkB,CAAC"}