@artemiskit/redteam 0.1.6 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +93 -0
- package/dist/custom-attacks.d.ts +59 -0
- package/dist/custom-attacks.d.ts.map +1 -0
- package/dist/detector.d.ts +13 -2
- package/dist/detector.d.ts.map +1 -1
- package/dist/generator.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7755 -58
- package/dist/mutations/cot-injection.d.ts +2 -0
- package/dist/mutations/cot-injection.d.ts.map +1 -1
- package/dist/mutations/encoding.d.ts +37 -0
- package/dist/mutations/encoding.d.ts.map +1 -0
- package/dist/mutations/index.d.ts +5 -0
- package/dist/mutations/index.d.ts.map +1 -1
- package/dist/mutations/instruction-flip.d.ts +2 -0
- package/dist/mutations/instruction-flip.d.ts.map +1 -1
- package/dist/mutations/multi-turn.d.ts +90 -0
- package/dist/mutations/multi-turn.d.ts.map +1 -0
- package/dist/mutations/role-spoof.d.ts +2 -0
- package/dist/mutations/role-spoof.d.ts.map +1 -1
- package/dist/mutations/typo.d.ts +2 -0
- package/dist/mutations/typo.d.ts.map +1 -1
- package/dist/severity.d.ts +69 -1
- package/dist/severity.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/custom-attacks.ts +233 -0
- package/src/detector.ts +48 -11
- package/src/generator.ts +4 -0
- package/src/index.ts +17 -1
- package/src/mutations/cot-injection.ts +2 -0
- package/src/mutations/encoding.ts +116 -0
- package/src/mutations/index.ts +12 -0
- package/src/mutations/instruction-flip.ts +2 -0
- package/src/mutations/multi-turn.test.ts +144 -0
- package/src/mutations/multi-turn.ts +305 -0
- package/src/mutations/role-spoof.ts +2 -0
- package/src/mutations/typo.ts +2 -0
- package/src/severity.test.ts +238 -0
- package/src/severity.ts +381 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,98 @@
|
|
|
1
1
|
# @artemiskit/redteam
|
|
2
2
|
|
|
3
|
+
## 0.2.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [d5ca7c6]
|
|
8
|
+
- @artemiskit/core@0.2.2
|
|
9
|
+
|
|
10
|
+
## 0.2.1
|
|
11
|
+
|
|
12
|
+
### Patch Changes
|
|
13
|
+
|
|
14
|
+
- Updated dependencies
|
|
15
|
+
- @artemiskit/core@0.2.1
|
|
16
|
+
|
|
17
|
+
## 0.2.0
|
|
18
|
+
|
|
19
|
+
### Minor Changes
|
|
20
|
+
|
|
21
|
+
- d2c3835: ## v0.2.0 - Enhanced Evaluation Features
|
|
22
|
+
|
|
23
|
+
### CLI (`@artemiskit/cli`)
|
|
24
|
+
|
|
25
|
+
#### New Features
|
|
26
|
+
|
|
27
|
+
- **Multi-turn mutations**: Added `--mutations multi_turn` flag for red team testing with 4 built-in strategies:
|
|
28
|
+
- `gradual_escalation`: Gradually intensifies requests over conversation turns
|
|
29
|
+
- `context_switching`: Shifts topics to lower defenses before attack
|
|
30
|
+
- `persona_building`: Establishes trust through roleplay
|
|
31
|
+
- `distraction`: Uses side discussions to slip in harmful requests
|
|
32
|
+
- **Custom multi-turn conversations**: Support for array prompts in red team scenarios (consistent with `run` command format). The last user message becomes the attack target, preceding messages form conversation context.
|
|
33
|
+
- **Custom attacks**: Added `--custom-attacks` flag to load custom attack patterns from YAML files with template variables and variations.
|
|
34
|
+
- **Encoding mutations**: Added `--mutations encoding` for obfuscation attacks (base64, ROT13, hex, unicode).
|
|
35
|
+
- **Directory scanning**: Run all scenarios in a directory with `akit run scenarios/`
|
|
36
|
+
- **Glob pattern matching**: Use patterns like `akit run scenarios/**/*.yaml`
|
|
37
|
+
- **Parallel execution**: Added `--parallel` flag for concurrent scenario execution
|
|
38
|
+
- **Scenario tags**: Filter scenarios with `--tags` flag
|
|
39
|
+
|
|
40
|
+
### Core (`@artemiskit/core`)
|
|
41
|
+
|
|
42
|
+
#### New Features
|
|
43
|
+
|
|
44
|
+
- **Combined matchers**: New `type: combined` expectation with `operator: and|or` for complex assertion logic
|
|
45
|
+
- **`not_contains` expectation**: Negative containment check to ensure responses don't include specific text
|
|
46
|
+
- **`similarity` expectation**: Semantic similarity matching with two modes:
|
|
47
|
+
- Embedding-based: Uses vector embeddings for fast semantic comparison
|
|
48
|
+
- LLM-based fallback: Uses LLM to evaluate semantic similarity when embeddings unavailable
|
|
49
|
+
- Configurable threshold (default 0.75)
|
|
50
|
+
- **`inline` expectation**: Safe expression-based custom matchers in YAML using JavaScript-like expressions (e.g., `response.length > 100`, `response.includes('hello')`)
|
|
51
|
+
- **p90 latency metric**: Added p90 percentile to stress test latency metrics
|
|
52
|
+
- **Token usage tracking**: Monitor token consumption per request in stress tests
|
|
53
|
+
- **Cost estimation**: Estimate API costs with model pricing data
|
|
54
|
+
|
|
55
|
+
### Red Team (`@artemiskit/redteam`)
|
|
56
|
+
|
|
57
|
+
#### New Features
|
|
58
|
+
|
|
59
|
+
- **MultiTurnMutation class**: Full implementation with strategy support and custom conversation prefixes
|
|
60
|
+
- **Custom attack loader**: Parse and load custom attack patterns from YAML
|
|
61
|
+
- **Encoding mutation**: Obfuscate attack payloads using various encoding schemes
|
|
62
|
+
- **CVSS-like severity scoring**: Detailed attack severity scoring with:
|
|
63
|
+
- `CvssScore` interface with attack vector, complexity, impact metrics
|
|
64
|
+
- `CvssCalculator` class for score calculation and aggregation
|
|
65
|
+
- Predefined scores for all mutations and detection categories
|
|
66
|
+
- Human-readable score descriptions and vector strings
|
|
67
|
+
|
|
68
|
+
### Reports (`@artemiskit/reports`)
|
|
69
|
+
|
|
70
|
+
#### New Features
|
|
71
|
+
|
|
72
|
+
- **Run comparison HTML report**: Visual diff between two runs showing:
|
|
73
|
+
- Metrics overview with baseline vs current comparison
|
|
74
|
+
- Change summary (regressions, improvements, unchanged)
|
|
75
|
+
- Case-by-case comparison table with filtering
|
|
76
|
+
- Side-by-side response comparison for each case
|
|
77
|
+
- **Comparison JSON export**: Structured comparison data for programmatic use
|
|
78
|
+
|
|
79
|
+
### CLI Enhancements
|
|
80
|
+
|
|
81
|
+
- **Compare command `--html` flag**: Generate HTML comparison report
|
|
82
|
+
- **Compare command `--json` flag**: Generate JSON comparison data
|
|
83
|
+
|
|
84
|
+
### Documentation
|
|
85
|
+
|
|
86
|
+
- Updated all CLI command documentation
|
|
87
|
+
- Added comprehensive examples for custom multi-turn scenarios
|
|
88
|
+
- Documented combined matchers and `not_contains` expectations
|
|
89
|
+
- Added mutation strategy reference tables
|
|
90
|
+
|
|
91
|
+
### Patch Changes
|
|
92
|
+
|
|
93
|
+
- Updated dependencies [d2c3835]
|
|
94
|
+
- @artemiskit/core@0.2.0
|
|
95
|
+
|
|
3
96
|
## 0.1.6
|
|
4
97
|
|
|
5
98
|
### Patch Changes
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Custom attack YAML loader
|
|
3
|
+
* Allows users to define custom red team attacks in YAML format
|
|
4
|
+
*/
|
|
5
|
+
import type { Mutation } from './mutations';
|
|
6
|
+
/**
|
|
7
|
+
* Schema for custom attack definition in YAML
|
|
8
|
+
*/
|
|
9
|
+
export interface CustomAttackDefinition {
|
|
10
|
+
/** Unique name for the attack */
|
|
11
|
+
name: string;
|
|
12
|
+
/** Description of what the attack tests */
|
|
13
|
+
description: string;
|
|
14
|
+
/** Severity level */
|
|
15
|
+
severity: 'low' | 'medium' | 'high' | 'critical';
|
|
16
|
+
/** Attack templates - use {{prompt}} as placeholder for the original prompt */
|
|
17
|
+
templates: string[];
|
|
18
|
+
/** Optional: Variations to apply to templates */
|
|
19
|
+
variations?: {
|
|
20
|
+
/** Placeholder name (e.g., 'role') */
|
|
21
|
+
name: string;
|
|
22
|
+
/** Values to substitute */
|
|
23
|
+
values: string[];
|
|
24
|
+
}[];
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Schema for custom attacks YAML file
|
|
28
|
+
*/
|
|
29
|
+
export interface CustomAttacksFile {
|
|
30
|
+
/** File format version */
|
|
31
|
+
version: string;
|
|
32
|
+
/** List of custom attack definitions */
|
|
33
|
+
attacks: CustomAttackDefinition[];
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Custom mutation created from YAML definition
|
|
37
|
+
*/
|
|
38
|
+
export declare class CustomMutation implements Mutation {
|
|
39
|
+
readonly name: string;
|
|
40
|
+
readonly description: string;
|
|
41
|
+
readonly severity: 'low' | 'medium' | 'high' | 'critical';
|
|
42
|
+
private templates;
|
|
43
|
+
private variations;
|
|
44
|
+
constructor(definition: CustomAttackDefinition);
|
|
45
|
+
mutate(prompt: string): string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Load custom attacks from a YAML file
|
|
49
|
+
*/
|
|
50
|
+
export declare function loadCustomAttacks(filePath: string): CustomMutation[];
|
|
51
|
+
/**
|
|
52
|
+
* Load custom attacks from a YAML string
|
|
53
|
+
*/
|
|
54
|
+
export declare function parseCustomAttacks(yamlContent: string): CustomMutation[];
|
|
55
|
+
/**
|
|
56
|
+
* Generate an example custom attacks YAML file
|
|
57
|
+
*/
|
|
58
|
+
export declare function generateExampleCustomAttacksYaml(): string;
|
|
59
|
+
//# sourceMappingURL=custom-attacks.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"custom-attacks.d.ts","sourceRoot":"","sources":["../src/custom-attacks.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,iCAAiC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,qBAAqB;IACrB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,+EAA+E;IAC/E,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,iDAAiD;IACjD,UAAU,CAAC,EAAE;QACX,sCAAsC;QACtC,IAAI,EAAE,MAAM,CAAC;QACb,2BAA2B;QAC3B,MAAM,EAAE,MAAM,EAAE,CAAC;KAClB,EAAE,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,0BAA0B;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,wCAAwC;IACxC,OAAO,EAAE,sBAAsB,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,qBAAa,cAAe,YAAW,QAAQ;IAC7C,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IAE1D,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,UAAU,CAAuC;gBAE7C,UAAU,EAAE,sBAAsB;IAQ9C,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM;CAkB/B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,EAAE,CAwBpE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,cAAc,EAAE,CAexE;AAgDD;;GAEG;AACH,wBAAgB,gCAAgC,IAAI,MAAM,CAsDzD"}
|
package/dist/detector.d.ts
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Unsafe response detector
|
|
2
|
+
* Unsafe response detector with CVSS-like severity scoring
|
|
3
3
|
*/
|
|
4
|
+
import { CvssCalculator, type CvssScore } from './severity';
|
|
4
5
|
export interface DetectionResult {
|
|
5
6
|
unsafe: boolean;
|
|
6
7
|
reasons: string[];
|
|
7
8
|
severity: 'low' | 'medium' | 'high' | 'critical';
|
|
8
9
|
categories: string[];
|
|
10
|
+
/** Detailed CVSS-like score for the detection */
|
|
11
|
+
cvssScore?: CvssScore;
|
|
9
12
|
}
|
|
10
13
|
export declare class UnsafeResponseDetector {
|
|
11
14
|
private patterns;
|
|
@@ -17,6 +20,14 @@ export declare class UnsafeResponseDetector {
|
|
|
17
20
|
/**
|
|
18
21
|
* Add a custom detection pattern
|
|
19
22
|
*/
|
|
20
|
-
addPattern(pattern: RegExp, category: string, severity: 'low' | 'medium' | 'high' | 'critical', description: string): void;
|
|
23
|
+
addPattern(pattern: RegExp, category: string, severity: 'low' | 'medium' | 'high' | 'critical', description: string, cvssParams?: Parameters<typeof CvssCalculator.calculate>[0]): void;
|
|
24
|
+
/**
|
|
25
|
+
* Get severity info for display
|
|
26
|
+
*/
|
|
27
|
+
static getSeverityInfo(severity: 'low' | 'medium' | 'high' | 'critical'): import("./severity").SeverityInfo;
|
|
28
|
+
/**
|
|
29
|
+
* Describe a CVSS score in human-readable form
|
|
30
|
+
*/
|
|
31
|
+
static describeCvssScore(cvssScore: CvssScore): string;
|
|
21
32
|
}
|
|
22
33
|
//# sourceMappingURL=detector.d.ts.map
|
package/dist/detector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,cAAc,EAAE,KAAK,SAAS,EAAyC,MAAM,YAAY,CAAC;AAEnG,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;IACjD,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,iDAAiD;IACjD,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;AAUD,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAAqB;;IA2DrC;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe;IA0CzC;;OAEG;IACH,UAAU,CACR,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,EAChD,WAAW,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,UAAU,CAAC,OAAO,cAAc,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAC1D,IAAI;IAKP;;OAEG;IACH,MAAM,CAAC,eAAe,CAAC,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU;IAIvE;;OAEG;IACH,MAAM,CAAC,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM;CAGvD"}
|
package/dist/generator.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAQ5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAC;CAClD;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAAa;gBAElB,SAAS,CAAC,EAAE,QAAQ,EAAE;IAWlC;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,eAAe,EAAE;IA0BvD;;OAEG;IACH,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,GAAG,MAAM;IAQ3D;;OAEG;IACH,aAAa,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAQ/E,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;CAOpB"}
|
package/dist/index.d.ts
CHANGED
|
@@ -5,5 +5,6 @@
|
|
|
5
5
|
export * from './mutations';
|
|
6
6
|
export { RedTeamGenerator, type GeneratedPrompt } from './generator';
|
|
7
7
|
export { UnsafeResponseDetector, type DetectionResult } from './detector';
|
|
8
|
-
export { SeverityMapper, type Severity, type SeverityInfo } from './severity';
|
|
8
|
+
export { SeverityMapper, CvssCalculator, MUTATION_CVSS_SCORES, DETECTION_CVSS_SCORES, type Severity, type SeverityInfo, type CvssScore, } from './severity';
|
|
9
|
+
export { CustomMutation, loadCustomAttacks, parseCustomAttacks, generateExampleCustomAttacksYaml, type CustomAttackDefinition, type CustomAttacksFile, } from './custom-attacks';
|
|
9
10
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC1E,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,aAAa,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,cAAc,EACd,oBAAoB,EACpB,qBAAqB,EACrB,KAAK,QAAQ,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACf,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,cAAc,EACd,iBAAiB,EACjB,kBAAkB,EAClB,gCAAgC,EAChC,KAAK,sBAAsB,EAC3B,KAAK,iBAAiB,GACvB,MAAM,kBAAkB,CAAC"}
|