@elsium-ai/testing 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ import type { Message } from '@elsium-ai/core';
2
+ import type { ToolExecutionResult } from '@elsium-ai/tools';
3
+ import type { ToolCallEntry } from './tool-assertions';
4
+ export type TurnAssertion = {
5
+ type: 'response_contains';
6
+ value: string;
7
+ } | {
8
+ type: 'response_not_contains';
9
+ value: string;
10
+ } | {
11
+ type: 'response_matches';
12
+ pattern: string;
13
+ flags?: string;
14
+ } | {
15
+ type: 'tool_called';
16
+ name: string;
17
+ times?: number;
18
+ } | {
19
+ type: 'tool_not_called';
20
+ name: string;
21
+ } | {
22
+ type: 'tool_args_match';
23
+ name: string;
24
+ args: Record<string, unknown>;
25
+ } | {
26
+ type: 'max_iterations';
27
+ value: number;
28
+ } | {
29
+ type: 'max_latency_ms';
30
+ value: number;
31
+ } | {
32
+ type: 'custom';
33
+ name: string;
34
+ fn: (result: TurnResult) => boolean;
35
+ };
36
+ export interface TurnResult {
37
+ turnIndex: number;
38
+ name?: string;
39
+ input: string;
40
+ output: string;
41
+ toolCalls: ToolCallEntry[];
42
+ usage: {
43
+ totalInputTokens: number;
44
+ totalOutputTokens: number;
45
+ totalTokens: number;
46
+ totalCost: number;
47
+ iterations: number;
48
+ };
49
+ durationMs: number;
50
+ assertions: Array<{
51
+ type: string;
52
+ passed: boolean;
53
+ message: string;
54
+ }>;
55
+ passed: boolean;
56
+ }
57
+ export interface ConversationTurn {
58
+ role: 'user';
59
+ content: string | ((history: TurnResult[]) => string);
60
+ assertions?: TurnAssertion[];
61
+ name?: string;
62
+ }
63
+ interface AgentResultLike {
64
+ message: Message;
65
+ usage: {
66
+ totalInputTokens: number;
67
+ totalOutputTokens: number;
68
+ totalTokens: number;
69
+ totalCost: number;
70
+ iterations: number;
71
+ };
72
+ toolCalls: Array<{
73
+ name: string;
74
+ arguments: Record<string, unknown>;
75
+ result: ToolExecutionResult;
76
+ }>;
77
+ traceId: string;
78
+ }
79
+ export interface ConversationScenarioConfig {
80
+ name: string;
81
+ description?: string;
82
+ turns: ConversationTurn[];
83
+ runner: (messages: Message[]) => Promise<AgentResultLike>;
84
+ tags?: string[];
85
+ }
86
+ export interface ConversationResult {
87
+ name: string;
88
+ passed: boolean;
89
+ turns: TurnResult[];
90
+ totalDurationMs: number;
91
+ totalTokens: number;
92
+ totalCost: number;
93
+ totalToolCalls: number;
94
+ tags: string[];
95
+ }
96
+ export declare function runConversation(config: ConversationScenarioConfig): Promise<ConversationResult>;
97
+ export declare function formatConversationReport(result: ConversationResult): string;
98
+ export {};
99
+ //# sourceMappingURL=multi-turn.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"multi-turn.d.ts","sourceRoot":"","sources":["../src/multi-turn.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAA;AAE9C,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAA;AAC3D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AAGtD,MAAM,MAAM,aAAa,GACtB;IAAE,IAAI,EAAE,mBAAmB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,uBAAuB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAChD;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAC7D;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GACzC;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,GACxE;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACzC;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACzC;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,OAAO,CAAA;CAAE,CAAA;AAExE,MAAM,WAAW,UAAU;IAC1B,SAAS,EAAE,MAAM,CAAA;IACjB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,SAAS,EAAE,aAAa,EAAE,CAAA;IAC1B,KAAK,EAAE;QACN,gBAAgB,EAAE,MAAM,CAAA;QACxB,iBAAiB,EAAE,MAAM,CAAA;QACzB,WAAW,EAAE,MAAM,CAAA;QACnB,SAAS,EAAE,MAAM,CAAA;QACjB,UAAU,EAAE,MAAM,CAAA;KAClB,CAAA;IACD,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IACrE,MAAM,EAAE,OAAO,CAAA;CACf;AAED,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,GAAG,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,KAAK,MAAM,CAAC,CAAA;IACrD,UAAU,CAAC,EAAE,aAAa,EAAE,CAAA;IAC5B,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAED,UAAU,eAAe;IACxB,OAAO,EAAE,OAAO,CAAA;IAChB,KAAK,EAAE;QACN,gBAAgB,EAAE,MAAM,CAAA;QACxB,iBAAiB,EAAE,MAAM,CAAA;QACzB,WAAW,EAAE,MAAM,CAAA;QACnB,SAAS,EAAE,MAAM,CAAA;QACjB,UAAU,EAAE,MAAM,CAAA;KAClB,CAAA;IACD,SAAS,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAA;QACZ,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QAClC,MAAM,EAAE,mBAAmB,CAAA;KAC3B,CAAC,CAAA;IACF,OAAO,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,0BAA0B;IAC1C,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,gBAAgB,EAAE,CAAA;IACzB,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,eAAe,CAAC,CAAA;IACzD,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;CACf;AAED,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,OAAO,CAAA;IACf,KAAK,EAAE,UAAU,EAAE,CAAA;IACnB,eAAe,EAAE,MAAM,CAAA;IACvB,WAAW,EAAE,MAAM,CAAA;IACnB,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;IACtB,IAAI,EAAE,MAAM,EAAE,CAAA;CACd;AAyFD,wBAAsB,eAAe,CACpC,MAAM,EAAE,0BAA0B,GAChC,OAAO,CAAC,kBAAkB,CAAC,CA+D7B;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA2B3E"}
@@ -0,0 +1,69 @@
1
+ import type { Message } from '@elsium-ai/core';
2
+ import type { ToolExecutionResult } from '@elsium-ai/tools';
3
+ export type AttackCategory = 'prompt_injection' | 'jailbreak' | 'data_extraction' | 'persona_override' | 'instruction_bypass';
4
+ export interface AttackProbe {
5
+ name: string;
6
+ category: AttackCategory;
7
+ input: string;
8
+ detectCompromise: (output: string) => boolean;
9
+ severity: 'low' | 'medium' | 'high' | 'critical';
10
+ }
11
+ export interface MultiTurnAttackProbe {
12
+ name: string;
13
+ category: AttackCategory;
14
+ turns: Array<string | ((history: string[]) => string)>;
15
+ detectCompromise: (outputs: string[]) => boolean;
16
+ severity: 'low' | 'medium' | 'high' | 'critical';
17
+ }
18
+ interface AgentResultLike {
19
+ message: Message;
20
+ toolCalls: Array<{
21
+ name: string;
22
+ arguments: Record<string, unknown>;
23
+ result: ToolExecutionResult;
24
+ }>;
25
+ }
26
+ export interface RedTeamConfig {
27
+ name: string;
28
+ runner: (input: string) => Promise<string>;
29
+ probes?: AttackProbe[];
30
+ categories?: AttackCategory[];
31
+ concurrency?: number;
32
+ multiTurnProbes?: MultiTurnAttackProbe[];
33
+ multiTurnRunner?: (messages: Message[]) => Promise<AgentResultLike>;
34
+ }
35
+ export interface ProbeResult {
36
+ probe: AttackProbe;
37
+ output: string;
38
+ compromised: boolean;
39
+ durationMs: number;
40
+ error?: string;
41
+ }
42
+ export interface MultiTurnProbeResult {
43
+ probe: MultiTurnAttackProbe;
44
+ outputs: string[];
45
+ compromised: boolean;
46
+ durationMs: number;
47
+ error?: string;
48
+ }
49
+ export interface RedTeamResult {
50
+ name: string;
51
+ total: number;
52
+ passed: number;
53
+ compromised: number;
54
+ errored: number;
55
+ results: ProbeResult[];
56
+ multiTurnResults: MultiTurnProbeResult[];
57
+ byCategory: Record<AttackCategory, {
58
+ total: number;
59
+ compromised: number;
60
+ }>;
61
+ score: number;
62
+ durationMs: number;
63
+ }
64
+ export declare function getBuiltInMultiTurnProbes(categories?: AttackCategory[]): MultiTurnAttackProbe[];
65
+ export declare function getBuiltInProbes(categories?: AttackCategory[]): AttackProbe[];
66
+ export declare function runRedTeam(config: RedTeamConfig): Promise<RedTeamResult>;
67
+ export declare function formatRedTeamReport(result: RedTeamResult): string;
68
+ export {};
69
+ //# sourceMappingURL=red-team.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"red-team.d.ts","sourceRoot":"","sources":["../src/red-team.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAA;AAE9C,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAA;AAE3D,MAAM,MAAM,cAAc,GACvB,kBAAkB,GAClB,WAAW,GACX,iBAAiB,GACjB,kBAAkB,GAClB,oBAAoB,CAAA;AAEvB,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,QAAQ,EAAE,cAAc,CAAA;IACxB,KAAK,EAAE,MAAM,CAAA;IACb,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAA;IAC7C,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAA;CAChD;AAED,MAAM,WAAW,oBAAoB;IACpC,IAAI,EAAE,MAAM,CAAA;IACZ,QAAQ,EAAE,cAAc,CAAA;IACxB,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC,CAAC,CAAA;IACtD,gBAAgB,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,OAAO,CAAA;IAChD,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,CAAA;CAChD;AAED,UAAU,eAAe;IACxB,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAA;QACZ,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QAClC,MAAM,EAAE,mBAAmB,CAAA;KAC3B,CAAC,CAAA;CACF;AAED,MAAM,WAAW,aAAa;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAA;IAC1C,MAAM,CAAC,EAAE,WAAW,EAAE,CAAA;IACtB,UAAU,CAAC,EAAE,cAAc,EAAE,CAAA;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,eAAe,CAAC,EAAE,oBAAoB,EAAE,CAAA;IACxC,eAAe,CAAC,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,eAAe,CAAC,CAAA;CACnE;AAED,MAAM,WAAW,WAAW;IAC3B,KAAK,EAAE,WAAW,CAAA;IAClB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,OAAO,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,oBAAoB;IACpC,KAAK,EAAE,oBAAoB,CAAA;IAC3B,OAAO,EAAE,MAAM,EAAE,CAAA;IACjB,WAAW,EAAE,OAAO,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,aAAa;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,OAAO,EAAE,MAAM,CAAA;IACf,OAAO,EAAE,WAAW,EAAE,CAAA;IACtB,gBAAgB,EAAE,oBAAoB,EAAE,CAAA;IACxC,UAAU,EAAE,MAAM,CAAC,cAAc,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC1E,KAAK,EAAE,MAAM,CAAA;IACb,UAAU,EAAE,MAAM,CAAA;CAClB;AA8gBD,wBAAgB,yBAAyB,CAAC,UAAU,CAAC,EAAE,cAAc,EAAE,GAAG,oBAAoB,EAAE,CAG/F;AAUD,wBAAgB,gBAAgB,CAAC,UAAU,CAAC,EAAE,cAAc,EAAE,GAAG,WAAW,EAAE,CAG7E;AAoID,wBAAsB,UAAU,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC,CAoB9E;AAED,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM,CAoCjE"}
@@ -0,0 +1,46 @@
1
+ import type { ToolExecutionResult } from '@elsium-ai/tools';
2
+ import type { EvalCriterion } from './eval';
3
+ export type ToolCallEntry = {
4
+ name: string;
5
+ arguments: Record<string, unknown>;
6
+ result: ToolExecutionResult;
7
+ };
8
+ export type ToolAssertion = {
9
+ type: 'called';
10
+ name: string;
11
+ times?: number;
12
+ } | {
13
+ type: 'not_called';
14
+ name: string;
15
+ } | {
16
+ type: 'called_with';
17
+ name: string;
18
+ args: Record<string, unknown>;
19
+ partial?: boolean;
20
+ } | {
21
+ type: 'called_in_order';
22
+ names: string[];
23
+ } | {
24
+ type: 'all_succeeded';
25
+ } | {
26
+ type: 'none_failed';
27
+ } | {
28
+ type: 'call_count';
29
+ min?: number;
30
+ max?: number;
31
+ } | {
32
+ type: 'no_repeated_calls';
33
+ name?: string;
34
+ } | {
35
+ type: 'custom';
36
+ name: string;
37
+ fn: (calls: ToolCallEntry[]) => boolean;
38
+ };
39
+ export interface ToolAssertionResult {
40
+ type: string;
41
+ passed: boolean;
42
+ message: string;
43
+ }
44
+ export declare function assertToolCalls(calls: ToolCallEntry[], assertions: ToolAssertion[]): ToolAssertionResult[];
45
+ export declare function toolCallsToEvalCriteria(assertions: ToolAssertion[], calls: ToolCallEntry[]): EvalCriterion[];
46
+ //# sourceMappingURL=tool-assertions.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tool-assertions.d.ts","sourceRoot":"","sources":["../src/tool-assertions.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAA;AAC3D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAA;AAE3C,MAAM,MAAM,aAAa,GAAG;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAClC,MAAM,EAAE,mBAAmB,CAAA;CAC3B,CAAA;AAED,MAAM,MAAM,aAAa,GACtB;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAChD;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAA;CAAE,GACvF;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,KAAK,EAAE,MAAM,EAAE,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,eAAe,CAAA;CAAE,GACzB;IAAE,IAAI,EAAE,aAAa,CAAA;CAAE,GACvB;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GAClD;IAAE,IAAI,EAAE,mBAAmB,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,CAAC,KAAK,EAAE,aAAa,EAAE,KAAK,OAAO,CAAA;CAAE,CAAA;AAE5E,MAAM,WAAW,mBAAmB;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,OAAO,CAAA;IACf,OAAO,EAAE,MAAM,CAAA;CACf;AA+KD,wBAAgB,eAAe,CAC9B,KAAK,EAAE,aAAa,EAAE,EACtB,UAAU,EAAE,aAAa,EAAE,GACzB,mBAAmB,EAAE,CAuBvB;AAED,wBAAgB,uBAAuB,CACtC,UAAU,EAAE,aAAa,EAAE,EAC3B,KAAK,EAAE,aAAa,EAAE,GACpB,aAAa,EAAE,CASjB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elsium-ai/testing",
3
- "version": "0.9.1",
3
+ "version": "0.10.0",
4
4
  "description": "Testing utilities, mock providers, fixtures, and eval framework for ElsiumAI",
5
5
  "license": "MIT",
6
6
  "author": "Eric Utrera <ebutrera9103@gmail.com>",
@@ -26,10 +26,10 @@
26
26
  "dev": "bun --watch src/index.ts"
27
27
  },
28
28
  "dependencies": {
29
- "@elsium-ai/core": "^0.9.1",
30
- "@elsium-ai/gateway": "^0.9.1",
31
- "@elsium-ai/agents": "^0.9.1",
32
- "@elsium-ai/tools": "^0.9.1"
29
+ "@elsium-ai/core": "^0.10.0",
30
+ "@elsium-ai/gateway": "^0.10.0",
31
+ "@elsium-ai/agents": "^0.10.0",
32
+ "@elsium-ai/tools": "^0.10.0"
33
33
  },
34
34
  "devDependencies": {
35
35
  "typescript": "^5.7.0"