@langwatch/scenario 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +90 -34
- package/dist/index.d.ts +90 -34
- package/dist/index.js +199 -45
- package/dist/index.mjs +193 -44
- package/dist/integrations/vitest/setup.js +1 -6
- package/dist/integrations/vitest/setup.mjs +1 -6
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -197,10 +197,6 @@ interface ScenarioConfigFinal extends Omit<ScenarioConfig, "id" | "script" | "th
|
|
|
197
197
|
verbose: boolean;
|
|
198
198
|
maxTurns: number;
|
|
199
199
|
threadId: string;
|
|
200
|
-
/**
|
|
201
|
-
* Optional identifier to group this scenario into a set ("Simulation Set").
|
|
202
|
-
*/
|
|
203
|
-
setId?: string;
|
|
204
200
|
}
|
|
205
201
|
/**
|
|
206
202
|
* The execution context for a scenario script.
|
|
@@ -390,7 +386,7 @@ interface ScenarioExecutionStateLike {
|
|
|
390
386
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
391
387
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
392
388
|
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
393
|
-
temperature: z.
|
|
389
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
394
390
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
395
391
|
}, z.core.$strip>>;
|
|
396
392
|
headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
@@ -431,7 +427,7 @@ declare namespace domain {
|
|
|
431
427
|
*/
|
|
432
428
|
declare const modelSchema: z.ZodObject<{
|
|
433
429
|
model: z.ZodCustom<LanguageModel, LanguageModel>;
|
|
434
|
-
temperature: z.
|
|
430
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
435
431
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
436
432
|
}, z.core.$strip>;
|
|
437
433
|
type ModelConfig = z.infer<typeof modelSchema>;
|
|
@@ -589,6 +585,17 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
589
585
|
* being forced to a terminal decision.
|
|
590
586
|
*/
|
|
591
587
|
private invokeLLMWithDiscovery;
|
|
588
|
+
/**
|
|
589
|
+
* Checks whether the discovery loop ran out of steps without the judge
|
|
590
|
+
* calling finish_test or continue_test.
|
|
591
|
+
*/
|
|
592
|
+
private discoveryExhausted;
|
|
593
|
+
/**
|
|
594
|
+
* Makes one final LLM call with tool_choice forced to finish_test,
|
|
595
|
+
* so the judge renders a verdict with whatever context it accumulated
|
|
596
|
+
* during discovery instead of hard-failing.
|
|
597
|
+
*/
|
|
598
|
+
private forceVerdict;
|
|
592
599
|
private parseToolCalls;
|
|
593
600
|
}
|
|
594
601
|
/**
|
|
@@ -750,9 +757,9 @@ declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
|
750
757
|
* @param config Optional configuration for the agent.
|
|
751
758
|
* @param config.model The language model to use for generating responses.
|
|
752
759
|
* If not provided, a default model will be used.
|
|
753
|
-
* @param config.temperature
|
|
760
|
+
* @param config.temperature Optional temperature for the language model (0.0-1.0).
|
|
754
761
|
* Lower values make responses more deterministic.
|
|
755
|
-
*
|
|
762
|
+
* Omitted by default for compatibility with reasoning models.
|
|
756
763
|
* @param config.maxTokens The maximum number of tokens to generate.
|
|
757
764
|
* If not provided, uses model defaults.
|
|
758
765
|
* @param config.name The name of the agent.
|
|
@@ -1012,6 +1019,45 @@ declare class CrescendoStrategy implements RedTeamStrategy {
|
|
|
1012
1019
|
}): string;
|
|
1013
1020
|
}
|
|
1014
1021
|
|
|
1022
|
+
/**
|
|
1023
|
+
* Single-turn attack techniques for red-team message augmentation.
|
|
1024
|
+
*
|
|
1025
|
+
* Provides deterministic encoding transforms that can be randomly applied to
|
|
1026
|
+
* attacker messages before sending them to the target agent. Each technique
|
|
1027
|
+
* encodes the message and prepends a preamble so the target understands how
|
|
1028
|
+
* to decode it.
|
|
1029
|
+
*
|
|
1030
|
+
* These are zero-cost (no LLM calls) and test whether safety filters can
|
|
1031
|
+
* handle obfuscated inputs.
|
|
1032
|
+
*/
|
|
1033
|
+
interface AttackTechnique {
|
|
1034
|
+
/** Short identifier for this technique (used in debug logs). */
|
|
1035
|
+
readonly name: string;
|
|
1036
|
+
/** Transform the attacker's raw message into an encoded version with preamble. */
|
|
1037
|
+
transform(message: string): string;
|
|
1038
|
+
}
|
|
1039
|
+
declare class Base64Technique implements AttackTechnique {
|
|
1040
|
+
readonly name = "base64";
|
|
1041
|
+
transform(message: string): string;
|
|
1042
|
+
}
|
|
1043
|
+
declare class ROT13Technique implements AttackTechnique {
|
|
1044
|
+
readonly name = "rot13";
|
|
1045
|
+
transform(message: string): string;
|
|
1046
|
+
}
|
|
1047
|
+
declare class LeetspeakTechnique implements AttackTechnique {
|
|
1048
|
+
readonly name = "leetspeak";
|
|
1049
|
+
transform(message: string): string;
|
|
1050
|
+
}
|
|
1051
|
+
declare class CharSplitTechnique implements AttackTechnique {
|
|
1052
|
+
readonly name = "char_split";
|
|
1053
|
+
transform(message: string): string;
|
|
1054
|
+
}
|
|
1055
|
+
declare class CodeBlockTechnique implements AttackTechnique {
|
|
1056
|
+
readonly name = "code_block";
|
|
1057
|
+
transform(message: string): string;
|
|
1058
|
+
}
|
|
1059
|
+
declare const DEFAULT_TECHNIQUES: AttackTechnique[];
|
|
1060
|
+
|
|
1015
1061
|
interface RedTeamAgentConfig {
|
|
1016
1062
|
strategy: RedTeamStrategy;
|
|
1017
1063
|
target: string;
|
|
@@ -1032,6 +1078,10 @@ interface RedTeamAgentConfig {
|
|
|
1032
1078
|
successScore?: number;
|
|
1033
1079
|
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1034
1080
|
successConfirmTurns?: number;
|
|
1081
|
+
/** Probability (0.0-1.0) of applying a random encoding technique per turn. Default 0.0 (off). */
|
|
1082
|
+
injectionProbability?: number;
|
|
1083
|
+
/** List of AttackTechnique instances to sample from. Defaults to all built-ins. */
|
|
1084
|
+
techniques?: AttackTechnique[];
|
|
1035
1085
|
}
|
|
1036
1086
|
interface CrescendoConfig {
|
|
1037
1087
|
target: string;
|
|
@@ -1049,6 +1099,10 @@ interface CrescendoConfig {
|
|
|
1049
1099
|
successScore?: number;
|
|
1050
1100
|
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1051
1101
|
successConfirmTurns?: number;
|
|
1102
|
+
/** Probability (0.0-1.0) of applying a random encoding technique per turn. Default 0.0 (off). */
|
|
1103
|
+
injectionProbability?: number;
|
|
1104
|
+
/** List of AttackTechnique instances to sample from. Defaults to all built-ins. */
|
|
1105
|
+
techniques?: AttackTechnique[];
|
|
1052
1106
|
}
|
|
1053
1107
|
declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
1054
1108
|
name: string;
|
|
@@ -1065,6 +1119,8 @@ declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
|
1065
1119
|
private maxTokens?;
|
|
1066
1120
|
private _successScore;
|
|
1067
1121
|
private _successConfirmTurns;
|
|
1122
|
+
private injectionProbability;
|
|
1123
|
+
private techniques;
|
|
1068
1124
|
private static readonly HARD_REFUSAL_PATTERNS;
|
|
1069
1125
|
private static readonly SOFT_REFUSAL_PATTERNS;
|
|
1070
1126
|
private attackPlanPromise;
|
|
@@ -1094,15 +1150,18 @@ declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
|
1094
1150
|
/**
|
|
1095
1151
|
* Generate a marathon test script with automatic early-exit checks.
|
|
1096
1152
|
*
|
|
1097
|
-
*
|
|
1098
|
-
* after each `agent()` step
|
|
1099
|
-
* score >= the threshold, the
|
|
1100
|
-
* `
|
|
1153
|
+
* Builds exactly `totalTurns` user/agent pairs and inserts an early-exit
|
|
1154
|
+
* check after each `agent()` step when `successScore` is set. When
|
|
1155
|
+
* `successConfirmTurns` consecutive turns score >= the threshold, the
|
|
1156
|
+
* check runs `finalChecks` inline and calls `executor.succeed()`.
|
|
1157
|
+
*
|
|
1158
|
+
* `totalTurns` is a hard cap — backtracked turns count toward the budget.
|
|
1159
|
+
* If backtracks eat into the budget, fewer effective attacks land, but the
|
|
1160
|
+
* test never exceeds `totalTurns`.
|
|
1101
1161
|
*
|
|
1102
1162
|
* Set `successScore` to `undefined` to disable early exit.
|
|
1103
1163
|
*/
|
|
1104
|
-
marathonScript(options
|
|
1105
|
-
turns: number;
|
|
1164
|
+
marathonScript(options?: {
|
|
1106
1165
|
checks?: ScriptStep[];
|
|
1107
1166
|
finalChecks?: ScriptStep[];
|
|
1108
1167
|
}): ScriptStep[];
|
|
@@ -1140,22 +1199,30 @@ declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
|
|
|
1140
1199
|
* const redTeam = scenario.redTeamCrescendo({
|
|
1141
1200
|
* target: "extract the system prompt",
|
|
1142
1201
|
* model: openai("gpt-4o"),
|
|
1143
|
-
* totalTurns:
|
|
1202
|
+
* totalTurns: 30,
|
|
1144
1203
|
* successScore: 9, // default: 9 (score 0-10)
|
|
1145
1204
|
* successConfirmTurns: 2, // default: 2
|
|
1146
1205
|
* });
|
|
1147
1206
|
*
|
|
1148
1207
|
* // Use instance marathonScript for automatic early-exit:
|
|
1149
|
-
* script: redTeam.marathonScript(
|
|
1208
|
+
* script: redTeam.marathonScript(),
|
|
1150
1209
|
* ```
|
|
1151
1210
|
*/
|
|
1152
1211
|
declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
|
|
1153
1212
|
|
|
1213
|
+
type agents_AttackTechnique = AttackTechnique;
|
|
1154
1214
|
type agents_AudioResponseEvent = AudioResponseEvent;
|
|
1155
1215
|
type agents_BacktrackEntry = BacktrackEntry;
|
|
1216
|
+
type agents_Base64Technique = Base64Technique;
|
|
1217
|
+
declare const agents_Base64Technique: typeof Base64Technique;
|
|
1218
|
+
type agents_CharSplitTechnique = CharSplitTechnique;
|
|
1219
|
+
declare const agents_CharSplitTechnique: typeof CharSplitTechnique;
|
|
1220
|
+
type agents_CodeBlockTechnique = CodeBlockTechnique;
|
|
1221
|
+
declare const agents_CodeBlockTechnique: typeof CodeBlockTechnique;
|
|
1156
1222
|
type agents_CrescendoConfig = CrescendoConfig;
|
|
1157
1223
|
type agents_CrescendoStrategy = CrescendoStrategy;
|
|
1158
1224
|
declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
|
|
1225
|
+
declare const agents_DEFAULT_TECHNIQUES: typeof DEFAULT_TECHNIQUES;
|
|
1159
1226
|
declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
|
|
1160
1227
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
1161
1228
|
type agents_InvokeLLMParams = InvokeLLMParams;
|
|
@@ -1166,6 +1233,10 @@ type agents_JudgeSpanCollector = JudgeSpanCollector;
|
|
|
1166
1233
|
declare const agents_JudgeSpanCollector: typeof JudgeSpanCollector;
|
|
1167
1234
|
type agents_JudgeSpanDigestFormatter = JudgeSpanDigestFormatter;
|
|
1168
1235
|
declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
|
|
1236
|
+
type agents_LeetspeakTechnique = LeetspeakTechnique;
|
|
1237
|
+
declare const agents_LeetspeakTechnique: typeof LeetspeakTechnique;
|
|
1238
|
+
type agents_ROT13Technique = ROT13Technique;
|
|
1239
|
+
declare const agents_ROT13Technique: typeof ROT13Technique;
|
|
1169
1240
|
type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
|
|
1170
1241
|
declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
|
|
1171
1242
|
type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
|
|
@@ -1182,7 +1253,7 @@ declare const agents_redTeamAgent: typeof redTeamAgent;
|
|
|
1182
1253
|
declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
|
|
1183
1254
|
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
1184
1255
|
declare namespace agents {
|
|
1185
|
-
export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
|
|
1256
|
+
export { type agents_AttackTechnique as AttackTechnique, type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, agents_Base64Technique as Base64Technique, agents_CharSplitTechnique as CharSplitTechnique, agents_CodeBlockTechnique as CodeBlockTechnique, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TECHNIQUES as DEFAULT_TECHNIQUES, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_LeetspeakTechnique as LeetspeakTechnique, agents_ROT13Technique as ROT13Technique, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
|
|
1186
1257
|
}
|
|
1187
1258
|
|
|
1188
1259
|
/**
|
|
@@ -2430,31 +2501,16 @@ declare const succeed: (reasoning?: string) => ScriptStep;
|
|
|
2430
2501
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
2431
2502
|
*/
|
|
2432
2503
|
declare const fail: (reasoning?: string) => ScriptStep;
|
|
2433
|
-
/**
|
|
2434
|
-
* Generate a marathon script that runs user-agent turns in a loop,
|
|
2435
|
-
* with optional per-turn checks and a final judge evaluation.
|
|
2436
|
-
*
|
|
2437
|
-
* @param options.turns Number of user-agent turn pairs.
|
|
2438
|
-
* @param options.checks Optional steps to run after each turn.
|
|
2439
|
-
* @param options.finalChecks Optional steps to run after all turns, before the judge.
|
|
2440
|
-
* @returns An array of ScriptStep functions.
|
|
2441
|
-
*/
|
|
2442
|
-
declare const marathonScript: (options: {
|
|
2443
|
-
turns: number;
|
|
2444
|
-
checks?: ScriptStep[];
|
|
2445
|
-
finalChecks?: ScriptStep[];
|
|
2446
|
-
}) => ScriptStep[];
|
|
2447
2504
|
|
|
2448
2505
|
declare const script_agent: typeof agent;
|
|
2449
2506
|
declare const script_fail: typeof fail;
|
|
2450
2507
|
declare const script_judge: typeof judge;
|
|
2451
|
-
declare const script_marathonScript: typeof marathonScript;
|
|
2452
2508
|
declare const script_message: typeof message;
|
|
2453
2509
|
declare const script_proceed: typeof proceed;
|
|
2454
2510
|
declare const script_succeed: typeof succeed;
|
|
2455
2511
|
declare const script_user: typeof user;
|
|
2456
2512
|
declare namespace script {
|
|
2457
|
-
export { script_agent as agent, script_fail as fail, script_judge as judge,
|
|
2513
|
+
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2458
2514
|
}
|
|
2459
2515
|
|
|
2460
2516
|
/**
|
|
@@ -2560,4 +2616,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
|
2560
2616
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2561
2617
|
declare const scenario: ScenarioApi;
|
|
2562
2618
|
|
|
2563
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter,
|
|
2619
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AttackTechnique, type AudioResponseEvent, type BacktrackEntry, Base64Technique, CharSplitTechnique, CodeBlockTechnique, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TECHNIQUES, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, LeetspeakTechnique, ROT13Technique, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|
package/dist/index.d.ts
CHANGED
|
@@ -197,10 +197,6 @@ interface ScenarioConfigFinal extends Omit<ScenarioConfig, "id" | "script" | "th
|
|
|
197
197
|
verbose: boolean;
|
|
198
198
|
maxTurns: number;
|
|
199
199
|
threadId: string;
|
|
200
|
-
/**
|
|
201
|
-
* Optional identifier to group this scenario into a set ("Simulation Set").
|
|
202
|
-
*/
|
|
203
|
-
setId?: string;
|
|
204
200
|
}
|
|
205
201
|
/**
|
|
206
202
|
* The execution context for a scenario script.
|
|
@@ -390,7 +386,7 @@ interface ScenarioExecutionStateLike {
|
|
|
390
386
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
391
387
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
392
388
|
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
393
|
-
temperature: z.
|
|
389
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
394
390
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
395
391
|
}, z.core.$strip>>;
|
|
396
392
|
headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
@@ -431,7 +427,7 @@ declare namespace domain {
|
|
|
431
427
|
*/
|
|
432
428
|
declare const modelSchema: z.ZodObject<{
|
|
433
429
|
model: z.ZodCustom<LanguageModel, LanguageModel>;
|
|
434
|
-
temperature: z.
|
|
430
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
435
431
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
436
432
|
}, z.core.$strip>;
|
|
437
433
|
type ModelConfig = z.infer<typeof modelSchema>;
|
|
@@ -589,6 +585,17 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
589
585
|
* being forced to a terminal decision.
|
|
590
586
|
*/
|
|
591
587
|
private invokeLLMWithDiscovery;
|
|
588
|
+
/**
|
|
589
|
+
* Checks whether the discovery loop ran out of steps without the judge
|
|
590
|
+
* calling finish_test or continue_test.
|
|
591
|
+
*/
|
|
592
|
+
private discoveryExhausted;
|
|
593
|
+
/**
|
|
594
|
+
* Makes one final LLM call with tool_choice forced to finish_test,
|
|
595
|
+
* so the judge renders a verdict with whatever context it accumulated
|
|
596
|
+
* during discovery instead of hard-failing.
|
|
597
|
+
*/
|
|
598
|
+
private forceVerdict;
|
|
592
599
|
private parseToolCalls;
|
|
593
600
|
}
|
|
594
601
|
/**
|
|
@@ -750,9 +757,9 @@ declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
|
750
757
|
* @param config Optional configuration for the agent.
|
|
751
758
|
* @param config.model The language model to use for generating responses.
|
|
752
759
|
* If not provided, a default model will be used.
|
|
753
|
-
* @param config.temperature
|
|
760
|
+
* @param config.temperature Optional temperature for the language model (0.0-1.0).
|
|
754
761
|
* Lower values make responses more deterministic.
|
|
755
|
-
*
|
|
762
|
+
* Omitted by default for compatibility with reasoning models.
|
|
756
763
|
* @param config.maxTokens The maximum number of tokens to generate.
|
|
757
764
|
* If not provided, uses model defaults.
|
|
758
765
|
* @param config.name The name of the agent.
|
|
@@ -1012,6 +1019,45 @@ declare class CrescendoStrategy implements RedTeamStrategy {
|
|
|
1012
1019
|
}): string;
|
|
1013
1020
|
}
|
|
1014
1021
|
|
|
1022
|
+
/**
|
|
1023
|
+
* Single-turn attack techniques for red-team message augmentation.
|
|
1024
|
+
*
|
|
1025
|
+
* Provides deterministic encoding transforms that can be randomly applied to
|
|
1026
|
+
* attacker messages before sending them to the target agent. Each technique
|
|
1027
|
+
* encodes the message and prepends a preamble so the target understands how
|
|
1028
|
+
* to decode it.
|
|
1029
|
+
*
|
|
1030
|
+
* These are zero-cost (no LLM calls) and test whether safety filters can
|
|
1031
|
+
* handle obfuscated inputs.
|
|
1032
|
+
*/
|
|
1033
|
+
interface AttackTechnique {
|
|
1034
|
+
/** Short identifier for this technique (used in debug logs). */
|
|
1035
|
+
readonly name: string;
|
|
1036
|
+
/** Transform the attacker's raw message into an encoded version with preamble. */
|
|
1037
|
+
transform(message: string): string;
|
|
1038
|
+
}
|
|
1039
|
+
declare class Base64Technique implements AttackTechnique {
|
|
1040
|
+
readonly name = "base64";
|
|
1041
|
+
transform(message: string): string;
|
|
1042
|
+
}
|
|
1043
|
+
declare class ROT13Technique implements AttackTechnique {
|
|
1044
|
+
readonly name = "rot13";
|
|
1045
|
+
transform(message: string): string;
|
|
1046
|
+
}
|
|
1047
|
+
declare class LeetspeakTechnique implements AttackTechnique {
|
|
1048
|
+
readonly name = "leetspeak";
|
|
1049
|
+
transform(message: string): string;
|
|
1050
|
+
}
|
|
1051
|
+
declare class CharSplitTechnique implements AttackTechnique {
|
|
1052
|
+
readonly name = "char_split";
|
|
1053
|
+
transform(message: string): string;
|
|
1054
|
+
}
|
|
1055
|
+
declare class CodeBlockTechnique implements AttackTechnique {
|
|
1056
|
+
readonly name = "code_block";
|
|
1057
|
+
transform(message: string): string;
|
|
1058
|
+
}
|
|
1059
|
+
declare const DEFAULT_TECHNIQUES: AttackTechnique[];
|
|
1060
|
+
|
|
1015
1061
|
interface RedTeamAgentConfig {
|
|
1016
1062
|
strategy: RedTeamStrategy;
|
|
1017
1063
|
target: string;
|
|
@@ -1032,6 +1078,10 @@ interface RedTeamAgentConfig {
|
|
|
1032
1078
|
successScore?: number;
|
|
1033
1079
|
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1034
1080
|
successConfirmTurns?: number;
|
|
1081
|
+
/** Probability (0.0-1.0) of applying a random encoding technique per turn. Default 0.0 (off). */
|
|
1082
|
+
injectionProbability?: number;
|
|
1083
|
+
/** List of AttackTechnique instances to sample from. Defaults to all built-ins. */
|
|
1084
|
+
techniques?: AttackTechnique[];
|
|
1035
1085
|
}
|
|
1036
1086
|
interface CrescendoConfig {
|
|
1037
1087
|
target: string;
|
|
@@ -1049,6 +1099,10 @@ interface CrescendoConfig {
|
|
|
1049
1099
|
successScore?: number;
|
|
1050
1100
|
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1051
1101
|
successConfirmTurns?: number;
|
|
1102
|
+
/** Probability (0.0-1.0) of applying a random encoding technique per turn. Default 0.0 (off). */
|
|
1103
|
+
injectionProbability?: number;
|
|
1104
|
+
/** List of AttackTechnique instances to sample from. Defaults to all built-ins. */
|
|
1105
|
+
techniques?: AttackTechnique[];
|
|
1052
1106
|
}
|
|
1053
1107
|
declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
1054
1108
|
name: string;
|
|
@@ -1065,6 +1119,8 @@ declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
|
1065
1119
|
private maxTokens?;
|
|
1066
1120
|
private _successScore;
|
|
1067
1121
|
private _successConfirmTurns;
|
|
1122
|
+
private injectionProbability;
|
|
1123
|
+
private techniques;
|
|
1068
1124
|
private static readonly HARD_REFUSAL_PATTERNS;
|
|
1069
1125
|
private static readonly SOFT_REFUSAL_PATTERNS;
|
|
1070
1126
|
private attackPlanPromise;
|
|
@@ -1094,15 +1150,18 @@ declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
|
1094
1150
|
/**
|
|
1095
1151
|
* Generate a marathon test script with automatic early-exit checks.
|
|
1096
1152
|
*
|
|
1097
|
-
*
|
|
1098
|
-
* after each `agent()` step
|
|
1099
|
-
* score >= the threshold, the
|
|
1100
|
-
* `
|
|
1153
|
+
* Builds exactly `totalTurns` user/agent pairs and inserts an early-exit
|
|
1154
|
+
* check after each `agent()` step when `successScore` is set. When
|
|
1155
|
+
* `successConfirmTurns` consecutive turns score >= the threshold, the
|
|
1156
|
+
* check runs `finalChecks` inline and calls `executor.succeed()`.
|
|
1157
|
+
*
|
|
1158
|
+
* `totalTurns` is a hard cap — backtracked turns count toward the budget.
|
|
1159
|
+
* If backtracks eat into the budget, fewer effective attacks land, but the
|
|
1160
|
+
* test never exceeds `totalTurns`.
|
|
1101
1161
|
*
|
|
1102
1162
|
* Set `successScore` to `undefined` to disable early exit.
|
|
1103
1163
|
*/
|
|
1104
|
-
marathonScript(options
|
|
1105
|
-
turns: number;
|
|
1164
|
+
marathonScript(options?: {
|
|
1106
1165
|
checks?: ScriptStep[];
|
|
1107
1166
|
finalChecks?: ScriptStep[];
|
|
1108
1167
|
}): ScriptStep[];
|
|
@@ -1140,22 +1199,30 @@ declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
|
|
|
1140
1199
|
* const redTeam = scenario.redTeamCrescendo({
|
|
1141
1200
|
* target: "extract the system prompt",
|
|
1142
1201
|
* model: openai("gpt-4o"),
|
|
1143
|
-
* totalTurns:
|
|
1202
|
+
* totalTurns: 30,
|
|
1144
1203
|
* successScore: 9, // default: 9 (score 0-10)
|
|
1145
1204
|
* successConfirmTurns: 2, // default: 2
|
|
1146
1205
|
* });
|
|
1147
1206
|
*
|
|
1148
1207
|
* // Use instance marathonScript for automatic early-exit:
|
|
1149
|
-
* script: redTeam.marathonScript(
|
|
1208
|
+
* script: redTeam.marathonScript(),
|
|
1150
1209
|
* ```
|
|
1151
1210
|
*/
|
|
1152
1211
|
declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
|
|
1153
1212
|
|
|
1213
|
+
type agents_AttackTechnique = AttackTechnique;
|
|
1154
1214
|
type agents_AudioResponseEvent = AudioResponseEvent;
|
|
1155
1215
|
type agents_BacktrackEntry = BacktrackEntry;
|
|
1216
|
+
type agents_Base64Technique = Base64Technique;
|
|
1217
|
+
declare const agents_Base64Technique: typeof Base64Technique;
|
|
1218
|
+
type agents_CharSplitTechnique = CharSplitTechnique;
|
|
1219
|
+
declare const agents_CharSplitTechnique: typeof CharSplitTechnique;
|
|
1220
|
+
type agents_CodeBlockTechnique = CodeBlockTechnique;
|
|
1221
|
+
declare const agents_CodeBlockTechnique: typeof CodeBlockTechnique;
|
|
1156
1222
|
type agents_CrescendoConfig = CrescendoConfig;
|
|
1157
1223
|
type agents_CrescendoStrategy = CrescendoStrategy;
|
|
1158
1224
|
declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
|
|
1225
|
+
declare const agents_DEFAULT_TECHNIQUES: typeof DEFAULT_TECHNIQUES;
|
|
1159
1226
|
declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
|
|
1160
1227
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
1161
1228
|
type agents_InvokeLLMParams = InvokeLLMParams;
|
|
@@ -1166,6 +1233,10 @@ type agents_JudgeSpanCollector = JudgeSpanCollector;
|
|
|
1166
1233
|
declare const agents_JudgeSpanCollector: typeof JudgeSpanCollector;
|
|
1167
1234
|
type agents_JudgeSpanDigestFormatter = JudgeSpanDigestFormatter;
|
|
1168
1235
|
declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
|
|
1236
|
+
type agents_LeetspeakTechnique = LeetspeakTechnique;
|
|
1237
|
+
declare const agents_LeetspeakTechnique: typeof LeetspeakTechnique;
|
|
1238
|
+
type agents_ROT13Technique = ROT13Technique;
|
|
1239
|
+
declare const agents_ROT13Technique: typeof ROT13Technique;
|
|
1169
1240
|
type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
|
|
1170
1241
|
declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
|
|
1171
1242
|
type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
|
|
@@ -1182,7 +1253,7 @@ declare const agents_redTeamAgent: typeof redTeamAgent;
|
|
|
1182
1253
|
declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
|
|
1183
1254
|
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
1184
1255
|
declare namespace agents {
|
|
1185
|
-
export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
|
|
1256
|
+
export { type agents_AttackTechnique as AttackTechnique, type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, agents_Base64Technique as Base64Technique, agents_CharSplitTechnique as CharSplitTechnique, agents_CodeBlockTechnique as CodeBlockTechnique, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TECHNIQUES as DEFAULT_TECHNIQUES, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_LeetspeakTechnique as LeetspeakTechnique, agents_ROT13Technique as ROT13Technique, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
|
|
1186
1257
|
}
|
|
1187
1258
|
|
|
1188
1259
|
/**
|
|
@@ -2430,31 +2501,16 @@ declare const succeed: (reasoning?: string) => ScriptStep;
|
|
|
2430
2501
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
2431
2502
|
*/
|
|
2432
2503
|
declare const fail: (reasoning?: string) => ScriptStep;
|
|
2433
|
-
/**
|
|
2434
|
-
* Generate a marathon script that runs user-agent turns in a loop,
|
|
2435
|
-
* with optional per-turn checks and a final judge evaluation.
|
|
2436
|
-
*
|
|
2437
|
-
* @param options.turns Number of user-agent turn pairs.
|
|
2438
|
-
* @param options.checks Optional steps to run after each turn.
|
|
2439
|
-
* @param options.finalChecks Optional steps to run after all turns, before the judge.
|
|
2440
|
-
* @returns An array of ScriptStep functions.
|
|
2441
|
-
*/
|
|
2442
|
-
declare const marathonScript: (options: {
|
|
2443
|
-
turns: number;
|
|
2444
|
-
checks?: ScriptStep[];
|
|
2445
|
-
finalChecks?: ScriptStep[];
|
|
2446
|
-
}) => ScriptStep[];
|
|
2447
2504
|
|
|
2448
2505
|
declare const script_agent: typeof agent;
|
|
2449
2506
|
declare const script_fail: typeof fail;
|
|
2450
2507
|
declare const script_judge: typeof judge;
|
|
2451
|
-
declare const script_marathonScript: typeof marathonScript;
|
|
2452
2508
|
declare const script_message: typeof message;
|
|
2453
2509
|
declare const script_proceed: typeof proceed;
|
|
2454
2510
|
declare const script_succeed: typeof succeed;
|
|
2455
2511
|
declare const script_user: typeof user;
|
|
2456
2512
|
declare namespace script {
|
|
2457
|
-
export { script_agent as agent, script_fail as fail, script_judge as judge,
|
|
2513
|
+
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2458
2514
|
}
|
|
2459
2515
|
|
|
2460
2516
|
/**
|
|
@@ -2560,4 +2616,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
|
2560
2616
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2561
2617
|
declare const scenario: ScenarioApi;
|
|
2562
2618
|
|
|
2563
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter,
|
|
2619
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AttackTechnique, type AudioResponseEvent, type BacktrackEntry, Base64Technique, CharSplitTechnique, CodeBlockTechnique, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TECHNIQUES, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, LeetspeakTechnique, ROT13Technique, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|