@langwatch/scenario 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +189 -3
- package/dist/index.d.ts +189 -3
- package/dist/index.js +520 -41
- package/dist/index.mjs +516 -41
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -961,7 +961,174 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
|
|
|
961
961
|
offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
|
|
962
962
|
}
|
|
963
963
|
|
|
964
|
+
interface BacktrackEntry {
|
|
965
|
+
turn: number;
|
|
966
|
+
attack: string;
|
|
967
|
+
refusal: string;
|
|
968
|
+
}
|
|
969
|
+
interface RedTeamStrategy {
|
|
970
|
+
buildSystemPrompt(params: {
|
|
971
|
+
target: string;
|
|
972
|
+
currentTurn: number;
|
|
973
|
+
totalTurns: number;
|
|
974
|
+
scenarioDescription: string;
|
|
975
|
+
metapromptPlan: string;
|
|
976
|
+
lastResponseScore?: number;
|
|
977
|
+
adaptationHint?: string;
|
|
978
|
+
backtrackHistory?: BacktrackEntry[];
|
|
979
|
+
}): string;
|
|
980
|
+
getPhaseName(currentTurn: number, totalTurns: number): string;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
declare class CrescendoStrategy implements RedTeamStrategy {
|
|
984
|
+
getPhase(currentTurn: number, totalTurns: number): {
|
|
985
|
+
name: string;
|
|
986
|
+
instructions: string;
|
|
987
|
+
};
|
|
988
|
+
getPhaseName(currentTurn: number, totalTurns: number): string;
|
|
989
|
+
private phaseTurnRange;
|
|
990
|
+
buildSystemPrompt(params: {
|
|
991
|
+
target: string;
|
|
992
|
+
currentTurn: number;
|
|
993
|
+
totalTurns: number;
|
|
994
|
+
scenarioDescription: string;
|
|
995
|
+
metapromptPlan: string;
|
|
996
|
+
lastResponseScore?: number;
|
|
997
|
+
adaptationHint?: string;
|
|
998
|
+
backtrackHistory?: BacktrackEntry[];
|
|
999
|
+
}): string;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
interface RedTeamAgentConfig {
|
|
1003
|
+
strategy: RedTeamStrategy;
|
|
1004
|
+
target: string;
|
|
1005
|
+
totalTurns?: number;
|
|
1006
|
+
model?: LanguageModel;
|
|
1007
|
+
metapromptModel?: LanguageModel;
|
|
1008
|
+
metapromptTemplate?: string;
|
|
1009
|
+
attackPlan?: string;
|
|
1010
|
+
/** Score target responses each turn to feed back into the attacker. Default true. */
|
|
1011
|
+
scoreResponses?: boolean;
|
|
1012
|
+
/** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
|
|
1013
|
+
detectRefusals?: boolean;
|
|
1014
|
+
temperature?: number;
|
|
1015
|
+
/** Separate temperature for metaprompt/scoring calls. Defaults to temperature. */
|
|
1016
|
+
metapromptTemperature?: number;
|
|
1017
|
+
maxTokens?: number;
|
|
1018
|
+
/** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
|
|
1019
|
+
successScore?: number;
|
|
1020
|
+
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1021
|
+
successConfirmTurns?: number;
|
|
1022
|
+
}
|
|
1023
|
+
interface CrescendoConfig {
|
|
1024
|
+
target: string;
|
|
1025
|
+
totalTurns?: number;
|
|
1026
|
+
model?: LanguageModel;
|
|
1027
|
+
metapromptModel?: LanguageModel;
|
|
1028
|
+
attackPlan?: string;
|
|
1029
|
+
scoreResponses?: boolean;
|
|
1030
|
+
/** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
|
|
1031
|
+
detectRefusals?: boolean;
|
|
1032
|
+
temperature?: number;
|
|
1033
|
+
metapromptTemperature?: number;
|
|
1034
|
+
maxTokens?: number;
|
|
1035
|
+
/** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
|
|
1036
|
+
successScore?: number;
|
|
1037
|
+
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1038
|
+
successConfirmTurns?: number;
|
|
1039
|
+
}
|
|
1040
|
+
declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
1041
|
+
name: string;
|
|
1042
|
+
private strategy;
|
|
1043
|
+
private target;
|
|
1044
|
+
private totalTurns;
|
|
1045
|
+
private model?;
|
|
1046
|
+
private metapromptModel?;
|
|
1047
|
+
private metapromptTemplate;
|
|
1048
|
+
private scoreResponses;
|
|
1049
|
+
private detectRefusals;
|
|
1050
|
+
private temperature;
|
|
1051
|
+
private metapromptTemperature;
|
|
1052
|
+
private maxTokens?;
|
|
1053
|
+
private _successScore;
|
|
1054
|
+
private _successConfirmTurns;
|
|
1055
|
+
private static readonly HARD_REFUSAL_PATTERNS;
|
|
1056
|
+
private static readonly SOFT_REFUSAL_PATTERNS;
|
|
1057
|
+
private attackPlanPromise;
|
|
1058
|
+
private attackPlanValue;
|
|
1059
|
+
private turnScores;
|
|
1060
|
+
private static readonly MAX_BACKTRACKS;
|
|
1061
|
+
private backtracksRemaining;
|
|
1062
|
+
private backtrackHistory;
|
|
1063
|
+
constructor(config: RedTeamAgentConfig);
|
|
1064
|
+
private getAttackPlan;
|
|
1065
|
+
private generateAttackPlan;
|
|
1066
|
+
private getLastAssistantContent;
|
|
1067
|
+
private getLastUserContent;
|
|
1068
|
+
private detectRefusal;
|
|
1069
|
+
/**
|
|
1070
|
+
* Score the target agent's last response toward the attack goal (0-10).
|
|
1071
|
+
* Results are cached per turn so the scorer LLM is called at most once.
|
|
1072
|
+
*/
|
|
1073
|
+
private scoreLastResponse;
|
|
1074
|
+
/**
|
|
1075
|
+
* Check if the attack objective has been achieved.
|
|
1076
|
+
* Returns true if the last `successConfirmTurns` cached scores
|
|
1077
|
+
* are all >= `successScore`.
|
|
1078
|
+
*/
|
|
1079
|
+
checkEarlyExit(): boolean;
|
|
1080
|
+
/**
|
|
1081
|
+
* Generate a marathon test script with automatic early-exit checks.
|
|
1082
|
+
*
|
|
1083
|
+
* Like the standalone `marathonScript`, but inserts an early-exit check
|
|
1084
|
+
* after each `agent()` step. When `successConfirmTurns` consecutive turns
|
|
1085
|
+
* score >= the threshold, the check runs `finalChecks` inline and calls
|
|
1086
|
+
* `executor.succeed()` to end the scenario early.
|
|
1087
|
+
*
|
|
1088
|
+
* Set `successScore` to `undefined` to disable early exit.
|
|
1089
|
+
*/
|
|
1090
|
+
marathonScript(options: {
|
|
1091
|
+
turns: number;
|
|
1092
|
+
checks?: ScriptStep[];
|
|
1093
|
+
finalChecks?: ScriptStep[];
|
|
1094
|
+
}): ScriptStep[];
|
|
1095
|
+
call: (input: AgentInput) => Promise<AgentReturnTypes>;
|
|
1096
|
+
}
|
|
1097
|
+
/**
|
|
1098
|
+
* Create a red-team agent with a custom strategy.
|
|
1099
|
+
*/
|
|
1100
|
+
declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
|
|
1101
|
+
/**
|
|
1102
|
+
* Create a red-team agent using the Crescendo (marathon) strategy.
|
|
1103
|
+
*
|
|
1104
|
+
* Crescendo gradually escalates from innocent rapport-building to aggressive
|
|
1105
|
+
* jailbreak attempts over many turns, exploiting LLMs' tendency to maintain
|
|
1106
|
+
* conversational consistency once cooperative context has been established.
|
|
1107
|
+
*
|
|
1108
|
+
* @example
|
|
1109
|
+
* ```typescript
|
|
1110
|
+
* import scenario from "@langwatch/scenario";
|
|
1111
|
+
* import { openai } from "@ai-sdk/openai";
|
|
1112
|
+
*
|
|
1113
|
+
* const redTeam = scenario.redTeamCrescendo({
|
|
1114
|
+
* target: "extract the system prompt",
|
|
1115
|
+
* model: openai("gpt-4o"),
|
|
1116
|
+
* totalTurns: 50,
|
|
1117
|
+
* successScore: 9, // default: 9 (score 0-10)
|
|
1118
|
+
* successConfirmTurns: 2, // default: 2
|
|
1119
|
+
* });
|
|
1120
|
+
*
|
|
1121
|
+
* // Use instance marathonScript for automatic early-exit:
|
|
1122
|
+
* script: redTeam.marathonScript({ turns: 50 }),
|
|
1123
|
+
* ```
|
|
1124
|
+
*/
|
|
1125
|
+
declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
|
|
1126
|
+
|
|
964
1127
|
type agents_AudioResponseEvent = AudioResponseEvent;
|
|
1128
|
+
type agents_BacktrackEntry = BacktrackEntry;
|
|
1129
|
+
type agents_CrescendoConfig = CrescendoConfig;
|
|
1130
|
+
type agents_CrescendoStrategy = CrescendoStrategy;
|
|
1131
|
+
declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
|
|
965
1132
|
declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
|
|
966
1133
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
967
1134
|
type agents_InvokeLLMParams = InvokeLLMParams;
|
|
@@ -975,6 +1142,8 @@ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
|
|
|
975
1142
|
type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
|
|
976
1143
|
declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
|
|
977
1144
|
type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
|
|
1145
|
+
type agents_RedTeamAgentConfig = RedTeamAgentConfig;
|
|
1146
|
+
type agents_RedTeamStrategy = RedTeamStrategy;
|
|
978
1147
|
type agents_TestingAgentConfig = TestingAgentConfig;
|
|
979
1148
|
declare const agents_estimateTokens: typeof estimateTokens;
|
|
980
1149
|
declare const agents_expandTrace: typeof expandTrace;
|
|
@@ -982,9 +1151,11 @@ declare const agents_grepTrace: typeof grepTrace;
|
|
|
982
1151
|
declare const agents_judgeAgent: typeof judgeAgent;
|
|
983
1152
|
declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
|
|
984
1153
|
declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
|
|
1154
|
+
declare const agents_redTeamAgent: typeof redTeamAgent;
|
|
1155
|
+
declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
|
|
985
1156
|
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
986
1157
|
declare namespace agents {
|
|
987
|
-
export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
|
|
1158
|
+
export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
|
|
988
1159
|
}
|
|
989
1160
|
|
|
990
1161
|
/**
|
|
@@ -2197,16 +2368,31 @@ declare const succeed: (reasoning?: string) => ScriptStep;
|
|
|
2197
2368
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
2198
2369
|
*/
|
|
2199
2370
|
declare const fail: (reasoning?: string) => ScriptStep;
|
|
2371
|
+
/**
|
|
2372
|
+
* Generate a marathon script that runs user-agent turns in a loop,
|
|
2373
|
+
* with optional per-turn checks and a final judge evaluation.
|
|
2374
|
+
*
|
|
2375
|
+
* @param options.turns Number of user-agent turn pairs.
|
|
2376
|
+
* @param options.checks Optional steps to run after each turn.
|
|
2377
|
+
* @param options.finalChecks Optional steps to run after all turns, before the judge.
|
|
2378
|
+
* @returns An array of ScriptStep functions.
|
|
2379
|
+
*/
|
|
2380
|
+
declare const marathonScript: (options: {
|
|
2381
|
+
turns: number;
|
|
2382
|
+
checks?: ScriptStep[];
|
|
2383
|
+
finalChecks?: ScriptStep[];
|
|
2384
|
+
}) => ScriptStep[];
|
|
2200
2385
|
|
|
2201
2386
|
declare const script_agent: typeof agent;
|
|
2202
2387
|
declare const script_fail: typeof fail;
|
|
2203
2388
|
declare const script_judge: typeof judge;
|
|
2389
|
+
declare const script_marathonScript: typeof marathonScript;
|
|
2204
2390
|
declare const script_message: typeof message;
|
|
2205
2391
|
declare const script_proceed: typeof proceed;
|
|
2206
2392
|
declare const script_succeed: typeof succeed;
|
|
2207
2393
|
declare const script_user: typeof user;
|
|
2208
2394
|
declare namespace script {
|
|
2209
|
-
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2395
|
+
export { script_agent as agent, script_fail as fail, script_judge as judge, script_marathonScript as marathonScript, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2210
2396
|
}
|
|
2211
2397
|
|
|
2212
2398
|
/**
|
|
@@ -2312,4 +2498,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
|
2312
2498
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2313
2499
|
declare const scenario: ScenarioApi;
|
|
2314
2500
|
|
|
2315
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|
|
2501
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, marathonScript, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|
package/dist/index.d.ts
CHANGED
|
@@ -961,7 +961,174 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
|
|
|
961
961
|
offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
|
|
962
962
|
}
|
|
963
963
|
|
|
964
|
+
interface BacktrackEntry {
|
|
965
|
+
turn: number;
|
|
966
|
+
attack: string;
|
|
967
|
+
refusal: string;
|
|
968
|
+
}
|
|
969
|
+
interface RedTeamStrategy {
|
|
970
|
+
buildSystemPrompt(params: {
|
|
971
|
+
target: string;
|
|
972
|
+
currentTurn: number;
|
|
973
|
+
totalTurns: number;
|
|
974
|
+
scenarioDescription: string;
|
|
975
|
+
metapromptPlan: string;
|
|
976
|
+
lastResponseScore?: number;
|
|
977
|
+
adaptationHint?: string;
|
|
978
|
+
backtrackHistory?: BacktrackEntry[];
|
|
979
|
+
}): string;
|
|
980
|
+
getPhaseName(currentTurn: number, totalTurns: number): string;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
declare class CrescendoStrategy implements RedTeamStrategy {
|
|
984
|
+
getPhase(currentTurn: number, totalTurns: number): {
|
|
985
|
+
name: string;
|
|
986
|
+
instructions: string;
|
|
987
|
+
};
|
|
988
|
+
getPhaseName(currentTurn: number, totalTurns: number): string;
|
|
989
|
+
private phaseTurnRange;
|
|
990
|
+
buildSystemPrompt(params: {
|
|
991
|
+
target: string;
|
|
992
|
+
currentTurn: number;
|
|
993
|
+
totalTurns: number;
|
|
994
|
+
scenarioDescription: string;
|
|
995
|
+
metapromptPlan: string;
|
|
996
|
+
lastResponseScore?: number;
|
|
997
|
+
adaptationHint?: string;
|
|
998
|
+
backtrackHistory?: BacktrackEntry[];
|
|
999
|
+
}): string;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
interface RedTeamAgentConfig {
|
|
1003
|
+
strategy: RedTeamStrategy;
|
|
1004
|
+
target: string;
|
|
1005
|
+
totalTurns?: number;
|
|
1006
|
+
model?: LanguageModel;
|
|
1007
|
+
metapromptModel?: LanguageModel;
|
|
1008
|
+
metapromptTemplate?: string;
|
|
1009
|
+
attackPlan?: string;
|
|
1010
|
+
/** Score target responses each turn to feed back into the attacker. Default true. */
|
|
1011
|
+
scoreResponses?: boolean;
|
|
1012
|
+
/** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
|
|
1013
|
+
detectRefusals?: boolean;
|
|
1014
|
+
temperature?: number;
|
|
1015
|
+
/** Separate temperature for metaprompt/scoring calls. Defaults to temperature. */
|
|
1016
|
+
metapromptTemperature?: number;
|
|
1017
|
+
maxTokens?: number;
|
|
1018
|
+
/** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
|
|
1019
|
+
successScore?: number;
|
|
1020
|
+
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1021
|
+
successConfirmTurns?: number;
|
|
1022
|
+
}
|
|
1023
|
+
interface CrescendoConfig {
|
|
1024
|
+
target: string;
|
|
1025
|
+
totalTurns?: number;
|
|
1026
|
+
model?: LanguageModel;
|
|
1027
|
+
metapromptModel?: LanguageModel;
|
|
1028
|
+
attackPlan?: string;
|
|
1029
|
+
scoreResponses?: boolean;
|
|
1030
|
+
/** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
|
|
1031
|
+
detectRefusals?: boolean;
|
|
1032
|
+
temperature?: number;
|
|
1033
|
+
metapromptTemperature?: number;
|
|
1034
|
+
maxTokens?: number;
|
|
1035
|
+
/** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
|
|
1036
|
+
successScore?: number;
|
|
1037
|
+
/** Consecutive turns >= threshold before triggering early exit. Default 2. */
|
|
1038
|
+
successConfirmTurns?: number;
|
|
1039
|
+
}
|
|
1040
|
+
declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
|
|
1041
|
+
name: string;
|
|
1042
|
+
private strategy;
|
|
1043
|
+
private target;
|
|
1044
|
+
private totalTurns;
|
|
1045
|
+
private model?;
|
|
1046
|
+
private metapromptModel?;
|
|
1047
|
+
private metapromptTemplate;
|
|
1048
|
+
private scoreResponses;
|
|
1049
|
+
private detectRefusals;
|
|
1050
|
+
private temperature;
|
|
1051
|
+
private metapromptTemperature;
|
|
1052
|
+
private maxTokens?;
|
|
1053
|
+
private _successScore;
|
|
1054
|
+
private _successConfirmTurns;
|
|
1055
|
+
private static readonly HARD_REFUSAL_PATTERNS;
|
|
1056
|
+
private static readonly SOFT_REFUSAL_PATTERNS;
|
|
1057
|
+
private attackPlanPromise;
|
|
1058
|
+
private attackPlanValue;
|
|
1059
|
+
private turnScores;
|
|
1060
|
+
private static readonly MAX_BACKTRACKS;
|
|
1061
|
+
private backtracksRemaining;
|
|
1062
|
+
private backtrackHistory;
|
|
1063
|
+
constructor(config: RedTeamAgentConfig);
|
|
1064
|
+
private getAttackPlan;
|
|
1065
|
+
private generateAttackPlan;
|
|
1066
|
+
private getLastAssistantContent;
|
|
1067
|
+
private getLastUserContent;
|
|
1068
|
+
private detectRefusal;
|
|
1069
|
+
/**
|
|
1070
|
+
* Score the target agent's last response toward the attack goal (0-10).
|
|
1071
|
+
* Results are cached per turn so the scorer LLM is called at most once.
|
|
1072
|
+
*/
|
|
1073
|
+
private scoreLastResponse;
|
|
1074
|
+
/**
|
|
1075
|
+
* Check if the attack objective has been achieved.
|
|
1076
|
+
* Returns true if the last `successConfirmTurns` cached scores
|
|
1077
|
+
* are all >= `successScore`.
|
|
1078
|
+
*/
|
|
1079
|
+
checkEarlyExit(): boolean;
|
|
1080
|
+
/**
|
|
1081
|
+
* Generate a marathon test script with automatic early-exit checks.
|
|
1082
|
+
*
|
|
1083
|
+
* Like the standalone `marathonScript`, but inserts an early-exit check
|
|
1084
|
+
* after each `agent()` step. When `successConfirmTurns` consecutive turns
|
|
1085
|
+
* score >= the threshold, the check runs `finalChecks` inline and calls
|
|
1086
|
+
* `executor.succeed()` to end the scenario early.
|
|
1087
|
+
*
|
|
1088
|
+
* Set `successScore` to `undefined` to disable early exit.
|
|
1089
|
+
*/
|
|
1090
|
+
marathonScript(options: {
|
|
1091
|
+
turns: number;
|
|
1092
|
+
checks?: ScriptStep[];
|
|
1093
|
+
finalChecks?: ScriptStep[];
|
|
1094
|
+
}): ScriptStep[];
|
|
1095
|
+
call: (input: AgentInput) => Promise<AgentReturnTypes>;
|
|
1096
|
+
}
|
|
1097
|
+
/**
|
|
1098
|
+
* Create a red-team agent with a custom strategy.
|
|
1099
|
+
*/
|
|
1100
|
+
declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
|
|
1101
|
+
/**
|
|
1102
|
+
* Create a red-team agent using the Crescendo (marathon) strategy.
|
|
1103
|
+
*
|
|
1104
|
+
* Crescendo gradually escalates from innocent rapport-building to aggressive
|
|
1105
|
+
* jailbreak attempts over many turns, exploiting LLMs' tendency to maintain
|
|
1106
|
+
* conversational consistency once cooperative context has been established.
|
|
1107
|
+
*
|
|
1108
|
+
* @example
|
|
1109
|
+
* ```typescript
|
|
1110
|
+
* import scenario from "@langwatch/scenario";
|
|
1111
|
+
* import { openai } from "@ai-sdk/openai";
|
|
1112
|
+
*
|
|
1113
|
+
* const redTeam = scenario.redTeamCrescendo({
|
|
1114
|
+
* target: "extract the system prompt",
|
|
1115
|
+
* model: openai("gpt-4o"),
|
|
1116
|
+
* totalTurns: 50,
|
|
1117
|
+
* successScore: 9, // default: 9 (score 0-10)
|
|
1118
|
+
* successConfirmTurns: 2, // default: 2
|
|
1119
|
+
* });
|
|
1120
|
+
*
|
|
1121
|
+
* // Use instance marathonScript for automatic early-exit:
|
|
1122
|
+
* script: redTeam.marathonScript({ turns: 50 }),
|
|
1123
|
+
* ```
|
|
1124
|
+
*/
|
|
1125
|
+
declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
|
|
1126
|
+
|
|
964
1127
|
type agents_AudioResponseEvent = AudioResponseEvent;
|
|
1128
|
+
type agents_BacktrackEntry = BacktrackEntry;
|
|
1129
|
+
type agents_CrescendoConfig = CrescendoConfig;
|
|
1130
|
+
type agents_CrescendoStrategy = CrescendoStrategy;
|
|
1131
|
+
declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
|
|
965
1132
|
declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
|
|
966
1133
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
967
1134
|
type agents_InvokeLLMParams = InvokeLLMParams;
|
|
@@ -975,6 +1142,8 @@ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
|
|
|
975
1142
|
type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
|
|
976
1143
|
declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
|
|
977
1144
|
type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
|
|
1145
|
+
type agents_RedTeamAgentConfig = RedTeamAgentConfig;
|
|
1146
|
+
type agents_RedTeamStrategy = RedTeamStrategy;
|
|
978
1147
|
type agents_TestingAgentConfig = TestingAgentConfig;
|
|
979
1148
|
declare const agents_estimateTokens: typeof estimateTokens;
|
|
980
1149
|
declare const agents_expandTrace: typeof expandTrace;
|
|
@@ -982,9 +1151,11 @@ declare const agents_grepTrace: typeof grepTrace;
|
|
|
982
1151
|
declare const agents_judgeAgent: typeof judgeAgent;
|
|
983
1152
|
declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
|
|
984
1153
|
declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
|
|
1154
|
+
declare const agents_redTeamAgent: typeof redTeamAgent;
|
|
1155
|
+
declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
|
|
985
1156
|
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
986
1157
|
declare namespace agents {
|
|
987
|
-
export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
|
|
1158
|
+
export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
|
|
988
1159
|
}
|
|
989
1160
|
|
|
990
1161
|
/**
|
|
@@ -2197,16 +2368,31 @@ declare const succeed: (reasoning?: string) => ScriptStep;
|
|
|
2197
2368
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
2198
2369
|
*/
|
|
2199
2370
|
declare const fail: (reasoning?: string) => ScriptStep;
|
|
2371
|
+
/**
|
|
2372
|
+
* Generate a marathon script that runs user-agent turns in a loop,
|
|
2373
|
+
* with optional per-turn checks and a final judge evaluation.
|
|
2374
|
+
*
|
|
2375
|
+
* @param options.turns Number of user-agent turn pairs.
|
|
2376
|
+
* @param options.checks Optional steps to run after each turn.
|
|
2377
|
+
* @param options.finalChecks Optional steps to run after all turns, before the judge.
|
|
2378
|
+
* @returns An array of ScriptStep functions.
|
|
2379
|
+
*/
|
|
2380
|
+
declare const marathonScript: (options: {
|
|
2381
|
+
turns: number;
|
|
2382
|
+
checks?: ScriptStep[];
|
|
2383
|
+
finalChecks?: ScriptStep[];
|
|
2384
|
+
}) => ScriptStep[];
|
|
2200
2385
|
|
|
2201
2386
|
declare const script_agent: typeof agent;
|
|
2202
2387
|
declare const script_fail: typeof fail;
|
|
2203
2388
|
declare const script_judge: typeof judge;
|
|
2389
|
+
declare const script_marathonScript: typeof marathonScript;
|
|
2204
2390
|
declare const script_message: typeof message;
|
|
2205
2391
|
declare const script_proceed: typeof proceed;
|
|
2206
2392
|
declare const script_succeed: typeof succeed;
|
|
2207
2393
|
declare const script_user: typeof user;
|
|
2208
2394
|
declare namespace script {
|
|
2209
|
-
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2395
|
+
export { script_agent as agent, script_fail as fail, script_judge as judge, script_marathonScript as marathonScript, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2210
2396
|
}
|
|
2211
2397
|
|
|
2212
2398
|
/**
|
|
@@ -2312,4 +2498,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
|
2312
2498
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2313
2499
|
declare const scenario: ScenarioApi;
|
|
2314
2500
|
|
|
2315
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|
|
2501
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, marathonScript, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|