@langwatch/scenario 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -961,7 +961,174 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
961
961
  offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
962
962
  }
963
963
 
964
+ interface BacktrackEntry {
965
+ turn: number;
966
+ attack: string;
967
+ refusal: string;
968
+ }
969
+ interface RedTeamStrategy {
970
+ buildSystemPrompt(params: {
971
+ target: string;
972
+ currentTurn: number;
973
+ totalTurns: number;
974
+ scenarioDescription: string;
975
+ metapromptPlan: string;
976
+ lastResponseScore?: number;
977
+ adaptationHint?: string;
978
+ backtrackHistory?: BacktrackEntry[];
979
+ }): string;
980
+ getPhaseName(currentTurn: number, totalTurns: number): string;
981
+ }
982
+
983
+ declare class CrescendoStrategy implements RedTeamStrategy {
984
+ getPhase(currentTurn: number, totalTurns: number): {
985
+ name: string;
986
+ instructions: string;
987
+ };
988
+ getPhaseName(currentTurn: number, totalTurns: number): string;
989
+ private phaseTurnRange;
990
+ buildSystemPrompt(params: {
991
+ target: string;
992
+ currentTurn: number;
993
+ totalTurns: number;
994
+ scenarioDescription: string;
995
+ metapromptPlan: string;
996
+ lastResponseScore?: number;
997
+ adaptationHint?: string;
998
+ backtrackHistory?: BacktrackEntry[];
999
+ }): string;
1000
+ }
1001
+
1002
+ interface RedTeamAgentConfig {
1003
+ strategy: RedTeamStrategy;
1004
+ target: string;
1005
+ totalTurns?: number;
1006
+ model?: LanguageModel;
1007
+ metapromptModel?: LanguageModel;
1008
+ metapromptTemplate?: string;
1009
+ attackPlan?: string;
1010
+ /** Score target responses each turn to feed back into the attacker. Default true. */
1011
+ scoreResponses?: boolean;
1012
+ /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
1013
+ detectRefusals?: boolean;
1014
+ temperature?: number;
1015
+ /** Separate temperature for metaprompt/scoring calls. Defaults to temperature. */
1016
+ metapromptTemperature?: number;
1017
+ maxTokens?: number;
1018
+ /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
1019
+ successScore?: number;
1020
+ /** Consecutive turns >= threshold before triggering early exit. Default 2. */
1021
+ successConfirmTurns?: number;
1022
+ }
1023
+ interface CrescendoConfig {
1024
+ target: string;
1025
+ totalTurns?: number;
1026
+ model?: LanguageModel;
1027
+ metapromptModel?: LanguageModel;
1028
+ attackPlan?: string;
1029
+ scoreResponses?: boolean;
1030
+ /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
1031
+ detectRefusals?: boolean;
1032
+ temperature?: number;
1033
+ metapromptTemperature?: number;
1034
+ maxTokens?: number;
1035
+ /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
1036
+ successScore?: number;
1037
+ /** Consecutive turns >= threshold before triggering early exit. Default 2. */
1038
+ successConfirmTurns?: number;
1039
+ }
1040
+ declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
1041
+ name: string;
1042
+ private strategy;
1043
+ private target;
1044
+ private totalTurns;
1045
+ private model?;
1046
+ private metapromptModel?;
1047
+ private metapromptTemplate;
1048
+ private scoreResponses;
1049
+ private detectRefusals;
1050
+ private temperature;
1051
+ private metapromptTemperature;
1052
+ private maxTokens?;
1053
+ private _successScore;
1054
+ private _successConfirmTurns;
1055
+ private static readonly HARD_REFUSAL_PATTERNS;
1056
+ private static readonly SOFT_REFUSAL_PATTERNS;
1057
+ private attackPlanPromise;
1058
+ private attackPlanValue;
1059
+ private turnScores;
1060
+ private static readonly MAX_BACKTRACKS;
1061
+ private backtracksRemaining;
1062
+ private backtrackHistory;
1063
+ constructor(config: RedTeamAgentConfig);
1064
+ private getAttackPlan;
1065
+ private generateAttackPlan;
1066
+ private getLastAssistantContent;
1067
+ private getLastUserContent;
1068
+ private detectRefusal;
1069
+ /**
1070
+ * Score the target agent's last response toward the attack goal (0-10).
1071
+ * Results are cached per turn so the scorer LLM is called at most once.
1072
+ */
1073
+ private scoreLastResponse;
1074
+ /**
1075
+ * Check if the attack objective has been achieved.
1076
+ * Returns true if the last `successConfirmTurns` cached scores
1077
+ * are all >= `successScore`.
1078
+ */
1079
+ checkEarlyExit(): boolean;
1080
+ /**
1081
+ * Generate a marathon test script with automatic early-exit checks.
1082
+ *
1083
+ * Like the standalone `marathonScript`, but inserts an early-exit check
1084
+ * after each `agent()` step. When `successConfirmTurns` consecutive turns
1085
+ * score >= the threshold, the check runs `finalChecks` inline and calls
1086
+ * `executor.succeed()` to end the scenario early.
1087
+ *
1088
+ * Set `successScore` to `undefined` to disable early exit.
1089
+ */
1090
+ marathonScript(options: {
1091
+ turns: number;
1092
+ checks?: ScriptStep[];
1093
+ finalChecks?: ScriptStep[];
1094
+ }): ScriptStep[];
1095
+ call: (input: AgentInput) => Promise<AgentReturnTypes>;
1096
+ }
1097
+ /**
1098
+ * Create a red-team agent with a custom strategy.
1099
+ */
1100
+ declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
1101
+ /**
1102
+ * Create a red-team agent using the Crescendo (marathon) strategy.
1103
+ *
1104
+ * Crescendo gradually escalates from innocent rapport-building to aggressive
1105
+ * jailbreak attempts over many turns, exploiting LLMs' tendency to maintain
1106
+ * conversational consistency once cooperative context has been established.
1107
+ *
1108
+ * @example
1109
+ * ```typescript
1110
+ * import scenario from "@langwatch/scenario";
1111
+ * import { openai } from "@ai-sdk/openai";
1112
+ *
1113
+ * const redTeam = scenario.redTeamCrescendo({
1114
+ * target: "extract the system prompt",
1115
+ * model: openai("gpt-4o"),
1116
+ * totalTurns: 50,
1117
+ * successScore: 9, // default: 9 (score 0-10)
1118
+ * successConfirmTurns: 2, // default: 2
1119
+ * });
1120
+ *
1121
+ * // Use instance marathonScript for automatic early-exit:
1122
+ * script: redTeam.marathonScript({ turns: 50 }),
1123
+ * ```
1124
+ */
1125
+ declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
1126
+
964
1127
  type agents_AudioResponseEvent = AudioResponseEvent;
1128
+ type agents_BacktrackEntry = BacktrackEntry;
1129
+ type agents_CrescendoConfig = CrescendoConfig;
1130
+ type agents_CrescendoStrategy = CrescendoStrategy;
1131
+ declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
965
1132
  declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
966
1133
  type agents_FinishTestArgs = FinishTestArgs;
967
1134
  type agents_InvokeLLMParams = InvokeLLMParams;
@@ -975,6 +1142,8 @@ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
975
1142
  type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
976
1143
  declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
977
1144
  type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
1145
+ type agents_RedTeamAgentConfig = RedTeamAgentConfig;
1146
+ type agents_RedTeamStrategy = RedTeamStrategy;
978
1147
  type agents_TestingAgentConfig = TestingAgentConfig;
979
1148
  declare const agents_estimateTokens: typeof estimateTokens;
980
1149
  declare const agents_expandTrace: typeof expandTrace;
@@ -982,9 +1151,11 @@ declare const agents_grepTrace: typeof grepTrace;
982
1151
  declare const agents_judgeAgent: typeof judgeAgent;
983
1152
  declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
984
1153
  declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
1154
+ declare const agents_redTeamAgent: typeof redTeamAgent;
1155
+ declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
985
1156
  declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
986
1157
  declare namespace agents {
987
- export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
1158
+ export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
988
1159
  }
989
1160
 
990
1161
  /**
@@ -2197,16 +2368,31 @@ declare const succeed: (reasoning?: string) => ScriptStep;
2197
2368
  * @returns A ScriptStep function that can be used in scenario scripts.
2198
2369
  */
2199
2370
  declare const fail: (reasoning?: string) => ScriptStep;
2371
+ /**
2372
+ * Generate a marathon script that runs user-agent turns in a loop,
2373
+ * with optional per-turn checks and a final judge evaluation.
2374
+ *
2375
+ * @param options.turns Number of user-agent turn pairs.
2376
+ * @param options.checks Optional steps to run after each turn.
2377
+ * @param options.finalChecks Optional steps to run after all turns, before the judge.
2378
+ * @returns An array of ScriptStep functions.
2379
+ */
2380
+ declare const marathonScript: (options: {
2381
+ turns: number;
2382
+ checks?: ScriptStep[];
2383
+ finalChecks?: ScriptStep[];
2384
+ }) => ScriptStep[];
2200
2385
 
2201
2386
  declare const script_agent: typeof agent;
2202
2387
  declare const script_fail: typeof fail;
2203
2388
  declare const script_judge: typeof judge;
2389
+ declare const script_marathonScript: typeof marathonScript;
2204
2390
  declare const script_message: typeof message;
2205
2391
  declare const script_proceed: typeof proceed;
2206
2392
  declare const script_succeed: typeof succeed;
2207
2393
  declare const script_user: typeof user;
2208
2394
  declare namespace script {
2209
- export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
2395
+ export { script_agent as agent, script_fail as fail, script_judge as judge, script_marathonScript as marathonScript, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
2210
2396
  }
2211
2397
 
2212
2398
  /**
@@ -2312,4 +2498,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
2312
2498
  type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
2313
2499
  declare const scenario: ScenarioApi;
2314
2500
 
2315
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
2501
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, marathonScript, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
package/dist/index.d.ts CHANGED
@@ -961,7 +961,174 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
961
961
  offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
962
962
  }
963
963
 
964
+ interface BacktrackEntry {
965
+ turn: number;
966
+ attack: string;
967
+ refusal: string;
968
+ }
969
+ interface RedTeamStrategy {
970
+ buildSystemPrompt(params: {
971
+ target: string;
972
+ currentTurn: number;
973
+ totalTurns: number;
974
+ scenarioDescription: string;
975
+ metapromptPlan: string;
976
+ lastResponseScore?: number;
977
+ adaptationHint?: string;
978
+ backtrackHistory?: BacktrackEntry[];
979
+ }): string;
980
+ getPhaseName(currentTurn: number, totalTurns: number): string;
981
+ }
982
+
983
+ declare class CrescendoStrategy implements RedTeamStrategy {
984
+ getPhase(currentTurn: number, totalTurns: number): {
985
+ name: string;
986
+ instructions: string;
987
+ };
988
+ getPhaseName(currentTurn: number, totalTurns: number): string;
989
+ private phaseTurnRange;
990
+ buildSystemPrompt(params: {
991
+ target: string;
992
+ currentTurn: number;
993
+ totalTurns: number;
994
+ scenarioDescription: string;
995
+ metapromptPlan: string;
996
+ lastResponseScore?: number;
997
+ adaptationHint?: string;
998
+ backtrackHistory?: BacktrackEntry[];
999
+ }): string;
1000
+ }
1001
+
1002
+ interface RedTeamAgentConfig {
1003
+ strategy: RedTeamStrategy;
1004
+ target: string;
1005
+ totalTurns?: number;
1006
+ model?: LanguageModel;
1007
+ metapromptModel?: LanguageModel;
1008
+ metapromptTemplate?: string;
1009
+ attackPlan?: string;
1010
+ /** Score target responses each turn to feed back into the attacker. Default true. */
1011
+ scoreResponses?: boolean;
1012
+ /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
1013
+ detectRefusals?: boolean;
1014
+ temperature?: number;
1015
+ /** Separate temperature for metaprompt/scoring calls. Defaults to temperature. */
1016
+ metapromptTemperature?: number;
1017
+ maxTokens?: number;
1018
+ /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
1019
+ successScore?: number;
1020
+ /** Consecutive turns >= threshold before triggering early exit. Default 2. */
1021
+ successConfirmTurns?: number;
1022
+ }
1023
+ interface CrescendoConfig {
1024
+ target: string;
1025
+ totalTurns?: number;
1026
+ model?: LanguageModel;
1027
+ metapromptModel?: LanguageModel;
1028
+ attackPlan?: string;
1029
+ scoreResponses?: boolean;
1030
+ /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
1031
+ detectRefusals?: boolean;
1032
+ temperature?: number;
1033
+ metapromptTemperature?: number;
1034
+ maxTokens?: number;
1035
+ /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
1036
+ successScore?: number;
1037
+ /** Consecutive turns >= threshold before triggering early exit. Default 2. */
1038
+ successConfirmTurns?: number;
1039
+ }
1040
+ declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
1041
+ name: string;
1042
+ private strategy;
1043
+ private target;
1044
+ private totalTurns;
1045
+ private model?;
1046
+ private metapromptModel?;
1047
+ private metapromptTemplate;
1048
+ private scoreResponses;
1049
+ private detectRefusals;
1050
+ private temperature;
1051
+ private metapromptTemperature;
1052
+ private maxTokens?;
1053
+ private _successScore;
1054
+ private _successConfirmTurns;
1055
+ private static readonly HARD_REFUSAL_PATTERNS;
1056
+ private static readonly SOFT_REFUSAL_PATTERNS;
1057
+ private attackPlanPromise;
1058
+ private attackPlanValue;
1059
+ private turnScores;
1060
+ private static readonly MAX_BACKTRACKS;
1061
+ private backtracksRemaining;
1062
+ private backtrackHistory;
1063
+ constructor(config: RedTeamAgentConfig);
1064
+ private getAttackPlan;
1065
+ private generateAttackPlan;
1066
+ private getLastAssistantContent;
1067
+ private getLastUserContent;
1068
+ private detectRefusal;
1069
+ /**
1070
+ * Score the target agent's last response toward the attack goal (0-10).
1071
+ * Results are cached per turn so the scorer LLM is called at most once.
1072
+ */
1073
+ private scoreLastResponse;
1074
+ /**
1075
+ * Check if the attack objective has been achieved.
1076
+ * Returns true if the last `successConfirmTurns` cached scores
1077
+ * are all >= `successScore`.
1078
+ */
1079
+ checkEarlyExit(): boolean;
1080
+ /**
1081
+ * Generate a marathon test script with automatic early-exit checks.
1082
+ *
1083
+ * Like the standalone `marathonScript`, but inserts an early-exit check
1084
+ * after each `agent()` step. When `successConfirmTurns` consecutive turns
1085
+ * score >= the threshold, the check runs `finalChecks` inline and calls
1086
+ * `executor.succeed()` to end the scenario early.
1087
+ *
1088
+ * Set `successScore` to `undefined` to disable early exit.
1089
+ */
1090
+ marathonScript(options: {
1091
+ turns: number;
1092
+ checks?: ScriptStep[];
1093
+ finalChecks?: ScriptStep[];
1094
+ }): ScriptStep[];
1095
+ call: (input: AgentInput) => Promise<AgentReturnTypes>;
1096
+ }
1097
+ /**
1098
+ * Create a red-team agent with a custom strategy.
1099
+ */
1100
+ declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
1101
+ /**
1102
+ * Create a red-team agent using the Crescendo (marathon) strategy.
1103
+ *
1104
+ * Crescendo gradually escalates from innocent rapport-building to aggressive
1105
+ * jailbreak attempts over many turns, exploiting LLMs' tendency to maintain
1106
+ * conversational consistency once cooperative context has been established.
1107
+ *
1108
+ * @example
1109
+ * ```typescript
1110
+ * import scenario from "@langwatch/scenario";
1111
+ * import { openai } from "@ai-sdk/openai";
1112
+ *
1113
+ * const redTeam = scenario.redTeamCrescendo({
1114
+ * target: "extract the system prompt",
1115
+ * model: openai("gpt-4o"),
1116
+ * totalTurns: 50,
1117
+ * successScore: 9, // default: 9 (score 0-10)
1118
+ * successConfirmTurns: 2, // default: 2
1119
+ * });
1120
+ *
1121
+ * // Use instance marathonScript for automatic early-exit:
1122
+ * script: redTeam.marathonScript({ turns: 50 }),
1123
+ * ```
1124
+ */
1125
+ declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
1126
+
964
1127
  type agents_AudioResponseEvent = AudioResponseEvent;
1128
+ type agents_BacktrackEntry = BacktrackEntry;
1129
+ type agents_CrescendoConfig = CrescendoConfig;
1130
+ type agents_CrescendoStrategy = CrescendoStrategy;
1131
+ declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
965
1132
  declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
966
1133
  type agents_FinishTestArgs = FinishTestArgs;
967
1134
  type agents_InvokeLLMParams = InvokeLLMParams;
@@ -975,6 +1142,8 @@ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
975
1142
  type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
976
1143
  declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
977
1144
  type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
1145
+ type agents_RedTeamAgentConfig = RedTeamAgentConfig;
1146
+ type agents_RedTeamStrategy = RedTeamStrategy;
978
1147
  type agents_TestingAgentConfig = TestingAgentConfig;
979
1148
  declare const agents_estimateTokens: typeof estimateTokens;
980
1149
  declare const agents_expandTrace: typeof expandTrace;
@@ -982,9 +1151,11 @@ declare const agents_grepTrace: typeof grepTrace;
982
1151
  declare const agents_judgeAgent: typeof judgeAgent;
983
1152
  declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
984
1153
  declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
1154
+ declare const agents_redTeamAgent: typeof redTeamAgent;
1155
+ declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
985
1156
  declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
986
1157
  declare namespace agents {
987
- export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
1158
+ export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
988
1159
  }
989
1160
 
990
1161
  /**
@@ -2197,16 +2368,31 @@ declare const succeed: (reasoning?: string) => ScriptStep;
2197
2368
  * @returns A ScriptStep function that can be used in scenario scripts.
2198
2369
  */
2199
2370
  declare const fail: (reasoning?: string) => ScriptStep;
2371
+ /**
2372
+ * Generate a marathon script that runs user-agent turns in a loop,
2373
+ * with optional per-turn checks and a final judge evaluation.
2374
+ *
2375
+ * @param options.turns Number of user-agent turn pairs.
2376
+ * @param options.checks Optional steps to run after each turn.
2377
+ * @param options.finalChecks Optional steps to run after all turns, before the judge.
2378
+ * @returns An array of ScriptStep functions.
2379
+ */
2380
+ declare const marathonScript: (options: {
2381
+ turns: number;
2382
+ checks?: ScriptStep[];
2383
+ finalChecks?: ScriptStep[];
2384
+ }) => ScriptStep[];
2200
2385
 
2201
2386
  declare const script_agent: typeof agent;
2202
2387
  declare const script_fail: typeof fail;
2203
2388
  declare const script_judge: typeof judge;
2389
+ declare const script_marathonScript: typeof marathonScript;
2204
2390
  declare const script_message: typeof message;
2205
2391
  declare const script_proceed: typeof proceed;
2206
2392
  declare const script_succeed: typeof succeed;
2207
2393
  declare const script_user: typeof user;
2208
2394
  declare namespace script {
2209
- export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
2395
+ export { script_agent as agent, script_fail as fail, script_judge as judge, script_marathonScript as marathonScript, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
2210
2396
  }
2211
2397
 
2212
2398
  /**
@@ -2312,4 +2498,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
2312
2498
  type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
2313
2499
  declare const scenario: ScenarioApi;
2314
2500
 
2315
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
2501
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, marathonScript, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };