npm - @langwatch/scenario - Versions diffs - 0.4.6 → 0.4.8 - Mend

@langwatch/scenario 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -366,6 +366,18 @@ interface ScenarioExecutionStateLike {
      * @returns True if the tool call exists, false otherwise.
      */
     hasToolCall(toolName: string): boolean;
+    /**
+     * Remove all messages from position `index` onward.
+     *
+     * Truncates the message list and cleans up any pending message queues
+     * so no agent sees stale messages.
+     *
+     * @param index - Truncate point (clamped to `[0, messages.length]`).
+     *   Messages at positions >= index are removed.
+     * @returns The removed messages (empty array if nothing to remove).
+     * @throws {RangeError} If `index` is negative.
+     */
+    rollbackMessagesTo(index: number): ModelMessage[];
 }
 /**
@@ -961,7 +973,189 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
     offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
 }
+interface BacktrackEntry {
+    turn: number;
+    attack: string;
+    refusal: string;
+}
+interface RedTeamStrategy {
+    /**
+     * Build a turn-aware system prompt for the attacker.
+     *
+     * Score feedback, adaptation hints, and backtrack markers are
+     * communicated via the attacker's private conversation history
+     * (H_attacker) as system messages — not embedded in this prompt.
+     */
+    buildSystemPrompt(params: {
+        target: string;
+        currentTurn: number;
+        totalTurns: number;
+        scenarioDescription: string;
+        metapromptPlan: string;
+    }): string;
+    getPhaseName(currentTurn: number, totalTurns: number): string;
+}
+declare class CrescendoStrategy implements RedTeamStrategy {
+    getPhase(currentTurn: number, totalTurns: number): {
+        name: string;
+        instructions: string;
+    };
+    getPhaseName(currentTurn: number, totalTurns: number): string;
+    private phaseTurnRange;
+    buildSystemPrompt(params: {
+        target: string;
+        currentTurn: number;
+        totalTurns: number;
+        scenarioDescription: string;
+        metapromptPlan: string;
+    }): string;
+}
+interface RedTeamAgentConfig {
+    strategy: RedTeamStrategy;
+    target: string;
+    totalTurns?: number;
+    model?: LanguageModel;
+    metapromptModel?: LanguageModel;
+    metapromptTemplate?: string;
+    attackPlan?: string;
+    /** Score target responses each turn to feed back into the attacker. Default true. */
+    scoreResponses?: boolean;
+    /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
+    detectRefusals?: boolean;
+    temperature?: number;
+    /** Separate temperature for metaprompt/scoring calls. Defaults to temperature. */
+    metapromptTemperature?: number;
+    maxTokens?: number;
+    /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
+    successScore?: number;
+    /** Consecutive turns >= threshold before triggering early exit. Default 2. */
+    successConfirmTurns?: number;
+}
+interface CrescendoConfig {
+    target: string;
+    totalTurns?: number;
+    model?: LanguageModel;
+    metapromptModel?: LanguageModel;
+    attackPlan?: string;
+    scoreResponses?: boolean;
+    /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
+    detectRefusals?: boolean;
+    temperature?: number;
+    metapromptTemperature?: number;
+    maxTokens?: number;
+    /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
+    successScore?: number;
+    /** Consecutive turns >= threshold before triggering early exit. Default 2. */
+    successConfirmTurns?: number;
+}
+declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
+    name: string;
+    private strategy;
+    private target;
+    private totalTurns;
+    private model?;
+    private metapromptModel?;
+    private metapromptTemplate;
+    private scoreResponses;
+    private detectRefusals;
+    private temperature;
+    private metapromptTemperature;
+    private maxTokens?;
+    private _successScore;
+    private _successConfirmTurns;
+    private static readonly HARD_REFUSAL_PATTERNS;
+    private static readonly SOFT_REFUSAL_PATTERNS;
+    private attackPlanPromise;
+    private attackPlanValue;
+    private turnScores;
+    private static readonly MAX_BACKTRACKS;
+    private backtracksRemaining;
+    private backtrackHistory;
+    private attackerHistory;
+    constructor(config: RedTeamAgentConfig);
+    private getAttackPlan;
+    private generateAttackPlan;
+    private getLastAssistantContent;
+    private getLastUserContent;
+    private detectRefusal;
+    /**
+     * Score the target agent's last response toward the attack goal (0-10).
+     * Results are cached per turn so the scorer LLM is called at most once.
+     */
+    private scoreLastResponse;
+    /**
+     * Check if the attack objective has been achieved.
+     * Returns true if the last `successConfirmTurns` cached scores
+     * are all >= `successScore`.
+     */
+    checkEarlyExit(): boolean;
+    /**
+     * Generate a marathon test script with automatic early-exit checks.
+     *
+     * Like the standalone `marathonScript`, but inserts an early-exit check
+     * after each `agent()` step. When `successConfirmTurns` consecutive turns
+     * score >= the threshold, the check runs `finalChecks` inline and calls
+     * `executor.succeed()` to end the scenario early.
+     *
+     * Set `successScore` to `undefined` to disable early exit.
+     */
+    marathonScript(options: {
+        turns: number;
+        checks?: ScriptStep[];
+        finalChecks?: ScriptStep[];
+    }): ScriptStep[];
+    /**
+     * Call the attacker LLM directly with the attacker's private history.
+     * Uses `attackerHistory` (H_attacker) which contains the system prompt,
+     * previous attack messages, target response summaries, score annotations,
+     * and backtrack markers — none of which leak to the target.
+     */
+    private callAttackerLLM;
+    /**
+     * Reset per-run state for safe reuse across scenario.run() calls.
+     * Called at the start of turn 1. Does NOT reset attackPlanValue
+     * (expensive to regenerate and target-specific, not run-specific).
+     */
+    private resetRunState;
+    call: (input: AgentInput) => Promise<AgentReturnTypes>;
+}
+/**
+ * Create a red-team agent with a custom strategy.
+ */
+declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
+/**
+ * Create a red-team agent using the Crescendo (marathon) strategy.
+ *
+ * Crescendo gradually escalates from innocent rapport-building to aggressive
+ * jailbreak attempts over many turns, exploiting LLMs' tendency to maintain
+ * conversational consistency once cooperative context has been established.
+ *
+ * @example
+ * ```typescript
+ * import scenario from "@langwatch/scenario";
+ * import { openai } from "@ai-sdk/openai";
+ *
+ * const redTeam = scenario.redTeamCrescendo({
+ *   target: "extract the system prompt",
+ *   model: openai("gpt-4o"),
+ *   totalTurns: 50,
+ *   successScore: 9,          // default: 9 (score 0-10)
+ *   successConfirmTurns: 2,   // default: 2
+ * });
+ *
+ * // Use instance marathonScript for automatic early-exit:
+ * script: redTeam.marathonScript({ turns: 50 }),
+ * ```
+ */
+declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
 type agents_AudioResponseEvent = AudioResponseEvent;
+type agents_BacktrackEntry = BacktrackEntry;
+type agents_CrescendoConfig = CrescendoConfig;
+type agents_CrescendoStrategy = CrescendoStrategy;
+declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
 declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
 type agents_FinishTestArgs = FinishTestArgs;
 type agents_InvokeLLMParams = InvokeLLMParams;
@@ -975,6 +1169,8 @@ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
 type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
 declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
 type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
+type agents_RedTeamAgentConfig = RedTeamAgentConfig;
+type agents_RedTeamStrategy = RedTeamStrategy;
 type agents_TestingAgentConfig = TestingAgentConfig;
 declare const agents_estimateTokens: typeof estimateTokens;
 declare const agents_expandTrace: typeof expandTrace;
@@ -982,9 +1178,11 @@ declare const agents_grepTrace: typeof grepTrace;
 declare const agents_judgeAgent: typeof judgeAgent;
 declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
 declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
+declare const agents_redTeamAgent: typeof redTeamAgent;
+declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
 declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
 declare namespace agents {
-  export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
+  export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
 }
 /**
@@ -1482,14 +1680,19 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
     private batchRunId;
     /** The run ID for the current execution */
     private scenarioRunId?;
+    /** Pre-assigned run ID (provided externally, e.g. by the platform) */
+    private preAssignedRunId?;
     /**
      * Creates a new ScenarioExecution instance.
      *
      * @param config - The scenario configuration containing agents, settings, and metadata
      * @param script - The ordered sequence of script steps that define the test flow
      * @param batchRunId - Batch run ID for grouping scenario runs
+     * @param runId - Optional pre-assigned run ID. When provided, the execution uses this
+     *   ID instead of generating a new one. This prevents duplicate entries when the
+     *   platform pre-creates placeholder rows with a known ID.
      */
-    constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string);
+    constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string, runId?: string);
     /**
      * Gets the complete conversation history as an array of messages.
      *
@@ -1979,6 +2182,7 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
     private _messages;
     private _currentTurn;
     private _threadId;
+    private _onRollback?;
     /** Event stream for message additions */
     private eventSubject;
     readonly events$: Observable<StateChangeEvent>;
@@ -2014,6 +2218,28 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
         traceId?: string;
     };
     hasToolCall(toolName: string): boolean;
+    /**
+     * Register a callback that fires when messages are rolled back.
+     * The executor uses this to clean up its pending message queues.
+     */
+    setOnRollback(handler: (removedSet: Set<object>) => void): void;
+    /**
+     * Remove all messages from position `index` onward.
+     *
+     * Truncates the internal message list and notifies the executor
+     * (via the registered rollback handler) to clean pending queues.
+     *
+     * **Note:** This method is safe to call only during an agent's `call()`
+     * invocation.  The executor runs agents sequentially, so no other agent
+     * can observe stale `newMessages` references.  Calling this from outside
+     * that flow may leave already-delivered `newMessages` out of sync.
+     *
+     * @param index - Truncate point (clamped to `[0, messages.length]`).
+     *   Messages at positions >= index are removed.
+     * @returns The removed messages (empty array if nothing to remove).
+     * @throws {RangeError} If `index` is negative.
+     */
+    rollbackMessagesTo(index: number): ModelMessage[];
 }
 type execution_ScenarioExecution = ScenarioExecution;
@@ -2045,6 +2271,13 @@ interface RunOptions {
     langwatch?: LangwatchConfig;
     /** Batch run ID for grouping scenario runs. Overrides SCENARIO_BATCH_RUN_ID env var. */
     batchRunId?: string;
+    /**
+     * Pre-assigned run ID for the scenario execution.
+     * When provided, the SDK uses this ID instead of generating a new one.
+     *
+     * @internal Platform use only — not part of the public API.
+     */
+    runId?: string;
 }
 /**
  * High-level interface for running a scenario test.
@@ -2197,16 +2430,31 @@ declare const succeed: (reasoning?: string) => ScriptStep;
  * @returns A ScriptStep function that can be used in scenario scripts.
  */
 declare const fail: (reasoning?: string) => ScriptStep;
+/**
+ * Generate a marathon script that runs user-agent turns in a loop,
+ * with optional per-turn checks and a final judge evaluation.
+ *
+ * @param options.turns Number of user-agent turn pairs.
+ * @param options.checks Optional steps to run after each turn.
+ * @param options.finalChecks Optional steps to run after all turns, before the judge.
+ * @returns An array of ScriptStep functions.
+ */
+declare const marathonScript: (options: {
+    turns: number;
+    checks?: ScriptStep[];
+    finalChecks?: ScriptStep[];
+}) => ScriptStep[];
 declare const script_agent: typeof agent;
 declare const script_fail: typeof fail;
 declare const script_judge: typeof judge;
+declare const script_marathonScript: typeof marathonScript;
 declare const script_message: typeof message;
 declare const script_proceed: typeof proceed;
 declare const script_succeed: typeof succeed;
 declare const script_user: typeof user;
 declare namespace script {
-  export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
+  export { script_agent as agent, script_fail as fail, script_judge as judge, script_marathonScript as marathonScript, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
 }
 /**
@@ -2312,4 +2560,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
 type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
 declare const scenario: ScenarioApi;
-export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
+export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, marathonScript, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };

package/dist/index.d.ts CHANGED Viewed

@@ -366,6 +366,18 @@ interface ScenarioExecutionStateLike {
      * @returns True if the tool call exists, false otherwise.
      */
     hasToolCall(toolName: string): boolean;
+    /**
+     * Remove all messages from position `index` onward.
+     *
+     * Truncates the message list and cleans up any pending message queues
+     * so no agent sees stale messages.
+     *
+     * @param index - Truncate point (clamped to `[0, messages.length]`).
+     *   Messages at positions >= index are removed.
+     * @returns The removed messages (empty array if nothing to remove).
+     * @throws {RangeError} If `index` is negative.
+     */
+    rollbackMessagesTo(index: number): ModelMessage[];
 }
 /**
@@ -961,7 +973,189 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
     offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
 }
+interface BacktrackEntry {
+    turn: number;
+    attack: string;
+    refusal: string;
+}
+interface RedTeamStrategy {
+    /**
+     * Build a turn-aware system prompt for the attacker.
+     *
+     * Score feedback, adaptation hints, and backtrack markers are
+     * communicated via the attacker's private conversation history
+     * (H_attacker) as system messages — not embedded in this prompt.
+     */
+    buildSystemPrompt(params: {
+        target: string;
+        currentTurn: number;
+        totalTurns: number;
+        scenarioDescription: string;
+        metapromptPlan: string;
+    }): string;
+    getPhaseName(currentTurn: number, totalTurns: number): string;
+}
+declare class CrescendoStrategy implements RedTeamStrategy {
+    getPhase(currentTurn: number, totalTurns: number): {
+        name: string;
+        instructions: string;
+    };
+    getPhaseName(currentTurn: number, totalTurns: number): string;
+    private phaseTurnRange;
+    buildSystemPrompt(params: {
+        target: string;
+        currentTurn: number;
+        totalTurns: number;
+        scenarioDescription: string;
+        metapromptPlan: string;
+    }): string;
+}
+interface RedTeamAgentConfig {
+    strategy: RedTeamStrategy;
+    target: string;
+    totalTurns?: number;
+    model?: LanguageModel;
+    metapromptModel?: LanguageModel;
+    metapromptTemplate?: string;
+    attackPlan?: string;
+    /** Score target responses each turn to feed back into the attacker. Default true. */
+    scoreResponses?: boolean;
+    /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
+    detectRefusals?: boolean;
+    temperature?: number;
+    /** Separate temperature for metaprompt/scoring calls. Defaults to temperature. */
+    metapromptTemperature?: number;
+    maxTokens?: number;
+    /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
+    successScore?: number;
+    /** Consecutive turns >= threshold before triggering early exit. Default 2. */
+    successConfirmTurns?: number;
+}
+interface CrescendoConfig {
+    target: string;
+    totalTurns?: number;
+    model?: LanguageModel;
+    metapromptModel?: LanguageModel;
+    attackPlan?: string;
+    scoreResponses?: boolean;
+    /** Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true. */
+    detectRefusals?: boolean;
+    temperature?: number;
+    metapromptTemperature?: number;
+    maxTokens?: number;
+    /** Score threshold (0-10) for early exit. Default 9. Set to undefined to disable. */
+    successScore?: number;
+    /** Consecutive turns >= threshold before triggering early exit. Default 2. */
+    successConfirmTurns?: number;
+}
+declare class RedTeamAgentImpl extends UserSimulatorAgentAdapter {
+    name: string;
+    private strategy;
+    private target;
+    private totalTurns;
+    private model?;
+    private metapromptModel?;
+    private metapromptTemplate;
+    private scoreResponses;
+    private detectRefusals;
+    private temperature;
+    private metapromptTemperature;
+    private maxTokens?;
+    private _successScore;
+    private _successConfirmTurns;
+    private static readonly HARD_REFUSAL_PATTERNS;
+    private static readonly SOFT_REFUSAL_PATTERNS;
+    private attackPlanPromise;
+    private attackPlanValue;
+    private turnScores;
+    private static readonly MAX_BACKTRACKS;
+    private backtracksRemaining;
+    private backtrackHistory;
+    private attackerHistory;
+    constructor(config: RedTeamAgentConfig);
+    private getAttackPlan;
+    private generateAttackPlan;
+    private getLastAssistantContent;
+    private getLastUserContent;
+    private detectRefusal;
+    /**
+     * Score the target agent's last response toward the attack goal (0-10).
+     * Results are cached per turn so the scorer LLM is called at most once.
+     */
+    private scoreLastResponse;
+    /**
+     * Check if the attack objective has been achieved.
+     * Returns true if the last `successConfirmTurns` cached scores
+     * are all >= `successScore`.
+     */
+    checkEarlyExit(): boolean;
+    /**
+     * Generate a marathon test script with automatic early-exit checks.
+     *
+     * Like the standalone `marathonScript`, but inserts an early-exit check
+     * after each `agent()` step. When `successConfirmTurns` consecutive turns
+     * score >= the threshold, the check runs `finalChecks` inline and calls
+     * `executor.succeed()` to end the scenario early.
+     *
+     * Set `successScore` to `undefined` to disable early exit.
+     */
+    marathonScript(options: {
+        turns: number;
+        checks?: ScriptStep[];
+        finalChecks?: ScriptStep[];
+    }): ScriptStep[];
+    /**
+     * Call the attacker LLM directly with the attacker's private history.
+     * Uses `attackerHistory` (H_attacker) which contains the system prompt,
+     * previous attack messages, target response summaries, score annotations,
+     * and backtrack markers — none of which leak to the target.
+     */
+    private callAttackerLLM;
+    /**
+     * Reset per-run state for safe reuse across scenario.run() calls.
+     * Called at the start of turn 1. Does NOT reset attackPlanValue
+     * (expensive to regenerate and target-specific, not run-specific).
+     */
+    private resetRunState;
+    call: (input: AgentInput) => Promise<AgentReturnTypes>;
+}
+/**
+ * Create a red-team agent with a custom strategy.
+ */
+declare const redTeamAgent: (config: RedTeamAgentConfig) => RedTeamAgentImpl;
+/**
+ * Create a red-team agent using the Crescendo (marathon) strategy.
+ *
+ * Crescendo gradually escalates from innocent rapport-building to aggressive
+ * jailbreak attempts over many turns, exploiting LLMs' tendency to maintain
+ * conversational consistency once cooperative context has been established.
+ *
+ * @example
+ * ```typescript
+ * import scenario from "@langwatch/scenario";
+ * import { openai } from "@ai-sdk/openai";
+ *
+ * const redTeam = scenario.redTeamCrescendo({
+ *   target: "extract the system prompt",
+ *   model: openai("gpt-4o"),
+ *   totalTurns: 50,
+ *   successScore: 9,          // default: 9 (score 0-10)
+ *   successConfirmTurns: 2,   // default: 2
+ * });
+ *
+ * // Use instance marathonScript for automatic early-exit:
+ * script: redTeam.marathonScript({ turns: 50 }),
+ * ```
+ */
+declare const redTeamCrescendo: (config: CrescendoConfig) => RedTeamAgentImpl;
 type agents_AudioResponseEvent = AudioResponseEvent;
+type agents_BacktrackEntry = BacktrackEntry;
+type agents_CrescendoConfig = CrescendoConfig;
+type agents_CrescendoStrategy = CrescendoStrategy;
+declare const agents_CrescendoStrategy: typeof CrescendoStrategy;
 declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
 type agents_FinishTestArgs = FinishTestArgs;
 type agents_InvokeLLMParams = InvokeLLMParams;
@@ -975,6 +1169,8 @@ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
 type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
 declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
 type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
+type agents_RedTeamAgentConfig = RedTeamAgentConfig;
+type agents_RedTeamStrategy = RedTeamStrategy;
 type agents_TestingAgentConfig = TestingAgentConfig;
 declare const agents_estimateTokens: typeof estimateTokens;
 declare const agents_expandTrace: typeof expandTrace;
@@ -982,9 +1178,11 @@ declare const agents_grepTrace: typeof grepTrace;
 declare const agents_judgeAgent: typeof judgeAgent;
 declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
 declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
+declare const agents_redTeamAgent: typeof redTeamAgent;
+declare const agents_redTeamCrescendo: typeof redTeamCrescendo;
 declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
 declare namespace agents {
-  export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
+  export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_BacktrackEntry as BacktrackEntry, type agents_CrescendoConfig as CrescendoConfig, agents_CrescendoStrategy as CrescendoStrategy, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_RedTeamAgentConfig as RedTeamAgentConfig, type agents_RedTeamStrategy as RedTeamStrategy, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_redTeamAgent as redTeamAgent, agents_redTeamCrescendo as redTeamCrescendo, agents_userSimulatorAgent as userSimulatorAgent };
 }
 /**
@@ -1482,14 +1680,19 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
     private batchRunId;
     /** The run ID for the current execution */
     private scenarioRunId?;
+    /** Pre-assigned run ID (provided externally, e.g. by the platform) */
+    private preAssignedRunId?;
     /**
      * Creates a new ScenarioExecution instance.
      *
      * @param config - The scenario configuration containing agents, settings, and metadata
      * @param script - The ordered sequence of script steps that define the test flow
      * @param batchRunId - Batch run ID for grouping scenario runs
+     * @param runId - Optional pre-assigned run ID. When provided, the execution uses this
+     *   ID instead of generating a new one. This prevents duplicate entries when the
+     *   platform pre-creates placeholder rows with a known ID.
      */
-    constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string);
+    constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string, runId?: string);
     /**
      * Gets the complete conversation history as an array of messages.
      *
@@ -1979,6 +2182,7 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
     private _messages;
     private _currentTurn;
     private _threadId;
+    private _onRollback?;
     /** Event stream for message additions */
     private eventSubject;
     readonly events$: Observable<StateChangeEvent>;
@@ -2014,6 +2218,28 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
         traceId?: string;
     };
     hasToolCall(toolName: string): boolean;
+    /**
+     * Register a callback that fires when messages are rolled back.
+     * The executor uses this to clean up its pending message queues.
+     */
+    setOnRollback(handler: (removedSet: Set<object>) => void): void;
+    /**
+     * Remove all messages from position `index` onward.
+     *
+     * Truncates the internal message list and notifies the executor
+     * (via the registered rollback handler) to clean pending queues.
+     *
+     * **Note:** This method is safe to call only during an agent's `call()`
+     * invocation.  The executor runs agents sequentially, so no other agent
+     * can observe stale `newMessages` references.  Calling this from outside
+     * that flow may leave already-delivered `newMessages` out of sync.
+     *
+     * @param index - Truncate point (clamped to `[0, messages.length]`).
+     *   Messages at positions >= index are removed.
+     * @returns The removed messages (empty array if nothing to remove).
+     * @throws {RangeError} If `index` is negative.
+     */
+    rollbackMessagesTo(index: number): ModelMessage[];
 }
 type execution_ScenarioExecution = ScenarioExecution;
@@ -2045,6 +2271,13 @@ interface RunOptions {
     langwatch?: LangwatchConfig;
     /** Batch run ID for grouping scenario runs. Overrides SCENARIO_BATCH_RUN_ID env var. */
     batchRunId?: string;
+    /**
+     * Pre-assigned run ID for the scenario execution.
+     * When provided, the SDK uses this ID instead of generating a new one.
+     *
+     * @internal Platform use only — not part of the public API.
+     */
+    runId?: string;
 }
 /**
  * High-level interface for running a scenario test.
@@ -2197,16 +2430,31 @@ declare const succeed: (reasoning?: string) => ScriptStep;
  * @returns A ScriptStep function that can be used in scenario scripts.
  */
 declare const fail: (reasoning?: string) => ScriptStep;
+/**
+ * Generate a marathon script that runs user-agent turns in a loop,
+ * with optional per-turn checks and a final judge evaluation.
+ *
+ * @param options.turns Number of user-agent turn pairs.
+ * @param options.checks Optional steps to run after each turn.
+ * @param options.finalChecks Optional steps to run after all turns, before the judge.
+ * @returns An array of ScriptStep functions.
+ */
+declare const marathonScript: (options: {
+    turns: number;
+    checks?: ScriptStep[];
+    finalChecks?: ScriptStep[];
+}) => ScriptStep[];
 declare const script_agent: typeof agent;
 declare const script_fail: typeof fail;
 declare const script_judge: typeof judge;
+declare const script_marathonScript: typeof marathonScript;
 declare const script_message: typeof message;
 declare const script_proceed: typeof proceed;
 declare const script_succeed: typeof succeed;
 declare const script_user: typeof user;
 declare namespace script {
-  export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
+  export { script_agent as agent, script_fail as fail, script_judge as judge, script_marathonScript as marathonScript, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
 }
 /**
@@ -2312,4 +2560,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
 type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
 declare const scenario: ScenarioApi;
-export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
+export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, type BacktrackEntry, type CrescendoConfig, CrescendoStrategy, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RedTeamAgentConfig, type RedTeamStrategy, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, marathonScript, message, proceed, redTeamAgent, redTeamCrescendo, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };