npm - @langwatch/scenario - Versions diffs - 0.4.3 → 0.4.4 - Mend

@langwatch/scenario 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -521,6 +521,22 @@ interface JudgeAgentConfig extends TestingAgentConfig {
      * Optional span collector for telemetry. Defaults to global singleton.
      */
     spanCollector?: JudgeSpanCollector;
+    /**
+     * Token threshold for switching to structure-only trace rendering.
+     * When the full trace digest exceeds this estimated token count,
+     * the judge receives a structure-only view with expand_trace and
+     * grep_trace tools for progressive discovery.
+     *
+     * @default 8192
+     */
+    tokenThreshold?: number;
+    /**
+     * Maximum number of tool-calling steps for progressive trace discovery.
+     * Only applies when the trace exceeds the token threshold.
+     *
+     * @default 10
+     */
+    maxDiscoverySteps?: number;
 }
 /**
  * Agent that evaluates conversations against success criteria.
@@ -535,6 +551,8 @@ declare class JudgeAgent extends JudgeAgentAdapter {
     private readonly cfg;
     private logger;
     private readonly spanCollector;
+    private readonly tokenThreshold;
+    private readonly maxDiscoverySteps;
     role: AgentRole;
     criteria: string[];
     /**
@@ -543,7 +561,19 @@ declare class JudgeAgent extends JudgeAgentAdapter {
     invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
     constructor(cfg: JudgeAgentConfig);
     call(input: AgentInput): Promise<JudgeResult | null>;
-    private getOpenTelemetryTracesDigest;
+    /**
+     * Builds the trace digest, choosing between full inline rendering
+     * and structure-only mode based on estimated token count.
+     */
+    private buildTraceDigest;
+    /**
+     * Invokes the LLM, enabling multi-step tool execution for large traces.
+     * In multi-step mode, the AI SDK loops automatically: the judge can call
+     * expand_trace/grep_trace tools multiple times before reaching a terminal
+     * tool (finish_test/continue_test) or hitting the step limit.
+     */
+    private invokeLLMWithDiscovery;
+    private parseToolCalls;
 }
 /**
  * Factory function for creating JudgeAgent instances.
@@ -604,6 +634,15 @@ declare const judgeAgent: (cfg?: JudgeAgentConfig) => JudgeAgent;
 declare class JudgeSpanDigestFormatter {
     private readonly logger;
     private readonly deduplicator;
+    /**
+     * Formats spans into a structure-only digest showing span tree hierarchy
+     * without attributes, events, or content. Used for large traces that
+     * exceed the token threshold, paired with expand_trace/grep_trace tools.
+     *
+     * @param spans - All spans for a thread
+     * @returns Plain text digest with only structural information
+     */
+    formatStructureOnly(spans: ReadableSpan[]): string;
     /**
      * Formats spans into a complete digest with full content and nesting.
      * @param spans - All spans for a thread
@@ -612,20 +651,18 @@ declare class JudgeSpanDigestFormatter {
     format(spans: ReadableSpan[]): string;
     private sortByStartTime;
     private buildHierarchy;
+    private renderStructureNode;
     private renderNode;
     private getTreePrefix;
     private getAttrIndent;
-    private cleanAttributes;
-    private formatValue;
-    private transformValue;
-    private transformString;
-    private looksLikeJson;
-    private hrTimeToMs;
-    private calculateSpanDuration;
+    /**
+     * Formats a value with deduplication applied. Used by the `format()` method
+     * to reduce token usage by replacing repeated strings with markers.
+     */
+    private formatValueWithDedup;
+    private transformValueWithDedup;
+    private transformStringWithDedup;
     private calculateTotalDuration;
-    private formatDuration;
-    private formatTimestamp;
-    private getStatusIndicator;
     private collectErrors;
 }
 /**
@@ -633,6 +670,45 @@ declare class JudgeSpanDigestFormatter {
  */
 declare const judgeSpanDigestFormatter: JudgeSpanDigestFormatter;
+/**
+ * Default token threshold for switching to structure-only trace rendering.
+ * Traces exceeding this estimated token count will be rendered in
+ * structure-only mode with expand/grep tools available to the judge.
+ *
+ */
+declare const DEFAULT_TOKEN_THRESHOLD = 8192;
+/**
+ * Estimates the number of tokens in a text string using a byte-based heuristic.
+ * Uses UTF-8 byte length divided by 4, which accounts for multi-byte characters
+ * (emojis, CJK, etc.) that typically consume more tokens than ASCII text.
+ *
+ * @param text - The text to estimate token count for
+ * @returns Estimated token count
+ */
+declare function estimateTokens(text: string): number;
+/**
+ * Expands one or more spans from a trace, returning their full details
+ * (attributes, events, status) with tree position context.
+ *
+ * @param spans - The full array of ReadableSpan objects for the trace
+ * @param options - Either a single `index` or a `range` string like "10-15"
+ * @returns Formatted string with full span details, truncated to ~4000 tokens
+ */
+declare function expandTrace(spans: ReadableSpan[], { index, range }: {
+    index?: number;
+    range?: string;
+}): string;
+/**
+ * Searches across all span attributes, events, and content for a pattern.
+ * Returns matching spans with their tree position and matching content.
+ *
+ * @param spans - The full array of ReadableSpan objects for the trace
+ * @param pattern - Case-insensitive search pattern
+ * @returns Formatted string with matches, limited to 20 results and ~4000 tokens
+ */
+declare function grepTrace(spans: ReadableSpan[], pattern: string): string;
 declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
     private readonly cfg?;
     private logger;
@@ -881,6 +957,7 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
 }
 type agents_AudioResponseEvent = AudioResponseEvent;
+declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
 type agents_FinishTestArgs = FinishTestArgs;
 type agents_InvokeLLMParams = InvokeLLMParams;
 type agents_InvokeLLMResult = InvokeLLMResult;
@@ -894,12 +971,15 @@ type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
 declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
 type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
 type agents_TestingAgentConfig = TestingAgentConfig;
+declare const agents_estimateTokens: typeof estimateTokens;
+declare const agents_expandTrace: typeof expandTrace;
+declare const agents_grepTrace: typeof grepTrace;
 declare const agents_judgeAgent: typeof judgeAgent;
 declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
 declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
 declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
 declare namespace agents {
-  export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
+  export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
 }
 /**
@@ -2227,4 +2307,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
 type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
 declare const scenario: ScenarioApi;
-export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
+export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };

package/dist/index.d.ts CHANGED Viewed

@@ -521,6 +521,22 @@ interface JudgeAgentConfig extends TestingAgentConfig {
      * Optional span collector for telemetry. Defaults to global singleton.
      */
     spanCollector?: JudgeSpanCollector;
+    /**
+     * Token threshold for switching to structure-only trace rendering.
+     * When the full trace digest exceeds this estimated token count,
+     * the judge receives a structure-only view with expand_trace and
+     * grep_trace tools for progressive discovery.
+     *
+     * @default 8192
+     */
+    tokenThreshold?: number;
+    /**
+     * Maximum number of tool-calling steps for progressive trace discovery.
+     * Only applies when the trace exceeds the token threshold.
+     *
+     * @default 10
+     */
+    maxDiscoverySteps?: number;
 }
 /**
  * Agent that evaluates conversations against success criteria.
@@ -535,6 +551,8 @@ declare class JudgeAgent extends JudgeAgentAdapter {
     private readonly cfg;
     private logger;
     private readonly spanCollector;
+    private readonly tokenThreshold;
+    private readonly maxDiscoverySteps;
     role: AgentRole;
     criteria: string[];
     /**
@@ -543,7 +561,19 @@ declare class JudgeAgent extends JudgeAgentAdapter {
     invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
     constructor(cfg: JudgeAgentConfig);
     call(input: AgentInput): Promise<JudgeResult | null>;
-    private getOpenTelemetryTracesDigest;
+    /**
+     * Builds the trace digest, choosing between full inline rendering
+     * and structure-only mode based on estimated token count.
+     */
+    private buildTraceDigest;
+    /**
+     * Invokes the LLM, enabling multi-step tool execution for large traces.
+     * In multi-step mode, the AI SDK loops automatically: the judge can call
+     * expand_trace/grep_trace tools multiple times before reaching a terminal
+     * tool (finish_test/continue_test) or hitting the step limit.
+     */
+    private invokeLLMWithDiscovery;
+    private parseToolCalls;
 }
 /**
  * Factory function for creating JudgeAgent instances.
@@ -604,6 +634,15 @@ declare const judgeAgent: (cfg?: JudgeAgentConfig) => JudgeAgent;
 declare class JudgeSpanDigestFormatter {
     private readonly logger;
     private readonly deduplicator;
+    /**
+     * Formats spans into a structure-only digest showing span tree hierarchy
+     * without attributes, events, or content. Used for large traces that
+     * exceed the token threshold, paired with expand_trace/grep_trace tools.
+     *
+     * @param spans - All spans for a thread
+     * @returns Plain text digest with only structural information
+     */
+    formatStructureOnly(spans: ReadableSpan[]): string;
     /**
      * Formats spans into a complete digest with full content and nesting.
      * @param spans - All spans for a thread
@@ -612,20 +651,18 @@ declare class JudgeSpanDigestFormatter {
     format(spans: ReadableSpan[]): string;
     private sortByStartTime;
     private buildHierarchy;
+    private renderStructureNode;
     private renderNode;
     private getTreePrefix;
     private getAttrIndent;
-    private cleanAttributes;
-    private formatValue;
-    private transformValue;
-    private transformString;
-    private looksLikeJson;
-    private hrTimeToMs;
-    private calculateSpanDuration;
+    /**
+     * Formats a value with deduplication applied. Used by the `format()` method
+     * to reduce token usage by replacing repeated strings with markers.
+     */
+    private formatValueWithDedup;
+    private transformValueWithDedup;
+    private transformStringWithDedup;
     private calculateTotalDuration;
-    private formatDuration;
-    private formatTimestamp;
-    private getStatusIndicator;
     private collectErrors;
 }
 /**
@@ -633,6 +670,45 @@ declare class JudgeSpanDigestFormatter {
  */
 declare const judgeSpanDigestFormatter: JudgeSpanDigestFormatter;
+/**
+ * Default token threshold for switching to structure-only trace rendering.
+ * Traces exceeding this estimated token count will be rendered in
+ * structure-only mode with expand/grep tools available to the judge.
+ *
+ */
+declare const DEFAULT_TOKEN_THRESHOLD = 8192;
+/**
+ * Estimates the number of tokens in a text string using a byte-based heuristic.
+ * Uses UTF-8 byte length divided by 4, which accounts for multi-byte characters
+ * (emojis, CJK, etc.) that typically consume more tokens than ASCII text.
+ *
+ * @param text - The text to estimate token count for
+ * @returns Estimated token count
+ */
+declare function estimateTokens(text: string): number;
+/**
+ * Expands one or more spans from a trace, returning their full details
+ * (attributes, events, status) with tree position context.
+ *
+ * @param spans - The full array of ReadableSpan objects for the trace
+ * @param options - Either a single `index` or a `range` string like "10-15"
+ * @returns Formatted string with full span details, truncated to ~4000 tokens
+ */
+declare function expandTrace(spans: ReadableSpan[], { index, range }: {
+    index?: number;
+    range?: string;
+}): string;
+/**
+ * Searches across all span attributes, events, and content for a pattern.
+ * Returns matching spans with their tree position and matching content.
+ *
+ * @param spans - The full array of ReadableSpan objects for the trace
+ * @param pattern - Case-insensitive search pattern
+ * @returns Formatted string with matches, limited to 20 results and ~4000 tokens
+ */
+declare function grepTrace(spans: ReadableSpan[], pattern: string): string;
 declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
     private readonly cfg?;
     private logger;
@@ -881,6 +957,7 @@ declare class RealtimeAgentAdapter extends AgentAdapter {
 }
 type agents_AudioResponseEvent = AudioResponseEvent;
+declare const agents_DEFAULT_TOKEN_THRESHOLD: typeof DEFAULT_TOKEN_THRESHOLD;
 type agents_FinishTestArgs = FinishTestArgs;
 type agents_InvokeLLMParams = InvokeLLMParams;
 type agents_InvokeLLMResult = InvokeLLMResult;
@@ -894,12 +971,15 @@ type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
 declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
 type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
 type agents_TestingAgentConfig = TestingAgentConfig;
+declare const agents_estimateTokens: typeof estimateTokens;
+declare const agents_expandTrace: typeof expandTrace;
+declare const agents_grepTrace: typeof grepTrace;
 declare const agents_judgeAgent: typeof judgeAgent;
 declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
 declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
 declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
 declare namespace agents {
-  export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
+  export { type agents_AudioResponseEvent as AudioResponseEvent, agents_DEFAULT_TOKEN_THRESHOLD as DEFAULT_TOKEN_THRESHOLD, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_estimateTokens as estimateTokens, agents_expandTrace as expandTrace, agents_grepTrace as grepTrace, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
 }
 /**
@@ -2227,4 +2307,4 @@ declare function withCustomScopes(...scopes: string[]): TraceFilter[];
 type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
 declare const scenario: ScenarioApi;
-export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
+export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_TOKEN_THRESHOLD, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, estimateTokens, expandTrace, fail, grepTrace, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };