@langwatch/scenario 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/index.d.mts +176 -20
- package/dist/index.d.ts +176 -20
- package/dist/index.js +2677 -1990
- package/dist/index.mjs +2659 -1974
- package/dist/integrations/vitest/setup.js +5 -2
- package/dist/integrations/vitest/setup.mjs +5 -2
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -97,7 +97,7 @@ describe("Weather Agent", () => {
|
|
|
97
97
|
role: AgentRole.AGENT,
|
|
98
98
|
call: async (input) => {
|
|
99
99
|
const response = await generateText({
|
|
100
|
-
model: openai("gpt-4.1"),
|
|
100
|
+
model: openai("gpt-4.1-mini"),
|
|
101
101
|
system: `You are a helpful assistant that may help the user with weather information.`,
|
|
102
102
|
messages: input.messages,
|
|
103
103
|
tools: { get_current_weather: getCurrentWeather },
|
|
@@ -150,7 +150,7 @@ describe("Weather Agent", () => {
|
|
|
150
150
|
"The user asks for the weather in a specific city, and the agent should use the weather tool to find it.",
|
|
151
151
|
agents: [
|
|
152
152
|
weatherAgent,
|
|
153
|
-
scenario.userSimulatorAgent({ model: openai("gpt-4.1") }),
|
|
153
|
+
scenario.userSimulatorAgent({ model: openai("gpt-4.1-mini") }),
|
|
154
154
|
],
|
|
155
155
|
script: [
|
|
156
156
|
scenario.user("What's the weather like in Barcelona?"),
|
package/dist/index.d.mts
CHANGED
|
@@ -3,6 +3,7 @@ import { ModelMessage, UserModelMessage, AssistantModelMessage, ToolModelMessage
|
|
|
3
3
|
import { z } from 'zod/v4';
|
|
4
4
|
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
5
|
import { RealtimeSession } from '@openai/agents/realtime';
|
|
6
|
+
import { SetupObservabilityOptions } from 'langwatch/observability/node';
|
|
6
7
|
import { Observable } from 'rxjs';
|
|
7
8
|
import { z as z$1 } from 'zod';
|
|
8
9
|
|
|
@@ -178,6 +179,12 @@ interface ScenarioConfig {
|
|
|
178
179
|
* If not provided, the scenario will not be grouped into a set.
|
|
179
180
|
*/
|
|
180
181
|
setId?: string;
|
|
182
|
+
/**
|
|
183
|
+
* Optional metadata to attach to the scenario run.
|
|
184
|
+
* Accepts arbitrary key-value pairs (e.g. prompt IDs, environments, versions).
|
|
185
|
+
* The `langwatch` key is reserved for platform-internal use.
|
|
186
|
+
*/
|
|
187
|
+
metadata?: Record<string, unknown>;
|
|
181
188
|
}
|
|
182
189
|
/**
|
|
183
190
|
* Final, normalized scenario configuration.
|
|
@@ -265,6 +272,10 @@ type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutio
|
|
|
265
272
|
*
|
|
266
273
|
*/
|
|
267
274
|
interface ScenarioResult {
|
|
275
|
+
/**
|
|
276
|
+
* Unique identifier for this scenario run.
|
|
277
|
+
*/
|
|
278
|
+
runId: string;
|
|
268
279
|
/**
|
|
269
280
|
* Indicates whether the scenario was successful.
|
|
270
281
|
*/
|
|
@@ -357,6 +368,13 @@ interface ScenarioExecutionStateLike {
|
|
|
357
368
|
hasToolCall(toolName: string): boolean;
|
|
358
369
|
}
|
|
359
370
|
|
|
371
|
+
/**
|
|
372
|
+
* Schema for the scenario project configuration file (scenario.config.js).
|
|
373
|
+
*
|
|
374
|
+
* The `observability` field accepts a subset of `SetupObservabilityOptions`
|
|
375
|
+
* from the langwatch SDK. It uses `z.custom()` to avoid strict validation
|
|
376
|
+
* on the passthrough object while keeping the outer config strict.
|
|
377
|
+
*/
|
|
360
378
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
361
379
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
362
380
|
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
@@ -364,6 +382,7 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
364
382
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
365
383
|
}, z.core.$strip>>;
|
|
366
384
|
headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
385
|
+
observability: z.ZodOptional<z.ZodCustom<Partial<SetupObservabilityOptions>, Partial<SetupObservabilityOptions>>>;
|
|
367
386
|
}, z.core.$strict>;
|
|
368
387
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
369
388
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
@@ -467,6 +486,13 @@ declare class JudgeSpanCollector implements SpanProcessor {
|
|
|
467
486
|
onEnd(span: ReadableSpan): void;
|
|
468
487
|
forceFlush(): Promise<void>;
|
|
469
488
|
shutdown(): Promise<void>;
|
|
489
|
+
/**
|
|
490
|
+
* Removes all spans associated with a specific thread.
|
|
491
|
+
* Call this after a scenario run completes to prevent memory growth
|
|
492
|
+
* in long-lived processes.
|
|
493
|
+
* @param threadId - The thread identifier whose spans should be cleared
|
|
494
|
+
*/
|
|
495
|
+
clearSpansForThread(threadId: string): void;
|
|
470
496
|
/**
|
|
471
497
|
* Retrieves all spans associated with a specific thread.
|
|
472
498
|
* @param threadId - The thread identifier to filter spans by
|
|
@@ -914,35 +940,39 @@ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObj
|
|
|
914
940
|
metadata: z$1.ZodObject<{
|
|
915
941
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
916
942
|
description: z$1.ZodOptional<z$1.ZodString>;
|
|
917
|
-
}, "strip", z$1.
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
}, {
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
}
|
|
943
|
+
}, "strip", z$1.ZodUnknown, z$1.objectOutputType<{
|
|
944
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
945
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
946
|
+
}, z$1.ZodUnknown, "strip">, z$1.objectInputType<{
|
|
947
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
948
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
949
|
+
}, z$1.ZodUnknown, "strip">>;
|
|
924
950
|
}, "strip", z$1.ZodTypeAny, {
|
|
925
951
|
type: ScenarioEventType.RUN_STARTED;
|
|
952
|
+
metadata: {
|
|
953
|
+
name?: string | undefined;
|
|
954
|
+
description?: string | undefined;
|
|
955
|
+
} & {
|
|
956
|
+
[k: string]: unknown;
|
|
957
|
+
};
|
|
926
958
|
timestamp: number;
|
|
927
959
|
batchRunId: string;
|
|
928
960
|
scenarioId: string;
|
|
929
961
|
scenarioRunId: string;
|
|
930
962
|
scenarioSetId: string;
|
|
931
|
-
metadata: {
|
|
932
|
-
description?: string | undefined;
|
|
933
|
-
name?: string | undefined;
|
|
934
|
-
};
|
|
935
963
|
rawEvent?: any;
|
|
936
964
|
}, {
|
|
937
965
|
type: ScenarioEventType.RUN_STARTED;
|
|
966
|
+
metadata: {
|
|
967
|
+
name?: string | undefined;
|
|
968
|
+
description?: string | undefined;
|
|
969
|
+
} & {
|
|
970
|
+
[k: string]: unknown;
|
|
971
|
+
};
|
|
938
972
|
timestamp: number;
|
|
939
973
|
batchRunId: string;
|
|
940
974
|
scenarioId: string;
|
|
941
975
|
scenarioRunId: string;
|
|
942
|
-
metadata: {
|
|
943
|
-
description?: string | undefined;
|
|
944
|
-
name?: string | undefined;
|
|
945
|
-
};
|
|
946
976
|
rawEvent?: any;
|
|
947
977
|
scenarioSetId?: string | undefined;
|
|
948
978
|
}>, z$1.ZodObject<{
|
|
@@ -1363,13 +1393,18 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1363
1393
|
* - RUN_FINISHED: When scenario execution completes (success/failure/error)
|
|
1364
1394
|
*/
|
|
1365
1395
|
readonly events$: Observable<ScenarioEvent>;
|
|
1396
|
+
/** Batch run ID for grouping scenario runs */
|
|
1397
|
+
private batchRunId;
|
|
1398
|
+
/** The run ID for the current execution */
|
|
1399
|
+
private scenarioRunId?;
|
|
1366
1400
|
/**
|
|
1367
1401
|
* Creates a new ScenarioExecution instance.
|
|
1368
1402
|
*
|
|
1369
1403
|
* @param config - The scenario configuration containing agents, settings, and metadata
|
|
1370
1404
|
* @param script - The ordered sequence of script steps that define the test flow
|
|
1405
|
+
* @param batchRunId - Batch run ID for grouping scenario runs
|
|
1371
1406
|
*/
|
|
1372
|
-
constructor(config: ScenarioConfig, script: ScriptStep[]);
|
|
1407
|
+
constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string);
|
|
1373
1408
|
/**
|
|
1374
1409
|
* Gets the complete conversation history as an array of messages.
|
|
1375
1410
|
*
|
|
@@ -1728,7 +1763,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1728
1763
|
* - Creates a new ScenarioExecutionState with the current config
|
|
1729
1764
|
* - Sets up the thread ID (generates new one if not provided)
|
|
1730
1765
|
* - Initializes all agents
|
|
1731
|
-
* -
|
|
1766
|
+
* - Initializes turn state (pending agents/roles) without creating a trace span
|
|
1732
1767
|
* - Records the start time for performance tracking
|
|
1733
1768
|
* - Clears any pending messages
|
|
1734
1769
|
* - Clears the result from any previous execution
|
|
@@ -1907,6 +1942,25 @@ declare namespace execution {
|
|
|
1907
1942
|
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState, type execution_StateChangeEvent as StateChangeEvent, execution_StateChangeEventType as StateChangeEventType };
|
|
1908
1943
|
}
|
|
1909
1944
|
|
|
1945
|
+
/**
|
|
1946
|
+
* Configuration for LangWatch event reporting.
|
|
1947
|
+
* All fields are optional — any omitted fields fall back to environment variables.
|
|
1948
|
+
*/
|
|
1949
|
+
interface LangwatchConfig {
|
|
1950
|
+
/** The endpoint URL to send events to. Falls back to LANGWATCH_ENDPOINT env var. */
|
|
1951
|
+
endpoint?: string;
|
|
1952
|
+
/** The API key for authentication. Falls back to LANGWATCH_API_KEY env var. */
|
|
1953
|
+
apiKey?: string;
|
|
1954
|
+
}
|
|
1955
|
+
/**
|
|
1956
|
+
* Options for running a scenario.
|
|
1957
|
+
*/
|
|
1958
|
+
interface RunOptions {
|
|
1959
|
+
/** LangWatch configuration for event reporting. Overrides environment variables. */
|
|
1960
|
+
langwatch?: LangwatchConfig;
|
|
1961
|
+
/** Batch run ID for grouping scenario runs. Overrides SCENARIO_BATCH_RUN_ID env var. */
|
|
1962
|
+
batchRunId?: string;
|
|
1963
|
+
}
|
|
1910
1964
|
/**
|
|
1911
1965
|
* High-level interface for running a scenario test.
|
|
1912
1966
|
*
|
|
@@ -1956,11 +2010,13 @@ declare namespace execution {
|
|
|
1956
2010
|
* main();
|
|
1957
2011
|
* ```
|
|
1958
2012
|
*/
|
|
1959
|
-
declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
|
|
2013
|
+
declare function run(cfg: ScenarioConfig, options?: RunOptions): Promise<ScenarioResult>;
|
|
1960
2014
|
|
|
2015
|
+
type runner_LangwatchConfig = LangwatchConfig;
|
|
2016
|
+
type runner_RunOptions = RunOptions;
|
|
1961
2017
|
declare const runner_run: typeof run;
|
|
1962
2018
|
declare namespace runner {
|
|
1963
|
-
export { runner_run as run };
|
|
2019
|
+
export { type runner_LangwatchConfig as LangwatchConfig, type runner_RunOptions as RunOptions, runner_run as run };
|
|
1964
2020
|
}
|
|
1965
2021
|
|
|
1966
2022
|
/**
|
|
@@ -2068,7 +2124,107 @@ declare namespace script {
|
|
|
2068
2124
|
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2069
2125
|
}
|
|
2070
2126
|
|
|
2127
|
+
/**
|
|
2128
|
+
* Explicitly set up tracing for @langwatch/scenario.
|
|
2129
|
+
*
|
|
2130
|
+
* Call this before any `run()` invocations when you want full control
|
|
2131
|
+
* over the observability configuration. If called, `run()` will skip
|
|
2132
|
+
* its own lazy initialization.
|
|
2133
|
+
*
|
|
2134
|
+
* The `judgeSpanCollector` is always added as a span processor regardless
|
|
2135
|
+
* of the user-provided options.
|
|
2136
|
+
*
|
|
2137
|
+
* @param options - Optional `SetupObservabilityOptions` forwarded to the
|
|
2138
|
+
* langwatch SDK `setupObservability()` function.
|
|
2139
|
+
*
|
|
2140
|
+
* @example
|
|
2141
|
+
* ```typescript
|
|
2142
|
+
* import { setupScenarioTracing } from "@langwatch/scenario";
|
|
2143
|
+
*
|
|
2144
|
+
* setupScenarioTracing({
|
|
2145
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2146
|
+
* spanProcessors: [myProcessor], // add custom processors
|
|
2147
|
+
* });
|
|
2148
|
+
* ```
|
|
2149
|
+
*/
|
|
2150
|
+
declare function setupScenarioTracing(options?: Partial<SetupObservabilityOptions>): void;
|
|
2151
|
+
|
|
2152
|
+
/**
|
|
2153
|
+
* Criteria for matching spans by instrumentation scope name or span name.
|
|
2154
|
+
* Within each field, matchers use OR semantics (any match succeeds).
|
|
2155
|
+
* Across fields, AND semantics apply (all specified fields must match).
|
|
2156
|
+
*/
|
|
2157
|
+
interface TraceFilterCriteria {
|
|
2158
|
+
instrumentationScopeName?: TraceFilterMatch[];
|
|
2159
|
+
name?: TraceFilterMatch[];
|
|
2160
|
+
}
|
|
2161
|
+
/**
|
|
2162
|
+
* A single match rule for string comparison.
|
|
2163
|
+
*/
|
|
2164
|
+
interface TraceFilterMatch {
|
|
2165
|
+
equals?: string;
|
|
2166
|
+
startsWith?: string;
|
|
2167
|
+
matches?: RegExp;
|
|
2168
|
+
ignoreCase?: boolean;
|
|
2169
|
+
}
|
|
2170
|
+
/**
|
|
2171
|
+
* A filter rule for controlling which spans are exported.
|
|
2172
|
+
*
|
|
2173
|
+
* Compatible with the langwatch SDK's `TraceFilter` type used by
|
|
2174
|
+
* `LangWatchTraceExporter`.
|
|
2175
|
+
*/
|
|
2176
|
+
type TraceFilter = {
|
|
2177
|
+
preset: "vercelAIOnly" | "excludeHttpRequests";
|
|
2178
|
+
} | {
|
|
2179
|
+
include: TraceFilterCriteria;
|
|
2180
|
+
} | {
|
|
2181
|
+
exclude: TraceFilterCriteria;
|
|
2182
|
+
};
|
|
2183
|
+
/**
|
|
2184
|
+
* Preset filter that only keeps spans from the @langwatch/scenario instrumentation scope.
|
|
2185
|
+
* Use this to prevent unrelated server spans (HTTP, middleware, etc.) from being exported.
|
|
2186
|
+
*
|
|
2187
|
+
* @example
|
|
2188
|
+
* ```typescript
|
|
2189
|
+
* import { defineConfig, scenarioOnly } from "@langwatch/scenario";
|
|
2190
|
+
* import { LangWatchTraceExporter } from "langwatch/observability";
|
|
2191
|
+
*
|
|
2192
|
+
* export default defineConfig({
|
|
2193
|
+
* observability: {
|
|
2194
|
+
* traceExporter: new LangWatchTraceExporter({
|
|
2195
|
+
* filters: scenarioOnly,
|
|
2196
|
+
* }),
|
|
2197
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2198
|
+
* },
|
|
2199
|
+
* });
|
|
2200
|
+
* ```
|
|
2201
|
+
*/
|
|
2202
|
+
declare const scenarioOnly: TraceFilter[];
|
|
2203
|
+
/**
|
|
2204
|
+
* Creates a filter that keeps spans from the @langwatch/scenario scope
|
|
2205
|
+
* plus any additional custom instrumentation scopes.
|
|
2206
|
+
*
|
|
2207
|
+
* @param scopes - Additional instrumentation scope names to include
|
|
2208
|
+
* @returns Array of TraceFilter rules
|
|
2209
|
+
*
|
|
2210
|
+
* @example
|
|
2211
|
+
* ```typescript
|
|
2212
|
+
* import { defineConfig, withCustomScopes } from "@langwatch/scenario";
|
|
2213
|
+
* import { LangWatchTraceExporter } from "langwatch/observability";
|
|
2214
|
+
*
|
|
2215
|
+
* export default defineConfig({
|
|
2216
|
+
* observability: {
|
|
2217
|
+
* traceExporter: new LangWatchTraceExporter({
|
|
2218
|
+
* filters: withCustomScopes("my-app/database", "my-app/agent"),
|
|
2219
|
+
* }),
|
|
2220
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2221
|
+
* },
|
|
2222
|
+
* });
|
|
2223
|
+
* ```
|
|
2224
|
+
*/
|
|
2225
|
+
declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
2226
|
+
|
|
2071
2227
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2072
2228
|
declare const scenario: ScenarioApi;
|
|
2073
2229
|
|
|
2074
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
2230
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|
package/dist/index.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { ModelMessage, UserModelMessage, AssistantModelMessage, ToolModelMessage
|
|
|
3
3
|
import { z } from 'zod/v4';
|
|
4
4
|
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
5
|
import { RealtimeSession } from '@openai/agents/realtime';
|
|
6
|
+
import { SetupObservabilityOptions } from 'langwatch/observability/node';
|
|
6
7
|
import { Observable } from 'rxjs';
|
|
7
8
|
import { z as z$1 } from 'zod';
|
|
8
9
|
|
|
@@ -178,6 +179,12 @@ interface ScenarioConfig {
|
|
|
178
179
|
* If not provided, the scenario will not be grouped into a set.
|
|
179
180
|
*/
|
|
180
181
|
setId?: string;
|
|
182
|
+
/**
|
|
183
|
+
* Optional metadata to attach to the scenario run.
|
|
184
|
+
* Accepts arbitrary key-value pairs (e.g. prompt IDs, environments, versions).
|
|
185
|
+
* The `langwatch` key is reserved for platform-internal use.
|
|
186
|
+
*/
|
|
187
|
+
metadata?: Record<string, unknown>;
|
|
181
188
|
}
|
|
182
189
|
/**
|
|
183
190
|
* Final, normalized scenario configuration.
|
|
@@ -265,6 +272,10 @@ type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutio
|
|
|
265
272
|
*
|
|
266
273
|
*/
|
|
267
274
|
interface ScenarioResult {
|
|
275
|
+
/**
|
|
276
|
+
* Unique identifier for this scenario run.
|
|
277
|
+
*/
|
|
278
|
+
runId: string;
|
|
268
279
|
/**
|
|
269
280
|
* Indicates whether the scenario was successful.
|
|
270
281
|
*/
|
|
@@ -357,6 +368,13 @@ interface ScenarioExecutionStateLike {
|
|
|
357
368
|
hasToolCall(toolName: string): boolean;
|
|
358
369
|
}
|
|
359
370
|
|
|
371
|
+
/**
|
|
372
|
+
* Schema for the scenario project configuration file (scenario.config.js).
|
|
373
|
+
*
|
|
374
|
+
* The `observability` field accepts a subset of `SetupObservabilityOptions`
|
|
375
|
+
* from the langwatch SDK. It uses `z.custom()` to avoid strict validation
|
|
376
|
+
* on the passthrough object while keeping the outer config strict.
|
|
377
|
+
*/
|
|
360
378
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
361
379
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
362
380
|
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
@@ -364,6 +382,7 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
364
382
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
365
383
|
}, z.core.$strip>>;
|
|
366
384
|
headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
385
|
+
observability: z.ZodOptional<z.ZodCustom<Partial<SetupObservabilityOptions>, Partial<SetupObservabilityOptions>>>;
|
|
367
386
|
}, z.core.$strict>;
|
|
368
387
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
369
388
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
@@ -467,6 +486,13 @@ declare class JudgeSpanCollector implements SpanProcessor {
|
|
|
467
486
|
onEnd(span: ReadableSpan): void;
|
|
468
487
|
forceFlush(): Promise<void>;
|
|
469
488
|
shutdown(): Promise<void>;
|
|
489
|
+
/**
|
|
490
|
+
* Removes all spans associated with a specific thread.
|
|
491
|
+
* Call this after a scenario run completes to prevent memory growth
|
|
492
|
+
* in long-lived processes.
|
|
493
|
+
* @param threadId - The thread identifier whose spans should be cleared
|
|
494
|
+
*/
|
|
495
|
+
clearSpansForThread(threadId: string): void;
|
|
470
496
|
/**
|
|
471
497
|
* Retrieves all spans associated with a specific thread.
|
|
472
498
|
* @param threadId - The thread identifier to filter spans by
|
|
@@ -914,35 +940,39 @@ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObj
|
|
|
914
940
|
metadata: z$1.ZodObject<{
|
|
915
941
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
916
942
|
description: z$1.ZodOptional<z$1.ZodString>;
|
|
917
|
-
}, "strip", z$1.
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
}, {
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
}
|
|
943
|
+
}, "strip", z$1.ZodUnknown, z$1.objectOutputType<{
|
|
944
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
945
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
946
|
+
}, z$1.ZodUnknown, "strip">, z$1.objectInputType<{
|
|
947
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
948
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
949
|
+
}, z$1.ZodUnknown, "strip">>;
|
|
924
950
|
}, "strip", z$1.ZodTypeAny, {
|
|
925
951
|
type: ScenarioEventType.RUN_STARTED;
|
|
952
|
+
metadata: {
|
|
953
|
+
name?: string | undefined;
|
|
954
|
+
description?: string | undefined;
|
|
955
|
+
} & {
|
|
956
|
+
[k: string]: unknown;
|
|
957
|
+
};
|
|
926
958
|
timestamp: number;
|
|
927
959
|
batchRunId: string;
|
|
928
960
|
scenarioId: string;
|
|
929
961
|
scenarioRunId: string;
|
|
930
962
|
scenarioSetId: string;
|
|
931
|
-
metadata: {
|
|
932
|
-
description?: string | undefined;
|
|
933
|
-
name?: string | undefined;
|
|
934
|
-
};
|
|
935
963
|
rawEvent?: any;
|
|
936
964
|
}, {
|
|
937
965
|
type: ScenarioEventType.RUN_STARTED;
|
|
966
|
+
metadata: {
|
|
967
|
+
name?: string | undefined;
|
|
968
|
+
description?: string | undefined;
|
|
969
|
+
} & {
|
|
970
|
+
[k: string]: unknown;
|
|
971
|
+
};
|
|
938
972
|
timestamp: number;
|
|
939
973
|
batchRunId: string;
|
|
940
974
|
scenarioId: string;
|
|
941
975
|
scenarioRunId: string;
|
|
942
|
-
metadata: {
|
|
943
|
-
description?: string | undefined;
|
|
944
|
-
name?: string | undefined;
|
|
945
|
-
};
|
|
946
976
|
rawEvent?: any;
|
|
947
977
|
scenarioSetId?: string | undefined;
|
|
948
978
|
}>, z$1.ZodObject<{
|
|
@@ -1363,13 +1393,18 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1363
1393
|
* - RUN_FINISHED: When scenario execution completes (success/failure/error)
|
|
1364
1394
|
*/
|
|
1365
1395
|
readonly events$: Observable<ScenarioEvent>;
|
|
1396
|
+
/** Batch run ID for grouping scenario runs */
|
|
1397
|
+
private batchRunId;
|
|
1398
|
+
/** The run ID for the current execution */
|
|
1399
|
+
private scenarioRunId?;
|
|
1366
1400
|
/**
|
|
1367
1401
|
* Creates a new ScenarioExecution instance.
|
|
1368
1402
|
*
|
|
1369
1403
|
* @param config - The scenario configuration containing agents, settings, and metadata
|
|
1370
1404
|
* @param script - The ordered sequence of script steps that define the test flow
|
|
1405
|
+
* @param batchRunId - Batch run ID for grouping scenario runs
|
|
1371
1406
|
*/
|
|
1372
|
-
constructor(config: ScenarioConfig, script: ScriptStep[]);
|
|
1407
|
+
constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string);
|
|
1373
1408
|
/**
|
|
1374
1409
|
* Gets the complete conversation history as an array of messages.
|
|
1375
1410
|
*
|
|
@@ -1728,7 +1763,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1728
1763
|
* - Creates a new ScenarioExecutionState with the current config
|
|
1729
1764
|
* - Sets up the thread ID (generates new one if not provided)
|
|
1730
1765
|
* - Initializes all agents
|
|
1731
|
-
* -
|
|
1766
|
+
* - Initializes turn state (pending agents/roles) without creating a trace span
|
|
1732
1767
|
* - Records the start time for performance tracking
|
|
1733
1768
|
* - Clears any pending messages
|
|
1734
1769
|
* - Clears the result from any previous execution
|
|
@@ -1907,6 +1942,25 @@ declare namespace execution {
|
|
|
1907
1942
|
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState, type execution_StateChangeEvent as StateChangeEvent, execution_StateChangeEventType as StateChangeEventType };
|
|
1908
1943
|
}
|
|
1909
1944
|
|
|
1945
|
+
/**
|
|
1946
|
+
* Configuration for LangWatch event reporting.
|
|
1947
|
+
* All fields are optional — any omitted fields fall back to environment variables.
|
|
1948
|
+
*/
|
|
1949
|
+
interface LangwatchConfig {
|
|
1950
|
+
/** The endpoint URL to send events to. Falls back to LANGWATCH_ENDPOINT env var. */
|
|
1951
|
+
endpoint?: string;
|
|
1952
|
+
/** The API key for authentication. Falls back to LANGWATCH_API_KEY env var. */
|
|
1953
|
+
apiKey?: string;
|
|
1954
|
+
}
|
|
1955
|
+
/**
|
|
1956
|
+
* Options for running a scenario.
|
|
1957
|
+
*/
|
|
1958
|
+
interface RunOptions {
|
|
1959
|
+
/** LangWatch configuration for event reporting. Overrides environment variables. */
|
|
1960
|
+
langwatch?: LangwatchConfig;
|
|
1961
|
+
/** Batch run ID for grouping scenario runs. Overrides SCENARIO_BATCH_RUN_ID env var. */
|
|
1962
|
+
batchRunId?: string;
|
|
1963
|
+
}
|
|
1910
1964
|
/**
|
|
1911
1965
|
* High-level interface for running a scenario test.
|
|
1912
1966
|
*
|
|
@@ -1956,11 +2010,13 @@ declare namespace execution {
|
|
|
1956
2010
|
* main();
|
|
1957
2011
|
* ```
|
|
1958
2012
|
*/
|
|
1959
|
-
declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
|
|
2013
|
+
declare function run(cfg: ScenarioConfig, options?: RunOptions): Promise<ScenarioResult>;
|
|
1960
2014
|
|
|
2015
|
+
type runner_LangwatchConfig = LangwatchConfig;
|
|
2016
|
+
type runner_RunOptions = RunOptions;
|
|
1961
2017
|
declare const runner_run: typeof run;
|
|
1962
2018
|
declare namespace runner {
|
|
1963
|
-
export { runner_run as run };
|
|
2019
|
+
export { type runner_LangwatchConfig as LangwatchConfig, type runner_RunOptions as RunOptions, runner_run as run };
|
|
1964
2020
|
}
|
|
1965
2021
|
|
|
1966
2022
|
/**
|
|
@@ -2068,7 +2124,107 @@ declare namespace script {
|
|
|
2068
2124
|
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2069
2125
|
}
|
|
2070
2126
|
|
|
2127
|
+
/**
|
|
2128
|
+
* Explicitly set up tracing for @langwatch/scenario.
|
|
2129
|
+
*
|
|
2130
|
+
* Call this before any `run()` invocations when you want full control
|
|
2131
|
+
* over the observability configuration. If called, `run()` will skip
|
|
2132
|
+
* its own lazy initialization.
|
|
2133
|
+
*
|
|
2134
|
+
* The `judgeSpanCollector` is always added as a span processor regardless
|
|
2135
|
+
* of the user-provided options.
|
|
2136
|
+
*
|
|
2137
|
+
* @param options - Optional `SetupObservabilityOptions` forwarded to the
|
|
2138
|
+
* langwatch SDK `setupObservability()` function.
|
|
2139
|
+
*
|
|
2140
|
+
* @example
|
|
2141
|
+
* ```typescript
|
|
2142
|
+
* import { setupScenarioTracing } from "@langwatch/scenario";
|
|
2143
|
+
*
|
|
2144
|
+
* setupScenarioTracing({
|
|
2145
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2146
|
+
* spanProcessors: [myProcessor], // add custom processors
|
|
2147
|
+
* });
|
|
2148
|
+
* ```
|
|
2149
|
+
*/
|
|
2150
|
+
declare function setupScenarioTracing(options?: Partial<SetupObservabilityOptions>): void;
|
|
2151
|
+
|
|
2152
|
+
/**
|
|
2153
|
+
* Criteria for matching spans by instrumentation scope name or span name.
|
|
2154
|
+
* Within each field, matchers use OR semantics (any match succeeds).
|
|
2155
|
+
* Across fields, AND semantics apply (all specified fields must match).
|
|
2156
|
+
*/
|
|
2157
|
+
interface TraceFilterCriteria {
|
|
2158
|
+
instrumentationScopeName?: TraceFilterMatch[];
|
|
2159
|
+
name?: TraceFilterMatch[];
|
|
2160
|
+
}
|
|
2161
|
+
/**
|
|
2162
|
+
* A single match rule for string comparison.
|
|
2163
|
+
*/
|
|
2164
|
+
interface TraceFilterMatch {
|
|
2165
|
+
equals?: string;
|
|
2166
|
+
startsWith?: string;
|
|
2167
|
+
matches?: RegExp;
|
|
2168
|
+
ignoreCase?: boolean;
|
|
2169
|
+
}
|
|
2170
|
+
/**
|
|
2171
|
+
* A filter rule for controlling which spans are exported.
|
|
2172
|
+
*
|
|
2173
|
+
* Compatible with the langwatch SDK's `TraceFilter` type used by
|
|
2174
|
+
* `LangWatchTraceExporter`.
|
|
2175
|
+
*/
|
|
2176
|
+
type TraceFilter = {
|
|
2177
|
+
preset: "vercelAIOnly" | "excludeHttpRequests";
|
|
2178
|
+
} | {
|
|
2179
|
+
include: TraceFilterCriteria;
|
|
2180
|
+
} | {
|
|
2181
|
+
exclude: TraceFilterCriteria;
|
|
2182
|
+
};
|
|
2183
|
+
/**
|
|
2184
|
+
* Preset filter that only keeps spans from the @langwatch/scenario instrumentation scope.
|
|
2185
|
+
* Use this to prevent unrelated server spans (HTTP, middleware, etc.) from being exported.
|
|
2186
|
+
*
|
|
2187
|
+
* @example
|
|
2188
|
+
* ```typescript
|
|
2189
|
+
* import { defineConfig, scenarioOnly } from "@langwatch/scenario";
|
|
2190
|
+
* import { LangWatchTraceExporter } from "langwatch/observability";
|
|
2191
|
+
*
|
|
2192
|
+
* export default defineConfig({
|
|
2193
|
+
* observability: {
|
|
2194
|
+
* traceExporter: new LangWatchTraceExporter({
|
|
2195
|
+
* filters: scenarioOnly,
|
|
2196
|
+
* }),
|
|
2197
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2198
|
+
* },
|
|
2199
|
+
* });
|
|
2200
|
+
* ```
|
|
2201
|
+
*/
|
|
2202
|
+
declare const scenarioOnly: TraceFilter[];
|
|
2203
|
+
/**
|
|
2204
|
+
* Creates a filter that keeps spans from the @langwatch/scenario scope
|
|
2205
|
+
* plus any additional custom instrumentation scopes.
|
|
2206
|
+
*
|
|
2207
|
+
* @param scopes - Additional instrumentation scope names to include
|
|
2208
|
+
* @returns Array of TraceFilter rules
|
|
2209
|
+
*
|
|
2210
|
+
* @example
|
|
2211
|
+
* ```typescript
|
|
2212
|
+
* import { defineConfig, withCustomScopes } from "@langwatch/scenario";
|
|
2213
|
+
* import { LangWatchTraceExporter } from "langwatch/observability";
|
|
2214
|
+
*
|
|
2215
|
+
* export default defineConfig({
|
|
2216
|
+
* observability: {
|
|
2217
|
+
* traceExporter: new LangWatchTraceExporter({
|
|
2218
|
+
* filters: withCustomScopes("my-app/database", "my-app/agent"),
|
|
2219
|
+
* }),
|
|
2220
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2221
|
+
* },
|
|
2222
|
+
* });
|
|
2223
|
+
* ```
|
|
2224
|
+
*/
|
|
2225
|
+
declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
2226
|
+
|
|
2071
2227
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2072
2228
|
declare const scenario: ScenarioApi;
|
|
2073
2229
|
|
|
2074
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
2230
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|