@langwatch/scenario 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/index.d.mts +220 -41
- package/dist/index.d.ts +220 -41
- package/dist/index.js +2485 -1733
- package/dist/index.mjs +2738 -1988
- package/dist/integrations/vitest/setup.js +5 -2
- package/dist/integrations/vitest/setup.mjs +5 -2
- package/package.json +4 -3
package/dist/index.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { ModelMessage, UserModelMessage, AssistantModelMessage, ToolModelMessage
|
|
|
3
3
|
import { z } from 'zod/v4';
|
|
4
4
|
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
5
|
import { RealtimeSession } from '@openai/agents/realtime';
|
|
6
|
+
import { SetupObservabilityOptions } from 'langwatch/observability/node';
|
|
6
7
|
import { Observable } from 'rxjs';
|
|
7
8
|
import { z as z$1 } from 'zod';
|
|
8
9
|
|
|
@@ -20,6 +21,18 @@ declare enum AgentRole {
|
|
|
20
21
|
JUDGE = "Judge"
|
|
21
22
|
}
|
|
22
23
|
declare const allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
|
|
24
|
+
/**
|
|
25
|
+
* Encapsulates a request for the judge agent to evaluate the conversation.
|
|
26
|
+
*
|
|
27
|
+
* When present on AgentInput, signals the judge to produce a verdict.
|
|
28
|
+
* Optionally carries inline criteria that override the judge's own criteria.
|
|
29
|
+
*/
|
|
30
|
+
interface JudgmentRequest {
|
|
31
|
+
/**
|
|
32
|
+
* Optional criteria to evaluate, overriding the judge agent's configured criteria.
|
|
33
|
+
*/
|
|
34
|
+
criteria?: string[];
|
|
35
|
+
}
|
|
23
36
|
/**
|
|
24
37
|
* Input provided to an agent's `call` method.
|
|
25
38
|
*/
|
|
@@ -41,9 +54,9 @@ interface AgentInput {
|
|
|
41
54
|
*/
|
|
42
55
|
requestedRole: AgentRole;
|
|
43
56
|
/**
|
|
44
|
-
*
|
|
57
|
+
* When set, requests the judge to produce a verdict, optionally with inline criteria.
|
|
45
58
|
*/
|
|
46
|
-
judgmentRequest
|
|
59
|
+
judgmentRequest?: JudgmentRequest;
|
|
47
60
|
/**
|
|
48
61
|
* The current state of the scenario execution.
|
|
49
62
|
*/
|
|
@@ -166,6 +179,12 @@ interface ScenarioConfig {
|
|
|
166
179
|
* If not provided, the scenario will not be grouped into a set.
|
|
167
180
|
*/
|
|
168
181
|
setId?: string;
|
|
182
|
+
/**
|
|
183
|
+
* Optional metadata to attach to the scenario run.
|
|
184
|
+
* Accepts arbitrary key-value pairs (e.g. prompt IDs, environments, versions).
|
|
185
|
+
* The `langwatch` key is reserved for platform-internal use.
|
|
186
|
+
*/
|
|
187
|
+
metadata?: Record<string, unknown>;
|
|
169
188
|
}
|
|
170
189
|
/**
|
|
171
190
|
* Final, normalized scenario configuration.
|
|
@@ -215,10 +234,12 @@ interface ScenarioExecutionLike {
|
|
|
215
234
|
agent(content?: string | ModelMessage): Promise<void>;
|
|
216
235
|
/**
|
|
217
236
|
* Invokes the judge agent to evaluate the current state.
|
|
218
|
-
* @param
|
|
237
|
+
* @param options Optional options with inline criteria to evaluate as a checkpoint.
|
|
219
238
|
* @returns The result of the scenario if the judge makes a final decision.
|
|
220
239
|
*/
|
|
221
|
-
judge(
|
|
240
|
+
judge(options?: {
|
|
241
|
+
criteria?: string[];
|
|
242
|
+
}): Promise<ScenarioResult | null>;
|
|
222
243
|
/**
|
|
223
244
|
* Proceeds with the scenario automatically for a number of turns.
|
|
224
245
|
* @param turns The number of turns to proceed. Defaults to running until the scenario ends.
|
|
@@ -251,6 +272,10 @@ type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutio
|
|
|
251
272
|
*
|
|
252
273
|
*/
|
|
253
274
|
interface ScenarioResult {
|
|
275
|
+
/**
|
|
276
|
+
* Unique identifier for this scenario run.
|
|
277
|
+
*/
|
|
278
|
+
runId: string;
|
|
254
279
|
/**
|
|
255
280
|
* Indicates whether the scenario was successful.
|
|
256
281
|
*/
|
|
@@ -343,6 +368,13 @@ interface ScenarioExecutionStateLike {
|
|
|
343
368
|
hasToolCall(toolName: string): boolean;
|
|
344
369
|
}
|
|
345
370
|
|
|
371
|
+
/**
|
|
372
|
+
* Schema for the scenario project configuration file (scenario.config.js).
|
|
373
|
+
*
|
|
374
|
+
* The `observability` field accepts a subset of `SetupObservabilityOptions`
|
|
375
|
+
* from the langwatch SDK. It uses `z.custom()` to avoid strict validation
|
|
376
|
+
* on the passthrough object while keeping the outer config strict.
|
|
377
|
+
*/
|
|
346
378
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
347
379
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
348
380
|
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
@@ -350,6 +382,7 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
350
382
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
351
383
|
}, z.core.$strip>>;
|
|
352
384
|
headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
385
|
+
observability: z.ZodOptional<z.ZodCustom<Partial<SetupObservabilityOptions>, Partial<SetupObservabilityOptions>>>;
|
|
353
386
|
}, z.core.$strict>;
|
|
354
387
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
355
388
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
@@ -364,6 +397,7 @@ declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
|
|
|
364
397
|
declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
|
|
365
398
|
type domain_JudgeAgentAdapter = JudgeAgentAdapter;
|
|
366
399
|
declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
400
|
+
type domain_JudgmentRequest = JudgmentRequest;
|
|
367
401
|
type domain_ScenarioConfig = ScenarioConfig;
|
|
368
402
|
type domain_ScenarioConfigFinal = ScenarioConfigFinal;
|
|
369
403
|
type domain_ScenarioExecutionLike = ScenarioExecutionLike;
|
|
@@ -377,7 +411,7 @@ declare const domain_allAgentRoles: typeof allAgentRoles;
|
|
|
377
411
|
declare const domain_defineConfig: typeof defineConfig;
|
|
378
412
|
declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
|
|
379
413
|
declare namespace domain {
|
|
380
|
-
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
|
|
414
|
+
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_JudgmentRequest as JudgmentRequest, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
|
|
381
415
|
}
|
|
382
416
|
|
|
383
417
|
/**
|
|
@@ -452,6 +486,13 @@ declare class JudgeSpanCollector implements SpanProcessor {
|
|
|
452
486
|
onEnd(span: ReadableSpan): void;
|
|
453
487
|
forceFlush(): Promise<void>;
|
|
454
488
|
shutdown(): Promise<void>;
|
|
489
|
+
/**
|
|
490
|
+
* Removes all spans associated with a specific thread.
|
|
491
|
+
* Call this after a scenario run completes to prevent memory growth
|
|
492
|
+
* in long-lived processes.
|
|
493
|
+
* @param threadId - The thread identifier whose spans should be cleared
|
|
494
|
+
*/
|
|
495
|
+
clearSpansForThread(threadId: string): void;
|
|
455
496
|
/**
|
|
456
497
|
* Retrieves all spans associated with a specific thread.
|
|
457
498
|
* @param threadId - The thread identifier to filter spans by
|
|
@@ -475,7 +516,7 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
475
516
|
/**
|
|
476
517
|
* The criteria that the judge will use to evaluate the conversation.
|
|
477
518
|
*/
|
|
478
|
-
criteria
|
|
519
|
+
criteria?: string[];
|
|
479
520
|
/**
|
|
480
521
|
* Optional span collector for telemetry. Defaults to global singleton.
|
|
481
522
|
*/
|
|
@@ -554,7 +595,7 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
554
595
|
* main();
|
|
555
596
|
* ```
|
|
556
597
|
*/
|
|
557
|
-
declare const judgeAgent: (cfg
|
|
598
|
+
declare const judgeAgent: (cfg?: JudgeAgentConfig) => JudgeAgent;
|
|
558
599
|
|
|
559
600
|
/**
|
|
560
601
|
* Transforms OpenTelemetry spans into a complete plain-text digest for judge evaluation.
|
|
@@ -899,35 +940,39 @@ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObj
|
|
|
899
940
|
metadata: z$1.ZodObject<{
|
|
900
941
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
901
942
|
description: z$1.ZodOptional<z$1.ZodString>;
|
|
902
|
-
}, "strip", z$1.
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
}, {
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
}
|
|
943
|
+
}, "strip", z$1.ZodUnknown, z$1.objectOutputType<{
|
|
944
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
945
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
946
|
+
}, z$1.ZodUnknown, "strip">, z$1.objectInputType<{
|
|
947
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
948
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
949
|
+
}, z$1.ZodUnknown, "strip">>;
|
|
909
950
|
}, "strip", z$1.ZodTypeAny, {
|
|
910
951
|
type: ScenarioEventType.RUN_STARTED;
|
|
952
|
+
metadata: {
|
|
953
|
+
name?: string | undefined;
|
|
954
|
+
description?: string | undefined;
|
|
955
|
+
} & {
|
|
956
|
+
[k: string]: unknown;
|
|
957
|
+
};
|
|
911
958
|
timestamp: number;
|
|
912
959
|
batchRunId: string;
|
|
913
960
|
scenarioId: string;
|
|
914
961
|
scenarioRunId: string;
|
|
915
962
|
scenarioSetId: string;
|
|
916
|
-
metadata: {
|
|
917
|
-
description?: string | undefined;
|
|
918
|
-
name?: string | undefined;
|
|
919
|
-
};
|
|
920
963
|
rawEvent?: any;
|
|
921
964
|
}, {
|
|
922
965
|
type: ScenarioEventType.RUN_STARTED;
|
|
966
|
+
metadata: {
|
|
967
|
+
name?: string | undefined;
|
|
968
|
+
description?: string | undefined;
|
|
969
|
+
} & {
|
|
970
|
+
[k: string]: unknown;
|
|
971
|
+
};
|
|
923
972
|
timestamp: number;
|
|
924
973
|
batchRunId: string;
|
|
925
974
|
scenarioId: string;
|
|
926
975
|
scenarioRunId: string;
|
|
927
|
-
metadata: {
|
|
928
|
-
description?: string | undefined;
|
|
929
|
-
name?: string | undefined;
|
|
930
|
-
};
|
|
931
976
|
rawEvent?: any;
|
|
932
977
|
scenarioSetId?: string | undefined;
|
|
933
978
|
}>, z$1.ZodObject<{
|
|
@@ -1334,6 +1379,8 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1334
1379
|
private currentTurnSpan?;
|
|
1335
1380
|
/** Timestamp when execution started (for total time calculation) */
|
|
1336
1381
|
private totalStartTime;
|
|
1382
|
+
/** Accumulated results from inline judge checkpoints */
|
|
1383
|
+
private checkpointResults;
|
|
1337
1384
|
/** Event stream for monitoring scenario progress */
|
|
1338
1385
|
private eventSubject;
|
|
1339
1386
|
/**
|
|
@@ -1346,13 +1393,18 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1346
1393
|
* - RUN_FINISHED: When scenario execution completes (success/failure/error)
|
|
1347
1394
|
*/
|
|
1348
1395
|
readonly events$: Observable<ScenarioEvent>;
|
|
1396
|
+
/** Batch run ID for grouping scenario runs */
|
|
1397
|
+
private batchRunId;
|
|
1398
|
+
/** The run ID for the current execution */
|
|
1399
|
+
private scenarioRunId?;
|
|
1349
1400
|
/**
|
|
1350
1401
|
* Creates a new ScenarioExecution instance.
|
|
1351
1402
|
*
|
|
1352
1403
|
* @param config - The scenario configuration containing agents, settings, and metadata
|
|
1353
1404
|
* @param script - The ordered sequence of script steps that define the test flow
|
|
1405
|
+
* @param batchRunId - Batch run ID for grouping scenario runs
|
|
1354
1406
|
*/
|
|
1355
|
-
constructor(config: ScenarioConfig, script: ScriptStep[]);
|
|
1407
|
+
constructor(config: ScenarioConfig, script: ScriptStep[], batchRunId: string);
|
|
1356
1408
|
/**
|
|
1357
1409
|
* Gets the complete conversation history as an array of messages.
|
|
1358
1410
|
*
|
|
@@ -1554,24 +1606,23 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1554
1606
|
*
|
|
1555
1607
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1556
1608
|
*
|
|
1557
|
-
* @param
|
|
1609
|
+
* @param options - Optional options with inline criteria to evaluate as a checkpoint.
|
|
1558
1610
|
* @returns A promise that resolves with:
|
|
1559
1611
|
* - ScenarioResult if the judge makes a final decision, or
|
|
1560
1612
|
* - Null if the conversation should continue
|
|
1561
1613
|
*
|
|
1562
1614
|
* @example
|
|
1563
1615
|
* ```typescript
|
|
1564
|
-
* // Let judge evaluate
|
|
1616
|
+
* // Let judge evaluate with its configured criteria
|
|
1565
1617
|
* const result = await execution.judge();
|
|
1566
|
-
* if (result) {
|
|
1567
|
-
* console.log(`Judge decided: ${result.success ? 'pass' : 'fail'}`);
|
|
1568
|
-
* }
|
|
1569
1618
|
*
|
|
1570
|
-
* //
|
|
1571
|
-
* const result = await execution.judge(
|
|
1619
|
+
* // Evaluate inline criteria as a checkpoint
|
|
1620
|
+
* const result = await execution.judge({ criteria: ["Agent responded helpfully"] });
|
|
1572
1621
|
* ```
|
|
1573
1622
|
*/
|
|
1574
|
-
judge(
|
|
1623
|
+
judge(options?: {
|
|
1624
|
+
criteria?: string[];
|
|
1625
|
+
}): Promise<ScenarioResult | null>;
|
|
1575
1626
|
/**
|
|
1576
1627
|
* Lets the scenario proceed automatically for a specified number of turns.
|
|
1577
1628
|
*
|
|
@@ -1712,12 +1763,14 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1712
1763
|
* - Creates a new ScenarioExecutionState with the current config
|
|
1713
1764
|
* - Sets up the thread ID (generates new one if not provided)
|
|
1714
1765
|
* - Initializes all agents
|
|
1715
|
-
* -
|
|
1766
|
+
* - Initializes turn state (pending agents/roles) without creating a trace span
|
|
1716
1767
|
* - Records the start time for performance tracking
|
|
1717
1768
|
* - Clears any pending messages
|
|
1718
1769
|
* - Clears the result from any previous execution
|
|
1719
1770
|
*/
|
|
1720
1771
|
private reset;
|
|
1772
|
+
/** Compiles all accumulated checkpoint results into aggregated met/unmet criteria. */
|
|
1773
|
+
private get compiledCheckpoints();
|
|
1721
1774
|
private nextAgentForRole;
|
|
1722
1775
|
/**
|
|
1723
1776
|
* Starts a new turn in the scenario execution.
|
|
@@ -1889,6 +1942,25 @@ declare namespace execution {
|
|
|
1889
1942
|
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState, type execution_StateChangeEvent as StateChangeEvent, execution_StateChangeEventType as StateChangeEventType };
|
|
1890
1943
|
}
|
|
1891
1944
|
|
|
1945
|
+
/**
|
|
1946
|
+
* Configuration for LangWatch event reporting.
|
|
1947
|
+
* All fields are optional — any omitted fields fall back to environment variables.
|
|
1948
|
+
*/
|
|
1949
|
+
interface LangwatchConfig {
|
|
1950
|
+
/** The endpoint URL to send events to. Falls back to LANGWATCH_ENDPOINT env var. */
|
|
1951
|
+
endpoint?: string;
|
|
1952
|
+
/** The API key for authentication. Falls back to LANGWATCH_API_KEY env var. */
|
|
1953
|
+
apiKey?: string;
|
|
1954
|
+
}
|
|
1955
|
+
/**
|
|
1956
|
+
* Options for running a scenario.
|
|
1957
|
+
*/
|
|
1958
|
+
interface RunOptions {
|
|
1959
|
+
/** LangWatch configuration for event reporting. Overrides environment variables. */
|
|
1960
|
+
langwatch?: LangwatchConfig;
|
|
1961
|
+
/** Batch run ID for grouping scenario runs. Overrides SCENARIO_BATCH_RUN_ID env var. */
|
|
1962
|
+
batchRunId?: string;
|
|
1963
|
+
}
|
|
1892
1964
|
/**
|
|
1893
1965
|
* High-level interface for running a scenario test.
|
|
1894
1966
|
*
|
|
@@ -1938,11 +2010,13 @@ declare namespace execution {
|
|
|
1938
2010
|
* main();
|
|
1939
2011
|
* ```
|
|
1940
2012
|
*/
|
|
1941
|
-
declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
|
|
2013
|
+
declare function run(cfg: ScenarioConfig, options?: RunOptions): Promise<ScenarioResult>;
|
|
1942
2014
|
|
|
2015
|
+
type runner_LangwatchConfig = LangwatchConfig;
|
|
2016
|
+
type runner_RunOptions = RunOptions;
|
|
1943
2017
|
declare const runner_run: typeof run;
|
|
1944
2018
|
declare namespace runner {
|
|
1945
|
-
export { runner_run as run };
|
|
2019
|
+
export { type runner_LangwatchConfig as LangwatchConfig, type runner_RunOptions as RunOptions, runner_run as run };
|
|
1946
2020
|
}
|
|
1947
2021
|
|
|
1948
2022
|
/**
|
|
@@ -1980,15 +2054,20 @@ declare const agent: (content?: string | ModelMessage) => ScriptStep;
|
|
|
1980
2054
|
/**
|
|
1981
2055
|
* Invoke the judge agent to evaluate the current conversation state.
|
|
1982
2056
|
*
|
|
1983
|
-
*
|
|
1984
|
-
*
|
|
1985
|
-
*
|
|
2057
|
+
* When criteria are provided inline, the judge evaluates only those criteria
|
|
2058
|
+
* as a checkpoint: if all pass, the scenario continues; if any fail, the
|
|
2059
|
+
* scenario fails immediately. This is the preferred way to pass criteria
|
|
2060
|
+
* when using scripts.
|
|
1986
2061
|
*
|
|
1987
|
-
*
|
|
1988
|
-
*
|
|
2062
|
+
* When no criteria are provided, the judge uses its own configured criteria
|
|
2063
|
+
* and returns a final verdict (success or failure), ending the scenario.
|
|
2064
|
+
*
|
|
2065
|
+
* @param options Optional options object with inline criteria to evaluate.
|
|
1989
2066
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1990
2067
|
*/
|
|
1991
|
-
declare const judge: (
|
|
2068
|
+
declare const judge: (options?: {
|
|
2069
|
+
criteria: string[];
|
|
2070
|
+
}) => ScriptStep;
|
|
1992
2071
|
/**
|
|
1993
2072
|
* Generate or specify a user message in the conversation.
|
|
1994
2073
|
*
|
|
@@ -2045,7 +2124,107 @@ declare namespace script {
|
|
|
2045
2124
|
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
2046
2125
|
}
|
|
2047
2126
|
|
|
2127
|
+
/**
|
|
2128
|
+
* Explicitly set up tracing for @langwatch/scenario.
|
|
2129
|
+
*
|
|
2130
|
+
* Call this before any `run()` invocations when you want full control
|
|
2131
|
+
* over the observability configuration. If called, `run()` will skip
|
|
2132
|
+
* its own lazy initialization.
|
|
2133
|
+
*
|
|
2134
|
+
* The `judgeSpanCollector` is always added as a span processor regardless
|
|
2135
|
+
* of the user-provided options.
|
|
2136
|
+
*
|
|
2137
|
+
* @param options - Optional `SetupObservabilityOptions` forwarded to the
|
|
2138
|
+
* langwatch SDK `setupObservability()` function.
|
|
2139
|
+
*
|
|
2140
|
+
* @example
|
|
2141
|
+
* ```typescript
|
|
2142
|
+
* import { setupScenarioTracing } from "@langwatch/scenario";
|
|
2143
|
+
*
|
|
2144
|
+
* setupScenarioTracing({
|
|
2145
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2146
|
+
* spanProcessors: [myProcessor], // add custom processors
|
|
2147
|
+
* });
|
|
2148
|
+
* ```
|
|
2149
|
+
*/
|
|
2150
|
+
declare function setupScenarioTracing(options?: Partial<SetupObservabilityOptions>): void;
|
|
2151
|
+
|
|
2152
|
+
/**
|
|
2153
|
+
* Criteria for matching spans by instrumentation scope name or span name.
|
|
2154
|
+
* Within each field, matchers use OR semantics (any match succeeds).
|
|
2155
|
+
* Across fields, AND semantics apply (all specified fields must match).
|
|
2156
|
+
*/
|
|
2157
|
+
interface TraceFilterCriteria {
|
|
2158
|
+
instrumentationScopeName?: TraceFilterMatch[];
|
|
2159
|
+
name?: TraceFilterMatch[];
|
|
2160
|
+
}
|
|
2161
|
+
/**
|
|
2162
|
+
* A single match rule for string comparison.
|
|
2163
|
+
*/
|
|
2164
|
+
interface TraceFilterMatch {
|
|
2165
|
+
equals?: string;
|
|
2166
|
+
startsWith?: string;
|
|
2167
|
+
matches?: RegExp;
|
|
2168
|
+
ignoreCase?: boolean;
|
|
2169
|
+
}
|
|
2170
|
+
/**
|
|
2171
|
+
* A filter rule for controlling which spans are exported.
|
|
2172
|
+
*
|
|
2173
|
+
* Compatible with the langwatch SDK's `TraceFilter` type used by
|
|
2174
|
+
* `LangWatchTraceExporter`.
|
|
2175
|
+
*/
|
|
2176
|
+
type TraceFilter = {
|
|
2177
|
+
preset: "vercelAIOnly" | "excludeHttpRequests";
|
|
2178
|
+
} | {
|
|
2179
|
+
include: TraceFilterCriteria;
|
|
2180
|
+
} | {
|
|
2181
|
+
exclude: TraceFilterCriteria;
|
|
2182
|
+
};
|
|
2183
|
+
/**
|
|
2184
|
+
* Preset filter that only keeps spans from the @langwatch/scenario instrumentation scope.
|
|
2185
|
+
* Use this to prevent unrelated server spans (HTTP, middleware, etc.) from being exported.
|
|
2186
|
+
*
|
|
2187
|
+
* @example
|
|
2188
|
+
* ```typescript
|
|
2189
|
+
* import { defineConfig, scenarioOnly } from "@langwatch/scenario";
|
|
2190
|
+
* import { LangWatchTraceExporter } from "langwatch/observability";
|
|
2191
|
+
*
|
|
2192
|
+
* export default defineConfig({
|
|
2193
|
+
* observability: {
|
|
2194
|
+
* traceExporter: new LangWatchTraceExporter({
|
|
2195
|
+
* filters: scenarioOnly,
|
|
2196
|
+
* }),
|
|
2197
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2198
|
+
* },
|
|
2199
|
+
* });
|
|
2200
|
+
* ```
|
|
2201
|
+
*/
|
|
2202
|
+
declare const scenarioOnly: TraceFilter[];
|
|
2203
|
+
/**
|
|
2204
|
+
* Creates a filter that keeps spans from the @langwatch/scenario scope
|
|
2205
|
+
* plus any additional custom instrumentation scopes.
|
|
2206
|
+
*
|
|
2207
|
+
* @param scopes - Additional instrumentation scope names to include
|
|
2208
|
+
* @returns Array of TraceFilter rules
|
|
2209
|
+
*
|
|
2210
|
+
* @example
|
|
2211
|
+
* ```typescript
|
|
2212
|
+
* import { defineConfig, withCustomScopes } from "@langwatch/scenario";
|
|
2213
|
+
* import { LangWatchTraceExporter } from "langwatch/observability";
|
|
2214
|
+
*
|
|
2215
|
+
* export default defineConfig({
|
|
2216
|
+
* observability: {
|
|
2217
|
+
* traceExporter: new LangWatchTraceExporter({
|
|
2218
|
+
* filters: withCustomScopes("my-app/database", "my-app/agent"),
|
|
2219
|
+
* }),
|
|
2220
|
+
* instrumentations: [], // disable auto-instrumentation
|
|
2221
|
+
* },
|
|
2222
|
+
* });
|
|
2223
|
+
* ```
|
|
2224
|
+
*/
|
|
2225
|
+
declare function withCustomScopes(...scopes: string[]): TraceFilter[];
|
|
2226
|
+
|
|
2048
2227
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
2049
2228
|
declare const scenario: ScenarioApi;
|
|
2050
2229
|
|
|
2051
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
2230
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, type JudgmentRequest, type LangwatchConfig, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type RunOptions, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioOnly, scenarioProjectConfigSchema, setupScenarioTracing, succeed, user, userSimulatorAgent, withCustomScopes };
|