@langwatch/scenario 0.2.2 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -222,10 +222,6 @@ export default defineConfig({
222
222
  model: openai("gpt-4o-mini"),
223
223
  temperature: 0.1,
224
224
  },
225
-
226
- // Configure the LangWatch reporting endpoint and API key
227
- langwatchEndpoint: "https://app.langwatch.ai",
228
- langwatchApiKey: process.env.LANGWATCH_API_KEY,
229
225
  });
230
226
  ```
231
227
 
@@ -239,8 +235,6 @@ The following configuration options are all optional. You can specify any combin
239
235
  - `model`: **(Required if `defaultModel` is set)** An instance of a language model from a provider like `@ai-sdk/openai`.
240
236
  - `temperature` _(Optional)_: The default temperature for the model (e.g., `0.1`).
241
237
  - `maxTokens` _(Optional)_: The default maximum number of tokens for the model to generate.
242
- - `langwatchEndpoint` _(Optional)_: The endpoint for the LangWatch reporting service. If not specified, it defaults to the `LANGWATCH_ENDPOINT` environment variable, or `https://app.langwatch.ai`.
243
- - `langwatchApiKey` _(Optional)_: Your LangWatch API key for authenticating with the reporting service. If not specified, it defaults to the `LANGWATCH_API_KEY` environment variable.
244
238
 
245
239
  ### Environment Variables
246
240
 
@@ -43,12 +43,18 @@ var JudgeAgentAdapter = class {
43
43
  }
44
44
  };
45
45
 
46
+ // src/domain/scenarios/index.ts
47
+ var DEFAULT_MAX_TURNS = 10;
48
+ var DEFAULT_VERBOSE = false;
49
+
46
50
  // src/domain/index.ts
47
51
  var domain_exports = {};
48
52
  __export(domain_exports, {
49
53
  AgentAdapter: () => AgentAdapter,
50
54
  AgentRole: () => AgentRole,
55
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
51
56
  DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
57
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
52
58
  JudgeAgentAdapter: () => JudgeAgentAdapter,
53
59
  UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
54
60
  allAgentRoles: () => allAgentRoles,
@@ -231,7 +237,7 @@ async function loadProjectConfig() {
231
237
  configLoadPromise = (async () => {
232
238
  try {
233
239
  config = await loadScenarioProjectConfig();
234
- logger.info("loaded scenario project config", { config });
240
+ logger.debug("loaded scenario project config", { config });
235
241
  } catch (error) {
236
242
  logger.error("error loading scenario project config", { error });
237
243
  } finally {
@@ -609,6 +615,8 @@ export {
609
615
  AgentAdapter,
610
616
  UserSimulatorAgentAdapter,
611
617
  JudgeAgentAdapter,
618
+ DEFAULT_MAX_TURNS,
619
+ DEFAULT_VERBOSE,
612
620
  domain_exports,
613
621
  Logger,
614
622
  env,
package/dist/index.d.mts CHANGED
@@ -1,7 +1,6 @@
1
- import * as zod from 'zod';
2
- import { z } from 'zod';
3
1
  import * as ai from 'ai';
4
2
  import { CoreMessage, CoreToolMessage, LanguageModel } from 'ai';
3
+ import { z } from 'zod';
5
4
  import { Observable } from 'rxjs';
6
5
 
7
6
  declare enum AgentRole {
@@ -121,6 +120,8 @@ declare abstract class JudgeAgentAdapter implements AgentAdapter {
121
120
  abstract call(input: AgentInput): Promise<AgentReturnTypes>;
122
121
  }
123
122
 
123
+ declare const DEFAULT_MAX_TURNS = 10;
124
+ declare const DEFAULT_VERBOSE = false;
124
125
  /**
125
126
  * Configuration for a scenario.
126
127
  */
@@ -147,11 +148,19 @@ interface ScenarioConfig {
147
148
  */
148
149
  script?: ScriptStep[];
149
150
  /**
150
- * Whether to output verbose logging. Defaults to false.
151
+ * Whether to output verbose logging.
152
+ *
153
+ * If no value is provided, this defaults to {@link DEFAULT_VERBOSE}.
154
+ *
155
+ * @default {@link DEFAULT_VERBOSE}
151
156
  */
152
157
  verbose?: boolean;
153
158
  /**
154
- * The maximum number of turns to execute. Defaults to 20.
159
+ * The maximum number of turns to execute.
160
+ *
161
+ * If no value is provided, this defaults to {@link DEFAULT_MAX_TURNS}.
162
+ *
163
+ * @default {@link DEFAULT_MAX_TURNS}
155
164
  */
156
165
  maxTurns?: number;
157
166
  /**
@@ -369,6 +378,33 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
369
378
  type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
370
379
  declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
371
380
 
381
+ type domain_AgentAdapter = AgentAdapter;
382
+ declare const domain_AgentAdapter: typeof AgentAdapter;
383
+ type domain_AgentInput = AgentInput;
384
+ type domain_AgentReturnTypes = AgentReturnTypes;
385
+ type domain_AgentRole = AgentRole;
386
+ declare const domain_AgentRole: typeof AgentRole;
387
+ declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
388
+ declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
389
+ declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
390
+ type domain_JudgeAgentAdapter = JudgeAgentAdapter;
391
+ declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
392
+ type domain_ScenarioConfig = ScenarioConfig;
393
+ type domain_ScenarioConfigFinal = ScenarioConfigFinal;
394
+ type domain_ScenarioExecutionLike = ScenarioExecutionLike;
395
+ type domain_ScenarioExecutionStateLike = ScenarioExecutionStateLike;
396
+ type domain_ScenarioProjectConfig = ScenarioProjectConfig;
397
+ type domain_ScenarioResult = ScenarioResult;
398
+ type domain_ScriptStep = ScriptStep;
399
+ type domain_UserSimulatorAgentAdapter = UserSimulatorAgentAdapter;
400
+ declare const domain_UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
401
+ declare const domain_allAgentRoles: typeof allAgentRoles;
402
+ declare const domain_defineConfig: typeof defineConfig;
403
+ declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
404
+ declare namespace domain {
405
+ export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_TEMPERATURE as DEFAULT_TEMPERATURE, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
406
+ }
407
+
372
408
  /**
373
409
  * Configuration for the inference parameters of a testing agent.
374
410
  */
@@ -593,6 +629,16 @@ declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
593
629
  }>;
594
630
  };
595
631
 
632
+ type agents_FinishTestArgs = FinishTestArgs;
633
+ type agents_JudgeAgentConfig = JudgeAgentConfig;
634
+ type agents_TestingAgentConfig = TestingAgentConfig;
635
+ type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
636
+ declare const agents_judgeAgent: typeof judgeAgent;
637
+ declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
638
+ declare namespace agents {
639
+ export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_TestingAgentConfig as TestingAgentConfig, type agents_TestingAgentInferenceConfig as TestingAgentInferenceConfig, agents_judgeAgent as judgeAgent, agents_userSimulatorAgent as userSimulatorAgent };
640
+ }
641
+
596
642
  /**
597
643
  * Verdict enum represents the possible outcomes of a test scenario
598
644
  */
@@ -1162,6 +1208,14 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
1162
1208
  hasToolCall(toolName: string): boolean;
1163
1209
  }
1164
1210
 
1211
+ type execution_ScenarioExecution = ScenarioExecution;
1212
+ declare const execution_ScenarioExecution: typeof ScenarioExecution;
1213
+ type execution_ScenarioExecutionState = ScenarioExecutionState;
1214
+ declare const execution_ScenarioExecutionState: typeof ScenarioExecutionState;
1215
+ declare namespace execution {
1216
+ export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState };
1217
+ }
1218
+
1165
1219
  /**
1166
1220
  * High-level interface for running a scenario test.
1167
1221
  *
@@ -1213,6 +1267,11 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
1213
1267
  */
1214
1268
  declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
1215
1269
 
1270
+ declare const runner_run: typeof run;
1271
+ declare namespace runner {
1272
+ export { runner_run as run };
1273
+ }
1274
+
1216
1275
  /**
1217
1276
  * Scenario script DSL (Domain Specific Language) module.
1218
1277
  *
@@ -1302,69 +1361,18 @@ declare const succeed: (reasoning?: string) => ScriptStep;
1302
1361
  */
1303
1362
  declare const fail: (reasoning?: string) => ScriptStep;
1304
1363
 
1305
- declare const scenario: {
1306
- message: (message: ai.CoreMessage) => ScriptStep;
1307
- agent: (content?: string | ai.CoreMessage) => ScriptStep;
1308
- judge: (content?: string | ai.CoreMessage) => ScriptStep;
1309
- user: (content?: string | ai.CoreMessage) => ScriptStep;
1310
- proceed: (turns?: number, onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>, onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>) => ScriptStep;
1311
- succeed: (reasoning?: string) => ScriptStep;
1312
- fail: (reasoning?: string) => ScriptStep;
1313
- run(cfg: ScenarioConfig): Promise<ScenarioResult>;
1314
- ScenarioExecution: typeof ScenarioExecution;
1315
- ScenarioExecutionState: typeof ScenarioExecutionState;
1316
- defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
1317
- DEFAULT_TEMPERATURE: 0;
1318
- scenarioProjectConfigSchema: zod.ZodObject<{
1319
- defaultModel: zod.ZodOptional<zod.ZodObject<{
1320
- model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
1321
- temperature: zod.ZodDefault<zod.ZodOptional<zod.ZodNumber>>;
1322
- maxTokens: zod.ZodOptional<zod.ZodNumber>;
1323
- }, "strip", zod.ZodTypeAny, {
1324
- model: ai.LanguageModelV1;
1325
- temperature: number;
1326
- maxTokens?: number | undefined;
1327
- }, {
1328
- model: ai.LanguageModelV1;
1329
- temperature?: number | undefined;
1330
- maxTokens?: number | undefined;
1331
- }>>;
1332
- }, "strict", zod.ZodTypeAny, {
1333
- defaultModel?: {
1334
- model: ai.LanguageModelV1;
1335
- temperature: number;
1336
- maxTokens?: number | undefined;
1337
- } | undefined;
1338
- }, {
1339
- defaultModel?: {
1340
- model: ai.LanguageModelV1;
1341
- temperature?: number | undefined;
1342
- maxTokens?: number | undefined;
1343
- } | undefined;
1344
- }>;
1345
- AgentRole: typeof AgentRole;
1346
- allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
1347
- AgentAdapter: typeof AgentAdapter;
1348
- UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
1349
- JudgeAgentAdapter: typeof JudgeAgentAdapter;
1350
- judgeAgent: (cfg: JudgeAgentConfig) => {
1351
- role: AgentRole.JUDGE;
1352
- criteria: string[];
1353
- call: (input: AgentInput) => Promise<never[] | {
1354
- success: boolean;
1355
- messages: ai.CoreMessage[];
1356
- reasoning: string;
1357
- metCriteria: string[];
1358
- unmetCriteria: string[];
1359
- }>;
1360
- };
1361
- userSimulatorAgent: (config?: TestingAgentConfig) => {
1362
- role: AgentRole.USER;
1363
- call: (input: AgentInput) => Promise<{
1364
- role: "user";
1365
- content: string;
1366
- }>;
1367
- };
1368
- };
1364
+ declare const script_agent: typeof agent;
1365
+ declare const script_fail: typeof fail;
1366
+ declare const script_judge: typeof judge;
1367
+ declare const script_message: typeof message;
1368
+ declare const script_proceed: typeof proceed;
1369
+ declare const script_succeed: typeof succeed;
1370
+ declare const script_user: typeof user;
1371
+ declare namespace script {
1372
+ export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
1373
+ }
1374
+
1375
+ type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
1376
+ declare const scenario: ScenarioApi;
1369
1377
 
1370
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
1378
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
package/dist/index.d.ts CHANGED
@@ -1,7 +1,6 @@
1
- import * as zod from 'zod';
2
- import { z } from 'zod';
3
1
  import * as ai from 'ai';
4
2
  import { CoreMessage, CoreToolMessage, LanguageModel } from 'ai';
3
+ import { z } from 'zod';
5
4
  import { Observable } from 'rxjs';
6
5
 
7
6
  declare enum AgentRole {
@@ -121,6 +120,8 @@ declare abstract class JudgeAgentAdapter implements AgentAdapter {
121
120
  abstract call(input: AgentInput): Promise<AgentReturnTypes>;
122
121
  }
123
122
 
123
+ declare const DEFAULT_MAX_TURNS = 10;
124
+ declare const DEFAULT_VERBOSE = false;
124
125
  /**
125
126
  * Configuration for a scenario.
126
127
  */
@@ -147,11 +148,19 @@ interface ScenarioConfig {
147
148
  */
148
149
  script?: ScriptStep[];
149
150
  /**
150
- * Whether to output verbose logging. Defaults to false.
151
+ * Whether to output verbose logging.
152
+ *
153
+ * If no value is provided, this defaults to {@link DEFAULT_VERBOSE}.
154
+ *
155
+ * @default {@link DEFAULT_VERBOSE}
151
156
  */
152
157
  verbose?: boolean;
153
158
  /**
154
- * The maximum number of turns to execute. Defaults to 20.
159
+ * The maximum number of turns to execute.
160
+ *
161
+ * If no value is provided, this defaults to {@link DEFAULT_MAX_TURNS}.
162
+ *
163
+ * @default {@link DEFAULT_MAX_TURNS}
155
164
  */
156
165
  maxTurns?: number;
157
166
  /**
@@ -369,6 +378,33 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
369
378
  type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
370
379
  declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
371
380
 
381
+ type domain_AgentAdapter = AgentAdapter;
382
+ declare const domain_AgentAdapter: typeof AgentAdapter;
383
+ type domain_AgentInput = AgentInput;
384
+ type domain_AgentReturnTypes = AgentReturnTypes;
385
+ type domain_AgentRole = AgentRole;
386
+ declare const domain_AgentRole: typeof AgentRole;
387
+ declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
388
+ declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
389
+ declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
390
+ type domain_JudgeAgentAdapter = JudgeAgentAdapter;
391
+ declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
392
+ type domain_ScenarioConfig = ScenarioConfig;
393
+ type domain_ScenarioConfigFinal = ScenarioConfigFinal;
394
+ type domain_ScenarioExecutionLike = ScenarioExecutionLike;
395
+ type domain_ScenarioExecutionStateLike = ScenarioExecutionStateLike;
396
+ type domain_ScenarioProjectConfig = ScenarioProjectConfig;
397
+ type domain_ScenarioResult = ScenarioResult;
398
+ type domain_ScriptStep = ScriptStep;
399
+ type domain_UserSimulatorAgentAdapter = UserSimulatorAgentAdapter;
400
+ declare const domain_UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
401
+ declare const domain_allAgentRoles: typeof allAgentRoles;
402
+ declare const domain_defineConfig: typeof defineConfig;
403
+ declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
404
+ declare namespace domain {
405
+ export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_TEMPERATURE as DEFAULT_TEMPERATURE, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
406
+ }
407
+
372
408
  /**
373
409
  * Configuration for the inference parameters of a testing agent.
374
410
  */
@@ -593,6 +629,16 @@ declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
593
629
  }>;
594
630
  };
595
631
 
632
+ type agents_FinishTestArgs = FinishTestArgs;
633
+ type agents_JudgeAgentConfig = JudgeAgentConfig;
634
+ type agents_TestingAgentConfig = TestingAgentConfig;
635
+ type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
636
+ declare const agents_judgeAgent: typeof judgeAgent;
637
+ declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
638
+ declare namespace agents {
639
+ export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_TestingAgentConfig as TestingAgentConfig, type agents_TestingAgentInferenceConfig as TestingAgentInferenceConfig, agents_judgeAgent as judgeAgent, agents_userSimulatorAgent as userSimulatorAgent };
640
+ }
641
+
596
642
  /**
597
643
  * Verdict enum represents the possible outcomes of a test scenario
598
644
  */
@@ -1162,6 +1208,14 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
1162
1208
  hasToolCall(toolName: string): boolean;
1163
1209
  }
1164
1210
 
1211
+ type execution_ScenarioExecution = ScenarioExecution;
1212
+ declare const execution_ScenarioExecution: typeof ScenarioExecution;
1213
+ type execution_ScenarioExecutionState = ScenarioExecutionState;
1214
+ declare const execution_ScenarioExecutionState: typeof ScenarioExecutionState;
1215
+ declare namespace execution {
1216
+ export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState };
1217
+ }
1218
+
1165
1219
  /**
1166
1220
  * High-level interface for running a scenario test.
1167
1221
  *
@@ -1213,6 +1267,11 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
1213
1267
  */
1214
1268
  declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
1215
1269
 
1270
+ declare const runner_run: typeof run;
1271
+ declare namespace runner {
1272
+ export { runner_run as run };
1273
+ }
1274
+
1216
1275
  /**
1217
1276
  * Scenario script DSL (Domain Specific Language) module.
1218
1277
  *
@@ -1302,69 +1361,18 @@ declare const succeed: (reasoning?: string) => ScriptStep;
1302
1361
  */
1303
1362
  declare const fail: (reasoning?: string) => ScriptStep;
1304
1363
 
1305
- declare const scenario: {
1306
- message: (message: ai.CoreMessage) => ScriptStep;
1307
- agent: (content?: string | ai.CoreMessage) => ScriptStep;
1308
- judge: (content?: string | ai.CoreMessage) => ScriptStep;
1309
- user: (content?: string | ai.CoreMessage) => ScriptStep;
1310
- proceed: (turns?: number, onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>, onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>) => ScriptStep;
1311
- succeed: (reasoning?: string) => ScriptStep;
1312
- fail: (reasoning?: string) => ScriptStep;
1313
- run(cfg: ScenarioConfig): Promise<ScenarioResult>;
1314
- ScenarioExecution: typeof ScenarioExecution;
1315
- ScenarioExecutionState: typeof ScenarioExecutionState;
1316
- defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
1317
- DEFAULT_TEMPERATURE: 0;
1318
- scenarioProjectConfigSchema: zod.ZodObject<{
1319
- defaultModel: zod.ZodOptional<zod.ZodObject<{
1320
- model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
1321
- temperature: zod.ZodDefault<zod.ZodOptional<zod.ZodNumber>>;
1322
- maxTokens: zod.ZodOptional<zod.ZodNumber>;
1323
- }, "strip", zod.ZodTypeAny, {
1324
- model: ai.LanguageModelV1;
1325
- temperature: number;
1326
- maxTokens?: number | undefined;
1327
- }, {
1328
- model: ai.LanguageModelV1;
1329
- temperature?: number | undefined;
1330
- maxTokens?: number | undefined;
1331
- }>>;
1332
- }, "strict", zod.ZodTypeAny, {
1333
- defaultModel?: {
1334
- model: ai.LanguageModelV1;
1335
- temperature: number;
1336
- maxTokens?: number | undefined;
1337
- } | undefined;
1338
- }, {
1339
- defaultModel?: {
1340
- model: ai.LanguageModelV1;
1341
- temperature?: number | undefined;
1342
- maxTokens?: number | undefined;
1343
- } | undefined;
1344
- }>;
1345
- AgentRole: typeof AgentRole;
1346
- allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
1347
- AgentAdapter: typeof AgentAdapter;
1348
- UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
1349
- JudgeAgentAdapter: typeof JudgeAgentAdapter;
1350
- judgeAgent: (cfg: JudgeAgentConfig) => {
1351
- role: AgentRole.JUDGE;
1352
- criteria: string[];
1353
- call: (input: AgentInput) => Promise<never[] | {
1354
- success: boolean;
1355
- messages: ai.CoreMessage[];
1356
- reasoning: string;
1357
- metCriteria: string[];
1358
- unmetCriteria: string[];
1359
- }>;
1360
- };
1361
- userSimulatorAgent: (config?: TestingAgentConfig) => {
1362
- role: AgentRole.USER;
1363
- call: (input: AgentInput) => Promise<{
1364
- role: "user";
1365
- content: string;
1366
- }>;
1367
- };
1368
- };
1364
+ declare const script_agent: typeof agent;
1365
+ declare const script_fail: typeof fail;
1366
+ declare const script_judge: typeof judge;
1367
+ declare const script_message: typeof message;
1368
+ declare const script_proceed: typeof proceed;
1369
+ declare const script_succeed: typeof succeed;
1370
+ declare const script_user: typeof user;
1371
+ declare namespace script {
1372
+ export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
1373
+ }
1374
+
1375
+ type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
1376
+ declare const scenario: ScenarioApi;
1369
1377
 
1370
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
1378
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
package/dist/index.js CHANGED
@@ -32,7 +32,9 @@ var index_exports = {};
32
32
  __export(index_exports, {
33
33
  AgentAdapter: () => AgentAdapter,
34
34
  AgentRole: () => AgentRole,
35
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
35
36
  DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
37
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
36
38
  JudgeAgentAdapter: () => JudgeAgentAdapter,
37
39
  ScenarioExecution: () => ScenarioExecution,
38
40
  ScenarioExecutionState: () => ScenarioExecutionState,
@@ -71,7 +73,9 @@ var domain_exports = {};
71
73
  __export(domain_exports, {
72
74
  AgentAdapter: () => AgentAdapter,
73
75
  AgentRole: () => AgentRole,
76
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
74
77
  DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
78
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
75
79
  JudgeAgentAdapter: () => JudgeAgentAdapter,
76
80
  UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
77
81
  allAgentRoles: () => allAgentRoles,
@@ -120,6 +124,10 @@ var JudgeAgentAdapter = class {
120
124
  }
121
125
  };
122
126
 
127
+ // src/domain/scenarios/index.ts
128
+ var DEFAULT_MAX_TURNS = 10;
129
+ var DEFAULT_VERBOSE = false;
130
+
123
131
  // src/agents/utils.ts
124
132
  var toolMessageRole = "tool";
125
133
  var assistantMessageRole = "assistant";
@@ -340,7 +348,7 @@ async function loadProjectConfig() {
340
348
  configLoadPromise = (async () => {
341
349
  try {
342
350
  config = await loadScenarioProjectConfig();
343
- logger.info("loaded scenario project config", { config });
351
+ logger.debug("loaded scenario project config", { config });
344
352
  } catch (error) {
345
353
  logger.error("error loading scenario project config", { error });
346
354
  } finally {
@@ -838,8 +846,8 @@ var ScenarioExecution = class {
838
846
  description: config2.description,
839
847
  agents: config2.agents,
840
848
  script,
841
- verbose: config2.verbose ?? false,
842
- maxTurns: config2.maxTurns ?? 10,
849
+ verbose: config2.verbose ?? DEFAULT_VERBOSE,
850
+ maxTurns: config2.maxTurns ?? DEFAULT_MAX_TURNS,
843
851
  threadId: config2.threadId ?? generateThreadId(),
844
852
  setId: config2.setId
845
853
  };
@@ -891,12 +899,14 @@ var ScenarioExecution = class {
891
899
  }
892
900
  }
893
901
  this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
894
- return this.reachedMaxTurns([
895
- "Reached end of script without conclusion, add one of the following to the end of the script:",
896
- "- `Scenario.proceed()` to let the simulation continue to play out",
897
- "- `Scenario.judge()` to force criteria judgement",
898
- "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
899
- ].join("\n"));
902
+ return this.reachedMaxTurns(
903
+ [
904
+ "Reached end of script without conclusion, add one of the following to the end of the script:",
905
+ "- `Scenario.proceed()` to let the simulation continue to play out",
906
+ "- `Scenario.judge()` to force criteria judgement",
907
+ "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
908
+ ].join("\n")
909
+ );
900
910
  } catch (error) {
901
911
  const errorResult = {
902
912
  success: false,
@@ -1031,8 +1041,7 @@ var ScenarioExecution = class {
1031
1041
  while (true) {
1032
1042
  const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
1033
1043
  const nextMessage = await this._step(goToNextTurn, onTurn);
1034
- if (initialTurn === null)
1035
- initialTurn = this.state.currentTurn;
1044
+ if (initialTurn === null) initialTurn = this.state.currentTurn;
1036
1045
  if (nextMessage === null) {
1037
1046
  return null;
1038
1047
  }
@@ -1118,7 +1127,10 @@ var ScenarioExecution = class {
1118
1127
  agent2 = nextAgent.agent;
1119
1128
  this.removePendingAgent(agent2);
1120
1129
  if (content) {
1121
- const message2 = typeof content === "string" ? { role: role === "User" /* USER */ ? "user" : "assistant", content } : content;
1130
+ const message2 = typeof content === "string" ? {
1131
+ role: role === "User" /* USER */ ? "user" : "assistant",
1132
+ content
1133
+ } : content;
1122
1134
  this.state.addMessage(message2);
1123
1135
  this.broadcastMessage(message2, index);
1124
1136
  return null;
@@ -1191,7 +1203,9 @@ var ScenarioExecution = class {
1191
1203
  reachedMaxTurns(errorMessage) {
1192
1204
  var _a;
1193
1205
  const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
1194
- const agentTimes = agentRoleAgentsIdx.map((i) => this.agentTimes.get(i) || 0);
1206
+ const agentTimes = agentRoleAgentsIdx.map(
1207
+ (i) => this.agentTimes.get(i) || 0
1208
+ );
1195
1209
  const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
1196
1210
  return {
1197
1211
  success: false,
@@ -1293,10 +1307,8 @@ var ScenarioExecution = class {
1293
1307
  function convertAgentReturnTypesToMessages(response, role) {
1294
1308
  if (typeof response === "string")
1295
1309
  return [{ role, content: response }];
1296
- if (Array.isArray(response))
1297
- return response;
1298
- if (typeof response === "object" && "role" in response)
1299
- return [response];
1310
+ if (Array.isArray(response)) return response;
1311
+ if (typeof response === "object" && "role" in response) return [response];
1300
1312
  return [];
1301
1313
  }
1302
1314
 
@@ -1712,7 +1724,9 @@ var index_default = scenario;
1712
1724
  0 && (module.exports = {
1713
1725
  AgentAdapter,
1714
1726
  AgentRole,
1727
+ DEFAULT_MAX_TURNS,
1715
1728
  DEFAULT_TEMPERATURE,
1729
+ DEFAULT_VERBOSE,
1716
1730
  JudgeAgentAdapter,
1717
1731
  ScenarioExecution,
1718
1732
  ScenarioExecutionState,
package/dist/index.mjs CHANGED
@@ -1,7 +1,9 @@
1
1
  import {
2
2
  AgentAdapter,
3
3
  AgentRole,
4
+ DEFAULT_MAX_TURNS,
4
5
  DEFAULT_TEMPERATURE,
6
+ DEFAULT_VERBOSE,
5
7
  EventBus,
6
8
  JudgeAgentAdapter,
7
9
  Logger,
@@ -17,7 +19,7 @@ import {
17
19
  getBatchRunId,
18
20
  getProjectConfig,
19
21
  scenarioProjectConfigSchema
20
- } from "./chunk-NUZZAQV2.mjs";
22
+ } from "./chunk-MOOKAYIE.mjs";
21
23
  import {
22
24
  __export
23
25
  } from "./chunk-7P6ASYW6.mjs";
@@ -479,8 +481,8 @@ var ScenarioExecution = class {
479
481
  description: config.description,
480
482
  agents: config.agents,
481
483
  script,
482
- verbose: config.verbose ?? false,
483
- maxTurns: config.maxTurns ?? 10,
484
+ verbose: config.verbose ?? DEFAULT_VERBOSE,
485
+ maxTurns: config.maxTurns ?? DEFAULT_MAX_TURNS,
484
486
  threadId: config.threadId ?? generateThreadId(),
485
487
  setId: config.setId
486
488
  };
@@ -532,12 +534,14 @@ var ScenarioExecution = class {
532
534
  }
533
535
  }
534
536
  this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
535
- return this.reachedMaxTurns([
536
- "Reached end of script without conclusion, add one of the following to the end of the script:",
537
- "- `Scenario.proceed()` to let the simulation continue to play out",
538
- "- `Scenario.judge()` to force criteria judgement",
539
- "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
540
- ].join("\n"));
537
+ return this.reachedMaxTurns(
538
+ [
539
+ "Reached end of script without conclusion, add one of the following to the end of the script:",
540
+ "- `Scenario.proceed()` to let the simulation continue to play out",
541
+ "- `Scenario.judge()` to force criteria judgement",
542
+ "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
543
+ ].join("\n")
544
+ );
541
545
  } catch (error) {
542
546
  const errorResult = {
543
547
  success: false,
@@ -672,8 +676,7 @@ var ScenarioExecution = class {
672
676
  while (true) {
673
677
  const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
674
678
  const nextMessage = await this._step(goToNextTurn, onTurn);
675
- if (initialTurn === null)
676
- initialTurn = this.state.currentTurn;
679
+ if (initialTurn === null) initialTurn = this.state.currentTurn;
677
680
  if (nextMessage === null) {
678
681
  return null;
679
682
  }
@@ -759,7 +762,10 @@ var ScenarioExecution = class {
759
762
  agent2 = nextAgent.agent;
760
763
  this.removePendingAgent(agent2);
761
764
  if (content) {
762
- const message2 = typeof content === "string" ? { role: role === "User" /* USER */ ? "user" : "assistant", content } : content;
765
+ const message2 = typeof content === "string" ? {
766
+ role: role === "User" /* USER */ ? "user" : "assistant",
767
+ content
768
+ } : content;
763
769
  this.state.addMessage(message2);
764
770
  this.broadcastMessage(message2, index);
765
771
  return null;
@@ -832,7 +838,9 @@ var ScenarioExecution = class {
832
838
  reachedMaxTurns(errorMessage) {
833
839
  var _a;
834
840
  const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
835
- const agentTimes = agentRoleAgentsIdx.map((i) => this.agentTimes.get(i) || 0);
841
+ const agentTimes = agentRoleAgentsIdx.map(
842
+ (i) => this.agentTimes.get(i) || 0
843
+ );
836
844
  const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
837
845
  return {
838
846
  success: false,
@@ -934,10 +942,8 @@ var ScenarioExecution = class {
934
942
  function convertAgentReturnTypesToMessages(response, role) {
935
943
  if (typeof response === "string")
936
944
  return [{ role, content: response }];
937
- if (Array.isArray(response))
938
- return response;
939
- if (typeof response === "object" && "role" in response)
940
- return [response];
945
+ if (Array.isArray(response)) return response;
946
+ if (typeof response === "object" && "role" in response) return [response];
941
947
  return [];
942
948
  }
943
949
 
@@ -1087,7 +1093,9 @@ var index_default = scenario;
1087
1093
  export {
1088
1094
  AgentAdapter,
1089
1095
  AgentRole,
1096
+ DEFAULT_MAX_TURNS,
1090
1097
  DEFAULT_TEMPERATURE,
1098
+ DEFAULT_VERBOSE,
1091
1099
  JudgeAgentAdapter,
1092
1100
  ScenarioExecution,
1093
1101
  ScenarioExecutionState,
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  EventBus
3
- } from "../../chunk-NUZZAQV2.mjs";
3
+ } from "../../chunk-MOOKAYIE.mjs";
4
4
  import "../../chunk-7P6ASYW6.mjs";
5
5
 
6
6
  // src/integrations/vitest/setup.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@langwatch/scenario",
3
- "version": "0.2.2",
3
+ "version": "0.2.6",
4
4
  "description": "A TypeScript library for testing AI agents using scenarios",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -84,8 +84,7 @@
84
84
  "test": "vitest",
85
85
  "test:ci": "vitest run",
86
86
  "lint": "eslint .",
87
- "examples:vitest:run": "export SCENARIO_BATCH_ID=scenariobatch_$(uuidgen) && pnpm run buildpack && (cd examples/vitest && pnpm install) && pnpm -F vitest-example run test",
88
- "hash-source": "find src -name \"*.ts\" -type f | sort | xargs cat | sha256sum | cut -d' ' -f1",
89
- "generate:api-reference": "npx typedoc src --out api-reference-docs && rm -rf ../docs/docs/public/reference/javascript/scenario && mv api-reference-docs ../docs/docs/public/reference/javascript/scenario && pnpm run hash-source > ../docs/docs/public/reference/javascript/.docs-source-hash"
87
+ "examples:vitest:run": "export SCENARIO_BATCH_ID=scenariobatch_$(uuidgen) && (cd examples/vitest && pnpm install) && pnpm -F vitest-example run test",
88
+ "generate:api-reference": "npx typedoc src --out api-reference-docs && rm -rf ../docs/docs/public/reference/javascript/scenario && mv api-reference-docs ../docs/docs/public/reference/javascript/scenario"
90
89
  }
91
90
  }