@langwatch/scenario 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -337,6 +337,8 @@ interface ScenarioExecutionStateLike {
337
337
  hasToolCall(toolName: string): boolean;
338
338
  }
339
339
 
340
+ /** Default temperature for language model inference */
341
+ declare const DEFAULT_TEMPERATURE = 0;
340
342
  declare const scenarioProjectConfigSchema: z.ZodObject<{
341
343
  defaultModel: z.ZodOptional<z.ZodObject<{
342
344
  model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
@@ -351,24 +353,18 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
351
353
  temperature?: number | undefined;
352
354
  maxTokens?: number | undefined;
353
355
  }>>;
354
- langwatchEndpoint: z.ZodOptional<z.ZodString>;
355
- langwatchApiKey: z.ZodOptional<z.ZodString>;
356
356
  }, "strict", z.ZodTypeAny, {
357
357
  defaultModel?: {
358
358
  model: ai.LanguageModelV1;
359
359
  temperature: number;
360
360
  maxTokens?: number | undefined;
361
361
  } | undefined;
362
- langwatchEndpoint?: string | undefined;
363
- langwatchApiKey?: string | undefined;
364
362
  }, {
365
363
  defaultModel?: {
366
364
  model: ai.LanguageModelV1;
367
365
  temperature?: number | undefined;
368
366
  maxTokens?: number | undefined;
369
367
  } | undefined;
370
- langwatchEndpoint?: string | undefined;
371
- langwatchApiKey?: string | undefined;
372
368
  }>;
373
369
  type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
374
370
  declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
@@ -400,6 +396,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
400
396
  * The name of the agent.
401
397
  */
402
398
  name?: string;
399
+ /**
400
+ * System prompt to use for the agent.
401
+ *
402
+ * Useful in more complex scenarios where you want to set the system prompt
403
+ * for the agent directly. If left blank, this will be automatically generated
404
+ * from the scenario description.
405
+ */
406
+ systemPrompt?: string;
403
407
  }
404
408
  /**
405
409
  * The arguments for finishing a test, used by the judge agent's tool.
@@ -502,8 +506,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
502
506
  *
503
507
  * @param config Optional configuration for the agent.
504
508
  * @param config.model The language model to use for generating responses.
505
- * @param config.temperature The temperature to use for the model.
509
+ * If not provided, a default model will be used.
510
+ * @param config.temperature The temperature for the language model (0.0-1.0).
511
+ * Lower values make responses more deterministic.
512
+ * Defaults to {@link DEFAULT_TEMPERATURE}.
506
513
  * @param config.maxTokens The maximum number of tokens to generate.
514
+ * If not provided, uses model defaults.
515
+ * @param config.name The name of the agent.
516
+ * @param config.systemPrompt Custom system prompt to override default user simulation behavior.
517
+ * Use this to create specialized user personas or behaviors.
518
+ *
519
+ * @throws {Error} If no model is configured either in parameters or global config.
507
520
  *
508
521
  * @example
509
522
  * ```typescript
@@ -517,7 +530,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
517
530
  * };
518
531
  *
519
532
  * async function main() {
520
- * const result = await run({
533
+ * // Basic user simulator with default behavior
534
+ * const basicResult = await run({
521
535
  * name: "User Simulator Test",
522
536
  * description: "A simple test to see if the user simulator works.",
523
537
  * agents: [myAgent, userSimulatorAgent()],
@@ -526,9 +540,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
526
540
  * agent(),
527
541
  * ],
528
542
  * });
543
+ *
544
+ * // Customized user simulator
545
+ * const customResult = await run({
546
+ * name: "Expert User Test",
547
+ * description: "User seeks help with TypeScript programming",
548
+ * agents: [
549
+ * myAgent,
550
+ * userSimulatorAgent({
551
+ * model: openai("gpt-4"),
552
+ * temperature: 0.3,
553
+ * systemPrompt: "You are a technical user who asks detailed questions"
554
+ * })
555
+ * ],
556
+ * script: [
557
+ * user(),
558
+ * agent(),
559
+ * ],
560
+ * });
561
+ *
562
+ * // User simulator with custom persona
563
+ * const expertResult = await run({
564
+ * name: "Expert Developer Test",
565
+ * description: "Testing with a technical expert user persona.",
566
+ * agents: [
567
+ * myAgent,
568
+ * userSimulatorAgent({
569
+ * systemPrompt: `
570
+ * You are an expert software developer testing an AI coding assistant.
571
+ * Ask challenging, technical questions and be demanding about code quality.
572
+ * Use technical jargon and expect detailed, accurate responses.
573
+ * `
574
+ * })
575
+ * ],
576
+ * script: [
577
+ * user(),
578
+ * agent(),
579
+ * ],
580
+ * });
529
581
  * }
530
582
  * main();
531
583
  * ```
584
+ *
585
+ * @note
586
+ * - Uses role reversal internally to work around LLM biases toward assistant roles
532
587
  */
533
588
  declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
534
589
  role: AgentRole.USER;
@@ -1259,6 +1314,7 @@ declare const scenario: {
1259
1314
  ScenarioExecution: typeof ScenarioExecution;
1260
1315
  ScenarioExecutionState: typeof ScenarioExecutionState;
1261
1316
  defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
1317
+ DEFAULT_TEMPERATURE: 0;
1262
1318
  scenarioProjectConfigSchema: zod.ZodObject<{
1263
1319
  defaultModel: zod.ZodOptional<zod.ZodObject<{
1264
1320
  model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
@@ -1273,24 +1329,18 @@ declare const scenario: {
1273
1329
  temperature?: number | undefined;
1274
1330
  maxTokens?: number | undefined;
1275
1331
  }>>;
1276
- langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
1277
- langwatchApiKey: zod.ZodOptional<zod.ZodString>;
1278
1332
  }, "strict", zod.ZodTypeAny, {
1279
1333
  defaultModel?: {
1280
1334
  model: ai.LanguageModelV1;
1281
1335
  temperature: number;
1282
1336
  maxTokens?: number | undefined;
1283
1337
  } | undefined;
1284
- langwatchEndpoint?: string | undefined;
1285
- langwatchApiKey?: string | undefined;
1286
1338
  }, {
1287
1339
  defaultModel?: {
1288
1340
  model: ai.LanguageModelV1;
1289
1341
  temperature?: number | undefined;
1290
1342
  maxTokens?: number | undefined;
1291
1343
  } | undefined;
1292
- langwatchEndpoint?: string | undefined;
1293
- langwatchApiKey?: string | undefined;
1294
1344
  }>;
1295
1345
  AgentRole: typeof AgentRole;
1296
1346
  allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
@@ -1317,4 +1367,4 @@ declare const scenario: {
1317
1367
  };
1318
1368
  };
1319
1369
 
1320
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
1370
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
package/dist/index.d.ts CHANGED
@@ -337,6 +337,8 @@ interface ScenarioExecutionStateLike {
337
337
  hasToolCall(toolName: string): boolean;
338
338
  }
339
339
 
340
+ /** Default temperature for language model inference */
341
+ declare const DEFAULT_TEMPERATURE = 0;
340
342
  declare const scenarioProjectConfigSchema: z.ZodObject<{
341
343
  defaultModel: z.ZodOptional<z.ZodObject<{
342
344
  model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
@@ -351,24 +353,18 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
351
353
  temperature?: number | undefined;
352
354
  maxTokens?: number | undefined;
353
355
  }>>;
354
- langwatchEndpoint: z.ZodOptional<z.ZodString>;
355
- langwatchApiKey: z.ZodOptional<z.ZodString>;
356
356
  }, "strict", z.ZodTypeAny, {
357
357
  defaultModel?: {
358
358
  model: ai.LanguageModelV1;
359
359
  temperature: number;
360
360
  maxTokens?: number | undefined;
361
361
  } | undefined;
362
- langwatchEndpoint?: string | undefined;
363
- langwatchApiKey?: string | undefined;
364
362
  }, {
365
363
  defaultModel?: {
366
364
  model: ai.LanguageModelV1;
367
365
  temperature?: number | undefined;
368
366
  maxTokens?: number | undefined;
369
367
  } | undefined;
370
- langwatchEndpoint?: string | undefined;
371
- langwatchApiKey?: string | undefined;
372
368
  }>;
373
369
  type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
374
370
  declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
@@ -400,6 +396,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
400
396
  * The name of the agent.
401
397
  */
402
398
  name?: string;
399
+ /**
400
+ * System prompt to use for the agent.
401
+ *
402
+ * Useful in more complex scenarios where you want to set the system prompt
403
+ * for the agent directly. If left blank, this will be automatically generated
404
+ * from the scenario description.
405
+ */
406
+ systemPrompt?: string;
403
407
  }
404
408
  /**
405
409
  * The arguments for finishing a test, used by the judge agent's tool.
@@ -502,8 +506,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
502
506
  *
503
507
  * @param config Optional configuration for the agent.
504
508
  * @param config.model The language model to use for generating responses.
505
- * @param config.temperature The temperature to use for the model.
509
+ * If not provided, a default model will be used.
510
+ * @param config.temperature The temperature for the language model (0.0-1.0).
511
+ * Lower values make responses more deterministic.
512
+ * Defaults to {@link DEFAULT_TEMPERATURE}.
506
513
  * @param config.maxTokens The maximum number of tokens to generate.
514
+ * If not provided, uses model defaults.
515
+ * @param config.name The name of the agent.
516
+ * @param config.systemPrompt Custom system prompt to override default user simulation behavior.
517
+ * Use this to create specialized user personas or behaviors.
518
+ *
519
+ * @throws {Error} If no model is configured either in parameters or global config.
507
520
  *
508
521
  * @example
509
522
  * ```typescript
@@ -517,7 +530,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
517
530
  * };
518
531
  *
519
532
  * async function main() {
520
- * const result = await run({
533
+ * // Basic user simulator with default behavior
534
+ * const basicResult = await run({
521
535
  * name: "User Simulator Test",
522
536
  * description: "A simple test to see if the user simulator works.",
523
537
  * agents: [myAgent, userSimulatorAgent()],
@@ -526,9 +540,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
526
540
  * agent(),
527
541
  * ],
528
542
  * });
543
+ *
544
+ * // Customized user simulator
545
+ * const customResult = await run({
546
+ * name: "Expert User Test",
547
+ * description: "User seeks help with TypeScript programming",
548
+ * agents: [
549
+ * myAgent,
550
+ * userSimulatorAgent({
551
+ * model: openai("gpt-4"),
552
+ * temperature: 0.3,
553
+ * systemPrompt: "You are a technical user who asks detailed questions"
554
+ * })
555
+ * ],
556
+ * script: [
557
+ * user(),
558
+ * agent(),
559
+ * ],
560
+ * });
561
+ *
562
+ * // User simulator with custom persona
563
+ * const expertResult = await run({
564
+ * name: "Expert Developer Test",
565
+ * description: "Testing with a technical expert user persona.",
566
+ * agents: [
567
+ * myAgent,
568
+ * userSimulatorAgent({
569
+ * systemPrompt: `
570
+ * You are an expert software developer testing an AI coding assistant.
571
+ * Ask challenging, technical questions and be demanding about code quality.
572
+ * Use technical jargon and expect detailed, accurate responses.
573
+ * `
574
+ * })
575
+ * ],
576
+ * script: [
577
+ * user(),
578
+ * agent(),
579
+ * ],
580
+ * });
529
581
  * }
530
582
  * main();
531
583
  * ```
584
+ *
585
+ * @note
586
+ * - Uses role reversal internally to work around LLM biases toward assistant roles
532
587
  */
533
588
  declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
534
589
  role: AgentRole.USER;
@@ -1259,6 +1314,7 @@ declare const scenario: {
1259
1314
  ScenarioExecution: typeof ScenarioExecution;
1260
1315
  ScenarioExecutionState: typeof ScenarioExecutionState;
1261
1316
  defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
1317
+ DEFAULT_TEMPERATURE: 0;
1262
1318
  scenarioProjectConfigSchema: zod.ZodObject<{
1263
1319
  defaultModel: zod.ZodOptional<zod.ZodObject<{
1264
1320
  model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
@@ -1273,24 +1329,18 @@ declare const scenario: {
1273
1329
  temperature?: number | undefined;
1274
1330
  maxTokens?: number | undefined;
1275
1331
  }>>;
1276
- langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
1277
- langwatchApiKey: zod.ZodOptional<zod.ZodString>;
1278
1332
  }, "strict", zod.ZodTypeAny, {
1279
1333
  defaultModel?: {
1280
1334
  model: ai.LanguageModelV1;
1281
1335
  temperature: number;
1282
1336
  maxTokens?: number | undefined;
1283
1337
  } | undefined;
1284
- langwatchEndpoint?: string | undefined;
1285
- langwatchApiKey?: string | undefined;
1286
1338
  }, {
1287
1339
  defaultModel?: {
1288
1340
  model: ai.LanguageModelV1;
1289
1341
  temperature?: number | undefined;
1290
1342
  maxTokens?: number | undefined;
1291
1343
  } | undefined;
1292
- langwatchEndpoint?: string | undefined;
1293
- langwatchApiKey?: string | undefined;
1294
1344
  }>;
1295
1345
  AgentRole: typeof AgentRole;
1296
1346
  allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
@@ -1317,4 +1367,4 @@ declare const scenario: {
1317
1367
  };
1318
1368
  };
1319
1369
 
1320
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
1370
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };