@langwatch/scenario 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-NUZZAQV2.mjs +622 -0
- package/dist/index.d.mts +65 -15
- package/dist/index.d.ts +65 -15
- package/dist/index.js +317 -97
- package/dist/index.mjs +122 -173
- package/dist/integrations/vitest/setup.js +282 -105
- package/dist/integrations/vitest/setup.mjs +1 -1
- package/package.json +4 -3
- package/dist/chunk-ORWSJC5F.mjs +0 -309
package/dist/index.d.mts
CHANGED
|
@@ -337,6 +337,8 @@ interface ScenarioExecutionStateLike {
|
|
|
337
337
|
hasToolCall(toolName: string): boolean;
|
|
338
338
|
}
|
|
339
339
|
|
|
340
|
+
/** Default temperature for language model inference */
|
|
341
|
+
declare const DEFAULT_TEMPERATURE = 0;
|
|
340
342
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
341
343
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
342
344
|
model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
|
|
@@ -351,24 +353,18 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
351
353
|
temperature?: number | undefined;
|
|
352
354
|
maxTokens?: number | undefined;
|
|
353
355
|
}>>;
|
|
354
|
-
langwatchEndpoint: z.ZodOptional<z.ZodString>;
|
|
355
|
-
langwatchApiKey: z.ZodOptional<z.ZodString>;
|
|
356
356
|
}, "strict", z.ZodTypeAny, {
|
|
357
357
|
defaultModel?: {
|
|
358
358
|
model: ai.LanguageModelV1;
|
|
359
359
|
temperature: number;
|
|
360
360
|
maxTokens?: number | undefined;
|
|
361
361
|
} | undefined;
|
|
362
|
-
langwatchEndpoint?: string | undefined;
|
|
363
|
-
langwatchApiKey?: string | undefined;
|
|
364
362
|
}, {
|
|
365
363
|
defaultModel?: {
|
|
366
364
|
model: ai.LanguageModelV1;
|
|
367
365
|
temperature?: number | undefined;
|
|
368
366
|
maxTokens?: number | undefined;
|
|
369
367
|
} | undefined;
|
|
370
|
-
langwatchEndpoint?: string | undefined;
|
|
371
|
-
langwatchApiKey?: string | undefined;
|
|
372
368
|
}>;
|
|
373
369
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
374
370
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
@@ -400,6 +396,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
|
|
|
400
396
|
* The name of the agent.
|
|
401
397
|
*/
|
|
402
398
|
name?: string;
|
|
399
|
+
/**
|
|
400
|
+
* System prompt to use for the agent.
|
|
401
|
+
*
|
|
402
|
+
* Useful in more complex scenarios where you want to set the system prompt
|
|
403
|
+
* for the agent directly. If left blank, this will be automatically generated
|
|
404
|
+
* from the scenario description.
|
|
405
|
+
*/
|
|
406
|
+
systemPrompt?: string;
|
|
403
407
|
}
|
|
404
408
|
/**
|
|
405
409
|
* The arguments for finishing a test, used by the judge agent's tool.
|
|
@@ -502,8 +506,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
502
506
|
*
|
|
503
507
|
* @param config Optional configuration for the agent.
|
|
504
508
|
* @param config.model The language model to use for generating responses.
|
|
505
|
-
*
|
|
509
|
+
* If not provided, a default model will be used.
|
|
510
|
+
* @param config.temperature The temperature for the language model (0.0-1.0).
|
|
511
|
+
* Lower values make responses more deterministic.
|
|
512
|
+
* Defaults to {@link DEFAULT_TEMPERATURE}.
|
|
506
513
|
* @param config.maxTokens The maximum number of tokens to generate.
|
|
514
|
+
* If not provided, uses model defaults.
|
|
515
|
+
* @param config.name The name of the agent.
|
|
516
|
+
* @param config.systemPrompt Custom system prompt to override default user simulation behavior.
|
|
517
|
+
* Use this to create specialized user personas or behaviors.
|
|
518
|
+
*
|
|
519
|
+
* @throws {Error} If no model is configured either in parameters or global config.
|
|
507
520
|
*
|
|
508
521
|
* @example
|
|
509
522
|
* ```typescript
|
|
@@ -517,7 +530,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
517
530
|
* };
|
|
518
531
|
*
|
|
519
532
|
* async function main() {
|
|
520
|
-
*
|
|
533
|
+
* // Basic user simulator with default behavior
|
|
534
|
+
* const basicResult = await run({
|
|
521
535
|
* name: "User Simulator Test",
|
|
522
536
|
* description: "A simple test to see if the user simulator works.",
|
|
523
537
|
* agents: [myAgent, userSimulatorAgent()],
|
|
@@ -526,9 +540,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
526
540
|
* agent(),
|
|
527
541
|
* ],
|
|
528
542
|
* });
|
|
543
|
+
*
|
|
544
|
+
* // Customized user simulator
|
|
545
|
+
* const customResult = await run({
|
|
546
|
+
* name: "Expert User Test",
|
|
547
|
+
* description: "User seeks help with TypeScript programming",
|
|
548
|
+
* agents: [
|
|
549
|
+
* myAgent,
|
|
550
|
+
* userSimulatorAgent({
|
|
551
|
+
* model: openai("gpt-4"),
|
|
552
|
+
* temperature: 0.3,
|
|
553
|
+
* systemPrompt: "You are a technical user who asks detailed questions"
|
|
554
|
+
* })
|
|
555
|
+
* ],
|
|
556
|
+
* script: [
|
|
557
|
+
* user(),
|
|
558
|
+
* agent(),
|
|
559
|
+
* ],
|
|
560
|
+
* });
|
|
561
|
+
*
|
|
562
|
+
* // User simulator with custom persona
|
|
563
|
+
* const expertResult = await run({
|
|
564
|
+
* name: "Expert Developer Test",
|
|
565
|
+
* description: "Testing with a technical expert user persona.",
|
|
566
|
+
* agents: [
|
|
567
|
+
* myAgent,
|
|
568
|
+
* userSimulatorAgent({
|
|
569
|
+
* systemPrompt: `
|
|
570
|
+
* You are an expert software developer testing an AI coding assistant.
|
|
571
|
+
* Ask challenging, technical questions and be demanding about code quality.
|
|
572
|
+
* Use technical jargon and expect detailed, accurate responses.
|
|
573
|
+
* `
|
|
574
|
+
* })
|
|
575
|
+
* ],
|
|
576
|
+
* script: [
|
|
577
|
+
* user(),
|
|
578
|
+
* agent(),
|
|
579
|
+
* ],
|
|
580
|
+
* });
|
|
529
581
|
* }
|
|
530
582
|
* main();
|
|
531
583
|
* ```
|
|
584
|
+
*
|
|
585
|
+
* @note
|
|
586
|
+
* - Uses role reversal internally to work around LLM biases toward assistant roles
|
|
532
587
|
*/
|
|
533
588
|
declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
534
589
|
role: AgentRole.USER;
|
|
@@ -1259,6 +1314,7 @@ declare const scenario: {
|
|
|
1259
1314
|
ScenarioExecution: typeof ScenarioExecution;
|
|
1260
1315
|
ScenarioExecutionState: typeof ScenarioExecutionState;
|
|
1261
1316
|
defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
1317
|
+
DEFAULT_TEMPERATURE: 0;
|
|
1262
1318
|
scenarioProjectConfigSchema: zod.ZodObject<{
|
|
1263
1319
|
defaultModel: zod.ZodOptional<zod.ZodObject<{
|
|
1264
1320
|
model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
|
|
@@ -1273,24 +1329,18 @@ declare const scenario: {
|
|
|
1273
1329
|
temperature?: number | undefined;
|
|
1274
1330
|
maxTokens?: number | undefined;
|
|
1275
1331
|
}>>;
|
|
1276
|
-
langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
|
|
1277
|
-
langwatchApiKey: zod.ZodOptional<zod.ZodString>;
|
|
1278
1332
|
}, "strict", zod.ZodTypeAny, {
|
|
1279
1333
|
defaultModel?: {
|
|
1280
1334
|
model: ai.LanguageModelV1;
|
|
1281
1335
|
temperature: number;
|
|
1282
1336
|
maxTokens?: number | undefined;
|
|
1283
1337
|
} | undefined;
|
|
1284
|
-
langwatchEndpoint?: string | undefined;
|
|
1285
|
-
langwatchApiKey?: string | undefined;
|
|
1286
1338
|
}, {
|
|
1287
1339
|
defaultModel?: {
|
|
1288
1340
|
model: ai.LanguageModelV1;
|
|
1289
1341
|
temperature?: number | undefined;
|
|
1290
1342
|
maxTokens?: number | undefined;
|
|
1291
1343
|
} | undefined;
|
|
1292
|
-
langwatchEndpoint?: string | undefined;
|
|
1293
|
-
langwatchApiKey?: string | undefined;
|
|
1294
1344
|
}>;
|
|
1295
1345
|
AgentRole: typeof AgentRole;
|
|
1296
1346
|
allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
|
|
@@ -1317,4 +1367,4 @@ declare const scenario: {
|
|
|
1317
1367
|
};
|
|
1318
1368
|
};
|
|
1319
1369
|
|
|
1320
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
1370
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
package/dist/index.d.ts
CHANGED
|
@@ -337,6 +337,8 @@ interface ScenarioExecutionStateLike {
|
|
|
337
337
|
hasToolCall(toolName: string): boolean;
|
|
338
338
|
}
|
|
339
339
|
|
|
340
|
+
/** Default temperature for language model inference */
|
|
341
|
+
declare const DEFAULT_TEMPERATURE = 0;
|
|
340
342
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
341
343
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
342
344
|
model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
|
|
@@ -351,24 +353,18 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
351
353
|
temperature?: number | undefined;
|
|
352
354
|
maxTokens?: number | undefined;
|
|
353
355
|
}>>;
|
|
354
|
-
langwatchEndpoint: z.ZodOptional<z.ZodString>;
|
|
355
|
-
langwatchApiKey: z.ZodOptional<z.ZodString>;
|
|
356
356
|
}, "strict", z.ZodTypeAny, {
|
|
357
357
|
defaultModel?: {
|
|
358
358
|
model: ai.LanguageModelV1;
|
|
359
359
|
temperature: number;
|
|
360
360
|
maxTokens?: number | undefined;
|
|
361
361
|
} | undefined;
|
|
362
|
-
langwatchEndpoint?: string | undefined;
|
|
363
|
-
langwatchApiKey?: string | undefined;
|
|
364
362
|
}, {
|
|
365
363
|
defaultModel?: {
|
|
366
364
|
model: ai.LanguageModelV1;
|
|
367
365
|
temperature?: number | undefined;
|
|
368
366
|
maxTokens?: number | undefined;
|
|
369
367
|
} | undefined;
|
|
370
|
-
langwatchEndpoint?: string | undefined;
|
|
371
|
-
langwatchApiKey?: string | undefined;
|
|
372
368
|
}>;
|
|
373
369
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
374
370
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
@@ -400,6 +396,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
|
|
|
400
396
|
* The name of the agent.
|
|
401
397
|
*/
|
|
402
398
|
name?: string;
|
|
399
|
+
/**
|
|
400
|
+
* System prompt to use for the agent.
|
|
401
|
+
*
|
|
402
|
+
* Useful in more complex scenarios where you want to set the system prompt
|
|
403
|
+
* for the agent directly. If left blank, this will be automatically generated
|
|
404
|
+
* from the scenario description.
|
|
405
|
+
*/
|
|
406
|
+
systemPrompt?: string;
|
|
403
407
|
}
|
|
404
408
|
/**
|
|
405
409
|
* The arguments for finishing a test, used by the judge agent's tool.
|
|
@@ -502,8 +506,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
502
506
|
*
|
|
503
507
|
* @param config Optional configuration for the agent.
|
|
504
508
|
* @param config.model The language model to use for generating responses.
|
|
505
|
-
*
|
|
509
|
+
* If not provided, a default model will be used.
|
|
510
|
+
* @param config.temperature The temperature for the language model (0.0-1.0).
|
|
511
|
+
* Lower values make responses more deterministic.
|
|
512
|
+
* Defaults to {@link DEFAULT_TEMPERATURE}.
|
|
506
513
|
* @param config.maxTokens The maximum number of tokens to generate.
|
|
514
|
+
* If not provided, uses model defaults.
|
|
515
|
+
* @param config.name The name of the agent.
|
|
516
|
+
* @param config.systemPrompt Custom system prompt to override default user simulation behavior.
|
|
517
|
+
* Use this to create specialized user personas or behaviors.
|
|
518
|
+
*
|
|
519
|
+
* @throws {Error} If no model is configured either in parameters or global config.
|
|
507
520
|
*
|
|
508
521
|
* @example
|
|
509
522
|
* ```typescript
|
|
@@ -517,7 +530,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
517
530
|
* };
|
|
518
531
|
*
|
|
519
532
|
* async function main() {
|
|
520
|
-
*
|
|
533
|
+
* // Basic user simulator with default behavior
|
|
534
|
+
* const basicResult = await run({
|
|
521
535
|
* name: "User Simulator Test",
|
|
522
536
|
* description: "A simple test to see if the user simulator works.",
|
|
523
537
|
* agents: [myAgent, userSimulatorAgent()],
|
|
@@ -526,9 +540,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
526
540
|
* agent(),
|
|
527
541
|
* ],
|
|
528
542
|
* });
|
|
543
|
+
*
|
|
544
|
+
* // Customized user simulator
|
|
545
|
+
* const customResult = await run({
|
|
546
|
+
* name: "Expert User Test",
|
|
547
|
+
* description: "User seeks help with TypeScript programming",
|
|
548
|
+
* agents: [
|
|
549
|
+
* myAgent,
|
|
550
|
+
* userSimulatorAgent({
|
|
551
|
+
* model: openai("gpt-4"),
|
|
552
|
+
* temperature: 0.3,
|
|
553
|
+
* systemPrompt: "You are a technical user who asks detailed questions"
|
|
554
|
+
* })
|
|
555
|
+
* ],
|
|
556
|
+
* script: [
|
|
557
|
+
* user(),
|
|
558
|
+
* agent(),
|
|
559
|
+
* ],
|
|
560
|
+
* });
|
|
561
|
+
*
|
|
562
|
+
* // User simulator with custom persona
|
|
563
|
+
* const expertResult = await run({
|
|
564
|
+
* name: "Expert Developer Test",
|
|
565
|
+
* description: "Testing with a technical expert user persona.",
|
|
566
|
+
* agents: [
|
|
567
|
+
* myAgent,
|
|
568
|
+
* userSimulatorAgent({
|
|
569
|
+
* systemPrompt: `
|
|
570
|
+
* You are an expert software developer testing an AI coding assistant.
|
|
571
|
+
* Ask challenging, technical questions and be demanding about code quality.
|
|
572
|
+
* Use technical jargon and expect detailed, accurate responses.
|
|
573
|
+
* `
|
|
574
|
+
* })
|
|
575
|
+
* ],
|
|
576
|
+
* script: [
|
|
577
|
+
* user(),
|
|
578
|
+
* agent(),
|
|
579
|
+
* ],
|
|
580
|
+
* });
|
|
529
581
|
* }
|
|
530
582
|
* main();
|
|
531
583
|
* ```
|
|
584
|
+
*
|
|
585
|
+
* @note
|
|
586
|
+
* - Uses role reversal internally to work around LLM biases toward assistant roles
|
|
532
587
|
*/
|
|
533
588
|
declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
534
589
|
role: AgentRole.USER;
|
|
@@ -1259,6 +1314,7 @@ declare const scenario: {
|
|
|
1259
1314
|
ScenarioExecution: typeof ScenarioExecution;
|
|
1260
1315
|
ScenarioExecutionState: typeof ScenarioExecutionState;
|
|
1261
1316
|
defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
1317
|
+
DEFAULT_TEMPERATURE: 0;
|
|
1262
1318
|
scenarioProjectConfigSchema: zod.ZodObject<{
|
|
1263
1319
|
defaultModel: zod.ZodOptional<zod.ZodObject<{
|
|
1264
1320
|
model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
|
|
@@ -1273,24 +1329,18 @@ declare const scenario: {
|
|
|
1273
1329
|
temperature?: number | undefined;
|
|
1274
1330
|
maxTokens?: number | undefined;
|
|
1275
1331
|
}>>;
|
|
1276
|
-
langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
|
|
1277
|
-
langwatchApiKey: zod.ZodOptional<zod.ZodString>;
|
|
1278
1332
|
}, "strict", zod.ZodTypeAny, {
|
|
1279
1333
|
defaultModel?: {
|
|
1280
1334
|
model: ai.LanguageModelV1;
|
|
1281
1335
|
temperature: number;
|
|
1282
1336
|
maxTokens?: number | undefined;
|
|
1283
1337
|
} | undefined;
|
|
1284
|
-
langwatchEndpoint?: string | undefined;
|
|
1285
|
-
langwatchApiKey?: string | undefined;
|
|
1286
1338
|
}, {
|
|
1287
1339
|
defaultModel?: {
|
|
1288
1340
|
model: ai.LanguageModelV1;
|
|
1289
1341
|
temperature?: number | undefined;
|
|
1290
1342
|
maxTokens?: number | undefined;
|
|
1291
1343
|
} | undefined;
|
|
1292
|
-
langwatchEndpoint?: string | undefined;
|
|
1293
|
-
langwatchApiKey?: string | undefined;
|
|
1294
1344
|
}>;
|
|
1295
1345
|
AgentRole: typeof AgentRole;
|
|
1296
1346
|
allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
|
|
@@ -1317,4 +1367,4 @@ declare const scenario: {
|
|
|
1317
1367
|
};
|
|
1318
1368
|
};
|
|
1319
1369
|
|
|
1320
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
1370
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|