@langwatch/scenario 0.2.1 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -6
- package/dist/{chunk-ZMHTHRDR.mjs → chunk-MOOKAYIE.mjs} +18 -6
- package/dist/index.d.mts +140 -82
- package/dist/index.d.ts +140 -82
- package/dist/index.js +116 -27
- package/dist/index.mjs +104 -24
- package/dist/integrations/vitest/setup.js +6 -4
- package/dist/integrations/vitest/setup.mjs +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -222,10 +222,6 @@ export default defineConfig({
|
|
|
222
222
|
model: openai("gpt-4o-mini"),
|
|
223
223
|
temperature: 0.1,
|
|
224
224
|
},
|
|
225
|
-
|
|
226
|
-
// Configure the LangWatch reporting endpoint and API key
|
|
227
|
-
langwatchEndpoint: "https://app.langwatch.ai",
|
|
228
|
-
langwatchApiKey: process.env.LANGWATCH_API_KEY,
|
|
229
225
|
});
|
|
230
226
|
```
|
|
231
227
|
|
|
@@ -239,8 +235,6 @@ The following configuration options are all optional. You can specify any combin
|
|
|
239
235
|
- `model`: **(Required if `defaultModel` is set)** An instance of a language model from a provider like `@ai-sdk/openai`.
|
|
240
236
|
- `temperature` _(Optional)_: The default temperature for the model (e.g., `0.1`).
|
|
241
237
|
- `maxTokens` _(Optional)_: The default maximum number of tokens for the model to generate.
|
|
242
|
-
- `langwatchEndpoint` _(Optional)_: The endpoint for the LangWatch reporting service. If not specified, it defaults to the `LANGWATCH_ENDPOINT` environment variable, or `https://app.langwatch.ai`.
|
|
243
|
-
- `langwatchApiKey` _(Optional)_: Your LangWatch API key for authenticating with the reporting service. If not specified, it defaults to the `LANGWATCH_API_KEY` environment variable.
|
|
244
238
|
|
|
245
239
|
### Environment Variables
|
|
246
240
|
|
|
@@ -4,14 +4,13 @@ import {
|
|
|
4
4
|
|
|
5
5
|
// src/domain/core/config.ts
|
|
6
6
|
import { z } from "zod";
|
|
7
|
+
var DEFAULT_TEMPERATURE = 0;
|
|
7
8
|
var scenarioProjectConfigSchema = z.object({
|
|
8
9
|
defaultModel: z.object({
|
|
9
10
|
model: z.custom(),
|
|
10
|
-
temperature: z.number().min(0).max(1).optional().default(
|
|
11
|
+
temperature: z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
|
|
11
12
|
maxTokens: z.number().optional()
|
|
12
|
-
}).optional()
|
|
13
|
-
langwatchEndpoint: z.string().optional(),
|
|
14
|
-
langwatchApiKey: z.string().optional()
|
|
13
|
+
}).optional()
|
|
15
14
|
}).strict();
|
|
16
15
|
function defineConfig(config2) {
|
|
17
16
|
return config2;
|
|
@@ -44,11 +43,18 @@ var JudgeAgentAdapter = class {
|
|
|
44
43
|
}
|
|
45
44
|
};
|
|
46
45
|
|
|
46
|
+
// src/domain/scenarios/index.ts
|
|
47
|
+
var DEFAULT_MAX_TURNS = 10;
|
|
48
|
+
var DEFAULT_VERBOSE = false;
|
|
49
|
+
|
|
47
50
|
// src/domain/index.ts
|
|
48
51
|
var domain_exports = {};
|
|
49
52
|
__export(domain_exports, {
|
|
50
53
|
AgentAdapter: () => AgentAdapter,
|
|
51
54
|
AgentRole: () => AgentRole,
|
|
55
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
56
|
+
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
57
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
52
58
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
53
59
|
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
54
60
|
allAgentRoles: () => allAgentRoles,
|
|
@@ -231,7 +237,7 @@ async function loadProjectConfig() {
|
|
|
231
237
|
configLoadPromise = (async () => {
|
|
232
238
|
try {
|
|
233
239
|
config = await loadScenarioProjectConfig();
|
|
234
|
-
logger.
|
|
240
|
+
logger.debug("loaded scenario project config", { config });
|
|
235
241
|
} catch (error) {
|
|
236
242
|
logger.error("error loading scenario project config", { error });
|
|
237
243
|
} finally {
|
|
@@ -425,17 +431,20 @@ var EventReporter = class {
|
|
|
425
431
|
eventsEndpoint;
|
|
426
432
|
eventAlertMessageLogger;
|
|
427
433
|
logger = new Logger("scenario.events.EventReporter");
|
|
434
|
+
isEnabled;
|
|
428
435
|
constructor(config2) {
|
|
429
436
|
this.apiKey = config2.apiKey ?? "";
|
|
430
437
|
this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
|
|
431
438
|
this.eventAlertMessageLogger = new EventAlertMessageLogger();
|
|
432
439
|
this.eventAlertMessageLogger.handleGreeting();
|
|
440
|
+
this.isEnabled = this.apiKey.length > 0 && this.eventsEndpoint.href.length > 0;
|
|
433
441
|
}
|
|
434
442
|
/**
|
|
435
443
|
* Posts an event to the configured endpoint.
|
|
436
444
|
* Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
|
|
437
445
|
*/
|
|
438
446
|
async postEvent(event) {
|
|
447
|
+
if (!this.isEnabled) return {};
|
|
439
448
|
const result = {};
|
|
440
449
|
this.logger.debug(`[${event.type}] Posting event`, { event });
|
|
441
450
|
const processedEvent = this.processEventForApi(event);
|
|
@@ -598,6 +607,7 @@ var EventBus = class _EventBus {
|
|
|
598
607
|
};
|
|
599
608
|
|
|
600
609
|
export {
|
|
610
|
+
DEFAULT_TEMPERATURE,
|
|
601
611
|
scenarioProjectConfigSchema,
|
|
602
612
|
defineConfig,
|
|
603
613
|
AgentRole,
|
|
@@ -605,9 +615,11 @@ export {
|
|
|
605
615
|
AgentAdapter,
|
|
606
616
|
UserSimulatorAgentAdapter,
|
|
607
617
|
JudgeAgentAdapter,
|
|
618
|
+
DEFAULT_MAX_TURNS,
|
|
619
|
+
DEFAULT_VERBOSE,
|
|
608
620
|
domain_exports,
|
|
609
|
-
loadScenarioProjectConfig,
|
|
610
621
|
Logger,
|
|
622
|
+
env,
|
|
611
623
|
getProjectConfig,
|
|
612
624
|
generateThreadId,
|
|
613
625
|
generateScenarioRunId,
|
package/dist/index.d.mts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import * as zod from 'zod';
|
|
2
|
-
import { z } from 'zod';
|
|
3
1
|
import * as ai from 'ai';
|
|
4
2
|
import { CoreMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
3
|
+
import { z } from 'zod';
|
|
5
4
|
import { Observable } from 'rxjs';
|
|
6
5
|
|
|
7
6
|
declare enum AgentRole {
|
|
@@ -121,6 +120,8 @@ declare abstract class JudgeAgentAdapter implements AgentAdapter {
|
|
|
121
120
|
abstract call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
122
121
|
}
|
|
123
122
|
|
|
123
|
+
declare const DEFAULT_MAX_TURNS = 10;
|
|
124
|
+
declare const DEFAULT_VERBOSE = false;
|
|
124
125
|
/**
|
|
125
126
|
* Configuration for a scenario.
|
|
126
127
|
*/
|
|
@@ -147,11 +148,19 @@ interface ScenarioConfig {
|
|
|
147
148
|
*/
|
|
148
149
|
script?: ScriptStep[];
|
|
149
150
|
/**
|
|
150
|
-
* Whether to output verbose logging.
|
|
151
|
+
* Whether to output verbose logging.
|
|
152
|
+
*
|
|
153
|
+
* If no value is provided, this defaults to {@link DEFAULT_VERBOSE}.
|
|
154
|
+
*
|
|
155
|
+
* @default {@link DEFAULT_VERBOSE}
|
|
151
156
|
*/
|
|
152
157
|
verbose?: boolean;
|
|
153
158
|
/**
|
|
154
|
-
* The maximum number of turns to execute.
|
|
159
|
+
* The maximum number of turns to execute.
|
|
160
|
+
*
|
|
161
|
+
* If no value is provided, this defaults to {@link DEFAULT_MAX_TURNS}.
|
|
162
|
+
*
|
|
163
|
+
* @default {@link DEFAULT_MAX_TURNS}
|
|
155
164
|
*/
|
|
156
165
|
maxTurns?: number;
|
|
157
166
|
/**
|
|
@@ -337,6 +346,8 @@ interface ScenarioExecutionStateLike {
|
|
|
337
346
|
hasToolCall(toolName: string): boolean;
|
|
338
347
|
}
|
|
339
348
|
|
|
349
|
+
/** Default temperature for language model inference */
|
|
350
|
+
declare const DEFAULT_TEMPERATURE = 0;
|
|
340
351
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
341
352
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
342
353
|
model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
|
|
@@ -351,28 +362,49 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
351
362
|
temperature?: number | undefined;
|
|
352
363
|
maxTokens?: number | undefined;
|
|
353
364
|
}>>;
|
|
354
|
-
langwatchEndpoint: z.ZodOptional<z.ZodString>;
|
|
355
|
-
langwatchApiKey: z.ZodOptional<z.ZodString>;
|
|
356
365
|
}, "strict", z.ZodTypeAny, {
|
|
357
366
|
defaultModel?: {
|
|
358
367
|
model: ai.LanguageModelV1;
|
|
359
368
|
temperature: number;
|
|
360
369
|
maxTokens?: number | undefined;
|
|
361
370
|
} | undefined;
|
|
362
|
-
langwatchEndpoint?: string | undefined;
|
|
363
|
-
langwatchApiKey?: string | undefined;
|
|
364
371
|
}, {
|
|
365
372
|
defaultModel?: {
|
|
366
373
|
model: ai.LanguageModelV1;
|
|
367
374
|
temperature?: number | undefined;
|
|
368
375
|
maxTokens?: number | undefined;
|
|
369
376
|
} | undefined;
|
|
370
|
-
langwatchEndpoint?: string | undefined;
|
|
371
|
-
langwatchApiKey?: string | undefined;
|
|
372
377
|
}>;
|
|
373
378
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
374
379
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
375
380
|
|
|
381
|
+
type domain_AgentAdapter = AgentAdapter;
|
|
382
|
+
declare const domain_AgentAdapter: typeof AgentAdapter;
|
|
383
|
+
type domain_AgentInput = AgentInput;
|
|
384
|
+
type domain_AgentReturnTypes = AgentReturnTypes;
|
|
385
|
+
type domain_AgentRole = AgentRole;
|
|
386
|
+
declare const domain_AgentRole: typeof AgentRole;
|
|
387
|
+
declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
|
|
388
|
+
declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
|
|
389
|
+
declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
|
|
390
|
+
type domain_JudgeAgentAdapter = JudgeAgentAdapter;
|
|
391
|
+
declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
392
|
+
type domain_ScenarioConfig = ScenarioConfig;
|
|
393
|
+
type domain_ScenarioConfigFinal = ScenarioConfigFinal;
|
|
394
|
+
type domain_ScenarioExecutionLike = ScenarioExecutionLike;
|
|
395
|
+
type domain_ScenarioExecutionStateLike = ScenarioExecutionStateLike;
|
|
396
|
+
type domain_ScenarioProjectConfig = ScenarioProjectConfig;
|
|
397
|
+
type domain_ScenarioResult = ScenarioResult;
|
|
398
|
+
type domain_ScriptStep = ScriptStep;
|
|
399
|
+
type domain_UserSimulatorAgentAdapter = UserSimulatorAgentAdapter;
|
|
400
|
+
declare const domain_UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
|
|
401
|
+
declare const domain_allAgentRoles: typeof allAgentRoles;
|
|
402
|
+
declare const domain_defineConfig: typeof defineConfig;
|
|
403
|
+
declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
|
|
404
|
+
declare namespace domain {
|
|
405
|
+
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_TEMPERATURE as DEFAULT_TEMPERATURE, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
|
|
406
|
+
}
|
|
407
|
+
|
|
376
408
|
/**
|
|
377
409
|
* Configuration for the inference parameters of a testing agent.
|
|
378
410
|
*/
|
|
@@ -400,6 +432,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
|
|
|
400
432
|
* The name of the agent.
|
|
401
433
|
*/
|
|
402
434
|
name?: string;
|
|
435
|
+
/**
|
|
436
|
+
* System prompt to use for the agent.
|
|
437
|
+
*
|
|
438
|
+
* Useful in more complex scenarios where you want to set the system prompt
|
|
439
|
+
* for the agent directly. If left blank, this will be automatically generated
|
|
440
|
+
* from the scenario description.
|
|
441
|
+
*/
|
|
442
|
+
systemPrompt?: string;
|
|
403
443
|
}
|
|
404
444
|
/**
|
|
405
445
|
* The arguments for finishing a test, used by the judge agent's tool.
|
|
@@ -502,8 +542,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
502
542
|
*
|
|
503
543
|
* @param config Optional configuration for the agent.
|
|
504
544
|
* @param config.model The language model to use for generating responses.
|
|
505
|
-
*
|
|
545
|
+
* If not provided, a default model will be used.
|
|
546
|
+
* @param config.temperature The temperature for the language model (0.0-1.0).
|
|
547
|
+
* Lower values make responses more deterministic.
|
|
548
|
+
* Defaults to {@link DEFAULT_TEMPERATURE}.
|
|
506
549
|
* @param config.maxTokens The maximum number of tokens to generate.
|
|
550
|
+
* If not provided, uses model defaults.
|
|
551
|
+
* @param config.name The name of the agent.
|
|
552
|
+
* @param config.systemPrompt Custom system prompt to override default user simulation behavior.
|
|
553
|
+
* Use this to create specialized user personas or behaviors.
|
|
554
|
+
*
|
|
555
|
+
* @throws {Error} If no model is configured either in parameters or global config.
|
|
507
556
|
*
|
|
508
557
|
* @example
|
|
509
558
|
* ```typescript
|
|
@@ -517,7 +566,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
517
566
|
* };
|
|
518
567
|
*
|
|
519
568
|
* async function main() {
|
|
520
|
-
*
|
|
569
|
+
* // Basic user simulator with default behavior
|
|
570
|
+
* const basicResult = await run({
|
|
521
571
|
* name: "User Simulator Test",
|
|
522
572
|
* description: "A simple test to see if the user simulator works.",
|
|
523
573
|
* agents: [myAgent, userSimulatorAgent()],
|
|
@@ -526,9 +576,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
526
576
|
* agent(),
|
|
527
577
|
* ],
|
|
528
578
|
* });
|
|
579
|
+
*
|
|
580
|
+
* // Customized user simulator
|
|
581
|
+
* const customResult = await run({
|
|
582
|
+
* name: "Expert User Test",
|
|
583
|
+
* description: "User seeks help with TypeScript programming",
|
|
584
|
+
* agents: [
|
|
585
|
+
* myAgent,
|
|
586
|
+
* userSimulatorAgent({
|
|
587
|
+
* model: openai("gpt-4"),
|
|
588
|
+
* temperature: 0.3,
|
|
589
|
+
* systemPrompt: "You are a technical user who asks detailed questions"
|
|
590
|
+
* })
|
|
591
|
+
* ],
|
|
592
|
+
* script: [
|
|
593
|
+
* user(),
|
|
594
|
+
* agent(),
|
|
595
|
+
* ],
|
|
596
|
+
* });
|
|
597
|
+
*
|
|
598
|
+
* // User simulator with custom persona
|
|
599
|
+
* const expertResult = await run({
|
|
600
|
+
* name: "Expert Developer Test",
|
|
601
|
+
* description: "Testing with a technical expert user persona.",
|
|
602
|
+
* agents: [
|
|
603
|
+
* myAgent,
|
|
604
|
+
* userSimulatorAgent({
|
|
605
|
+
* systemPrompt: `
|
|
606
|
+
* You are an expert software developer testing an AI coding assistant.
|
|
607
|
+
* Ask challenging, technical questions and be demanding about code quality.
|
|
608
|
+
* Use technical jargon and expect detailed, accurate responses.
|
|
609
|
+
* `
|
|
610
|
+
* })
|
|
611
|
+
* ],
|
|
612
|
+
* script: [
|
|
613
|
+
* user(),
|
|
614
|
+
* agent(),
|
|
615
|
+
* ],
|
|
616
|
+
* });
|
|
529
617
|
* }
|
|
530
618
|
* main();
|
|
531
619
|
* ```
|
|
620
|
+
*
|
|
621
|
+
* @note
|
|
622
|
+
* - Uses role reversal internally to work around LLM biases toward assistant roles
|
|
532
623
|
*/
|
|
533
624
|
declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
534
625
|
role: AgentRole.USER;
|
|
@@ -538,6 +629,16 @@ declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
|
538
629
|
}>;
|
|
539
630
|
};
|
|
540
631
|
|
|
632
|
+
type agents_FinishTestArgs = FinishTestArgs;
|
|
633
|
+
type agents_JudgeAgentConfig = JudgeAgentConfig;
|
|
634
|
+
type agents_TestingAgentConfig = TestingAgentConfig;
|
|
635
|
+
type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
|
|
636
|
+
declare const agents_judgeAgent: typeof judgeAgent;
|
|
637
|
+
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
638
|
+
declare namespace agents {
|
|
639
|
+
export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_TestingAgentConfig as TestingAgentConfig, type agents_TestingAgentInferenceConfig as TestingAgentInferenceConfig, agents_judgeAgent as judgeAgent, agents_userSimulatorAgent as userSimulatorAgent };
|
|
640
|
+
}
|
|
641
|
+
|
|
541
642
|
/**
|
|
542
643
|
* Verdict enum represents the possible outcomes of a test scenario
|
|
543
644
|
*/
|
|
@@ -1107,6 +1208,14 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1107
1208
|
hasToolCall(toolName: string): boolean;
|
|
1108
1209
|
}
|
|
1109
1210
|
|
|
1211
|
+
type execution_ScenarioExecution = ScenarioExecution;
|
|
1212
|
+
declare const execution_ScenarioExecution: typeof ScenarioExecution;
|
|
1213
|
+
type execution_ScenarioExecutionState = ScenarioExecutionState;
|
|
1214
|
+
declare const execution_ScenarioExecutionState: typeof ScenarioExecutionState;
|
|
1215
|
+
declare namespace execution {
|
|
1216
|
+
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState };
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1110
1219
|
/**
|
|
1111
1220
|
* High-level interface for running a scenario test.
|
|
1112
1221
|
*
|
|
@@ -1158,6 +1267,11 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1158
1267
|
*/
|
|
1159
1268
|
declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
|
|
1160
1269
|
|
|
1270
|
+
declare const runner_run: typeof run;
|
|
1271
|
+
declare namespace runner {
|
|
1272
|
+
export { runner_run as run };
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1161
1275
|
/**
|
|
1162
1276
|
* Scenario script DSL (Domain Specific Language) module.
|
|
1163
1277
|
*
|
|
@@ -1247,74 +1361,18 @@ declare const succeed: (reasoning?: string) => ScriptStep;
|
|
|
1247
1361
|
*/
|
|
1248
1362
|
declare const fail: (reasoning?: string) => ScriptStep;
|
|
1249
1363
|
|
|
1250
|
-
declare const
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
defaultModel: zod.ZodOptional<zod.ZodObject<{
|
|
1264
|
-
model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
|
|
1265
|
-
temperature: zod.ZodDefault<zod.ZodOptional<zod.ZodNumber>>;
|
|
1266
|
-
maxTokens: zod.ZodOptional<zod.ZodNumber>;
|
|
1267
|
-
}, "strip", zod.ZodTypeAny, {
|
|
1268
|
-
model: ai.LanguageModelV1;
|
|
1269
|
-
temperature: number;
|
|
1270
|
-
maxTokens?: number | undefined;
|
|
1271
|
-
}, {
|
|
1272
|
-
model: ai.LanguageModelV1;
|
|
1273
|
-
temperature?: number | undefined;
|
|
1274
|
-
maxTokens?: number | undefined;
|
|
1275
|
-
}>>;
|
|
1276
|
-
langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
|
|
1277
|
-
langwatchApiKey: zod.ZodOptional<zod.ZodString>;
|
|
1278
|
-
}, "strict", zod.ZodTypeAny, {
|
|
1279
|
-
defaultModel?: {
|
|
1280
|
-
model: ai.LanguageModelV1;
|
|
1281
|
-
temperature: number;
|
|
1282
|
-
maxTokens?: number | undefined;
|
|
1283
|
-
} | undefined;
|
|
1284
|
-
langwatchEndpoint?: string | undefined;
|
|
1285
|
-
langwatchApiKey?: string | undefined;
|
|
1286
|
-
}, {
|
|
1287
|
-
defaultModel?: {
|
|
1288
|
-
model: ai.LanguageModelV1;
|
|
1289
|
-
temperature?: number | undefined;
|
|
1290
|
-
maxTokens?: number | undefined;
|
|
1291
|
-
} | undefined;
|
|
1292
|
-
langwatchEndpoint?: string | undefined;
|
|
1293
|
-
langwatchApiKey?: string | undefined;
|
|
1294
|
-
}>;
|
|
1295
|
-
AgentRole: typeof AgentRole;
|
|
1296
|
-
allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
|
|
1297
|
-
AgentAdapter: typeof AgentAdapter;
|
|
1298
|
-
UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
|
|
1299
|
-
JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
1300
|
-
judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
1301
|
-
role: AgentRole.JUDGE;
|
|
1302
|
-
criteria: string[];
|
|
1303
|
-
call: (input: AgentInput) => Promise<never[] | {
|
|
1304
|
-
success: boolean;
|
|
1305
|
-
messages: ai.CoreMessage[];
|
|
1306
|
-
reasoning: string;
|
|
1307
|
-
metCriteria: string[];
|
|
1308
|
-
unmetCriteria: string[];
|
|
1309
|
-
}>;
|
|
1310
|
-
};
|
|
1311
|
-
userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
1312
|
-
role: AgentRole.USER;
|
|
1313
|
-
call: (input: AgentInput) => Promise<{
|
|
1314
|
-
role: "user";
|
|
1315
|
-
content: string;
|
|
1316
|
-
}>;
|
|
1317
|
-
};
|
|
1318
|
-
};
|
|
1364
|
+
declare const script_agent: typeof agent;
|
|
1365
|
+
declare const script_fail: typeof fail;
|
|
1366
|
+
declare const script_judge: typeof judge;
|
|
1367
|
+
declare const script_message: typeof message;
|
|
1368
|
+
declare const script_proceed: typeof proceed;
|
|
1369
|
+
declare const script_succeed: typeof succeed;
|
|
1370
|
+
declare const script_user: typeof user;
|
|
1371
|
+
declare namespace script {
|
|
1372
|
+
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
1376
|
+
declare const scenario: ScenarioApi;
|
|
1319
1377
|
|
|
1320
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
1378
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import * as zod from 'zod';
|
|
2
|
-
import { z } from 'zod';
|
|
3
1
|
import * as ai from 'ai';
|
|
4
2
|
import { CoreMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
3
|
+
import { z } from 'zod';
|
|
5
4
|
import { Observable } from 'rxjs';
|
|
6
5
|
|
|
7
6
|
declare enum AgentRole {
|
|
@@ -121,6 +120,8 @@ declare abstract class JudgeAgentAdapter implements AgentAdapter {
|
|
|
121
120
|
abstract call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
122
121
|
}
|
|
123
122
|
|
|
123
|
+
declare const DEFAULT_MAX_TURNS = 10;
|
|
124
|
+
declare const DEFAULT_VERBOSE = false;
|
|
124
125
|
/**
|
|
125
126
|
* Configuration for a scenario.
|
|
126
127
|
*/
|
|
@@ -147,11 +148,19 @@ interface ScenarioConfig {
|
|
|
147
148
|
*/
|
|
148
149
|
script?: ScriptStep[];
|
|
149
150
|
/**
|
|
150
|
-
* Whether to output verbose logging.
|
|
151
|
+
* Whether to output verbose logging.
|
|
152
|
+
*
|
|
153
|
+
* If no value is provided, this defaults to {@link DEFAULT_VERBOSE}.
|
|
154
|
+
*
|
|
155
|
+
* @default {@link DEFAULT_VERBOSE}
|
|
151
156
|
*/
|
|
152
157
|
verbose?: boolean;
|
|
153
158
|
/**
|
|
154
|
-
* The maximum number of turns to execute.
|
|
159
|
+
* The maximum number of turns to execute.
|
|
160
|
+
*
|
|
161
|
+
* If no value is provided, this defaults to {@link DEFAULT_MAX_TURNS}.
|
|
162
|
+
*
|
|
163
|
+
* @default {@link DEFAULT_MAX_TURNS}
|
|
155
164
|
*/
|
|
156
165
|
maxTurns?: number;
|
|
157
166
|
/**
|
|
@@ -337,6 +346,8 @@ interface ScenarioExecutionStateLike {
|
|
|
337
346
|
hasToolCall(toolName: string): boolean;
|
|
338
347
|
}
|
|
339
348
|
|
|
349
|
+
/** Default temperature for language model inference */
|
|
350
|
+
declare const DEFAULT_TEMPERATURE = 0;
|
|
340
351
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
341
352
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
342
353
|
model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
|
|
@@ -351,28 +362,49 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
|
351
362
|
temperature?: number | undefined;
|
|
352
363
|
maxTokens?: number | undefined;
|
|
353
364
|
}>>;
|
|
354
|
-
langwatchEndpoint: z.ZodOptional<z.ZodString>;
|
|
355
|
-
langwatchApiKey: z.ZodOptional<z.ZodString>;
|
|
356
365
|
}, "strict", z.ZodTypeAny, {
|
|
357
366
|
defaultModel?: {
|
|
358
367
|
model: ai.LanguageModelV1;
|
|
359
368
|
temperature: number;
|
|
360
369
|
maxTokens?: number | undefined;
|
|
361
370
|
} | undefined;
|
|
362
|
-
langwatchEndpoint?: string | undefined;
|
|
363
|
-
langwatchApiKey?: string | undefined;
|
|
364
371
|
}, {
|
|
365
372
|
defaultModel?: {
|
|
366
373
|
model: ai.LanguageModelV1;
|
|
367
374
|
temperature?: number | undefined;
|
|
368
375
|
maxTokens?: number | undefined;
|
|
369
376
|
} | undefined;
|
|
370
|
-
langwatchEndpoint?: string | undefined;
|
|
371
|
-
langwatchApiKey?: string | undefined;
|
|
372
377
|
}>;
|
|
373
378
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
374
379
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
375
380
|
|
|
381
|
+
type domain_AgentAdapter = AgentAdapter;
|
|
382
|
+
declare const domain_AgentAdapter: typeof AgentAdapter;
|
|
383
|
+
type domain_AgentInput = AgentInput;
|
|
384
|
+
type domain_AgentReturnTypes = AgentReturnTypes;
|
|
385
|
+
type domain_AgentRole = AgentRole;
|
|
386
|
+
declare const domain_AgentRole: typeof AgentRole;
|
|
387
|
+
declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
|
|
388
|
+
declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
|
|
389
|
+
declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
|
|
390
|
+
type domain_JudgeAgentAdapter = JudgeAgentAdapter;
|
|
391
|
+
declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
392
|
+
type domain_ScenarioConfig = ScenarioConfig;
|
|
393
|
+
type domain_ScenarioConfigFinal = ScenarioConfigFinal;
|
|
394
|
+
type domain_ScenarioExecutionLike = ScenarioExecutionLike;
|
|
395
|
+
type domain_ScenarioExecutionStateLike = ScenarioExecutionStateLike;
|
|
396
|
+
type domain_ScenarioProjectConfig = ScenarioProjectConfig;
|
|
397
|
+
type domain_ScenarioResult = ScenarioResult;
|
|
398
|
+
type domain_ScriptStep = ScriptStep;
|
|
399
|
+
type domain_UserSimulatorAgentAdapter = UserSimulatorAgentAdapter;
|
|
400
|
+
declare const domain_UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
|
|
401
|
+
declare const domain_allAgentRoles: typeof allAgentRoles;
|
|
402
|
+
declare const domain_defineConfig: typeof defineConfig;
|
|
403
|
+
declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
|
|
404
|
+
declare namespace domain {
|
|
405
|
+
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_TEMPERATURE as DEFAULT_TEMPERATURE, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
|
|
406
|
+
}
|
|
407
|
+
|
|
376
408
|
/**
|
|
377
409
|
* Configuration for the inference parameters of a testing agent.
|
|
378
410
|
*/
|
|
@@ -400,6 +432,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
|
|
|
400
432
|
* The name of the agent.
|
|
401
433
|
*/
|
|
402
434
|
name?: string;
|
|
435
|
+
/**
|
|
436
|
+
* System prompt to use for the agent.
|
|
437
|
+
*
|
|
438
|
+
* Useful in more complex scenarios where you want to set the system prompt
|
|
439
|
+
* for the agent directly. If left blank, this will be automatically generated
|
|
440
|
+
* from the scenario description.
|
|
441
|
+
*/
|
|
442
|
+
systemPrompt?: string;
|
|
403
443
|
}
|
|
404
444
|
/**
|
|
405
445
|
* The arguments for finishing a test, used by the judge agent's tool.
|
|
@@ -502,8 +542,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
502
542
|
*
|
|
503
543
|
* @param config Optional configuration for the agent.
|
|
504
544
|
* @param config.model The language model to use for generating responses.
|
|
505
|
-
*
|
|
545
|
+
* If not provided, a default model will be used.
|
|
546
|
+
* @param config.temperature The temperature for the language model (0.0-1.0).
|
|
547
|
+
* Lower values make responses more deterministic.
|
|
548
|
+
* Defaults to {@link DEFAULT_TEMPERATURE}.
|
|
506
549
|
* @param config.maxTokens The maximum number of tokens to generate.
|
|
550
|
+
* If not provided, uses model defaults.
|
|
551
|
+
* @param config.name The name of the agent.
|
|
552
|
+
* @param config.systemPrompt Custom system prompt to override default user simulation behavior.
|
|
553
|
+
* Use this to create specialized user personas or behaviors.
|
|
554
|
+
*
|
|
555
|
+
* @throws {Error} If no model is configured either in parameters or global config.
|
|
507
556
|
*
|
|
508
557
|
* @example
|
|
509
558
|
* ```typescript
|
|
@@ -517,7 +566,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
517
566
|
* };
|
|
518
567
|
*
|
|
519
568
|
* async function main() {
|
|
520
|
-
*
|
|
569
|
+
* // Basic user simulator with default behavior
|
|
570
|
+
* const basicResult = await run({
|
|
521
571
|
* name: "User Simulator Test",
|
|
522
572
|
* description: "A simple test to see if the user simulator works.",
|
|
523
573
|
* agents: [myAgent, userSimulatorAgent()],
|
|
@@ -526,9 +576,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
526
576
|
* agent(),
|
|
527
577
|
* ],
|
|
528
578
|
* });
|
|
579
|
+
*
|
|
580
|
+
* // Customized user simulator
|
|
581
|
+
* const customResult = await run({
|
|
582
|
+
* name: "Expert User Test",
|
|
583
|
+
* description: "User seeks help with TypeScript programming",
|
|
584
|
+
* agents: [
|
|
585
|
+
* myAgent,
|
|
586
|
+
* userSimulatorAgent({
|
|
587
|
+
* model: openai("gpt-4"),
|
|
588
|
+
* temperature: 0.3,
|
|
589
|
+
* systemPrompt: "You are a technical user who asks detailed questions"
|
|
590
|
+
* })
|
|
591
|
+
* ],
|
|
592
|
+
* script: [
|
|
593
|
+
* user(),
|
|
594
|
+
* agent(),
|
|
595
|
+
* ],
|
|
596
|
+
* });
|
|
597
|
+
*
|
|
598
|
+
* // User simulator with custom persona
|
|
599
|
+
* const expertResult = await run({
|
|
600
|
+
* name: "Expert Developer Test",
|
|
601
|
+
* description: "Testing with a technical expert user persona.",
|
|
602
|
+
* agents: [
|
|
603
|
+
* myAgent,
|
|
604
|
+
* userSimulatorAgent({
|
|
605
|
+
* systemPrompt: `
|
|
606
|
+
* You are an expert software developer testing an AI coding assistant.
|
|
607
|
+
* Ask challenging, technical questions and be demanding about code quality.
|
|
608
|
+
* Use technical jargon and expect detailed, accurate responses.
|
|
609
|
+
* `
|
|
610
|
+
* })
|
|
611
|
+
* ],
|
|
612
|
+
* script: [
|
|
613
|
+
* user(),
|
|
614
|
+
* agent(),
|
|
615
|
+
* ],
|
|
616
|
+
* });
|
|
529
617
|
* }
|
|
530
618
|
* main();
|
|
531
619
|
* ```
|
|
620
|
+
*
|
|
621
|
+
* @note
|
|
622
|
+
* - Uses role reversal internally to work around LLM biases toward assistant roles
|
|
532
623
|
*/
|
|
533
624
|
declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
534
625
|
role: AgentRole.USER;
|
|
@@ -538,6 +629,16 @@ declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
|
538
629
|
}>;
|
|
539
630
|
};
|
|
540
631
|
|
|
632
|
+
type agents_FinishTestArgs = FinishTestArgs;
|
|
633
|
+
type agents_JudgeAgentConfig = JudgeAgentConfig;
|
|
634
|
+
type agents_TestingAgentConfig = TestingAgentConfig;
|
|
635
|
+
type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
|
|
636
|
+
declare const agents_judgeAgent: typeof judgeAgent;
|
|
637
|
+
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
638
|
+
declare namespace agents {
|
|
639
|
+
export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_TestingAgentConfig as TestingAgentConfig, type agents_TestingAgentInferenceConfig as TestingAgentInferenceConfig, agents_judgeAgent as judgeAgent, agents_userSimulatorAgent as userSimulatorAgent };
|
|
640
|
+
}
|
|
641
|
+
|
|
541
642
|
/**
|
|
542
643
|
* Verdict enum represents the possible outcomes of a test scenario
|
|
543
644
|
*/
|
|
@@ -1107,6 +1208,14 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1107
1208
|
hasToolCall(toolName: string): boolean;
|
|
1108
1209
|
}
|
|
1109
1210
|
|
|
1211
|
+
type execution_ScenarioExecution = ScenarioExecution;
|
|
1212
|
+
declare const execution_ScenarioExecution: typeof ScenarioExecution;
|
|
1213
|
+
type execution_ScenarioExecutionState = ScenarioExecutionState;
|
|
1214
|
+
declare const execution_ScenarioExecutionState: typeof ScenarioExecutionState;
|
|
1215
|
+
declare namespace execution {
|
|
1216
|
+
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState };
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1110
1219
|
/**
|
|
1111
1220
|
* High-level interface for running a scenario test.
|
|
1112
1221
|
*
|
|
@@ -1158,6 +1267,11 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1158
1267
|
*/
|
|
1159
1268
|
declare function run(cfg: ScenarioConfig): Promise<ScenarioResult>;
|
|
1160
1269
|
|
|
1270
|
+
declare const runner_run: typeof run;
|
|
1271
|
+
declare namespace runner {
|
|
1272
|
+
export { runner_run as run };
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1161
1275
|
/**
|
|
1162
1276
|
* Scenario script DSL (Domain Specific Language) module.
|
|
1163
1277
|
*
|
|
@@ -1247,74 +1361,18 @@ declare const succeed: (reasoning?: string) => ScriptStep;
|
|
|
1247
1361
|
*/
|
|
1248
1362
|
declare const fail: (reasoning?: string) => ScriptStep;
|
|
1249
1363
|
|
|
1250
|
-
declare const
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
defaultModel: zod.ZodOptional<zod.ZodObject<{
|
|
1264
|
-
model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
|
|
1265
|
-
temperature: zod.ZodDefault<zod.ZodOptional<zod.ZodNumber>>;
|
|
1266
|
-
maxTokens: zod.ZodOptional<zod.ZodNumber>;
|
|
1267
|
-
}, "strip", zod.ZodTypeAny, {
|
|
1268
|
-
model: ai.LanguageModelV1;
|
|
1269
|
-
temperature: number;
|
|
1270
|
-
maxTokens?: number | undefined;
|
|
1271
|
-
}, {
|
|
1272
|
-
model: ai.LanguageModelV1;
|
|
1273
|
-
temperature?: number | undefined;
|
|
1274
|
-
maxTokens?: number | undefined;
|
|
1275
|
-
}>>;
|
|
1276
|
-
langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
|
|
1277
|
-
langwatchApiKey: zod.ZodOptional<zod.ZodString>;
|
|
1278
|
-
}, "strict", zod.ZodTypeAny, {
|
|
1279
|
-
defaultModel?: {
|
|
1280
|
-
model: ai.LanguageModelV1;
|
|
1281
|
-
temperature: number;
|
|
1282
|
-
maxTokens?: number | undefined;
|
|
1283
|
-
} | undefined;
|
|
1284
|
-
langwatchEndpoint?: string | undefined;
|
|
1285
|
-
langwatchApiKey?: string | undefined;
|
|
1286
|
-
}, {
|
|
1287
|
-
defaultModel?: {
|
|
1288
|
-
model: ai.LanguageModelV1;
|
|
1289
|
-
temperature?: number | undefined;
|
|
1290
|
-
maxTokens?: number | undefined;
|
|
1291
|
-
} | undefined;
|
|
1292
|
-
langwatchEndpoint?: string | undefined;
|
|
1293
|
-
langwatchApiKey?: string | undefined;
|
|
1294
|
-
}>;
|
|
1295
|
-
AgentRole: typeof AgentRole;
|
|
1296
|
-
allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
|
|
1297
|
-
AgentAdapter: typeof AgentAdapter;
|
|
1298
|
-
UserSimulatorAgentAdapter: typeof UserSimulatorAgentAdapter;
|
|
1299
|
-
JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
1300
|
-
judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
1301
|
-
role: AgentRole.JUDGE;
|
|
1302
|
-
criteria: string[];
|
|
1303
|
-
call: (input: AgentInput) => Promise<never[] | {
|
|
1304
|
-
success: boolean;
|
|
1305
|
-
messages: ai.CoreMessage[];
|
|
1306
|
-
reasoning: string;
|
|
1307
|
-
metCriteria: string[];
|
|
1308
|
-
unmetCriteria: string[];
|
|
1309
|
-
}>;
|
|
1310
|
-
};
|
|
1311
|
-
userSimulatorAgent: (config?: TestingAgentConfig) => {
|
|
1312
|
-
role: AgentRole.USER;
|
|
1313
|
-
call: (input: AgentInput) => Promise<{
|
|
1314
|
-
role: "user";
|
|
1315
|
-
content: string;
|
|
1316
|
-
}>;
|
|
1317
|
-
};
|
|
1318
|
-
};
|
|
1364
|
+
declare const script_agent: typeof agent;
|
|
1365
|
+
declare const script_fail: typeof fail;
|
|
1366
|
+
declare const script_judge: typeof judge;
|
|
1367
|
+
declare const script_message: typeof message;
|
|
1368
|
+
declare const script_proceed: typeof proceed;
|
|
1369
|
+
declare const script_succeed: typeof succeed;
|
|
1370
|
+
declare const script_user: typeof user;
|
|
1371
|
+
declare namespace script {
|
|
1372
|
+
export { script_agent as agent, script_fail as fail, script_judge as judge, script_message as message, script_proceed as proceed, script_succeed as succeed, script_user as user };
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
1376
|
+
declare const scenario: ScenarioApi;
|
|
1319
1377
|
|
|
1320
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
1378
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
package/dist/index.js
CHANGED
|
@@ -32,6 +32,9 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
AgentAdapter: () => AgentAdapter,
|
|
34
34
|
AgentRole: () => AgentRole,
|
|
35
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
36
|
+
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
37
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
35
38
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
36
39
|
ScenarioExecution: () => ScenarioExecution,
|
|
37
40
|
ScenarioExecutionState: () => ScenarioExecutionState,
|
|
@@ -70,6 +73,9 @@ var domain_exports = {};
|
|
|
70
73
|
__export(domain_exports, {
|
|
71
74
|
AgentAdapter: () => AgentAdapter,
|
|
72
75
|
AgentRole: () => AgentRole,
|
|
76
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
77
|
+
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
78
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
73
79
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
74
80
|
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
75
81
|
allAgentRoles: () => allAgentRoles,
|
|
@@ -79,14 +85,13 @@ __export(domain_exports, {
|
|
|
79
85
|
|
|
80
86
|
// src/domain/core/config.ts
|
|
81
87
|
var import_zod = require("zod");
|
|
88
|
+
var DEFAULT_TEMPERATURE = 0;
|
|
82
89
|
var scenarioProjectConfigSchema = import_zod.z.object({
|
|
83
90
|
defaultModel: import_zod.z.object({
|
|
84
91
|
model: import_zod.z.custom(),
|
|
85
|
-
temperature: import_zod.z.number().min(0).max(1).optional().default(
|
|
92
|
+
temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
|
|
86
93
|
maxTokens: import_zod.z.number().optional()
|
|
87
|
-
}).optional()
|
|
88
|
-
langwatchEndpoint: import_zod.z.string().optional(),
|
|
89
|
-
langwatchApiKey: import_zod.z.string().optional()
|
|
94
|
+
}).optional()
|
|
90
95
|
}).strict();
|
|
91
96
|
function defineConfig(config2) {
|
|
92
97
|
return config2;
|
|
@@ -119,6 +124,10 @@ var JudgeAgentAdapter = class {
|
|
|
119
124
|
}
|
|
120
125
|
};
|
|
121
126
|
|
|
127
|
+
// src/domain/scenarios/index.ts
|
|
128
|
+
var DEFAULT_MAX_TURNS = 10;
|
|
129
|
+
var DEFAULT_VERBOSE = false;
|
|
130
|
+
|
|
122
131
|
// src/agents/utils.ts
|
|
123
132
|
var toolMessageRole = "tool";
|
|
124
133
|
var assistantMessageRole = "assistant";
|
|
@@ -339,7 +348,7 @@ async function loadProjectConfig() {
|
|
|
339
348
|
configLoadPromise = (async () => {
|
|
340
349
|
try {
|
|
341
350
|
config = await loadScenarioProjectConfig();
|
|
342
|
-
logger.
|
|
351
|
+
logger.debug("loaded scenario project config", { config });
|
|
343
352
|
} catch (error) {
|
|
344
353
|
logger.error("error loading scenario project config", { error });
|
|
345
354
|
} finally {
|
|
@@ -534,7 +543,7 @@ var userSimulatorAgent = (config2) => {
|
|
|
534
543
|
return {
|
|
535
544
|
role: "User" /* USER */,
|
|
536
545
|
call: async (input) => {
|
|
537
|
-
const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
|
|
546
|
+
const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
|
|
538
547
|
const messages = [
|
|
539
548
|
{ role: "system", content: systemPrompt },
|
|
540
549
|
{ role: "assistant", content: "Hello, how can I help you today" },
|
|
@@ -549,7 +558,7 @@ var userSimulatorAgent = (config2) => {
|
|
|
549
558
|
const completion = await (0, import_ai2.generateText)({
|
|
550
559
|
model: mergedConfig.model,
|
|
551
560
|
messages: reversedMessages,
|
|
552
|
-
temperature: mergedConfig.temperature ??
|
|
561
|
+
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
553
562
|
maxTokens: mergedConfig.maxTokens
|
|
554
563
|
});
|
|
555
564
|
const messageContent = completion.text;
|
|
@@ -735,6 +744,77 @@ var stateSchema = import_zod4.z.object({
|
|
|
735
744
|
var runsSchema = import_zod4.z.object({ runs: import_zod4.z.array(import_zod4.z.string()) });
|
|
736
745
|
var eventsSchema = import_zod4.z.object({ events: import_zod4.z.array(scenarioEventSchema) });
|
|
737
746
|
|
|
747
|
+
// src/utils/message-conversion.ts
|
|
748
|
+
function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
749
|
+
const aguiMessages = [];
|
|
750
|
+
for (const msg of coreMessages) {
|
|
751
|
+
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
752
|
+
switch (true) {
|
|
753
|
+
case msg.role === "system":
|
|
754
|
+
aguiMessages.push({
|
|
755
|
+
id,
|
|
756
|
+
role: "system",
|
|
757
|
+
content: msg.content
|
|
758
|
+
});
|
|
759
|
+
break;
|
|
760
|
+
case (msg.role === "user" && typeof msg.content === "string"):
|
|
761
|
+
aguiMessages.push({
|
|
762
|
+
id,
|
|
763
|
+
role: "user",
|
|
764
|
+
content: msg.content
|
|
765
|
+
});
|
|
766
|
+
break;
|
|
767
|
+
// Handle any other user message content format
|
|
768
|
+
case (msg.role === "user" && Array.isArray(msg.content)):
|
|
769
|
+
aguiMessages.push({
|
|
770
|
+
id,
|
|
771
|
+
role: "user",
|
|
772
|
+
content: JSON.stringify(msg.content)
|
|
773
|
+
});
|
|
774
|
+
break;
|
|
775
|
+
case (msg.role === "assistant" && typeof msg.content === "string"):
|
|
776
|
+
aguiMessages.push({
|
|
777
|
+
id,
|
|
778
|
+
role: "assistant",
|
|
779
|
+
content: msg.content
|
|
780
|
+
});
|
|
781
|
+
break;
|
|
782
|
+
case (msg.role === "assistant" && Array.isArray(msg.content)): {
|
|
783
|
+
const toolCalls = msg.content.filter((p) => p.type === "tool-call");
|
|
784
|
+
const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
|
|
785
|
+
aguiMessages.push({
|
|
786
|
+
id,
|
|
787
|
+
role: "assistant",
|
|
788
|
+
content: JSON.stringify(nonToolCalls),
|
|
789
|
+
toolCalls: toolCalls.map((c) => ({
|
|
790
|
+
id: c.toolCallId,
|
|
791
|
+
type: "function",
|
|
792
|
+
function: {
|
|
793
|
+
name: c.toolName,
|
|
794
|
+
arguments: JSON.stringify(c.args)
|
|
795
|
+
}
|
|
796
|
+
}))
|
|
797
|
+
});
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
case msg.role === "tool":
|
|
801
|
+
msg.content.map((p, i) => {
|
|
802
|
+
aguiMessages.push({
|
|
803
|
+
id: `${id}-${i}`,
|
|
804
|
+
role: "tool",
|
|
805
|
+
toolCallId: p.toolCallId,
|
|
806
|
+
content: JSON.stringify(p.result)
|
|
807
|
+
});
|
|
808
|
+
});
|
|
809
|
+
break;
|
|
810
|
+
default:
|
|
811
|
+
throw new Error(`Unsupported message role: ${msg.role}`);
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
return aguiMessages;
|
|
815
|
+
}
|
|
816
|
+
var message_conversion_default = convertCoreMessagesToAguiMessages;
|
|
817
|
+
|
|
738
818
|
// src/execution/scenario-execution.ts
|
|
739
819
|
var batchRunId = getBatchRunId();
|
|
740
820
|
var ScenarioExecution = class {
|
|
@@ -766,8 +846,8 @@ var ScenarioExecution = class {
|
|
|
766
846
|
description: config2.description,
|
|
767
847
|
agents: config2.agents,
|
|
768
848
|
script,
|
|
769
|
-
verbose: config2.verbose ??
|
|
770
|
-
maxTurns: config2.maxTurns ??
|
|
849
|
+
verbose: config2.verbose ?? DEFAULT_VERBOSE,
|
|
850
|
+
maxTurns: config2.maxTurns ?? DEFAULT_MAX_TURNS,
|
|
771
851
|
threadId: config2.threadId ?? generateThreadId(),
|
|
772
852
|
setId: config2.setId
|
|
773
853
|
};
|
|
@@ -819,12 +899,14 @@ var ScenarioExecution = class {
|
|
|
819
899
|
}
|
|
820
900
|
}
|
|
821
901
|
this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
|
|
822
|
-
return this.reachedMaxTurns(
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
902
|
+
return this.reachedMaxTurns(
|
|
903
|
+
[
|
|
904
|
+
"Reached end of script without conclusion, add one of the following to the end of the script:",
|
|
905
|
+
"- `Scenario.proceed()` to let the simulation continue to play out",
|
|
906
|
+
"- `Scenario.judge()` to force criteria judgement",
|
|
907
|
+
"- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
|
|
908
|
+
].join("\n")
|
|
909
|
+
);
|
|
828
910
|
} catch (error) {
|
|
829
911
|
const errorResult = {
|
|
830
912
|
success: false,
|
|
@@ -959,8 +1041,7 @@ var ScenarioExecution = class {
|
|
|
959
1041
|
while (true) {
|
|
960
1042
|
const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
|
|
961
1043
|
const nextMessage = await this._step(goToNextTurn, onTurn);
|
|
962
|
-
if (initialTurn === null)
|
|
963
|
-
initialTurn = this.state.currentTurn;
|
|
1044
|
+
if (initialTurn === null) initialTurn = this.state.currentTurn;
|
|
964
1045
|
if (nextMessage === null) {
|
|
965
1046
|
return null;
|
|
966
1047
|
}
|
|
@@ -1046,7 +1127,10 @@ var ScenarioExecution = class {
|
|
|
1046
1127
|
agent2 = nextAgent.agent;
|
|
1047
1128
|
this.removePendingAgent(agent2);
|
|
1048
1129
|
if (content) {
|
|
1049
|
-
const message2 = typeof content === "string" ? {
|
|
1130
|
+
const message2 = typeof content === "string" ? {
|
|
1131
|
+
role: role === "User" /* USER */ ? "user" : "assistant",
|
|
1132
|
+
content
|
|
1133
|
+
} : content;
|
|
1050
1134
|
this.state.addMessage(message2);
|
|
1051
1135
|
this.broadcastMessage(message2, index);
|
|
1052
1136
|
return null;
|
|
@@ -1119,7 +1203,9 @@ var ScenarioExecution = class {
|
|
|
1119
1203
|
reachedMaxTurns(errorMessage) {
|
|
1120
1204
|
var _a;
|
|
1121
1205
|
const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
|
|
1122
|
-
const agentTimes = agentRoleAgentsIdx.map(
|
|
1206
|
+
const agentTimes = agentRoleAgentsIdx.map(
|
|
1207
|
+
(i) => this.agentTimes.get(i) || 0
|
|
1208
|
+
);
|
|
1123
1209
|
const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
|
|
1124
1210
|
return {
|
|
1125
1211
|
success: false,
|
|
@@ -1174,7 +1260,7 @@ var ScenarioExecution = class {
|
|
|
1174
1260
|
this.emitEvent({
|
|
1175
1261
|
...this.makeBaseEvent({ scenarioRunId }),
|
|
1176
1262
|
type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
|
|
1177
|
-
messages: this.state.messages
|
|
1263
|
+
messages: message_conversion_default(this.state.messages)
|
|
1178
1264
|
// Add any other required fields from MessagesSnapshotEventSchema
|
|
1179
1265
|
});
|
|
1180
1266
|
}
|
|
@@ -1221,10 +1307,8 @@ var ScenarioExecution = class {
|
|
|
1221
1307
|
function convertAgentReturnTypesToMessages(response, role) {
|
|
1222
1308
|
if (typeof response === "string")
|
|
1223
1309
|
return [{ role, content: response }];
|
|
1224
|
-
if (Array.isArray(response))
|
|
1225
|
-
|
|
1226
|
-
if (typeof response === "object" && "role" in response)
|
|
1227
|
-
return [response];
|
|
1310
|
+
if (Array.isArray(response)) return response;
|
|
1311
|
+
if (typeof response === "object" && "role" in response) return [response];
|
|
1228
1312
|
return [];
|
|
1229
1313
|
}
|
|
1230
1314
|
|
|
@@ -1324,17 +1408,20 @@ var EventReporter = class {
|
|
|
1324
1408
|
eventsEndpoint;
|
|
1325
1409
|
eventAlertMessageLogger;
|
|
1326
1410
|
logger = new Logger("scenario.events.EventReporter");
|
|
1411
|
+
isEnabled;
|
|
1327
1412
|
constructor(config2) {
|
|
1328
1413
|
this.apiKey = config2.apiKey ?? "";
|
|
1329
1414
|
this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
|
|
1330
1415
|
this.eventAlertMessageLogger = new EventAlertMessageLogger();
|
|
1331
1416
|
this.eventAlertMessageLogger.handleGreeting();
|
|
1417
|
+
this.isEnabled = this.apiKey.length > 0 && this.eventsEndpoint.href.length > 0;
|
|
1332
1418
|
}
|
|
1333
1419
|
/**
|
|
1334
1420
|
* Posts an event to the configured endpoint.
|
|
1335
1421
|
* Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
|
|
1336
1422
|
*/
|
|
1337
1423
|
async postEvent(event) {
|
|
1424
|
+
if (!this.isEnabled) return {};
|
|
1338
1425
|
const result = {};
|
|
1339
1426
|
this.logger.debug(`[${event.type}] Posting event`, { event });
|
|
1340
1427
|
const processedEvent = this.processEventForApi(event);
|
|
@@ -1559,10 +1646,9 @@ async function run(cfg) {
|
|
|
1559
1646
|
let eventBus = null;
|
|
1560
1647
|
let subscription = null;
|
|
1561
1648
|
try {
|
|
1562
|
-
const projectConfig = await loadScenarioProjectConfig();
|
|
1563
1649
|
eventBus = new EventBus({
|
|
1564
|
-
endpoint:
|
|
1565
|
-
apiKey:
|
|
1650
|
+
endpoint: env.LANGWATCH_ENDPOINT,
|
|
1651
|
+
apiKey: env.LANGWATCH_API_KEY
|
|
1566
1652
|
});
|
|
1567
1653
|
eventBus.listen();
|
|
1568
1654
|
subscription = eventBus.subscribeTo(execution.events$);
|
|
@@ -1638,6 +1724,9 @@ var index_default = scenario;
|
|
|
1638
1724
|
0 && (module.exports = {
|
|
1639
1725
|
AgentAdapter,
|
|
1640
1726
|
AgentRole,
|
|
1727
|
+
DEFAULT_MAX_TURNS,
|
|
1728
|
+
DEFAULT_TEMPERATURE,
|
|
1729
|
+
DEFAULT_VERBOSE,
|
|
1641
1730
|
JudgeAgentAdapter,
|
|
1642
1731
|
ScenarioExecution,
|
|
1643
1732
|
ScenarioExecutionState,
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
AgentAdapter,
|
|
3
3
|
AgentRole,
|
|
4
|
+
DEFAULT_MAX_TURNS,
|
|
5
|
+
DEFAULT_TEMPERATURE,
|
|
6
|
+
DEFAULT_VERBOSE,
|
|
4
7
|
EventBus,
|
|
5
8
|
JudgeAgentAdapter,
|
|
6
9
|
Logger,
|
|
@@ -8,15 +11,15 @@ import {
|
|
|
8
11
|
allAgentRoles,
|
|
9
12
|
defineConfig,
|
|
10
13
|
domain_exports,
|
|
14
|
+
env,
|
|
11
15
|
generateMessageId,
|
|
12
16
|
generateScenarioId,
|
|
13
17
|
generateScenarioRunId,
|
|
14
18
|
generateThreadId,
|
|
15
19
|
getBatchRunId,
|
|
16
20
|
getProjectConfig,
|
|
17
|
-
loadScenarioProjectConfig,
|
|
18
21
|
scenarioProjectConfigSchema
|
|
19
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-MOOKAYIE.mjs";
|
|
20
23
|
import {
|
|
21
24
|
__export
|
|
22
25
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -268,7 +271,7 @@ var userSimulatorAgent = (config) => {
|
|
|
268
271
|
return {
|
|
269
272
|
role: "User" /* USER */,
|
|
270
273
|
call: async (input) => {
|
|
271
|
-
const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
|
|
274
|
+
const systemPrompt = (config == null ? void 0 : config.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
|
|
272
275
|
const messages = [
|
|
273
276
|
{ role: "system", content: systemPrompt },
|
|
274
277
|
{ role: "assistant", content: "Hello, how can I help you today" },
|
|
@@ -283,7 +286,7 @@ var userSimulatorAgent = (config) => {
|
|
|
283
286
|
const completion = await generateText2({
|
|
284
287
|
model: mergedConfig.model,
|
|
285
288
|
messages: reversedMessages,
|
|
286
|
-
temperature: mergedConfig.temperature ??
|
|
289
|
+
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
287
290
|
maxTokens: mergedConfig.maxTokens
|
|
288
291
|
});
|
|
289
292
|
const messageContent = completion.text;
|
|
@@ -376,6 +379,77 @@ var ScenarioExecutionState = class {
|
|
|
376
379
|
}
|
|
377
380
|
};
|
|
378
381
|
|
|
382
|
+
// src/utils/message-conversion.ts
|
|
383
|
+
function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
384
|
+
const aguiMessages = [];
|
|
385
|
+
for (const msg of coreMessages) {
|
|
386
|
+
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
387
|
+
switch (true) {
|
|
388
|
+
case msg.role === "system":
|
|
389
|
+
aguiMessages.push({
|
|
390
|
+
id,
|
|
391
|
+
role: "system",
|
|
392
|
+
content: msg.content
|
|
393
|
+
});
|
|
394
|
+
break;
|
|
395
|
+
case (msg.role === "user" && typeof msg.content === "string"):
|
|
396
|
+
aguiMessages.push({
|
|
397
|
+
id,
|
|
398
|
+
role: "user",
|
|
399
|
+
content: msg.content
|
|
400
|
+
});
|
|
401
|
+
break;
|
|
402
|
+
// Handle any other user message content format
|
|
403
|
+
case (msg.role === "user" && Array.isArray(msg.content)):
|
|
404
|
+
aguiMessages.push({
|
|
405
|
+
id,
|
|
406
|
+
role: "user",
|
|
407
|
+
content: JSON.stringify(msg.content)
|
|
408
|
+
});
|
|
409
|
+
break;
|
|
410
|
+
case (msg.role === "assistant" && typeof msg.content === "string"):
|
|
411
|
+
aguiMessages.push({
|
|
412
|
+
id,
|
|
413
|
+
role: "assistant",
|
|
414
|
+
content: msg.content
|
|
415
|
+
});
|
|
416
|
+
break;
|
|
417
|
+
case (msg.role === "assistant" && Array.isArray(msg.content)): {
|
|
418
|
+
const toolCalls = msg.content.filter((p) => p.type === "tool-call");
|
|
419
|
+
const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
|
|
420
|
+
aguiMessages.push({
|
|
421
|
+
id,
|
|
422
|
+
role: "assistant",
|
|
423
|
+
content: JSON.stringify(nonToolCalls),
|
|
424
|
+
toolCalls: toolCalls.map((c) => ({
|
|
425
|
+
id: c.toolCallId,
|
|
426
|
+
type: "function",
|
|
427
|
+
function: {
|
|
428
|
+
name: c.toolName,
|
|
429
|
+
arguments: JSON.stringify(c.args)
|
|
430
|
+
}
|
|
431
|
+
}))
|
|
432
|
+
});
|
|
433
|
+
break;
|
|
434
|
+
}
|
|
435
|
+
case msg.role === "tool":
|
|
436
|
+
msg.content.map((p, i) => {
|
|
437
|
+
aguiMessages.push({
|
|
438
|
+
id: `${id}-${i}`,
|
|
439
|
+
role: "tool",
|
|
440
|
+
toolCallId: p.toolCallId,
|
|
441
|
+
content: JSON.stringify(p.result)
|
|
442
|
+
});
|
|
443
|
+
});
|
|
444
|
+
break;
|
|
445
|
+
default:
|
|
446
|
+
throw new Error(`Unsupported message role: ${msg.role}`);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
return aguiMessages;
|
|
450
|
+
}
|
|
451
|
+
var message_conversion_default = convertCoreMessagesToAguiMessages;
|
|
452
|
+
|
|
379
453
|
// src/execution/scenario-execution.ts
|
|
380
454
|
var batchRunId = getBatchRunId();
|
|
381
455
|
var ScenarioExecution = class {
|
|
@@ -407,8 +481,8 @@ var ScenarioExecution = class {
|
|
|
407
481
|
description: config.description,
|
|
408
482
|
agents: config.agents,
|
|
409
483
|
script,
|
|
410
|
-
verbose: config.verbose ??
|
|
411
|
-
maxTurns: config.maxTurns ??
|
|
484
|
+
verbose: config.verbose ?? DEFAULT_VERBOSE,
|
|
485
|
+
maxTurns: config.maxTurns ?? DEFAULT_MAX_TURNS,
|
|
412
486
|
threadId: config.threadId ?? generateThreadId(),
|
|
413
487
|
setId: config.setId
|
|
414
488
|
};
|
|
@@ -460,12 +534,14 @@ var ScenarioExecution = class {
|
|
|
460
534
|
}
|
|
461
535
|
}
|
|
462
536
|
this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
|
|
463
|
-
return this.reachedMaxTurns(
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
537
|
+
return this.reachedMaxTurns(
|
|
538
|
+
[
|
|
539
|
+
"Reached end of script without conclusion, add one of the following to the end of the script:",
|
|
540
|
+
"- `Scenario.proceed()` to let the simulation continue to play out",
|
|
541
|
+
"- `Scenario.judge()` to force criteria judgement",
|
|
542
|
+
"- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
|
|
543
|
+
].join("\n")
|
|
544
|
+
);
|
|
469
545
|
} catch (error) {
|
|
470
546
|
const errorResult = {
|
|
471
547
|
success: false,
|
|
@@ -600,8 +676,7 @@ var ScenarioExecution = class {
|
|
|
600
676
|
while (true) {
|
|
601
677
|
const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
|
|
602
678
|
const nextMessage = await this._step(goToNextTurn, onTurn);
|
|
603
|
-
if (initialTurn === null)
|
|
604
|
-
initialTurn = this.state.currentTurn;
|
|
679
|
+
if (initialTurn === null) initialTurn = this.state.currentTurn;
|
|
605
680
|
if (nextMessage === null) {
|
|
606
681
|
return null;
|
|
607
682
|
}
|
|
@@ -687,7 +762,10 @@ var ScenarioExecution = class {
|
|
|
687
762
|
agent2 = nextAgent.agent;
|
|
688
763
|
this.removePendingAgent(agent2);
|
|
689
764
|
if (content) {
|
|
690
|
-
const message2 = typeof content === "string" ? {
|
|
765
|
+
const message2 = typeof content === "string" ? {
|
|
766
|
+
role: role === "User" /* USER */ ? "user" : "assistant",
|
|
767
|
+
content
|
|
768
|
+
} : content;
|
|
691
769
|
this.state.addMessage(message2);
|
|
692
770
|
this.broadcastMessage(message2, index);
|
|
693
771
|
return null;
|
|
@@ -760,7 +838,9 @@ var ScenarioExecution = class {
|
|
|
760
838
|
reachedMaxTurns(errorMessage) {
|
|
761
839
|
var _a;
|
|
762
840
|
const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
|
|
763
|
-
const agentTimes = agentRoleAgentsIdx.map(
|
|
841
|
+
const agentTimes = agentRoleAgentsIdx.map(
|
|
842
|
+
(i) => this.agentTimes.get(i) || 0
|
|
843
|
+
);
|
|
764
844
|
const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
|
|
765
845
|
return {
|
|
766
846
|
success: false,
|
|
@@ -815,7 +895,7 @@ var ScenarioExecution = class {
|
|
|
815
895
|
this.emitEvent({
|
|
816
896
|
...this.makeBaseEvent({ scenarioRunId }),
|
|
817
897
|
type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
|
|
818
|
-
messages: this.state.messages
|
|
898
|
+
messages: message_conversion_default(this.state.messages)
|
|
819
899
|
// Add any other required fields from MessagesSnapshotEventSchema
|
|
820
900
|
});
|
|
821
901
|
}
|
|
@@ -862,10 +942,8 @@ var ScenarioExecution = class {
|
|
|
862
942
|
function convertAgentReturnTypesToMessages(response, role) {
|
|
863
943
|
if (typeof response === "string")
|
|
864
944
|
return [{ role, content: response }];
|
|
865
|
-
if (Array.isArray(response))
|
|
866
|
-
|
|
867
|
-
if (typeof response === "object" && "role" in response)
|
|
868
|
-
return [response];
|
|
945
|
+
if (Array.isArray(response)) return response;
|
|
946
|
+
if (typeof response === "object" && "role" in response) return [response];
|
|
869
947
|
return [];
|
|
870
948
|
}
|
|
871
949
|
|
|
@@ -938,10 +1016,9 @@ async function run(cfg) {
|
|
|
938
1016
|
let eventBus = null;
|
|
939
1017
|
let subscription = null;
|
|
940
1018
|
try {
|
|
941
|
-
const projectConfig = await loadScenarioProjectConfig();
|
|
942
1019
|
eventBus = new EventBus({
|
|
943
|
-
endpoint:
|
|
944
|
-
apiKey:
|
|
1020
|
+
endpoint: env.LANGWATCH_ENDPOINT,
|
|
1021
|
+
apiKey: env.LANGWATCH_API_KEY
|
|
945
1022
|
});
|
|
946
1023
|
eventBus.listen();
|
|
947
1024
|
subscription = eventBus.subscribeTo(execution.events$);
|
|
@@ -1016,6 +1093,9 @@ var index_default = scenario;
|
|
|
1016
1093
|
export {
|
|
1017
1094
|
AgentAdapter,
|
|
1018
1095
|
AgentRole,
|
|
1096
|
+
DEFAULT_MAX_TURNS,
|
|
1097
|
+
DEFAULT_TEMPERATURE,
|
|
1098
|
+
DEFAULT_VERBOSE,
|
|
1019
1099
|
JudgeAgentAdapter,
|
|
1020
1100
|
ScenarioExecution,
|
|
1021
1101
|
ScenarioExecutionState,
|
|
@@ -37,14 +37,13 @@ var import_node_url = require("url");
|
|
|
37
37
|
|
|
38
38
|
// src/domain/core/config.ts
|
|
39
39
|
var import_zod = require("zod");
|
|
40
|
+
var DEFAULT_TEMPERATURE = 0;
|
|
40
41
|
var scenarioProjectConfigSchema = import_zod.z.object({
|
|
41
42
|
defaultModel: import_zod.z.object({
|
|
42
43
|
model: import_zod.z.custom(),
|
|
43
|
-
temperature: import_zod.z.number().min(0).max(1).optional().default(
|
|
44
|
+
temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
|
|
44
45
|
maxTokens: import_zod.z.number().optional()
|
|
45
|
-
}).optional()
|
|
46
|
-
langwatchEndpoint: import_zod.z.string().optional(),
|
|
47
|
-
langwatchApiKey: import_zod.z.string().optional()
|
|
46
|
+
}).optional()
|
|
48
47
|
}).strict();
|
|
49
48
|
|
|
50
49
|
// src/utils/logger.ts
|
|
@@ -335,17 +334,20 @@ var EventReporter = class {
|
|
|
335
334
|
eventsEndpoint;
|
|
336
335
|
eventAlertMessageLogger;
|
|
337
336
|
logger = new Logger("scenario.events.EventReporter");
|
|
337
|
+
isEnabled;
|
|
338
338
|
constructor(config) {
|
|
339
339
|
this.apiKey = config.apiKey ?? "";
|
|
340
340
|
this.eventsEndpoint = new URL("/api/scenario-events", config.endpoint);
|
|
341
341
|
this.eventAlertMessageLogger = new EventAlertMessageLogger();
|
|
342
342
|
this.eventAlertMessageLogger.handleGreeting();
|
|
343
|
+
this.isEnabled = this.apiKey.length > 0 && this.eventsEndpoint.href.length > 0;
|
|
343
344
|
}
|
|
344
345
|
/**
|
|
345
346
|
* Posts an event to the configured endpoint.
|
|
346
347
|
* Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
|
|
347
348
|
*/
|
|
348
349
|
async postEvent(event) {
|
|
350
|
+
if (!this.isEnabled) return {};
|
|
349
351
|
const result = {};
|
|
350
352
|
this.logger.debug(`[${event.type}] Posting event`, { event });
|
|
351
353
|
const processedEvent = this.processEventForApi(event);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@langwatch/scenario",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "A TypeScript library for testing AI agents using scenarios",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"test": "vitest",
|
|
85
85
|
"test:ci": "vitest run",
|
|
86
86
|
"lint": "eslint .",
|
|
87
|
-
"examples:vitest:run": "export SCENARIO_BATCH_ID=scenariobatch_$(uuidgen) &&
|
|
88
|
-
"generate:api-reference": "npx typedoc src --out api-reference-docs"
|
|
87
|
+
"examples:vitest:run": "export SCENARIO_BATCH_ID=scenariobatch_$(uuidgen) && (cd examples/vitest && pnpm install) && pnpm -F vitest-example run test",
|
|
88
|
+
"generate:api-reference": "npx typedoc src --out api-reference-docs && rm -rf ../docs/docs/public/reference/javascript/scenario && mv api-reference-docs ../docs/docs/public/reference/javascript/scenario"
|
|
89
89
|
}
|
|
90
90
|
}
|