npm - @langwatch/scenario - Versions diffs - 0.2.0 → 0.2.2 - Mend

@langwatch/scenario 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/chunk-NUZZAQV2.mjs +622 -0
package/dist/index.d.mts +65 -15
package/dist/index.d.ts +65 -15
package/dist/index.js +317 -97
package/dist/index.mjs +122 -173
package/dist/integrations/vitest/setup.js +282 -105
package/dist/integrations/vitest/setup.mjs +1 -1
package/package.json +4 -3
package/dist/chunk-ORWSJC5F.mjs +0 -309

package/dist/index.d.mts CHANGED Viewed

@@ -337,6 +337,8 @@ interface ScenarioExecutionStateLike {
     hasToolCall(toolName: string): boolean;
 }
+/** Default temperature for language model inference */
+declare const DEFAULT_TEMPERATURE = 0;
 declare const scenarioProjectConfigSchema: z.ZodObject<{
     defaultModel: z.ZodOptional<z.ZodObject<{
         model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
@@ -351,24 +353,18 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
         temperature?: number | undefined;
         maxTokens?: number | undefined;
     }>>;
-    langwatchEndpoint: z.ZodOptional<z.ZodString>;
-    langwatchApiKey: z.ZodOptional<z.ZodString>;
 }, "strict", z.ZodTypeAny, {
     defaultModel?: {
         model: ai.LanguageModelV1;
         temperature: number;
         maxTokens?: number | undefined;
     } | undefined;
-    langwatchEndpoint?: string | undefined;
-    langwatchApiKey?: string | undefined;
 }, {
     defaultModel?: {
         model: ai.LanguageModelV1;
         temperature?: number | undefined;
         maxTokens?: number | undefined;
     } | undefined;
-    langwatchEndpoint?: string | undefined;
-    langwatchApiKey?: string | undefined;
 }>;
 type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
 declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
@@ -400,6 +396,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
      * The name of the agent.
      */
     name?: string;
+    /**
+     * System prompt to use for the agent.
+     *
+     * Useful in more complex scenarios where you want to set the system prompt
+     * for the agent directly. If left blank, this will be automatically generated
+     * from the scenario description.
+     */
+    systemPrompt?: string;
 }
 /**
  * The arguments for finishing a test, used by the judge agent's tool.
@@ -502,8 +506,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
  *
  * @param config Optional configuration for the agent.
  * @param config.model The language model to use for generating responses.
- * @param config.temperature The temperature to use for the model.
+ *                     If not provided, a default model will be used.
+ * @param config.temperature The temperature for the language model (0.0-1.0).
+ *                          Lower values make responses more deterministic.
+ *                          Defaults to {@link DEFAULT_TEMPERATURE}.
  * @param config.maxTokens The maximum number of tokens to generate.
+ *                        If not provided, uses model defaults.
+ * @param config.name The name of the agent.
+ * @param config.systemPrompt Custom system prompt to override default user simulation behavior.
+ *                           Use this to create specialized user personas or behaviors.
+ *
+ * @throws {Error} If no model is configured either in parameters or global config.
  *
  * @example
  * ```typescript
@@ -517,7 +530,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
  * };
  *
  * async function main() {
- *   const result = await run({
+ *   // Basic user simulator with default behavior
+ *   const basicResult = await run({
  *     name: "User Simulator Test",
  *     description: "A simple test to see if the user simulator works.",
  *     agents: [myAgent, userSimulatorAgent()],
@@ -526,9 +540,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
  *       agent(),
  *     ],
  *   });
+ *
+ *   // Customized user simulator
+ *   const customResult = await run({
+ *     name: "Expert User Test",
+ *     description: "User seeks help with TypeScript programming",
+ *     agents: [
+ *       myAgent,
+ *       userSimulatorAgent({
+ *         model: openai("gpt-4"),
+ *         temperature: 0.3,
+ *         systemPrompt: "You are a technical user who asks detailed questions"
+ *       })
+ *     ],
+ *     script: [
+ *       user(),
+ *       agent(),
+ *     ],
+ *   });
+ *
+ *   // User simulator with custom persona
+ *   const expertResult = await run({
+ *     name: "Expert Developer Test",
+ *     description: "Testing with a technical expert user persona.",
+ *     agents: [
+ *       myAgent,
+ *       userSimulatorAgent({
+ *         systemPrompt: `
+ *           You are an expert software developer testing an AI coding assistant.
+ *           Ask challenging, technical questions and be demanding about code quality.
+ *           Use technical jargon and expect detailed, accurate responses.
+ *         `
+ *       })
+ *     ],
+ *     script: [
+ *       user(),
+ *       agent(),
+ *     ],
+ *   });
  * }
  * main();
  * ```
+ *
+ * @note
+ * - Uses role reversal internally to work around LLM biases toward assistant roles
  */
 declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
     role: AgentRole.USER;
@@ -1259,6 +1314,7 @@ declare const scenario: {
     ScenarioExecution: typeof ScenarioExecution;
     ScenarioExecutionState: typeof ScenarioExecutionState;
     defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
+    DEFAULT_TEMPERATURE: 0;
     scenarioProjectConfigSchema: zod.ZodObject<{
         defaultModel: zod.ZodOptional<zod.ZodObject<{
             model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
@@ -1273,24 +1329,18 @@ declare const scenario: {
             temperature?: number | undefined;
             maxTokens?: number | undefined;
         }>>;
-        langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
-        langwatchApiKey: zod.ZodOptional<zod.ZodString>;
     }, "strict", zod.ZodTypeAny, {
         defaultModel?: {
             model: ai.LanguageModelV1;
             temperature: number;
             maxTokens?: number | undefined;
         } | undefined;
-        langwatchEndpoint?: string | undefined;
-        langwatchApiKey?: string | undefined;
     }, {
         defaultModel?: {
             model: ai.LanguageModelV1;
             temperature?: number | undefined;
             maxTokens?: number | undefined;
         } | undefined;
-        langwatchEndpoint?: string | undefined;
-        langwatchApiKey?: string | undefined;
     }>;
     AgentRole: typeof AgentRole;
     allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
@@ -1317,4 +1367,4 @@ declare const scenario: {
     };
 };
-export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
+export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };

package/dist/index.d.ts CHANGED Viewed

@@ -337,6 +337,8 @@ interface ScenarioExecutionStateLike {
     hasToolCall(toolName: string): boolean;
 }
+/** Default temperature for language model inference */
+declare const DEFAULT_TEMPERATURE = 0;
 declare const scenarioProjectConfigSchema: z.ZodObject<{
     defaultModel: z.ZodOptional<z.ZodObject<{
         model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
@@ -351,24 +353,18 @@ declare const scenarioProjectConfigSchema: z.ZodObject<{
         temperature?: number | undefined;
         maxTokens?: number | undefined;
     }>>;
-    langwatchEndpoint: z.ZodOptional<z.ZodString>;
-    langwatchApiKey: z.ZodOptional<z.ZodString>;
 }, "strict", z.ZodTypeAny, {
     defaultModel?: {
         model: ai.LanguageModelV1;
         temperature: number;
         maxTokens?: number | undefined;
     } | undefined;
-    langwatchEndpoint?: string | undefined;
-    langwatchApiKey?: string | undefined;
 }, {
     defaultModel?: {
         model: ai.LanguageModelV1;
         temperature?: number | undefined;
         maxTokens?: number | undefined;
     } | undefined;
-    langwatchEndpoint?: string | undefined;
-    langwatchApiKey?: string | undefined;
 }>;
 type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
 declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
@@ -400,6 +396,14 @@ interface TestingAgentConfig extends TestingAgentInferenceConfig {
      * The name of the agent.
      */
     name?: string;
+    /**
+     * System prompt to use for the agent.
+     *
+     * Useful in more complex scenarios where you want to set the system prompt
+     * for the agent directly. If left blank, this will be automatically generated
+     * from the scenario description.
+     */
+    systemPrompt?: string;
 }
 /**
  * The arguments for finishing a test, used by the judge agent's tool.
@@ -502,8 +506,17 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
  *
  * @param config Optional configuration for the agent.
  * @param config.model The language model to use for generating responses.
- * @param config.temperature The temperature to use for the model.
+ *                     If not provided, a default model will be used.
+ * @param config.temperature The temperature for the language model (0.0-1.0).
+ *                          Lower values make responses more deterministic.
+ *                          Defaults to {@link DEFAULT_TEMPERATURE}.
  * @param config.maxTokens The maximum number of tokens to generate.
+ *                        If not provided, uses model defaults.
+ * @param config.name The name of the agent.
+ * @param config.systemPrompt Custom system prompt to override default user simulation behavior.
+ *                           Use this to create specialized user personas or behaviors.
+ *
+ * @throws {Error} If no model is configured either in parameters or global config.
  *
  * @example
  * ```typescript
@@ -517,7 +530,8 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
  * };
  *
  * async function main() {
- *   const result = await run({
+ *   // Basic user simulator with default behavior
+ *   const basicResult = await run({
  *     name: "User Simulator Test",
  *     description: "A simple test to see if the user simulator works.",
  *     agents: [myAgent, userSimulatorAgent()],
@@ -526,9 +540,50 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
  *       agent(),
  *     ],
  *   });
+ *
+ *   // Customized user simulator
+ *   const customResult = await run({
+ *     name: "Expert User Test",
+ *     description: "User seeks help with TypeScript programming",
+ *     agents: [
+ *       myAgent,
+ *       userSimulatorAgent({
+ *         model: openai("gpt-4"),
+ *         temperature: 0.3,
+ *         systemPrompt: "You are a technical user who asks detailed questions"
+ *       })
+ *     ],
+ *     script: [
+ *       user(),
+ *       agent(),
+ *     ],
+ *   });
+ *
+ *   // User simulator with custom persona
+ *   const expertResult = await run({
+ *     name: "Expert Developer Test",
+ *     description: "Testing with a technical expert user persona.",
+ *     agents: [
+ *       myAgent,
+ *       userSimulatorAgent({
+ *         systemPrompt: `
+ *           You are an expert software developer testing an AI coding assistant.
+ *           Ask challenging, technical questions and be demanding about code quality.
+ *           Use technical jargon and expect detailed, accurate responses.
+ *         `
+ *       })
+ *     ],
+ *     script: [
+ *       user(),
+ *       agent(),
+ *     ],
+ *   });
  * }
  * main();
  * ```
+ *
+ * @note
+ * - Uses role reversal internally to work around LLM biases toward assistant roles
  */
 declare const userSimulatorAgent: (config?: TestingAgentConfig) => {
     role: AgentRole.USER;
@@ -1259,6 +1314,7 @@ declare const scenario: {
     ScenarioExecution: typeof ScenarioExecution;
     ScenarioExecutionState: typeof ScenarioExecutionState;
     defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
+    DEFAULT_TEMPERATURE: 0;
     scenarioProjectConfigSchema: zod.ZodObject<{
         defaultModel: zod.ZodOptional<zod.ZodObject<{
             model: zod.ZodType<ai.LanguageModelV1, zod.ZodTypeDef, ai.LanguageModelV1>;
@@ -1273,24 +1329,18 @@ declare const scenario: {
             temperature?: number | undefined;
             maxTokens?: number | undefined;
         }>>;
-        langwatchEndpoint: zod.ZodOptional<zod.ZodString>;
-        langwatchApiKey: zod.ZodOptional<zod.ZodString>;
     }, "strict", zod.ZodTypeAny, {
         defaultModel?: {
             model: ai.LanguageModelV1;
             temperature: number;
             maxTokens?: number | undefined;
         } | undefined;
-        langwatchEndpoint?: string | undefined;
-        langwatchApiKey?: string | undefined;
     }, {
         defaultModel?: {
             model: ai.LanguageModelV1;
             temperature?: number | undefined;
             maxTokens?: number | undefined;
         } | undefined;
-        langwatchEndpoint?: string | undefined;
-        langwatchApiKey?: string | undefined;
     }>;
     AgentRole: typeof AgentRole;
     allAgentRoles: readonly [AgentRole.USER, AgentRole.AGENT, AgentRole.JUDGE];
@@ -1317,4 +1367,4 @@ declare const scenario: {
     };
 };
-export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
+export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_TEMPERATURE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };