@langwatch/scenario 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -131,7 +131,7 @@ var DEFAULT_TEMPERATURE = 0;
131
131
  var modelSchema = z2.object({
132
132
  model: z2.custom((val) => Boolean(val), {
133
133
  message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
134
- }).describe("The OpenAI Language Model to use for generating responses."),
134
+ }).describe("Language model that is used by the AI SDK Core functions."),
135
135
  temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
136
136
  maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
137
137
  });
@@ -397,7 +397,7 @@ var JudgeUtils = {
397
397
  /**
398
398
  * Builds a minimal transcript from messages for judge evaluation.
399
399
  * Truncates base64 media to reduce token usage.
400
- * @param messages - Array of CoreMessage from conversation
400
+ * @param messages - Array of ModelMessage from conversation
401
401
  * @returns Plain text transcript with one message per line
402
402
  */
403
403
  buildTranscriptFromMessages(messages) {
@@ -428,52 +428,68 @@ var createLLMInvoker = (logger2) => {
428
428
  var toolMessageRole = "tool";
429
429
  var assistantMessageRole = "assistant";
430
430
  var userMessageRole = "user";
431
- var groupMessagesByToolBoundaries = (messages) => {
432
- const segments = [];
433
- let currentSegment = [];
434
- for (const message2 of messages) {
435
- currentSegment.push(message2);
436
- if (message2.role === toolMessageRole) {
437
- segments.push(currentSegment);
438
- currentSegment = [];
439
- }
440
- }
441
- if (currentSegment.length > 0) {
442
- segments.push(currentSegment);
431
+ var hasToolContent = (message2) => {
432
+ if (message2.role === toolMessageRole) return true;
433
+ if (!Array.isArray(message2.content)) return false;
434
+ return message2.content.some((part) => {
435
+ if (!part || typeof part !== "object") return false;
436
+ const partType = "type" in part ? part.type : void 0;
437
+ return partType === "tool-call" || partType === "tool-result";
438
+ });
439
+ };
440
+ var stringifyValue = (value) => {
441
+ if (typeof value === "string") return value;
442
+ if (value === void 0) return "undefined";
443
+ try {
444
+ const serialized = JSON.stringify(value);
445
+ return serialized === void 0 ? String(value) : serialized;
446
+ } catch {
447
+ return String(value);
443
448
  }
444
- return segments;
445
449
  };
446
- var segmentHasToolMessages = (segment) => {
447
- return segment.some((message2) => {
448
- if (message2.role === toolMessageRole) return true;
449
- if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
450
- return message2.content.some((part) => part.type === "tool-call");
451
- }
452
- return false;
450
+ var summarizeToolMessage = (message2) => {
451
+ if (message2.role === toolMessageRole && !Array.isArray(message2.content)) {
452
+ return `[Tool message: ${stringifyValue(message2.content)}]`;
453
+ }
454
+ if (message2.role === toolMessageRole) {
455
+ const toolResults = message2.content.filter((part) => part.type === "tool-result").map((part) => {
456
+ const contentPart = part;
457
+ const name = contentPart.toolName ?? "unknown tool";
458
+ const output = contentPart.output;
459
+ const value = output && typeof output === "object" && "value" in output && typeof output.value === "string" ? output.value : output ?? contentPart.result;
460
+ return `[Tool result from ${name}: ${stringifyValue(value)}]`;
461
+ });
462
+ return toolResults.length > 0 ? toolResults.join("\n") : null;
463
+ }
464
+ if (!Array.isArray(message2.content)) return null;
465
+ const toolCalls = message2.content.filter((part) => part.type === "tool-call").map((part) => {
466
+ const contentPart = part;
467
+ const name = contentPart.toolName ?? "unknown tool";
468
+ return `[Called tool ${name} with: ${stringifyValue(contentPart.input)}]`;
453
469
  });
470
+ return toolCalls.length > 0 ? toolCalls.join("\n") : null;
454
471
  };
455
- var reverseSegmentRoles = (segment) => {
456
- return segment.map((message2) => {
457
- const hasStringContent = typeof message2.content === "string";
458
- if (!hasStringContent) return message2;
459
- const roleMap = {
460
- [userMessageRole]: assistantMessageRole,
461
- [assistantMessageRole]: userMessageRole
462
- };
472
+ var messageRoleReversal = (messages) => {
473
+ const roleMap = {
474
+ [userMessageRole]: assistantMessageRole,
475
+ [assistantMessageRole]: userMessageRole
476
+ };
477
+ return messages.map((message2) => {
478
+ if (hasToolContent(message2)) {
479
+ const summary = summarizeToolMessage(message2);
480
+ if (!summary) return null;
481
+ return {
482
+ role: userMessageRole,
483
+ content: summary
484
+ };
485
+ }
463
486
  const newRole = roleMap[message2.role];
464
487
  if (!newRole) return message2;
465
488
  return {
466
- role: newRole,
467
- content: message2.content
489
+ ...message2,
490
+ role: newRole
468
491
  };
469
- });
470
- };
471
- var messageRoleReversal = (messages) => {
472
- const segments = groupMessagesByToolBoundaries(messages);
473
- const processedSegments = segments.map(
474
- (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
475
- );
476
- return processedSegments.flat();
492
+ }).filter((message2) => message2 !== null);
477
493
  };
478
494
  var criterionToParamName = (criterion) => {
479
495
  return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
@@ -835,7 +851,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
835
851
  constructor(cfg) {
836
852
  super();
837
853
  this.cfg = cfg;
838
- this.criteria = cfg.criteria;
854
+ this.criteria = cfg.criteria ?? [];
839
855
  this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
840
856
  }
841
857
  logger = new Logger("JudgeAgent");
@@ -847,7 +863,8 @@ var JudgeAgent = class extends JudgeAgentAdapter {
847
863
  */
848
864
  invokeLLM = createLLMInvoker(this.logger);
849
865
  async call(input) {
850
- var _a, _b, _c;
866
+ var _a, _b, _c, _d;
867
+ const criteria = ((_a = input.judgmentRequest) == null ? void 0 : _a.criteria) ?? this.criteria;
851
868
  this.logger.debug("call() invoked", {
852
869
  threadId: input.threadId,
853
870
  currentTurn: input.scenarioState.currentTurn,
@@ -866,7 +883,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
866
883
  </opentelemetry_traces>
867
884
  `;
868
885
  const cfg = this.cfg;
869
- const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
886
+ const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(criteria, input.scenarioConfig.description);
870
887
  const messages = [
871
888
  { role: "system", content: systemPrompt },
872
889
  { role: "user", content: contentForJudge }
@@ -879,10 +896,10 @@ var JudgeAgent = class extends JudgeAgentAdapter {
879
896
  });
880
897
  const tools = {
881
898
  continue_test: buildContinueTestTool(),
882
- finish_test: buildFinishTestTool(cfg.criteria)
899
+ finish_test: buildFinishTestTool(criteria)
883
900
  };
884
- const enforceJudgement = input.judgmentRequest;
885
- const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
901
+ const enforceJudgement = input.judgmentRequest != null;
902
+ const hasCriteria = criteria.length && criteria.length > 0;
886
903
  if (enforceJudgement && !hasCriteria) {
887
904
  return {
888
905
  success: false,
@@ -907,26 +924,26 @@ var JudgeAgent = class extends JudgeAgentAdapter {
907
924
  toolChoice
908
925
  });
909
926
  this.logger.debug("LLM response received", {
910
- toolCallCount: ((_a = completion.toolCalls) == null ? void 0 : _a.length) ?? 0,
911
- toolCalls: (_b = completion.toolCalls) == null ? void 0 : _b.map((tc) => ({
927
+ toolCallCount: ((_b = completion.toolCalls) == null ? void 0 : _b.length) ?? 0,
928
+ toolCalls: (_c = completion.toolCalls) == null ? void 0 : _c.map((tc) => ({
912
929
  toolName: tc.toolName,
913
930
  args: tc.input
914
931
  }))
915
932
  });
916
933
  let args;
917
- if ((_c = completion.toolCalls) == null ? void 0 : _c.length) {
934
+ if ((_d = completion.toolCalls) == null ? void 0 : _d.length) {
918
935
  const toolCall = completion.toolCalls[0];
919
936
  switch (toolCall.toolName) {
920
937
  case "finish_test": {
921
938
  args = toolCall.input;
922
939
  const verdict = args.verdict || "inconclusive";
923
940
  const reasoning = args.reasoning || "No reasoning provided";
924
- const criteria = args.criteria || {};
925
- const criteriaValues = Object.values(criteria);
926
- const metCriteria = cfg.criteria.filter(
941
+ const criteriaArgs = args.criteria || {};
942
+ const criteriaValues = Object.values(criteriaArgs);
943
+ const metCriteria = criteria.filter(
927
944
  (_, i) => criteriaValues[i] === "true"
928
945
  );
929
- const unmetCriteria = cfg.criteria.filter(
946
+ const unmetCriteria = criteria.filter(
930
947
  (_, i) => criteriaValues[i] !== "true"
931
948
  );
932
949
  const result = {
@@ -946,7 +963,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
946
963
  success: false,
947
964
  reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
948
965
  metCriteria: [],
949
- unmetCriteria: cfg.criteria
966
+ unmetCriteria: criteria
950
967
  };
951
968
  }
952
969
  }
@@ -954,7 +971,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
954
971
  success: false,
955
972
  reasoning: `JudgeAgent: No tool call found in LLM output`,
956
973
  metCriteria: [],
957
- unmetCriteria: cfg.criteria
974
+ unmetCriteria: criteria
958
975
  };
959
976
  }
960
977
  getOpenTelemetryTracesDigest(threadId) {
@@ -964,7 +981,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
964
981
  }
965
982
  };
966
983
  var judgeAgent = (cfg) => {
967
- return new JudgeAgent(cfg);
984
+ return new JudgeAgent(cfg ?? {});
968
985
  };
969
986
 
970
987
  // src/agents/user-simulator-agent.ts
@@ -2408,13 +2425,15 @@ function convertModelMessagesToAguiMessages(modelMessages) {
2408
2425
  }
2409
2426
  case msg.role === "tool":
2410
2427
  msg.content.map((p, i) => {
2411
- var _a;
2428
+ if ("type" in p && p.type !== "tool-result") return;
2412
2429
  aguiMessages.push({
2413
2430
  trace_id: msg.traceId,
2414
2431
  id: `${id}-${i}`,
2415
2432
  role: "tool",
2416
2433
  toolCallId: p.toolCallId,
2417
- content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
2434
+ content: JSON.stringify(
2435
+ p.output && "value" in p.output ? p.output.value : p.output
2436
+ )
2418
2437
  });
2419
2438
  });
2420
2439
  break;
@@ -2458,6 +2477,8 @@ var ScenarioExecution = class {
2458
2477
  currentTurnSpan;
2459
2478
  /** Timestamp when execution started (for total time calculation) */
2460
2479
  totalStartTime = 0;
2480
+ /** Accumulated results from inline judge checkpoints */
2481
+ checkpointResults = [];
2461
2482
  /** Event stream for monitoring scenario progress */
2462
2483
  eventSubject = new Subject2();
2463
2484
  /**
@@ -2535,6 +2556,7 @@ var ScenarioExecution = class {
2535
2556
  totalTime: this.totalTime,
2536
2557
  agentTime: totalAgentTime
2537
2558
  };
2559
+ return this._result;
2538
2560
  this.logger.debug(`[${this.config.id}] Result set`, {
2539
2561
  success: result.success,
2540
2562
  reasoning: result.reasoning,
@@ -2595,6 +2617,8 @@ var ScenarioExecution = class {
2595
2617
  const scriptStep = this.config.script[i];
2596
2618
  await this.executeScriptStep(scriptStep, i);
2597
2619
  if (this.result) {
2620
+ const cp = this.compiledCheckpoints;
2621
+ this.result.metCriteria = [...cp.metCriteria, ...this.result.metCriteria];
2598
2622
  this.emitRunFinished({
2599
2623
  scenarioRunId,
2600
2624
  status: this.result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
@@ -2603,7 +2627,22 @@ var ScenarioExecution = class {
2603
2627
  return this.result;
2604
2628
  }
2605
2629
  }
2606
- this.reachedMaxTurns(
2630
+ if (this.checkpointResults.length > 0) {
2631
+ const cp = this.compiledCheckpoints;
2632
+ const result2 = this.setResult({
2633
+ success: cp.unmetCriteria.length === 0,
2634
+ reasoning: "All inline criteria checkpoints passed",
2635
+ metCriteria: cp.metCriteria,
2636
+ unmetCriteria: cp.unmetCriteria
2637
+ });
2638
+ this.emitRunFinished({
2639
+ scenarioRunId,
2640
+ status: result2.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
2641
+ result: result2
2642
+ });
2643
+ return result2;
2644
+ }
2645
+ const result = this.reachedMaxTurns(
2607
2646
  [
2608
2647
  "Reached end of script without conclusion, add one of the following to the end of the script:",
2609
2648
  "- `Scenario.proceed()` to let the simulation continue to play out",
@@ -2611,11 +2650,11 @@ var ScenarioExecution = class {
2611
2650
  "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
2612
2651
  ].join("\n")
2613
2652
  );
2614
- this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
2615
- return this.result;
2653
+ this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */, result });
2654
+ return result;
2616
2655
  } catch (error) {
2617
2656
  const errorInfo = extractErrorInfo(error);
2618
- this.setResult({
2657
+ const result = this.setResult({
2619
2658
  success: false,
2620
2659
  reasoning: `Scenario failed with error: ${errorInfo.message}`,
2621
2660
  metCriteria: [],
@@ -2625,7 +2664,7 @@ var ScenarioExecution = class {
2625
2664
  this.emitRunFinished({
2626
2665
  scenarioRunId,
2627
2666
  status: "ERROR" /* ERROR */,
2628
- result: this.result
2667
+ result
2629
2668
  });
2630
2669
  throw error;
2631
2670
  } finally {
@@ -2729,7 +2768,7 @@ var ScenarioExecution = class {
2729
2768
  * @param judgmentRequest - Whether this is a judgment request (for judge agents)
2730
2769
  * @throws Error if the agent call fails
2731
2770
  */
2732
- async callAgent(idx, role, judgmentRequest = false) {
2771
+ async callAgent(idx, role, judgmentRequest) {
2733
2772
  var _a;
2734
2773
  const agent2 = this.agents[idx];
2735
2774
  const agentName = agent2.name ?? agent2.constructor.name;
@@ -2920,25 +2959,26 @@ var ScenarioExecution = class {
2920
2959
  *
2921
2960
  * This method is part of the ScenarioExecutionLike interface used by script steps.
2922
2961
  *
2923
- * @param content - Optional message to pass to the judge agent for additional context
2962
+ * @param options - Optional options with inline criteria to evaluate as a checkpoint.
2924
2963
  * @returns A promise that resolves with:
2925
2964
  * - ScenarioResult if the judge makes a final decision, or
2926
2965
  * - Null if the conversation should continue
2927
2966
  *
2928
2967
  * @example
2929
2968
  * ```typescript
2930
- * // Let judge evaluate current state
2969
+ * // Let judge evaluate with its configured criteria
2931
2970
  * const result = await execution.judge();
2932
- * if (result) {
2933
- * console.log(`Judge decided: ${result.success ? 'pass' : 'fail'}`);
2934
- * }
2935
2971
  *
2936
- * // Provide additional context to judge
2937
- * const result = await execution.judge("Please consider the user's satisfaction level");
2972
+ * // Evaluate inline criteria as a checkpoint
2973
+ * const result = await execution.judge({ criteria: ["Agent responded helpfully"] });
2938
2974
  * ```
2939
2975
  */
2940
- async judge(content) {
2941
- return await this.scriptCallAgent("Judge" /* JUDGE */, content, true);
2976
+ async judge(options) {
2977
+ return await this.scriptCallAgent(
2978
+ "Judge" /* JUDGE */,
2979
+ void 0,
2980
+ { criteria: options == null ? void 0 : options.criteria }
2981
+ );
2942
2982
  }
2943
2983
  /**
2944
2984
  * Lets the scenario proceed automatically for a specified number of turns.
@@ -3023,13 +3063,12 @@ var ScenarioExecution = class {
3023
3063
  * ```
3024
3064
  */
3025
3065
  async succeed(reasoning) {
3026
- this.setResult({
3066
+ return this.setResult({
3027
3067
  success: true,
3028
3068
  reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
3029
3069
  metCriteria: [],
3030
3070
  unmetCriteria: []
3031
3071
  });
3032
- return this.result;
3033
3072
  }
3034
3073
  /**
3035
3074
  * Immediately ends the scenario with a failure verdict.
@@ -3055,13 +3094,12 @@ var ScenarioExecution = class {
3055
3094
  * ```
3056
3095
  */
3057
3096
  async fail(reasoning) {
3058
- this.setResult({
3097
+ return this.setResult({
3059
3098
  success: false,
3060
3099
  reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
3061
3100
  metCriteria: [],
3062
3101
  unmetCriteria: []
3063
3102
  });
3064
- return this.result;
3065
3103
  }
3066
3104
  /**
3067
3105
  * Adds execution time for a specific agent to the performance tracking.
@@ -3105,15 +3143,14 @@ var ScenarioExecution = class {
3105
3143
  * decision, or null if the conversation should continue
3106
3144
  * @throws Error if no agent is found for the specified role
3107
3145
  */
3108
- async scriptCallAgent(role, content, judgmentRequest = false) {
3146
+ async scriptCallAgent(role, content, judgmentRequest) {
3109
3147
  this.logger.debug(`[${this.config.id}] scriptCallAgent`, {
3110
3148
  role,
3111
3149
  hasContent: content !== void 0,
3112
- judgmentRequest
3150
+ judgmentRequest: judgmentRequest != null,
3151
+ hasInlineCriteria: (judgmentRequest == null ? void 0 : judgmentRequest.criteria) != null
3113
3152
  });
3114
3153
  this.consumeUntilRole(role);
3115
- let index = -1;
3116
- let agent2 = null;
3117
3154
  let nextAgent = this.getNextAgentForRole(role);
3118
3155
  if (!nextAgent) {
3119
3156
  this.newTurn();
@@ -3143,8 +3180,8 @@ var ScenarioExecution = class {
3143
3180
  `Cannot generate a message for role \`${role}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
3144
3181
  );
3145
3182
  }
3146
- index = nextAgent.index;
3147
- agent2 = nextAgent.agent;
3183
+ const index = nextAgent.index;
3184
+ const agent2 = nextAgent.agent;
3148
3185
  this.removePendingAgent(agent2);
3149
3186
  if (content) {
3150
3187
  const message2 = typeof content === "string" ? {
@@ -3156,6 +3193,25 @@ var ScenarioExecution = class {
3156
3193
  return null;
3157
3194
  }
3158
3195
  await this.callAgent(index, role, judgmentRequest);
3196
+ if (this.result && (judgmentRequest == null ? void 0 : judgmentRequest.criteria) != null) {
3197
+ this.checkpointResults.push({
3198
+ metCriteria: this.result.metCriteria,
3199
+ unmetCriteria: this.result.unmetCriteria
3200
+ });
3201
+ if (this.result.success) {
3202
+ this._result = void 0;
3203
+ return null;
3204
+ } else {
3205
+ const cp = this.compiledCheckpoints;
3206
+ this.result.metCriteria = cp.metCriteria;
3207
+ this.result.unmetCriteria = cp.unmetCriteria;
3208
+ return this.result;
3209
+ }
3210
+ }
3211
+ if (this.result) {
3212
+ const cp = this.compiledCheckpoints;
3213
+ this.result.metCriteria = [...cp.metCriteria, ...this.result.metCriteria];
3214
+ }
3159
3215
  return this.result ?? null;
3160
3216
  }
3161
3217
  /**
@@ -3188,11 +3244,22 @@ var ScenarioExecution = class {
3188
3244
  this.totalStartTime = Date.now();
3189
3245
  this.pendingMessages.clear();
3190
3246
  this._result = void 0;
3247
+ this.checkpointResults = [];
3191
3248
  this.logger.debug(`[${this.config.id}] Reset complete`, {
3192
3249
  threadId: this.state.threadId,
3193
3250
  agentCount: this.agents.length
3194
3251
  });
3195
3252
  }
3253
+ /** Compiles all accumulated checkpoint results into aggregated met/unmet criteria. */
3254
+ get compiledCheckpoints() {
3255
+ const metCriteria = [];
3256
+ const unmetCriteria = [];
3257
+ for (const cp of this.checkpointResults) {
3258
+ metCriteria.push(...cp.metCriteria);
3259
+ unmetCriteria.push(...cp.unmetCriteria);
3260
+ }
3261
+ return { metCriteria, unmetCriteria };
3262
+ }
3196
3263
  nextAgentForRole(role) {
3197
3264
  for (const agent2 of this.agents) {
3198
3265
  if (agent2.role === role && this.pendingAgentsOnTurn.has(agent2) && this.pendingRolesOnTurn.includes(role)) {
@@ -3289,7 +3356,7 @@ var ScenarioExecution = class {
3289
3356
  */
3290
3357
  reachedMaxTurns(errorMessage) {
3291
3358
  var _a;
3292
- this.setResult({
3359
+ return this.setResult({
3293
3360
  success: false,
3294
3361
  reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
3295
3362
  metCriteria: [],
@@ -3797,9 +3864,9 @@ var message = (message2) => {
3797
3864
  var agent = (content) => {
3798
3865
  return (_state, executor) => executor.agent(content);
3799
3866
  };
3800
- var judge = (content) => {
3867
+ var judge = (options) => {
3801
3868
  return async (_state, executor) => {
3802
- await executor.judge(content);
3869
+ await executor.judge(options);
3803
3870
  };
3804
3871
  };
3805
3872
  var user = (content) => {
@@ -3911,7 +3978,6 @@ function formatPart(part) {
3911
3978
  case "reasoning":
3912
3979
  return `(reasoning): ${part.text}`;
3913
3980
  default:
3914
- part;
3915
3981
  return `Unknown content: ${JSON.stringify(part)}`;
3916
3982
  }
3917
3983
  }
@@ -104,7 +104,7 @@ var DEFAULT_TEMPERATURE = 0;
104
104
  var modelSchema = import_v42.z.object({
105
105
  model: import_v42.z.custom((val) => Boolean(val), {
106
106
  message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
107
- }).describe("The OpenAI Language Model to use for generating responses."),
107
+ }).describe("Language model that is used by the AI SDK Core functions."),
108
108
  temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
109
109
  maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
110
110
  });
@@ -87,7 +87,7 @@ var DEFAULT_TEMPERATURE = 0;
87
87
  var modelSchema = z2.object({
88
88
  model: z2.custom((val) => Boolean(val), {
89
89
  message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
90
- }).describe("The OpenAI Language Model to use for generating responses."),
90
+ }).describe("Language model that is used by the AI SDK Core functions."),
91
91
  temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
92
92
  maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
93
93
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@langwatch/scenario",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "A TypeScript library for testing AI agents using scenarios",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -29,9 +29,9 @@
29
29
  },
30
30
  "dependencies": {
31
31
  "@ag-ui/core": "^0.0.28",
32
- "@ai-sdk/openai": "^2.0.74",
32
+ "@ai-sdk/openai": "^3.0.26",
33
33
  "@openai/agents": "^0.3.3",
34
- "ai": "5.0.104",
34
+ "ai": "^6.0.0",
35
35
  "chalk": "^5.6.2",
36
36
  "langwatch": "0.9.0",
37
37
  "open": "11.0.0",
@@ -88,7 +88,7 @@
88
88
  }
89
89
  },
90
90
  "peerDependencies": {
91
- "ai": ">=5.0.0",
91
+ "ai": ">=6.0.0",
92
92
  "vitest": ">=3.2.4"
93
93
  },
94
94
  "scripts": {