@agentv/core 3.9.1 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1196,6 +1196,8 @@ interface EvaluatorResult {
1196
1196
  readonly assertions: readonly AssertionEntry[];
1197
1197
  readonly rawRequest?: JsonObject;
1198
1198
  readonly input?: JsonObject;
1199
+ /** Target name used for grading (e.g., the LLM provider name). */
1200
+ readonly target?: string;
1199
1201
  readonly scores?: readonly EvaluatorResult[];
1200
1202
  /** Optional structured details from code graders (e.g., TP/TN/FP/FN counts). */
1201
1203
  readonly details?: JsonObject;
@@ -2057,6 +2059,8 @@ interface EvaluationScore {
2057
2059
  readonly details?: JsonObject;
2058
2060
  /** Token usage from LLM calls made by this evaluator (optional). */
2059
2061
  readonly tokenUsage?: TokenUsage;
2062
+ /** Target name used for grading (e.g., the LLM provider). */
2063
+ readonly graderTarget?: string;
2060
2064
  }
2061
2065
  interface ChildEvaluatorResult {
2062
2066
  readonly name: string;
@@ -2660,6 +2664,8 @@ interface RunEvalCaseOptions {
2660
2664
  readonly repoManager?: RepoManager;
2661
2665
  /** Directory containing the eval YAML file. Used as default cwd for workspace scripts. */
2662
2666
  readonly evalDir?: string;
2667
+ /** Include verbose request details in results (e.g. agent input text) */
2668
+ readonly verbose?: boolean;
2663
2669
  }
2664
2670
  interface ProgressEvent {
2665
2671
  readonly workerId: number;
package/dist/index.d.ts CHANGED
@@ -1196,6 +1196,8 @@ interface EvaluatorResult {
1196
1196
  readonly assertions: readonly AssertionEntry[];
1197
1197
  readonly rawRequest?: JsonObject;
1198
1198
  readonly input?: JsonObject;
1199
+ /** Target name used for grading (e.g., the LLM provider name). */
1200
+ readonly target?: string;
1199
1201
  readonly scores?: readonly EvaluatorResult[];
1200
1202
  /** Optional structured details from code graders (e.g., TP/TN/FP/FN counts). */
1201
1203
  readonly details?: JsonObject;
@@ -2057,6 +2059,8 @@ interface EvaluationScore {
2057
2059
  readonly details?: JsonObject;
2058
2060
  /** Token usage from LLM calls made by this evaluator (optional). */
2059
2061
  readonly tokenUsage?: TokenUsage;
2062
+ /** Target name used for grading (e.g., the LLM provider). */
2063
+ readonly graderTarget?: string;
2060
2064
  }
2061
2065
  interface ChildEvaluatorResult {
2062
2066
  readonly name: string;
@@ -2660,6 +2664,8 @@ interface RunEvalCaseOptions {
2660
2664
  readonly repoManager?: RepoManager;
2661
2665
  /** Directory containing the eval YAML file. Used as default cwd for workspace scripts. */
2662
2666
  readonly evalDir?: string;
2667
+ /** Include verbose request details in results (e.g. agent input text) */
2668
+ readonly verbose?: boolean;
2663
2669
  }
2664
2670
  interface ProgressEvent {
2665
2671
  readonly workerId: number;
package/dist/index.js CHANGED
@@ -19,7 +19,7 @@ import {
19
19
  readTextFile,
20
20
  resolveFileReference,
21
21
  resolveTargetDefinition
22
- } from "./chunk-PC5TLJF6.js";
22
+ } from "./chunk-K7JCJIXA.js";
23
23
  import {
24
24
  AgentvProvider
25
25
  } from "./chunk-W5YDZWT4.js";
@@ -6112,11 +6112,7 @@ var CopilotCliProvider = class {
6112
6112
  }
6113
6113
  }
6114
6114
  if (sessionUpdate === "usage_update") {
6115
- if (tokenUsage) {
6116
- tokenUsage = { input: update.used, output: tokenUsage.output };
6117
- } else {
6118
- tokenUsage = { input: update.used, output: 0 };
6119
- }
6115
+ tokenUsage = { input: update.used, output: 0 };
6120
6116
  if (update.cost && update.cost.currency === "USD") {
6121
6117
  costUsd = (costUsd ?? 0) + update.cost.amount;
6122
6118
  }
@@ -6150,21 +6146,32 @@ var CopilotCliProvider = class {
6150
6146
  sessionId: session.sessionId,
6151
6147
  prompt: promptMessages
6152
6148
  });
6149
+ let promptResponse;
6153
6150
  if (request.signal) {
6154
6151
  const abortHandler = () => {
6155
6152
  killProcess(agentProcess);
6156
6153
  };
6157
6154
  request.signal.addEventListener("abort", abortHandler, { once: true });
6158
6155
  try {
6159
- await this.raceWithTimeout(sendPromise, agentProcess);
6156
+ promptResponse = await this.raceWithTimeout(sendPromise, agentProcess);
6160
6157
  } finally {
6161
6158
  request.signal.removeEventListener("abort", abortHandler);
6162
6159
  }
6163
6160
  } else {
6164
- await this.raceWithTimeout(sendPromise, agentProcess);
6161
+ promptResponse = await this.raceWithTimeout(sendPromise, agentProcess);
6165
6162
  }
6166
6163
  const endTime = (/* @__PURE__ */ new Date()).toISOString();
6167
6164
  const durationMs = Date.now() - startMs;
6165
+ const responseUsage = promptResponse.usage;
6166
+ if (responseUsage && responseUsage.totalTokens > 0) {
6167
+ tokenUsage = {
6168
+ input: responseUsage.inputTokens,
6169
+ output: responseUsage.outputTokens,
6170
+ ...responseUsage.thoughtTokens != null ? { reasoning: responseUsage.thoughtTokens } : {},
6171
+ ...responseUsage.cachedReadTokens != null ? { cached: responseUsage.cachedReadTokens } : {}
6172
+ };
6173
+ request.streamCallbacks?.onLlmCallEnd?.("copilot", tokenUsage);
6174
+ }
6168
6175
  const rejectedCalls = completedToolCalls.filter((tc) => {
6169
6176
  const out = tc.output;
6170
6177
  return out && (out.code === "rejected" || out.code === "denied");
@@ -6222,8 +6229,7 @@ var CopilotCliProvider = class {
6222
6229
  async raceWithTimeout(sendPromise, agentProcess) {
6223
6230
  const timeoutMs = this.config.timeoutMs;
6224
6231
  if (!timeoutMs) {
6225
- await sendPromise;
6226
- return;
6232
+ return sendPromise;
6227
6233
  }
6228
6234
  let timer;
6229
6235
  const timeoutPromise = new Promise((_, reject) => {
@@ -6234,7 +6240,7 @@ var CopilotCliProvider = class {
6234
6240
  timer.unref?.();
6235
6241
  });
6236
6242
  try {
6237
- await Promise.race([sendPromise, timeoutPromise]);
6243
+ return await Promise.race([sendPromise, timeoutPromise]);
6238
6244
  } finally {
6239
6245
  if (timer) clearTimeout(timer);
6240
6246
  }
@@ -9287,7 +9293,7 @@ async function readTargetDefinitions(filePath) {
9287
9293
  throw new Error(`targets.yaml not found at ${absolutePath}`);
9288
9294
  }
9289
9295
  const raw = await readFile9(absolutePath, "utf8");
9290
- const parsed = parse4(raw);
9296
+ const parsed = interpolateEnv(parse4(raw), process.env);
9291
9297
  if (!isRecord(parsed)) {
9292
9298
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
9293
9299
  }
@@ -10172,8 +10178,7 @@ ${context.fileChanges}`;
10172
10178
  }
10173
10179
  const evaluatorRawRequest = {
10174
10180
  userPrompt,
10175
- systemPrompt,
10176
- target: graderProvider.targetName
10181
+ systemPrompt
10177
10182
  };
10178
10183
  try {
10179
10184
  const { data, tokenUsage } = await this.runWithRetry({
@@ -10191,6 +10196,7 @@ ${context.fileChanges}`;
10191
10196
  assertions,
10192
10197
  expectedAspectCount: Math.max(assertions.length, 1),
10193
10198
  evaluatorRawRequest,
10199
+ graderTarget: graderProvider.targetName,
10194
10200
  tokenUsage
10195
10201
  };
10196
10202
  } catch (e) {
@@ -10202,7 +10208,8 @@ ${context.fileChanges}`;
10202
10208
  verdict: "skip",
10203
10209
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
10204
10210
  expectedAspectCount: 1,
10205
- evaluatorRawRequest
10211
+ evaluatorRawRequest,
10212
+ graderTarget: graderProvider.targetName
10206
10213
  };
10207
10214
  }
10208
10215
  }
@@ -10220,8 +10227,7 @@ ${context.fileChanges}`;
10220
10227
  const systemPrompt = buildRubricOutputSchema();
10221
10228
  const evaluatorRawRequest = {
10222
10229
  userPrompt: prompt,
10223
- systemPrompt,
10224
- target: graderProvider.targetName
10230
+ systemPrompt
10225
10231
  };
10226
10232
  try {
10227
10233
  const { data, tokenUsage } = await this.runWithRetry({
@@ -10238,6 +10244,7 @@ ${context.fileChanges}`;
10238
10244
  assertions,
10239
10245
  expectedAspectCount: rubrics.length,
10240
10246
  evaluatorRawRequest,
10247
+ graderTarget: graderProvider.targetName,
10241
10248
  tokenUsage
10242
10249
  };
10243
10250
  } catch (e) {
@@ -10249,7 +10256,8 @@ ${context.fileChanges}`;
10249
10256
  verdict: "skip",
10250
10257
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
10251
10258
  expectedAspectCount: rubrics.length,
10252
- evaluatorRawRequest
10259
+ evaluatorRawRequest,
10260
+ graderTarget: graderProvider.targetName
10253
10261
  };
10254
10262
  }
10255
10263
  }
@@ -10262,8 +10270,7 @@ ${context.fileChanges}`;
10262
10270
  const systemPrompt = buildScoreRangeOutputSchema();
10263
10271
  const evaluatorRawRequest = {
10264
10272
  userPrompt: prompt,
10265
- systemPrompt,
10266
- target: graderProvider.targetName
10273
+ systemPrompt
10267
10274
  };
10268
10275
  try {
10269
10276
  const { data, tokenUsage } = await this.runWithRetry({
@@ -10280,6 +10287,7 @@ ${context.fileChanges}`;
10280
10287
  assertions,
10281
10288
  expectedAspectCount: rubrics.length,
10282
10289
  evaluatorRawRequest,
10290
+ graderTarget: graderProvider.targetName,
10283
10291
  details,
10284
10292
  tokenUsage
10285
10293
  };
@@ -10292,7 +10300,8 @@ ${context.fileChanges}`;
10292
10300
  verdict: "skip",
10293
10301
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
10294
10302
  expectedAspectCount: rubrics.length,
10295
- evaluatorRawRequest
10303
+ evaluatorRawRequest,
10304
+ graderTarget: graderProvider.targetName
10296
10305
  };
10297
10306
  }
10298
10307
  }
@@ -10324,7 +10333,6 @@ ${context.fileChanges}`;
10324
10333
  mode: "built-in",
10325
10334
  systemPrompt,
10326
10335
  userPrompt,
10327
- target: graderProvider.targetName,
10328
10336
  maxSteps: this.maxSteps
10329
10337
  };
10330
10338
  try {
@@ -10342,7 +10350,13 @@ ${context.fileChanges}`;
10342
10350
  steps: steps.length,
10343
10351
  tool_calls: toolCallCount
10344
10352
  };
10345
- return this.parseAgentResult(text, rubrics, evaluatorRawRequest, details);
10353
+ return this.parseAgentResult(
10354
+ text,
10355
+ rubrics,
10356
+ evaluatorRawRequest,
10357
+ details,
10358
+ graderProvider.targetName
10359
+ );
10346
10360
  } catch (error) {
10347
10361
  const message = error instanceof Error ? error.message : String(error);
10348
10362
  return {
@@ -10351,6 +10365,7 @@ ${context.fileChanges}`;
10351
10365
  assertions: [{ text: `llm-grader built-in evaluation failed: ${message}`, passed: false }],
10352
10366
  expectedAspectCount: 1,
10353
10367
  evaluatorRawRequest,
10368
+ graderTarget: graderProvider.targetName,
10354
10369
  details: { mode: "built-in", error: message }
10355
10370
  };
10356
10371
  }
@@ -10403,6 +10418,7 @@ ${context.fileChanges}`;
10403
10418
  ],
10404
10419
  expectedAspectCount: 1,
10405
10420
  evaluatorRawRequest,
10421
+ graderTarget: provider.targetName,
10406
10422
  details: { mode: modeLabel, grader_target: provider.targetName }
10407
10423
  };
10408
10424
  }
@@ -10412,7 +10428,13 @@ ${context.fileChanges}`;
10412
10428
  mode: modeLabel,
10413
10429
  grader_target: provider.targetName
10414
10430
  };
10415
- return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
10431
+ return this.parseAgentResult(
10432
+ assistantContent,
10433
+ rubrics,
10434
+ evaluatorRawRequest,
10435
+ details,
10436
+ provider.targetName
10437
+ );
10416
10438
  } catch (error) {
10417
10439
  const message = error instanceof Error ? error.message : String(error);
10418
10440
  return {
@@ -10423,6 +10445,7 @@ ${context.fileChanges}`;
10423
10445
  ],
10424
10446
  expectedAspectCount: 1,
10425
10447
  evaluatorRawRequest,
10448
+ graderTarget: provider.targetName,
10426
10449
  details: {
10427
10450
  mode: modeLabel,
10428
10451
  grader_target: provider.targetName,
@@ -10567,7 +10590,7 @@ ${outputSchema}`;
10567
10590
  * Parse the agent's response text into an EvaluationScore.
10568
10591
  * Supports both freeform and rubric modes.
10569
10592
  */
10570
- parseAgentResult(text, rubrics, evaluatorRawRequest, details) {
10593
+ parseAgentResult(text, rubrics, evaluatorRawRequest, details, graderTarget) {
10571
10594
  try {
10572
10595
  const parsed = parseJsonFromText(text);
10573
10596
  if (rubrics && rubrics.length > 0) {
@@ -10579,6 +10602,7 @@ ${outputSchema}`;
10579
10602
  assertions: assertions2,
10580
10603
  expectedAspectCount: rubrics.length,
10581
10604
  evaluatorRawRequest,
10605
+ graderTarget,
10582
10606
  details
10583
10607
  };
10584
10608
  }
@@ -10591,6 +10615,7 @@ ${outputSchema}`;
10591
10615
  assertions,
10592
10616
  expectedAspectCount: Math.max(assertions.length, 1),
10593
10617
  evaluatorRawRequest,
10618
+ graderTarget,
10594
10619
  details
10595
10620
  };
10596
10621
  } catch {
@@ -10605,6 +10630,7 @@ ${outputSchema}`;
10605
10630
  ],
10606
10631
  expectedAspectCount: 1,
10607
10632
  evaluatorRawRequest,
10633
+ graderTarget,
10608
10634
  details
10609
10635
  };
10610
10636
  }
@@ -14916,7 +14942,8 @@ async function runEvaluation(options) {
14916
14942
  streamCallbacks,
14917
14943
  typeRegistry,
14918
14944
  repoManager,
14919
- evalDir
14945
+ evalDir,
14946
+ verbose
14920
14947
  };
14921
14948
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
14922
14949
  if (totalBudgetUsd !== void 0) {
@@ -14996,7 +15023,8 @@ async function runEvaluation(options) {
14996
15023
  promptInputs,
14997
15024
  primaryProvider,
14998
15025
  "agent",
14999
- "provider_error"
15026
+ "provider_error",
15027
+ verbose
15000
15028
  );
15001
15029
  results.push(errorResult);
15002
15030
  if (onResult) {
@@ -15069,6 +15097,7 @@ async function runBatchEvaluation(options) {
15069
15097
  nowFn,
15070
15098
  onProgress,
15071
15099
  onResult,
15100
+ verbose,
15072
15101
  resolveGraderProvider,
15073
15102
  agentTimeoutMs,
15074
15103
  targetResolver,
@@ -15156,7 +15185,8 @@ async function runBatchEvaluation(options) {
15156
15185
  startTime,
15157
15186
  endTime,
15158
15187
  targetResolver,
15159
- availableTargets
15188
+ availableTargets,
15189
+ verbose
15160
15190
  });
15161
15191
  if (providerError) {
15162
15192
  result = {
@@ -15177,7 +15207,8 @@ async function runBatchEvaluation(options) {
15177
15207
  promptInputs,
15178
15208
  provider,
15179
15209
  "evaluator",
15180
- "evaluator_error"
15210
+ "evaluator_error",
15211
+ verbose
15181
15212
  );
15182
15213
  results.push(errorResult);
15183
15214
  if (onResult) {
@@ -15240,7 +15271,8 @@ async function runEvalCase(options) {
15240
15271
  suiteWorkspaceFile,
15241
15272
  typeRegistry: providedTypeRegistry,
15242
15273
  repoManager,
15243
- evalDir
15274
+ evalDir,
15275
+ verbose
15244
15276
  } = options;
15245
15277
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
15246
15278
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -15277,7 +15309,8 @@ async function runEvalCase(options) {
15277
15309
  promptInputs,
15278
15310
  provider,
15279
15311
  "setup",
15280
- "template_error"
15312
+ "template_error",
15313
+ verbose
15281
15314
  );
15282
15315
  }
15283
15316
  if (caseWorkspaceFile && workspacePath) {
@@ -15306,7 +15339,8 @@ async function runEvalCase(options) {
15306
15339
  promptInputs,
15307
15340
  provider,
15308
15341
  "repo_setup",
15309
- "local_path_not_found"
15342
+ "local_path_not_found",
15343
+ verbose
15310
15344
  );
15311
15345
  }
15312
15346
  }
@@ -15332,7 +15366,8 @@ async function runEvalCase(options) {
15332
15366
  promptInputs,
15333
15367
  provider,
15334
15368
  "repo_setup",
15335
- "clone_error"
15369
+ "clone_error",
15370
+ verbose
15336
15371
  );
15337
15372
  }
15338
15373
  }
@@ -15358,7 +15393,8 @@ async function runEvalCase(options) {
15358
15393
  promptInputs,
15359
15394
  provider,
15360
15395
  "setup",
15361
- "file_copy_error"
15396
+ "file_copy_error",
15397
+ verbose
15362
15398
  );
15363
15399
  }
15364
15400
  }
@@ -15403,7 +15439,8 @@ async function runEvalCase(options) {
15403
15439
  promptInputs,
15404
15440
  provider,
15405
15441
  "setup",
15406
- "script_error"
15442
+ "script_error",
15443
+ verbose
15407
15444
  );
15408
15445
  }
15409
15446
  }
@@ -15434,7 +15471,8 @@ async function runEvalCase(options) {
15434
15471
  promptInputs,
15435
15472
  provider,
15436
15473
  "setup",
15437
- "script_error"
15474
+ "script_error",
15475
+ verbose
15438
15476
  );
15439
15477
  }
15440
15478
  }
@@ -15478,7 +15516,8 @@ async function runEvalCase(options) {
15478
15516
  promptInputs,
15479
15517
  provider,
15480
15518
  "agent",
15481
- "provider_error"
15519
+ "provider_error",
15520
+ verbose
15482
15521
  );
15483
15522
  if (workspacePath) {
15484
15523
  if (forceCleanup) {
@@ -15499,7 +15538,8 @@ async function runEvalCase(options) {
15499
15538
  promptInputs,
15500
15539
  provider,
15501
15540
  "agent",
15502
- "provider_error"
15541
+ "provider_error",
15542
+ verbose
15503
15543
  );
15504
15544
  if (workspacePath) {
15505
15545
  if (forceCleanup) {
@@ -15594,7 +15634,8 @@ async function runEvalCase(options) {
15594
15634
  targetResolver,
15595
15635
  availableTargets,
15596
15636
  fileChanges,
15597
- workspacePath
15637
+ workspacePath,
15638
+ verbose
15598
15639
  });
15599
15640
  const totalDurationMs = Date.now() - caseStartMs;
15600
15641
  const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
@@ -15649,7 +15690,8 @@ async function runEvalCase(options) {
15649
15690
  promptInputs,
15650
15691
  provider,
15651
15692
  "evaluator",
15652
- "evaluator_error"
15693
+ "evaluator_error",
15694
+ verbose
15653
15695
  );
15654
15696
  if (workspacePath && !isSharedWorkspace) {
15655
15697
  if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
@@ -15791,7 +15833,7 @@ async function evaluateCandidate(options) {
15791
15833
  let lmRequest;
15792
15834
  if (isAgentProvider(provider)) {
15793
15835
  agentRequest = {
15794
- question: promptInputs.question
15836
+ ...options.verbose ? { input: promptInputs.question } : {}
15795
15837
  };
15796
15838
  } else {
15797
15839
  if (promptInputs.chatPrompt) {
@@ -15805,8 +15847,9 @@ async function evaluateCandidate(options) {
15805
15847
  }
15806
15848
  }
15807
15849
  const evaluatorRequest = scores ? void 0 : score.evaluatorRawRequest;
15808
- const requests = agentRequest || lmRequest || evaluatorRequest ? {
15809
- ...agentRequest ? { agent: agentRequest } : {},
15850
+ const effectiveAgentRequest = agentRequest && Object.keys(agentRequest).length > 0 ? agentRequest : void 0;
15851
+ const requests = effectiveAgentRequest || lmRequest || evaluatorRequest ? {
15852
+ ...effectiveAgentRequest ? { agent: effectiveAgentRequest } : {},
15810
15853
  ...lmRequest ? { lm: lmRequest } : {},
15811
15854
  ...evaluatorRequest ? { evaluator: evaluatorRequest } : {}
15812
15855
  } : void 0;
@@ -15826,9 +15869,9 @@ async function evaluateCandidate(options) {
15826
15869
  endTime,
15827
15870
  requests,
15828
15871
  input,
15872
+ output: output ?? [{ role: "assistant", content: candidate }],
15829
15873
  scores,
15830
15874
  trace,
15831
- output: output ?? [{ role: "assistant", content: candidate }],
15832
15875
  fileChanges,
15833
15876
  executionStatus: classifyQualityStatus(score.score)
15834
15877
  };
@@ -15994,6 +16037,7 @@ async function runEvaluatorList(options) {
15994
16037
  verdict: score2.verdict,
15995
16038
  assertions: score2.assertions,
15996
16039
  input: score2.evaluatorRawRequest,
16040
+ target: score2.graderTarget,
15997
16041
  details: score2.details,
15998
16042
  scores: mapChildResults(score2.scores),
15999
16043
  tokenUsage: score2.tokenUsage,
@@ -16133,13 +16177,13 @@ async function invokeProvider(provider, options) {
16133
16177
  }
16134
16178
  }
16135
16179
  }
16136
- function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode) {
16180
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode, verbose) {
16137
16181
  const message = error instanceof Error ? error.message : String(error);
16138
16182
  let agentRequest;
16139
16183
  let lmRequest;
16140
16184
  if (isAgentProvider(provider)) {
16141
16185
  agentRequest = {
16142
- question: promptInputs.question,
16186
+ ...verbose ? { input: promptInputs.question } : {},
16143
16187
  error: message
16144
16188
  };
16145
16189
  } else {
@@ -16167,10 +16211,10 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
16167
16211
  conversationId: evalCase.conversation_id,
16168
16212
  score: 0,
16169
16213
  assertions: [{ text: `Error: ${message}`, passed: false }],
16170
- output: [{ role: "assistant", content: `Error occurred: ${message}` }],
16171
16214
  target: targetName,
16172
16215
  requests,
16173
16216
  input,
16217
+ output: [{ role: "assistant", content: `Error occurred: ${message}` }],
16174
16218
  error: message,
16175
16219
  executionStatus: "execution_error",
16176
16220
  failureStage,