@agentv/core 3.9.1 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -8068,11 +8068,7 @@ var CopilotCliProvider = class {
8068
8068
  }
8069
8069
  }
8070
8070
  if (sessionUpdate === "usage_update") {
8071
- if (tokenUsage) {
8072
- tokenUsage = { input: update.used, output: tokenUsage.output };
8073
- } else {
8074
- tokenUsage = { input: update.used, output: 0 };
8075
- }
8071
+ tokenUsage = { input: update.used, output: 0 };
8076
8072
  if (update.cost && update.cost.currency === "USD") {
8077
8073
  costUsd = (costUsd ?? 0) + update.cost.amount;
8078
8074
  }
@@ -8106,21 +8102,32 @@ var CopilotCliProvider = class {
8106
8102
  sessionId: session.sessionId,
8107
8103
  prompt: promptMessages
8108
8104
  });
8105
+ let promptResponse;
8109
8106
  if (request.signal) {
8110
8107
  const abortHandler = () => {
8111
8108
  killProcess(agentProcess);
8112
8109
  };
8113
8110
  request.signal.addEventListener("abort", abortHandler, { once: true });
8114
8111
  try {
8115
- await this.raceWithTimeout(sendPromise, agentProcess);
8112
+ promptResponse = await this.raceWithTimeout(sendPromise, agentProcess);
8116
8113
  } finally {
8117
8114
  request.signal.removeEventListener("abort", abortHandler);
8118
8115
  }
8119
8116
  } else {
8120
- await this.raceWithTimeout(sendPromise, agentProcess);
8117
+ promptResponse = await this.raceWithTimeout(sendPromise, agentProcess);
8121
8118
  }
8122
8119
  const endTime = (/* @__PURE__ */ new Date()).toISOString();
8123
8120
  const durationMs = Date.now() - startMs;
8121
+ const responseUsage = promptResponse.usage;
8122
+ if (responseUsage && responseUsage.totalTokens > 0) {
8123
+ tokenUsage = {
8124
+ input: responseUsage.inputTokens,
8125
+ output: responseUsage.outputTokens,
8126
+ ...responseUsage.thoughtTokens != null ? { reasoning: responseUsage.thoughtTokens } : {},
8127
+ ...responseUsage.cachedReadTokens != null ? { cached: responseUsage.cachedReadTokens } : {}
8128
+ };
8129
+ request.streamCallbacks?.onLlmCallEnd?.("copilot", tokenUsage);
8130
+ }
8124
8131
  const rejectedCalls = completedToolCalls.filter((tc) => {
8125
8132
  const out = tc.output;
8126
8133
  return out && (out.code === "rejected" || out.code === "denied");
@@ -8178,8 +8185,7 @@ var CopilotCliProvider = class {
8178
8185
  async raceWithTimeout(sendPromise, agentProcess) {
8179
8186
  const timeoutMs = this.config.timeoutMs;
8180
8187
  if (!timeoutMs) {
8181
- await sendPromise;
8182
- return;
8188
+ return sendPromise;
8183
8189
  }
8184
8190
  let timer;
8185
8191
  const timeoutPromise = new Promise((_, reject) => {
@@ -8190,7 +8196,7 @@ var CopilotCliProvider = class {
8190
8196
  timer.unref?.();
8191
8197
  });
8192
8198
  try {
8193
- await Promise.race([sendPromise, timeoutPromise]);
8199
+ return await Promise.race([sendPromise, timeoutPromise]);
8194
8200
  } finally {
8195
8201
  if (timer) clearTimeout(timer);
8196
8202
  }
@@ -12440,7 +12446,7 @@ async function readTargetDefinitions(filePath) {
12440
12446
  throw new Error(`targets.yaml not found at ${absolutePath}`);
12441
12447
  }
12442
12448
  const raw = await (0, import_promises24.readFile)(absolutePath, "utf8");
12443
- const parsed = (0, import_yaml6.parse)(raw);
12449
+ const parsed = interpolateEnv((0, import_yaml6.parse)(raw), process.env);
12444
12450
  if (!isRecord(parsed)) {
12445
12451
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
12446
12452
  }
@@ -13357,8 +13363,7 @@ ${context2.fileChanges}`;
13357
13363
  }
13358
13364
  const evaluatorRawRequest = {
13359
13365
  userPrompt,
13360
- systemPrompt,
13361
- target: graderProvider.targetName
13366
+ systemPrompt
13362
13367
  };
13363
13368
  try {
13364
13369
  const { data, tokenUsage } = await this.runWithRetry({
@@ -13376,6 +13381,7 @@ ${context2.fileChanges}`;
13376
13381
  assertions,
13377
13382
  expectedAspectCount: Math.max(assertions.length, 1),
13378
13383
  evaluatorRawRequest,
13384
+ graderTarget: graderProvider.targetName,
13379
13385
  tokenUsage
13380
13386
  };
13381
13387
  } catch (e) {
@@ -13387,7 +13393,8 @@ ${context2.fileChanges}`;
13387
13393
  verdict: "skip",
13388
13394
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
13389
13395
  expectedAspectCount: 1,
13390
- evaluatorRawRequest
13396
+ evaluatorRawRequest,
13397
+ graderTarget: graderProvider.targetName
13391
13398
  };
13392
13399
  }
13393
13400
  }
@@ -13405,8 +13412,7 @@ ${context2.fileChanges}`;
13405
13412
  const systemPrompt = buildRubricOutputSchema();
13406
13413
  const evaluatorRawRequest = {
13407
13414
  userPrompt: prompt,
13408
- systemPrompt,
13409
- target: graderProvider.targetName
13415
+ systemPrompt
13410
13416
  };
13411
13417
  try {
13412
13418
  const { data, tokenUsage } = await this.runWithRetry({
@@ -13423,6 +13429,7 @@ ${context2.fileChanges}`;
13423
13429
  assertions,
13424
13430
  expectedAspectCount: rubrics.length,
13425
13431
  evaluatorRawRequest,
13432
+ graderTarget: graderProvider.targetName,
13426
13433
  tokenUsage
13427
13434
  };
13428
13435
  } catch (e) {
@@ -13434,7 +13441,8 @@ ${context2.fileChanges}`;
13434
13441
  verdict: "skip",
13435
13442
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
13436
13443
  expectedAspectCount: rubrics.length,
13437
- evaluatorRawRequest
13444
+ evaluatorRawRequest,
13445
+ graderTarget: graderProvider.targetName
13438
13446
  };
13439
13447
  }
13440
13448
  }
@@ -13447,8 +13455,7 @@ ${context2.fileChanges}`;
13447
13455
  const systemPrompt = buildScoreRangeOutputSchema();
13448
13456
  const evaluatorRawRequest = {
13449
13457
  userPrompt: prompt,
13450
- systemPrompt,
13451
- target: graderProvider.targetName
13458
+ systemPrompt
13452
13459
  };
13453
13460
  try {
13454
13461
  const { data, tokenUsage } = await this.runWithRetry({
@@ -13465,6 +13472,7 @@ ${context2.fileChanges}`;
13465
13472
  assertions,
13466
13473
  expectedAspectCount: rubrics.length,
13467
13474
  evaluatorRawRequest,
13475
+ graderTarget: graderProvider.targetName,
13468
13476
  details,
13469
13477
  tokenUsage
13470
13478
  };
@@ -13477,7 +13485,8 @@ ${context2.fileChanges}`;
13477
13485
  verdict: "skip",
13478
13486
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
13479
13487
  expectedAspectCount: rubrics.length,
13480
- evaluatorRawRequest
13488
+ evaluatorRawRequest,
13489
+ graderTarget: graderProvider.targetName
13481
13490
  };
13482
13491
  }
13483
13492
  }
@@ -13509,7 +13518,6 @@ ${context2.fileChanges}`;
13509
13518
  mode: "built-in",
13510
13519
  systemPrompt,
13511
13520
  userPrompt,
13512
- target: graderProvider.targetName,
13513
13521
  maxSteps: this.maxSteps
13514
13522
  };
13515
13523
  try {
@@ -13527,7 +13535,13 @@ ${context2.fileChanges}`;
13527
13535
  steps: steps.length,
13528
13536
  tool_calls: toolCallCount
13529
13537
  };
13530
- return this.parseAgentResult(text, rubrics, evaluatorRawRequest, details);
13538
+ return this.parseAgentResult(
13539
+ text,
13540
+ rubrics,
13541
+ evaluatorRawRequest,
13542
+ details,
13543
+ graderProvider.targetName
13544
+ );
13531
13545
  } catch (error) {
13532
13546
  const message = error instanceof Error ? error.message : String(error);
13533
13547
  return {
@@ -13536,6 +13550,7 @@ ${context2.fileChanges}`;
13536
13550
  assertions: [{ text: `llm-grader built-in evaluation failed: ${message}`, passed: false }],
13537
13551
  expectedAspectCount: 1,
13538
13552
  evaluatorRawRequest,
13553
+ graderTarget: graderProvider.targetName,
13539
13554
  details: { mode: "built-in", error: message }
13540
13555
  };
13541
13556
  }
@@ -13588,6 +13603,7 @@ ${context2.fileChanges}`;
13588
13603
  ],
13589
13604
  expectedAspectCount: 1,
13590
13605
  evaluatorRawRequest,
13606
+ graderTarget: provider.targetName,
13591
13607
  details: { mode: modeLabel, grader_target: provider.targetName }
13592
13608
  };
13593
13609
  }
@@ -13597,7 +13613,13 @@ ${context2.fileChanges}`;
13597
13613
  mode: modeLabel,
13598
13614
  grader_target: provider.targetName
13599
13615
  };
13600
- return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
13616
+ return this.parseAgentResult(
13617
+ assistantContent,
13618
+ rubrics,
13619
+ evaluatorRawRequest,
13620
+ details,
13621
+ provider.targetName
13622
+ );
13601
13623
  } catch (error) {
13602
13624
  const message = error instanceof Error ? error.message : String(error);
13603
13625
  return {
@@ -13608,6 +13630,7 @@ ${context2.fileChanges}`;
13608
13630
  ],
13609
13631
  expectedAspectCount: 1,
13610
13632
  evaluatorRawRequest,
13633
+ graderTarget: provider.targetName,
13611
13634
  details: {
13612
13635
  mode: modeLabel,
13613
13636
  grader_target: provider.targetName,
@@ -13752,7 +13775,7 @@ ${outputSchema}`;
13752
13775
  * Parse the agent's response text into an EvaluationScore.
13753
13776
  * Supports both freeform and rubric modes.
13754
13777
  */
13755
- parseAgentResult(text, rubrics, evaluatorRawRequest, details) {
13778
+ parseAgentResult(text, rubrics, evaluatorRawRequest, details, graderTarget) {
13756
13779
  try {
13757
13780
  const parsed = parseJsonFromText(text);
13758
13781
  if (rubrics && rubrics.length > 0) {
@@ -13764,6 +13787,7 @@ ${outputSchema}`;
13764
13787
  assertions: assertions2,
13765
13788
  expectedAspectCount: rubrics.length,
13766
13789
  evaluatorRawRequest,
13790
+ graderTarget,
13767
13791
  details
13768
13792
  };
13769
13793
  }
@@ -13776,6 +13800,7 @@ ${outputSchema}`;
13776
13800
  assertions,
13777
13801
  expectedAspectCount: Math.max(assertions.length, 1),
13778
13802
  evaluatorRawRequest,
13803
+ graderTarget,
13779
13804
  details
13780
13805
  };
13781
13806
  } catch {
@@ -13790,6 +13815,7 @@ ${outputSchema}`;
13790
13815
  ],
13791
13816
  expectedAspectCount: 1,
13792
13817
  evaluatorRawRequest,
13818
+ graderTarget,
13793
13819
  details
13794
13820
  };
13795
13821
  }
@@ -18101,7 +18127,8 @@ async function runEvaluation(options) {
18101
18127
  streamCallbacks,
18102
18128
  typeRegistry,
18103
18129
  repoManager,
18104
- evalDir
18130
+ evalDir,
18131
+ verbose
18105
18132
  };
18106
18133
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
18107
18134
  if (totalBudgetUsd !== void 0) {
@@ -18181,7 +18208,8 @@ async function runEvaluation(options) {
18181
18208
  promptInputs,
18182
18209
  primaryProvider,
18183
18210
  "agent",
18184
- "provider_error"
18211
+ "provider_error",
18212
+ verbose
18185
18213
  );
18186
18214
  results.push(errorResult);
18187
18215
  if (onResult) {
@@ -18254,6 +18282,7 @@ async function runBatchEvaluation(options) {
18254
18282
  nowFn,
18255
18283
  onProgress,
18256
18284
  onResult,
18285
+ verbose,
18257
18286
  resolveGraderProvider,
18258
18287
  agentTimeoutMs,
18259
18288
  targetResolver,
@@ -18341,7 +18370,8 @@ async function runBatchEvaluation(options) {
18341
18370
  startTime,
18342
18371
  endTime,
18343
18372
  targetResolver,
18344
- availableTargets
18373
+ availableTargets,
18374
+ verbose
18345
18375
  });
18346
18376
  if (providerError) {
18347
18377
  result = {
@@ -18362,7 +18392,8 @@ async function runBatchEvaluation(options) {
18362
18392
  promptInputs,
18363
18393
  provider,
18364
18394
  "evaluator",
18365
- "evaluator_error"
18395
+ "evaluator_error",
18396
+ verbose
18366
18397
  );
18367
18398
  results.push(errorResult);
18368
18399
  if (onResult) {
@@ -18425,7 +18456,8 @@ async function runEvalCase(options) {
18425
18456
  suiteWorkspaceFile,
18426
18457
  typeRegistry: providedTypeRegistry,
18427
18458
  repoManager,
18428
- evalDir
18459
+ evalDir,
18460
+ verbose
18429
18461
  } = options;
18430
18462
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
18431
18463
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -18462,7 +18494,8 @@ async function runEvalCase(options) {
18462
18494
  promptInputs,
18463
18495
  provider,
18464
18496
  "setup",
18465
- "template_error"
18497
+ "template_error",
18498
+ verbose
18466
18499
  );
18467
18500
  }
18468
18501
  if (caseWorkspaceFile && workspacePath) {
@@ -18491,7 +18524,8 @@ async function runEvalCase(options) {
18491
18524
  promptInputs,
18492
18525
  provider,
18493
18526
  "repo_setup",
18494
- "local_path_not_found"
18527
+ "local_path_not_found",
18528
+ verbose
18495
18529
  );
18496
18530
  }
18497
18531
  }
@@ -18517,7 +18551,8 @@ async function runEvalCase(options) {
18517
18551
  promptInputs,
18518
18552
  provider,
18519
18553
  "repo_setup",
18520
- "clone_error"
18554
+ "clone_error",
18555
+ verbose
18521
18556
  );
18522
18557
  }
18523
18558
  }
@@ -18543,7 +18578,8 @@ async function runEvalCase(options) {
18543
18578
  promptInputs,
18544
18579
  provider,
18545
18580
  "setup",
18546
- "file_copy_error"
18581
+ "file_copy_error",
18582
+ verbose
18547
18583
  );
18548
18584
  }
18549
18585
  }
@@ -18588,7 +18624,8 @@ async function runEvalCase(options) {
18588
18624
  promptInputs,
18589
18625
  provider,
18590
18626
  "setup",
18591
- "script_error"
18627
+ "script_error",
18628
+ verbose
18592
18629
  );
18593
18630
  }
18594
18631
  }
@@ -18619,7 +18656,8 @@ async function runEvalCase(options) {
18619
18656
  promptInputs,
18620
18657
  provider,
18621
18658
  "setup",
18622
- "script_error"
18659
+ "script_error",
18660
+ verbose
18623
18661
  );
18624
18662
  }
18625
18663
  }
@@ -18663,7 +18701,8 @@ async function runEvalCase(options) {
18663
18701
  promptInputs,
18664
18702
  provider,
18665
18703
  "agent",
18666
- "provider_error"
18704
+ "provider_error",
18705
+ verbose
18667
18706
  );
18668
18707
  if (workspacePath) {
18669
18708
  if (forceCleanup) {
@@ -18684,7 +18723,8 @@ async function runEvalCase(options) {
18684
18723
  promptInputs,
18685
18724
  provider,
18686
18725
  "agent",
18687
- "provider_error"
18726
+ "provider_error",
18727
+ verbose
18688
18728
  );
18689
18729
  if (workspacePath) {
18690
18730
  if (forceCleanup) {
@@ -18779,7 +18819,8 @@ async function runEvalCase(options) {
18779
18819
  targetResolver,
18780
18820
  availableTargets,
18781
18821
  fileChanges,
18782
- workspacePath
18822
+ workspacePath,
18823
+ verbose
18783
18824
  });
18784
18825
  const totalDurationMs = Date.now() - caseStartMs;
18785
18826
  const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
@@ -18834,7 +18875,8 @@ async function runEvalCase(options) {
18834
18875
  promptInputs,
18835
18876
  provider,
18836
18877
  "evaluator",
18837
- "evaluator_error"
18878
+ "evaluator_error",
18879
+ verbose
18838
18880
  );
18839
18881
  if (workspacePath && !isSharedWorkspace) {
18840
18882
  if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
@@ -18976,7 +19018,7 @@ async function evaluateCandidate(options) {
18976
19018
  let lmRequest;
18977
19019
  if (isAgentProvider(provider)) {
18978
19020
  agentRequest = {
18979
- question: promptInputs.question
19021
+ ...options.verbose ? { input: promptInputs.question } : {}
18980
19022
  };
18981
19023
  } else {
18982
19024
  if (promptInputs.chatPrompt) {
@@ -18990,8 +19032,9 @@ async function evaluateCandidate(options) {
18990
19032
  }
18991
19033
  }
18992
19034
  const evaluatorRequest = scores ? void 0 : score.evaluatorRawRequest;
18993
- const requests = agentRequest || lmRequest || evaluatorRequest ? {
18994
- ...agentRequest ? { agent: agentRequest } : {},
19035
+ const effectiveAgentRequest = agentRequest && Object.keys(agentRequest).length > 0 ? agentRequest : void 0;
19036
+ const requests = effectiveAgentRequest || lmRequest || evaluatorRequest ? {
19037
+ ...effectiveAgentRequest ? { agent: effectiveAgentRequest } : {},
18995
19038
  ...lmRequest ? { lm: lmRequest } : {},
18996
19039
  ...evaluatorRequest ? { evaluator: evaluatorRequest } : {}
18997
19040
  } : void 0;
@@ -19011,9 +19054,9 @@ async function evaluateCandidate(options) {
19011
19054
  endTime,
19012
19055
  requests,
19013
19056
  input,
19057
+ output: output ?? [{ role: "assistant", content: candidate }],
19014
19058
  scores,
19015
19059
  trace: trace2,
19016
- output: output ?? [{ role: "assistant", content: candidate }],
19017
19060
  fileChanges,
19018
19061
  executionStatus: classifyQualityStatus(score.score)
19019
19062
  };
@@ -19179,6 +19222,7 @@ async function runEvaluatorList(options) {
19179
19222
  verdict: score2.verdict,
19180
19223
  assertions: score2.assertions,
19181
19224
  input: score2.evaluatorRawRequest,
19225
+ target: score2.graderTarget,
19182
19226
  details: score2.details,
19183
19227
  scores: mapChildResults(score2.scores),
19184
19228
  tokenUsage: score2.tokenUsage,
@@ -19318,13 +19362,13 @@ async function invokeProvider(provider, options) {
19318
19362
  }
19319
19363
  }
19320
19364
  }
19321
- function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode) {
19365
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode, verbose) {
19322
19366
  const message = error instanceof Error ? error.message : String(error);
19323
19367
  let agentRequest;
19324
19368
  let lmRequest;
19325
19369
  if (isAgentProvider(provider)) {
19326
19370
  agentRequest = {
19327
- question: promptInputs.question,
19371
+ ...verbose ? { input: promptInputs.question } : {},
19328
19372
  error: message
19329
19373
  };
19330
19374
  } else {
@@ -19352,10 +19396,10 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
19352
19396
  conversationId: evalCase.conversation_id,
19353
19397
  score: 0,
19354
19398
  assertions: [{ text: `Error: ${message}`, passed: false }],
19355
- output: [{ role: "assistant", content: `Error occurred: ${message}` }],
19356
19399
  target: targetName,
19357
19400
  requests,
19358
19401
  input,
19402
+ output: [{ role: "assistant", content: `Error occurred: ${message}` }],
19359
19403
  error: message,
19360
19404
  executionStatus: "execution_error",
19361
19405
  failureStage,