agentv 3.9.1 → 3.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-PC5TLJF6.js
304
+ // ../../packages/core/dist/chunk-K7JCJIXA.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-PC5TLJF6.js
422
+ // ../../packages/core/dist/chunk-K7JCJIXA.js
423
423
  import { readFile as readFile2 } from "node:fs/promises";
424
424
  import path3 from "node:path";
425
425
  import fg from "fast-glob";
@@ -23978,8 +23978,7 @@ ${context2.fileChanges}`;
23978
23978
  }
23979
23979
  const evaluatorRawRequest = {
23980
23980
  userPrompt,
23981
- systemPrompt,
23982
- target: graderProvider.targetName
23981
+ systemPrompt
23983
23982
  };
23984
23983
  try {
23985
23984
  const { data, tokenUsage } = await this.runWithRetry({
@@ -23997,6 +23996,7 @@ ${context2.fileChanges}`;
23997
23996
  assertions,
23998
23997
  expectedAspectCount: Math.max(assertions.length, 1),
23999
23998
  evaluatorRawRequest,
23999
+ graderTarget: graderProvider.targetName,
24000
24000
  tokenUsage
24001
24001
  };
24002
24002
  } catch (e) {
@@ -24008,7 +24008,8 @@ ${context2.fileChanges}`;
24008
24008
  verdict: "skip",
24009
24009
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
24010
24010
  expectedAspectCount: 1,
24011
- evaluatorRawRequest
24011
+ evaluatorRawRequest,
24012
+ graderTarget: graderProvider.targetName
24012
24013
  };
24013
24014
  }
24014
24015
  }
@@ -24026,8 +24027,7 @@ ${context2.fileChanges}`;
24026
24027
  const systemPrompt = buildRubricOutputSchema();
24027
24028
  const evaluatorRawRequest = {
24028
24029
  userPrompt: prompt,
24029
- systemPrompt,
24030
- target: graderProvider.targetName
24030
+ systemPrompt
24031
24031
  };
24032
24032
  try {
24033
24033
  const { data, tokenUsage } = await this.runWithRetry({
@@ -24044,6 +24044,7 @@ ${context2.fileChanges}`;
24044
24044
  assertions,
24045
24045
  expectedAspectCount: rubrics.length,
24046
24046
  evaluatorRawRequest,
24047
+ graderTarget: graderProvider.targetName,
24047
24048
  tokenUsage
24048
24049
  };
24049
24050
  } catch (e) {
@@ -24055,7 +24056,8 @@ ${context2.fileChanges}`;
24055
24056
  verdict: "skip",
24056
24057
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
24057
24058
  expectedAspectCount: rubrics.length,
24058
- evaluatorRawRequest
24059
+ evaluatorRawRequest,
24060
+ graderTarget: graderProvider.targetName
24059
24061
  };
24060
24062
  }
24061
24063
  }
@@ -24068,8 +24070,7 @@ ${context2.fileChanges}`;
24068
24070
  const systemPrompt = buildScoreRangeOutputSchema();
24069
24071
  const evaluatorRawRequest = {
24070
24072
  userPrompt: prompt,
24071
- systemPrompt,
24072
- target: graderProvider.targetName
24073
+ systemPrompt
24073
24074
  };
24074
24075
  try {
24075
24076
  const { data, tokenUsage } = await this.runWithRetry({
@@ -24086,6 +24087,7 @@ ${context2.fileChanges}`;
24086
24087
  assertions,
24087
24088
  expectedAspectCount: rubrics.length,
24088
24089
  evaluatorRawRequest,
24090
+ graderTarget: graderProvider.targetName,
24089
24091
  details,
24090
24092
  tokenUsage
24091
24093
  };
@@ -24098,7 +24100,8 @@ ${context2.fileChanges}`;
24098
24100
  verdict: "skip",
24099
24101
  assertions: [{ text: `Grader parse failure after 3 attempts: ${message}`, passed: false }],
24100
24102
  expectedAspectCount: rubrics.length,
24101
- evaluatorRawRequest
24103
+ evaluatorRawRequest,
24104
+ graderTarget: graderProvider.targetName
24102
24105
  };
24103
24106
  }
24104
24107
  }
@@ -24130,7 +24133,6 @@ ${context2.fileChanges}`;
24130
24133
  mode: "built-in",
24131
24134
  systemPrompt,
24132
24135
  userPrompt,
24133
- target: graderProvider.targetName,
24134
24136
  maxSteps: this.maxSteps
24135
24137
  };
24136
24138
  try {
@@ -24148,7 +24150,13 @@ ${context2.fileChanges}`;
24148
24150
  steps: steps.length,
24149
24151
  tool_calls: toolCallCount
24150
24152
  };
24151
- return this.parseAgentResult(text2, rubrics, evaluatorRawRequest, details);
24153
+ return this.parseAgentResult(
24154
+ text2,
24155
+ rubrics,
24156
+ evaluatorRawRequest,
24157
+ details,
24158
+ graderProvider.targetName
24159
+ );
24152
24160
  } catch (error) {
24153
24161
  const message = error instanceof Error ? error.message : String(error);
24154
24162
  return {
@@ -24157,6 +24165,7 @@ ${context2.fileChanges}`;
24157
24165
  assertions: [{ text: `llm-grader built-in evaluation failed: ${message}`, passed: false }],
24158
24166
  expectedAspectCount: 1,
24159
24167
  evaluatorRawRequest,
24168
+ graderTarget: graderProvider.targetName,
24160
24169
  details: { mode: "built-in", error: message }
24161
24170
  };
24162
24171
  }
@@ -24209,6 +24218,7 @@ ${context2.fileChanges}`;
24209
24218
  ],
24210
24219
  expectedAspectCount: 1,
24211
24220
  evaluatorRawRequest,
24221
+ graderTarget: provider.targetName,
24212
24222
  details: { mode: modeLabel, grader_target: provider.targetName }
24213
24223
  };
24214
24224
  }
@@ -24218,7 +24228,13 @@ ${context2.fileChanges}`;
24218
24228
  mode: modeLabel,
24219
24229
  grader_target: provider.targetName
24220
24230
  };
24221
- return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
24231
+ return this.parseAgentResult(
24232
+ assistantContent,
24233
+ rubrics,
24234
+ evaluatorRawRequest,
24235
+ details,
24236
+ provider.targetName
24237
+ );
24222
24238
  } catch (error) {
24223
24239
  const message = error instanceof Error ? error.message : String(error);
24224
24240
  return {
@@ -24229,6 +24245,7 @@ ${context2.fileChanges}`;
24229
24245
  ],
24230
24246
  expectedAspectCount: 1,
24231
24247
  evaluatorRawRequest,
24248
+ graderTarget: provider.targetName,
24232
24249
  details: {
24233
24250
  mode: modeLabel,
24234
24251
  grader_target: provider.targetName,
@@ -24373,7 +24390,7 @@ ${outputSchema2}`;
24373
24390
  * Parse the agent's response text into an EvaluationScore.
24374
24391
  * Supports both freeform and rubric modes.
24375
24392
  */
24376
- parseAgentResult(text2, rubrics, evaluatorRawRequest, details) {
24393
+ parseAgentResult(text2, rubrics, evaluatorRawRequest, details, graderTarget) {
24377
24394
  try {
24378
24395
  const parsed = parseJsonFromText(text2);
24379
24396
  if (rubrics && rubrics.length > 0) {
@@ -24385,6 +24402,7 @@ ${outputSchema2}`;
24385
24402
  assertions: assertions2,
24386
24403
  expectedAspectCount: rubrics.length,
24387
24404
  evaluatorRawRequest,
24405
+ graderTarget,
24388
24406
  details
24389
24407
  };
24390
24408
  }
@@ -24397,6 +24415,7 @@ ${outputSchema2}`;
24397
24415
  assertions,
24398
24416
  expectedAspectCount: Math.max(assertions.length, 1),
24399
24417
  evaluatorRawRequest,
24418
+ graderTarget,
24400
24419
  details
24401
24420
  };
24402
24421
  } catch {
@@ -24411,6 +24430,7 @@ ${outputSchema2}`;
24411
24430
  ],
24412
24431
  expectedAspectCount: 1,
24413
24432
  evaluatorRawRequest,
24433
+ graderTarget,
24414
24434
  details
24415
24435
  };
24416
24436
  }
@@ -28641,7 +28661,8 @@ async function runEvaluation(options) {
28641
28661
  streamCallbacks,
28642
28662
  typeRegistry,
28643
28663
  repoManager,
28644
- evalDir
28664
+ evalDir,
28665
+ verbose
28645
28666
  };
28646
28667
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
28647
28668
  if (totalBudgetUsd !== void 0) {
@@ -28721,7 +28742,8 @@ async function runEvaluation(options) {
28721
28742
  promptInputs,
28722
28743
  primaryProvider,
28723
28744
  "agent",
28724
- "provider_error"
28745
+ "provider_error",
28746
+ verbose
28725
28747
  );
28726
28748
  results.push(errorResult);
28727
28749
  if (onResult) {
@@ -28794,6 +28816,7 @@ async function runBatchEvaluation(options) {
28794
28816
  nowFn,
28795
28817
  onProgress,
28796
28818
  onResult,
28819
+ verbose,
28797
28820
  resolveGraderProvider,
28798
28821
  agentTimeoutMs,
28799
28822
  targetResolver,
@@ -28881,7 +28904,8 @@ async function runBatchEvaluation(options) {
28881
28904
  startTime,
28882
28905
  endTime,
28883
28906
  targetResolver,
28884
- availableTargets
28907
+ availableTargets,
28908
+ verbose
28885
28909
  });
28886
28910
  if (providerError) {
28887
28911
  result = {
@@ -28902,7 +28926,8 @@ async function runBatchEvaluation(options) {
28902
28926
  promptInputs,
28903
28927
  provider,
28904
28928
  "evaluator",
28905
- "evaluator_error"
28929
+ "evaluator_error",
28930
+ verbose
28906
28931
  );
28907
28932
  results.push(errorResult);
28908
28933
  if (onResult) {
@@ -28965,7 +28990,8 @@ async function runEvalCase(options) {
28965
28990
  suiteWorkspaceFile,
28966
28991
  typeRegistry: providedTypeRegistry,
28967
28992
  repoManager,
28968
- evalDir
28993
+ evalDir,
28994
+ verbose
28969
28995
  } = options;
28970
28996
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
28971
28997
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -29002,7 +29028,8 @@ async function runEvalCase(options) {
29002
29028
  promptInputs,
29003
29029
  provider,
29004
29030
  "setup",
29005
- "template_error"
29031
+ "template_error",
29032
+ verbose
29006
29033
  );
29007
29034
  }
29008
29035
  if (caseWorkspaceFile && workspacePath) {
@@ -29031,7 +29058,8 @@ async function runEvalCase(options) {
29031
29058
  promptInputs,
29032
29059
  provider,
29033
29060
  "repo_setup",
29034
- "local_path_not_found"
29061
+ "local_path_not_found",
29062
+ verbose
29035
29063
  );
29036
29064
  }
29037
29065
  }
@@ -29057,7 +29085,8 @@ async function runEvalCase(options) {
29057
29085
  promptInputs,
29058
29086
  provider,
29059
29087
  "repo_setup",
29060
- "clone_error"
29088
+ "clone_error",
29089
+ verbose
29061
29090
  );
29062
29091
  }
29063
29092
  }
@@ -29083,7 +29112,8 @@ async function runEvalCase(options) {
29083
29112
  promptInputs,
29084
29113
  provider,
29085
29114
  "setup",
29086
- "file_copy_error"
29115
+ "file_copy_error",
29116
+ verbose
29087
29117
  );
29088
29118
  }
29089
29119
  }
@@ -29128,7 +29158,8 @@ async function runEvalCase(options) {
29128
29158
  promptInputs,
29129
29159
  provider,
29130
29160
  "setup",
29131
- "script_error"
29161
+ "script_error",
29162
+ verbose
29132
29163
  );
29133
29164
  }
29134
29165
  }
@@ -29159,7 +29190,8 @@ async function runEvalCase(options) {
29159
29190
  promptInputs,
29160
29191
  provider,
29161
29192
  "setup",
29162
- "script_error"
29193
+ "script_error",
29194
+ verbose
29163
29195
  );
29164
29196
  }
29165
29197
  }
@@ -29203,7 +29235,8 @@ async function runEvalCase(options) {
29203
29235
  promptInputs,
29204
29236
  provider,
29205
29237
  "agent",
29206
- "provider_error"
29238
+ "provider_error",
29239
+ verbose
29207
29240
  );
29208
29241
  if (workspacePath) {
29209
29242
  if (forceCleanup) {
@@ -29224,7 +29257,8 @@ async function runEvalCase(options) {
29224
29257
  promptInputs,
29225
29258
  provider,
29226
29259
  "agent",
29227
- "provider_error"
29260
+ "provider_error",
29261
+ verbose
29228
29262
  );
29229
29263
  if (workspacePath) {
29230
29264
  if (forceCleanup) {
@@ -29319,7 +29353,8 @@ async function runEvalCase(options) {
29319
29353
  targetResolver,
29320
29354
  availableTargets,
29321
29355
  fileChanges,
29322
- workspacePath
29356
+ workspacePath,
29357
+ verbose
29323
29358
  });
29324
29359
  const totalDurationMs = Date.now() - caseStartMs;
29325
29360
  const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
@@ -29374,7 +29409,8 @@ async function runEvalCase(options) {
29374
29409
  promptInputs,
29375
29410
  provider,
29376
29411
  "evaluator",
29377
- "evaluator_error"
29412
+ "evaluator_error",
29413
+ verbose
29378
29414
  );
29379
29415
  if (workspacePath && !isSharedWorkspace) {
29380
29416
  if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
@@ -29516,7 +29552,7 @@ async function evaluateCandidate(options) {
29516
29552
  let lmRequest;
29517
29553
  if (isAgentProvider(provider)) {
29518
29554
  agentRequest = {
29519
- question: promptInputs.question
29555
+ ...options.verbose ? { input: promptInputs.question } : {}
29520
29556
  };
29521
29557
  } else {
29522
29558
  if (promptInputs.chatPrompt) {
@@ -29530,8 +29566,9 @@ async function evaluateCandidate(options) {
29530
29566
  }
29531
29567
  }
29532
29568
  const evaluatorRequest = scores ? void 0 : score.evaluatorRawRequest;
29533
- const requests = agentRequest || lmRequest || evaluatorRequest ? {
29534
- ...agentRequest ? { agent: agentRequest } : {},
29569
+ const effectiveAgentRequest = agentRequest && Object.keys(agentRequest).length > 0 ? agentRequest : void 0;
29570
+ const requests = effectiveAgentRequest || lmRequest || evaluatorRequest ? {
29571
+ ...effectiveAgentRequest ? { agent: effectiveAgentRequest } : {},
29535
29572
  ...lmRequest ? { lm: lmRequest } : {},
29536
29573
  ...evaluatorRequest ? { evaluator: evaluatorRequest } : {}
29537
29574
  } : void 0;
@@ -29551,9 +29588,9 @@ async function evaluateCandidate(options) {
29551
29588
  endTime,
29552
29589
  requests,
29553
29590
  input,
29591
+ output: output ?? [{ role: "assistant", content: candidate }],
29554
29592
  scores,
29555
29593
  trace: trace2,
29556
- output: output ?? [{ role: "assistant", content: candidate }],
29557
29594
  fileChanges,
29558
29595
  executionStatus: classifyQualityStatus(score.score)
29559
29596
  };
@@ -29719,6 +29756,7 @@ async function runEvaluatorList(options) {
29719
29756
  verdict: score2.verdict,
29720
29757
  assertions: score2.assertions,
29721
29758
  input: score2.evaluatorRawRequest,
29759
+ target: score2.graderTarget,
29722
29760
  details: score2.details,
29723
29761
  scores: mapChildResults(score2.scores),
29724
29762
  tokenUsage: score2.tokenUsage,
@@ -29858,13 +29896,13 @@ async function invokeProvider(provider, options) {
29858
29896
  }
29859
29897
  }
29860
29898
  }
29861
- function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode) {
29899
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider, failureStage, failureReasonCode, verbose) {
29862
29900
  const message = error instanceof Error ? error.message : String(error);
29863
29901
  let agentRequest;
29864
29902
  let lmRequest;
29865
29903
  if (isAgentProvider(provider)) {
29866
29904
  agentRequest = {
29867
- question: promptInputs.question,
29905
+ ...verbose ? { input: promptInputs.question } : {},
29868
29906
  error: message
29869
29907
  };
29870
29908
  } else {
@@ -29892,10 +29930,10 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
29892
29930
  conversationId: evalCase.conversation_id,
29893
29931
  score: 0,
29894
29932
  assertions: [{ text: `Error: ${message}`, passed: false }],
29895
- output: [{ role: "assistant", content: `Error occurred: ${message}` }],
29896
29933
  target: targetName,
29897
29934
  requests,
29898
29935
  input,
29936
+ output: [{ role: "assistant", content: `Error occurred: ${message}` }],
29899
29937
  error: message,
29900
29938
  executionStatus: "execution_error",
29901
29939
  failureStage,
@@ -31013,4 +31051,4 @@ export {
31013
31051
  OtelStreamingObserver,
31014
31052
  createAgentKernel
31015
31053
  };
31016
- //# sourceMappingURL=chunk-X24J6HCV.js.map
31054
+ //# sourceMappingURL=chunk-OIVGGWJ3.js.map