@agentv/core 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -54,6 +54,7 @@ __export(index_exports, {
54
54
  loadEvalCases: () => loadEvalCases,
55
55
  normalizeLineEndings: () => normalizeLineEndings,
56
56
  readTargetDefinitions: () => readTargetDefinitions,
57
+ readTestSuiteMetadata: () => readTestSuiteMetadata,
57
58
  readTextFile: () => readTextFile,
58
59
  resolveAndCreateProvider: () => resolveAndCreateProvider,
59
60
  resolveFileReference: () => resolveFileReference,
@@ -239,6 +240,33 @@ var ANSI_YELLOW = "\x1B[33m";
239
240
  var ANSI_RESET = "\x1B[0m";
240
241
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
241
242
  var SCHEMA_CONFIG_V2 = "agentv-config-v2";
243
+ async function readTestSuiteMetadata(testFilePath) {
244
+ try {
245
+ const absolutePath = import_node_path2.default.resolve(testFilePath);
246
+ const content = await (0, import_promises2.readFile)(absolutePath, "utf8");
247
+ const parsed = (0, import_yaml.parse)(content);
248
+ if (!isJsonObject(parsed)) {
249
+ return {};
250
+ }
251
+ return { target: extractTargetFromSuite(parsed) };
252
+ } catch {
253
+ return {};
254
+ }
255
+ }
256
+ function extractTargetFromSuite(suite) {
257
+ const execution = suite.execution;
258
+ if (execution && typeof execution === "object" && !Array.isArray(execution)) {
259
+ const executionTarget = execution.target;
260
+ if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
261
+ return executionTarget.trim();
262
+ }
263
+ }
264
+ const targetValue = suite.target;
265
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
266
+ return targetValue.trim();
267
+ }
268
+ return void 0;
269
+ }
242
270
  async function loadConfig(evalFilePath, repoRoot) {
243
271
  const directories = buildDirectoryChain(evalFilePath, repoRoot);
244
272
  for (const directory of directories) {
@@ -415,6 +443,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
415
443
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
416
444
  }
417
445
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
446
+ const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
447
+ const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
418
448
  const results = [];
419
449
  for (const rawEvalcase of rawTestcases) {
420
450
  if (!isJsonObject(rawEvalcase)) {
@@ -469,7 +499,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
469
499
  const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
470
500
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
471
501
  const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
472
- const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
502
+ const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
473
503
  const userFilePaths = [];
474
504
  for (const segment of inputSegments) {
475
505
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -555,14 +585,13 @@ function formatSegment(segment) {
555
585
  const text = asString(segment.text);
556
586
  const filePath = asString(segment.path);
557
587
  if (text && filePath) {
558
- return `=== ${filePath} ===
559
- ${text}`;
588
+ return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
560
589
  }
561
590
  }
562
591
  return void 0;
563
592
  }
564
593
  async function buildPromptInputs(testCase) {
565
- const guidelineContents = [];
594
+ const guidelineParts = [];
566
595
  for (const rawPath of testCase.guideline_paths) {
567
596
  const absolutePath = import_node_path2.default.resolve(rawPath);
568
597
  if (!await fileExists2(absolutePath)) {
@@ -570,14 +599,17 @@ async function buildPromptInputs(testCase) {
570
599
  continue;
571
600
  }
572
601
  try {
573
- const content = (await (0, import_promises2.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n");
574
- guidelineContents.push(`=== ${import_node_path2.default.basename(absolutePath)} ===
575
- ${content}`);
602
+ const content = (await (0, import_promises2.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
603
+ guidelineParts.push({
604
+ content,
605
+ isFile: true,
606
+ displayPath: import_node_path2.default.basename(absolutePath)
607
+ });
576
608
  } catch (error) {
577
609
  logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
578
610
  }
579
611
  }
580
- const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
612
+ const guidelines = formatFileContents(guidelineParts);
581
613
  const segmentsByMessage = [];
582
614
  const fileContentsByPath = /* @__PURE__ */ new Map();
583
615
  for (const segment of testCase.input_segments) {
@@ -779,6 +811,20 @@ function cloneJsonValue(value) {
779
811
  }
780
812
  return cloneJsonObject(value);
781
813
  }
814
+ function formatFileContents(parts) {
815
+ const fileCount = parts.filter((p) => p.isFile).length;
816
+ if (fileCount > 0) {
817
+ return parts.map((part) => {
818
+ if (part.isFile && part.displayPath) {
819
+ return `<file path="${part.displayPath}">
820
+ ${part.content}
821
+ </file>`;
822
+ }
823
+ return part.content;
824
+ }).join("\n\n");
825
+ }
826
+ return parts.map((p) => p.content).join(" ");
827
+ }
782
828
  async function resolveAssistantContent(content, searchRoots, verbose) {
783
829
  if (typeof content === "string") {
784
830
  return content;
@@ -789,7 +835,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
789
835
  const parts = [];
790
836
  for (const entry of content) {
791
837
  if (typeof entry === "string") {
792
- parts.push(entry);
838
+ parts.push({ content: entry, isFile: false });
793
839
  continue;
794
840
  }
795
841
  if (!isJsonObject(entry)) {
@@ -811,8 +857,8 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
811
857
  continue;
812
858
  }
813
859
  try {
814
- const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
815
- parts.push(fileContent);
860
+ const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
861
+ parts.push({ content: fileContent, isFile: true, displayPath });
816
862
  if (verbose) {
817
863
  console.log(` [Expected Assistant File] Found: ${displayPath}`);
818
864
  console.log(` Resolved to: ${resolvedPath}`);
@@ -824,21 +870,21 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
824
870
  }
825
871
  const textValue = asString(entry.text);
826
872
  if (typeof textValue === "string") {
827
- parts.push(textValue);
873
+ parts.push({ content: textValue, isFile: false });
828
874
  continue;
829
875
  }
830
876
  const valueValue = asString(entry.value);
831
877
  if (typeof valueValue === "string") {
832
- parts.push(valueValue);
878
+ parts.push({ content: valueValue, isFile: false });
833
879
  continue;
834
880
  }
835
- parts.push(JSON.stringify(entry));
881
+ parts.push({ content: JSON.stringify(entry), isFile: false });
836
882
  }
837
- return parts.join(" ");
883
+ return formatFileContents(parts);
838
884
  }
839
- async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
885
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
840
886
  const execution = rawEvalCase.execution;
841
- const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
887
+ const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
842
888
  if (candidateEvaluators === void 0) {
843
889
  return void 0;
844
890
  }
@@ -876,6 +922,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
876
922
  resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
877
923
  );
878
924
  }
925
+ } else {
926
+ resolvedCwd = searchRoots[0];
879
927
  }
880
928
  evaluators.push({
881
929
  name,
@@ -904,8 +952,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
904
952
  name,
905
953
  type: "llm_judge",
906
954
  prompt,
907
- promptPath,
908
- model
955
+ promptPath
909
956
  });
910
957
  }
911
958
  return evaluators.length > 0 ? evaluators : void 0;
@@ -3222,10 +3269,7 @@ var LlmJudgeEvaluator = class {
3222
3269
  prompt = substituteVariables(systemPrompt, variables);
3223
3270
  systemPrompt = buildSystemPrompt(hasReferenceAnswer);
3224
3271
  }
3225
- const metadata = {
3226
- ...systemPrompt !== void 0 ? { systemPrompt } : {},
3227
- ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
3228
- };
3272
+ const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
3229
3273
  const response = await judgeProvider.invoke({
3230
3274
  question: prompt,
3231
3275
  metadata,
@@ -3245,8 +3289,7 @@ var LlmJudgeEvaluator = class {
3245
3289
  provider: judgeProvider.id,
3246
3290
  prompt,
3247
3291
  target: context.target.name,
3248
- ...systemPrompt !== void 0 ? { systemPrompt } : {},
3249
- ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
3292
+ ...systemPrompt !== void 0 && { systemPrompt }
3250
3293
  };
3251
3294
  return {
3252
3295
  score,
@@ -4240,8 +4283,7 @@ async function runLlmJudgeEvaluator(options) {
4240
4283
  now,
4241
4284
  judgeProvider,
4242
4285
  systemPrompt: customPrompt,
4243
- evaluator: config,
4244
- judgeModel: config.model
4286
+ evaluator: config
4245
4287
  });
4246
4288
  }
4247
4289
  async function resolveCustomPrompt(config) {
@@ -4427,6 +4469,7 @@ function createAgentKernel() {
4427
4469
  loadEvalCases,
4428
4470
  normalizeLineEndings,
4429
4471
  readTargetDefinitions,
4472
+ readTestSuiteMetadata,
4430
4473
  readTextFile,
4431
4474
  resolveAndCreateProvider,
4432
4475
  resolveFileReference,