@agentv/core 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -54,6 +54,7 @@ __export(index_exports, {
54
54
  loadEvalCases: () => loadEvalCases,
55
55
  normalizeLineEndings: () => normalizeLineEndings,
56
56
  readTargetDefinitions: () => readTargetDefinitions,
57
+ readTestSuiteMetadata: () => readTestSuiteMetadata,
57
58
  readTextFile: () => readTextFile,
58
59
  resolveAndCreateProvider: () => resolveAndCreateProvider,
59
60
  resolveFileReference: () => resolveFileReference,
@@ -239,6 +240,33 @@ var ANSI_YELLOW = "\x1B[33m";
239
240
  var ANSI_RESET = "\x1B[0m";
240
241
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
241
242
  var SCHEMA_CONFIG_V2 = "agentv-config-v2";
243
+ async function readTestSuiteMetadata(testFilePath) {
244
+ try {
245
+ const absolutePath = import_node_path2.default.resolve(testFilePath);
246
+ const content = await (0, import_promises2.readFile)(absolutePath, "utf8");
247
+ const parsed = (0, import_yaml.parse)(content);
248
+ if (!isJsonObject(parsed)) {
249
+ return {};
250
+ }
251
+ return { target: extractTargetFromSuite(parsed) };
252
+ } catch {
253
+ return {};
254
+ }
255
+ }
256
+ function extractTargetFromSuite(suite) {
257
+ const execution = suite.execution;
258
+ if (execution && typeof execution === "object" && !Array.isArray(execution)) {
259
+ const executionTarget = execution.target;
260
+ if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
261
+ return executionTarget.trim();
262
+ }
263
+ }
264
+ const targetValue = suite.target;
265
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
266
+ return targetValue.trim();
267
+ }
268
+ return void 0;
269
+ }
242
270
  async function loadConfig(evalFilePath, repoRoot) {
243
271
  const directories = buildDirectoryChain(evalFilePath, repoRoot);
244
272
  for (const directory of directories) {
@@ -415,6 +443,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
415
443
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
416
444
  }
417
445
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
446
+ const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
447
+ const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
418
448
  const results = [];
419
449
  for (const rawEvalcase of rawTestcases) {
420
450
  if (!isJsonObject(rawEvalcase)) {
@@ -469,7 +499,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
469
499
  const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
470
500
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
471
501
  const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
472
- const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
502
+ const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
473
503
  const userFilePaths = [];
474
504
  for (const segment of inputSegments) {
475
505
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -836,9 +866,9 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
836
866
  }
837
867
  return parts.join(" ");
838
868
  }
839
- async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
869
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
840
870
  const execution = rawEvalCase.execution;
841
- const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
871
+ const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
842
872
  if (candidateEvaluators === void 0) {
843
873
  return void 0;
844
874
  }
@@ -876,6 +906,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
876
906
  resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
877
907
  );
878
908
  }
909
+ } else {
910
+ resolvedCwd = searchRoots[0];
879
911
  }
880
912
  evaluators.push({
881
913
  name,
@@ -904,8 +936,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
904
936
  name,
905
937
  type: "llm_judge",
906
938
  prompt,
907
- promptPath,
908
- model
939
+ promptPath
909
940
  });
910
941
  }
911
942
  return evaluators.length > 0 ? evaluators : void 0;
@@ -3222,10 +3253,7 @@ var LlmJudgeEvaluator = class {
3222
3253
  prompt = substituteVariables(systemPrompt, variables);
3223
3254
  systemPrompt = buildSystemPrompt(hasReferenceAnswer);
3224
3255
  }
3225
- const metadata = {
3226
- ...systemPrompt !== void 0 ? { systemPrompt } : {},
3227
- ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
3228
- };
3256
+ const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
3229
3257
  const response = await judgeProvider.invoke({
3230
3258
  question: prompt,
3231
3259
  metadata,
@@ -3245,8 +3273,7 @@ var LlmJudgeEvaluator = class {
3245
3273
  provider: judgeProvider.id,
3246
3274
  prompt,
3247
3275
  target: context.target.name,
3248
- ...systemPrompt !== void 0 ? { systemPrompt } : {},
3249
- ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
3276
+ ...systemPrompt !== void 0 && { systemPrompt }
3250
3277
  };
3251
3278
  return {
3252
3279
  score,
@@ -4240,8 +4267,7 @@ async function runLlmJudgeEvaluator(options) {
4240
4267
  now,
4241
4268
  judgeProvider,
4242
4269
  systemPrompt: customPrompt,
4243
- evaluator: config,
4244
- judgeModel: config.model
4270
+ evaluator: config
4245
4271
  });
4246
4272
  }
4247
4273
  async function resolveCustomPrompt(config) {
@@ -4427,6 +4453,7 @@ function createAgentKernel() {
4427
4453
  loadEvalCases,
4428
4454
  normalizeLineEndings,
4429
4455
  readTargetDefinitions,
4456
+ readTestSuiteMetadata,
4430
4457
  readTextFile,
4431
4458
  resolveAndCreateProvider,
4432
4459
  resolveFileReference,