@agentv/core 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -91,7 +91,6 @@ type LlmJudgeEvaluatorConfig = {
91
91
  readonly type: "llm_judge";
92
92
  readonly prompt?: string;
93
93
  readonly promptPath?: string;
94
- readonly model?: string;
95
94
  };
96
95
  type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
97
96
  /**
@@ -264,6 +263,13 @@ interface TargetDefinition {
264
263
  readonly retryStatusCodes?: unknown | undefined;
265
264
  }
266
265
 
266
+ /**
267
+ * Read metadata from a test suite file (like target name).
268
+ * This is a convenience function for CLI tools that need metadata without loading all eval cases.
269
+ */
270
+ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
271
+ target?: string;
272
+ }>;
267
273
  /**
268
274
  * Determine whether a path references guideline content (instructions or prompts).
269
275
  */
@@ -496,7 +502,6 @@ interface EvaluationContext {
496
502
  readonly judgeProvider?: Provider;
497
503
  readonly systemPrompt?: string;
498
504
  readonly evaluator?: EvaluatorConfig;
499
- readonly judgeModel?: string;
500
505
  }
501
506
  interface EvaluationScore {
502
507
  readonly score: number;
@@ -599,4 +604,4 @@ type AgentKernel = {
599
604
  };
600
605
  declare function createAgentKernel(): AgentKernel;
601
606
 
602
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
607
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.d.ts CHANGED
@@ -91,7 +91,6 @@ type LlmJudgeEvaluatorConfig = {
91
91
  readonly type: "llm_judge";
92
92
  readonly prompt?: string;
93
93
  readonly promptPath?: string;
94
- readonly model?: string;
95
94
  };
96
95
  type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
97
96
  /**
@@ -264,6 +263,13 @@ interface TargetDefinition {
264
263
  readonly retryStatusCodes?: unknown | undefined;
265
264
  }
266
265
 
266
+ /**
267
+ * Read metadata from a test suite file (like target name).
268
+ * This is a convenience function for CLI tools that need metadata without loading all eval cases.
269
+ */
270
+ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
271
+ target?: string;
272
+ }>;
267
273
  /**
268
274
  * Determine whether a path references guideline content (instructions or prompts).
269
275
  */
@@ -496,7 +502,6 @@ interface EvaluationContext {
496
502
  readonly judgeProvider?: Provider;
497
503
  readonly systemPrompt?: string;
498
504
  readonly evaluator?: EvaluatorConfig;
499
- readonly judgeModel?: string;
500
505
  }
501
506
  interface EvaluationScore {
502
507
  readonly score: number;
@@ -599,4 +604,4 @@ type AgentKernel = {
599
604
  };
600
605
  declare function createAgentKernel(): AgentKernel;
601
606
 
602
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
607
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.js CHANGED
@@ -73,6 +73,33 @@ var ANSI_YELLOW = "\x1B[33m";
73
73
  var ANSI_RESET = "\x1B[0m";
74
74
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
75
75
  var SCHEMA_CONFIG_V2 = "agentv-config-v2";
76
+ async function readTestSuiteMetadata(testFilePath) {
77
+ try {
78
+ const absolutePath = path.resolve(testFilePath);
79
+ const content = await readFile(absolutePath, "utf8");
80
+ const parsed = parse(content);
81
+ if (!isJsonObject(parsed)) {
82
+ return {};
83
+ }
84
+ return { target: extractTargetFromSuite(parsed) };
85
+ } catch {
86
+ return {};
87
+ }
88
+ }
89
+ function extractTargetFromSuite(suite) {
90
+ const execution = suite.execution;
91
+ if (execution && typeof execution === "object" && !Array.isArray(execution)) {
92
+ const executionTarget = execution.target;
93
+ if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
94
+ return executionTarget.trim();
95
+ }
96
+ }
97
+ const targetValue = suite.target;
98
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
99
+ return targetValue.trim();
100
+ }
101
+ return void 0;
102
+ }
76
103
  async function loadConfig(evalFilePath, repoRoot) {
77
104
  const directories = buildDirectoryChain(evalFilePath, repoRoot);
78
105
  for (const directory of directories) {
@@ -249,6 +276,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
249
276
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
250
277
  }
251
278
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
279
+ const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
280
+ const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
252
281
  const results = [];
253
282
  for (const rawEvalcase of rawTestcases) {
254
283
  if (!isJsonObject(rawEvalcase)) {
@@ -303,7 +332,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
303
332
  const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
304
333
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
305
334
  const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
306
- const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
335
+ const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
307
336
  const userFilePaths = [];
308
337
  for (const segment of inputSegments) {
309
338
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -670,9 +699,9 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
670
699
  }
671
700
  return parts.join(" ");
672
701
  }
673
- async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
702
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
674
703
  const execution = rawEvalCase.execution;
675
- const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
704
+ const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
676
705
  if (candidateEvaluators === void 0) {
677
706
  return void 0;
678
707
  }
@@ -710,6 +739,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
710
739
  resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
711
740
  );
712
741
  }
742
+ } else {
743
+ resolvedCwd = searchRoots[0];
713
744
  }
714
745
  evaluators.push({
715
746
  name,
@@ -738,8 +769,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
738
769
  name,
739
770
  type: "llm_judge",
740
771
  prompt,
741
- promptPath,
742
- model
772
+ promptPath
743
773
  });
744
774
  }
745
775
  return evaluators.length > 0 ? evaluators : void 0;
@@ -2532,10 +2562,7 @@ var LlmJudgeEvaluator = class {
2532
2562
  prompt = substituteVariables(systemPrompt, variables);
2533
2563
  systemPrompt = buildSystemPrompt(hasReferenceAnswer);
2534
2564
  }
2535
- const metadata = {
2536
- ...systemPrompt !== void 0 ? { systemPrompt } : {},
2537
- ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
2538
- };
2565
+ const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
2539
2566
  const response = await judgeProvider.invoke({
2540
2567
  question: prompt,
2541
2568
  metadata,
@@ -2555,8 +2582,7 @@ var LlmJudgeEvaluator = class {
2555
2582
  provider: judgeProvider.id,
2556
2583
  prompt,
2557
2584
  target: context.target.name,
2558
- ...systemPrompt !== void 0 ? { systemPrompt } : {},
2559
- ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
2585
+ ...systemPrompt !== void 0 && { systemPrompt }
2560
2586
  };
2561
2587
  return {
2562
2588
  score,
@@ -3550,8 +3576,7 @@ async function runLlmJudgeEvaluator(options) {
3550
3576
  now,
3551
3577
  judgeProvider,
3552
3578
  systemPrompt: customPrompt,
3553
- evaluator: config,
3554
- judgeModel: config.model
3579
+ evaluator: config
3555
3580
  });
3556
3581
  }
3557
3582
  async function resolveCustomPrompt(config) {
@@ -3736,6 +3761,7 @@ export {
3736
3761
  loadEvalCases,
3737
3762
  normalizeLineEndings,
3738
3763
  readTargetDefinitions,
3764
+ readTestSuiteMetadata,
3739
3765
  readTextFile,
3740
3766
  resolveAndCreateProvider,
3741
3767
  resolveFileReference,