@agentv/core 0.26.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -178,7 +178,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
178
178
  * Guard validating raw test messages.
179
179
  */
180
180
  declare function isTestMessage(value: unknown): value is TestMessage;
181
- declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite", "tool_trajectory", "expected_messages"];
181
+ declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite", "tool_trajectory", "expected_tool_calls"];
182
182
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
183
183
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
184
184
  type CodeEvaluatorConfig = {
@@ -224,12 +224,12 @@ type CompositeEvaluatorConfig = {
224
224
  readonly aggregator: CompositeAggregatorConfig;
225
225
  readonly weight?: number;
226
226
  };
227
- type ExpectedMessagesEvaluatorConfig = {
227
+ type ExpectedToolCallsEvaluatorConfig = {
228
228
  readonly name: string;
229
- readonly type: 'expected_messages';
229
+ readonly type: 'expected_tool_calls';
230
230
  readonly weight?: number;
231
231
  };
232
- type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig | ExpectedMessagesEvaluatorConfig;
232
+ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig | ExpectedToolCallsEvaluatorConfig;
233
233
  /**
234
234
  * Eval case definition sourced from AgentV specs.
235
235
  */
@@ -770,8 +770,8 @@ declare class ToolTrajectoryEvaluator implements Evaluator {
770
770
  * Extracts tool_calls from assistant messages in expected_messages and compares them
771
771
  * sequentially against tool_call events in the trace.
772
772
  */
773
- declare class ExpectedMessagesEvaluator implements Evaluator {
774
- readonly kind = "expected_messages";
773
+ declare class ExpectedToolCallsEvaluator implements Evaluator {
774
+ readonly kind = "expected_tool_calls";
775
775
  evaluate(context: EvaluationContext): EvaluationScore;
776
776
  private extractExpectedToolCalls;
777
777
  private validateToolCalls;
@@ -867,4 +867,4 @@ type AgentKernel = {
867
867
  };
868
868
  declare function createAgentKernel(): AgentKernel;
869
869
 
870
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, ExpectedMessagesEvaluator, type ExpectedMessagesEvaluatorConfig, type ExpectedToolCall, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TestMessageToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceEvent, type TraceEventType, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isExpectedToolCall, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, isTraceEvent, isTraceEventType, listTargetNames, loadEvalCases, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
870
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExpectedToolCall, ExpectedToolCallsEvaluator, type ExpectedToolCallsEvaluatorConfig, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TestMessageToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceEvent, type TraceEventType, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isExpectedToolCall, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, isTraceEvent, isTraceEventType, listTargetNames, loadEvalCases, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.d.ts CHANGED
@@ -178,7 +178,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
178
178
  * Guard validating raw test messages.
179
179
  */
180
180
  declare function isTestMessage(value: unknown): value is TestMessage;
181
- declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite", "tool_trajectory", "expected_messages"];
181
+ declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite", "tool_trajectory", "expected_tool_calls"];
182
182
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
183
183
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
184
184
  type CodeEvaluatorConfig = {
@@ -224,12 +224,12 @@ type CompositeEvaluatorConfig = {
224
224
  readonly aggregator: CompositeAggregatorConfig;
225
225
  readonly weight?: number;
226
226
  };
227
- type ExpectedMessagesEvaluatorConfig = {
227
+ type ExpectedToolCallsEvaluatorConfig = {
228
228
  readonly name: string;
229
- readonly type: 'expected_messages';
229
+ readonly type: 'expected_tool_calls';
230
230
  readonly weight?: number;
231
231
  };
232
- type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig | ExpectedMessagesEvaluatorConfig;
232
+ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig | ExpectedToolCallsEvaluatorConfig;
233
233
  /**
234
234
  * Eval case definition sourced from AgentV specs.
235
235
  */
@@ -770,8 +770,8 @@ declare class ToolTrajectoryEvaluator implements Evaluator {
770
770
  * Extracts tool_calls from assistant messages in expected_messages and compares them
771
771
  * sequentially against tool_call events in the trace.
772
772
  */
773
- declare class ExpectedMessagesEvaluator implements Evaluator {
774
- readonly kind = "expected_messages";
773
+ declare class ExpectedToolCallsEvaluator implements Evaluator {
774
+ readonly kind = "expected_tool_calls";
775
775
  evaluate(context: EvaluationContext): EvaluationScore;
776
776
  private extractExpectedToolCalls;
777
777
  private validateToolCalls;
@@ -867,4 +867,4 @@ type AgentKernel = {
867
867
  };
868
868
  declare function createAgentKernel(): AgentKernel;
869
869
 
870
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, ExpectedMessagesEvaluator, type ExpectedMessagesEvaluatorConfig, type ExpectedToolCall, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TestMessageToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceEvent, type TraceEventType, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isExpectedToolCall, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, isTraceEvent, isTraceEventType, listTargetNames, loadEvalCases, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
870
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExpectedToolCall, ExpectedToolCallsEvaluator, type ExpectedToolCallsEvaluatorConfig, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TestMessageToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceEvent, type TraceEventType, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isExpectedToolCall, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, isTraceEvent, isTraceEventType, listTargetNames, loadEvalCases, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  readTextFile,
10
10
  resolveFileReference,
11
11
  resolveTargetDefinition
12
- } from "./chunk-NDEN3H2B.js";
12
+ } from "./chunk-V3JCB3HI.js";
13
13
 
14
14
  // src/evaluation/types.ts
15
15
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -58,7 +58,7 @@ var EVALUATOR_KIND_VALUES = [
58
58
  "rubric",
59
59
  "composite",
60
60
  "tool_trajectory",
61
- "expected_messages"
61
+ "expected_tool_calls"
62
62
  ];
63
63
  var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
64
64
  function isEvaluatorKind(value) {
@@ -582,11 +582,11 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
582
582
  });
583
583
  continue;
584
584
  }
585
- if (typeValue === "expected_messages") {
585
+ if (typeValue === "expected_tool_calls") {
586
586
  const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
587
587
  evaluators.push({
588
588
  name,
589
- type: "expected_messages",
589
+ type: "expected_tool_calls",
590
590
  ...weight2 !== void 0 ? { weight: weight2 } : {}
591
591
  });
592
592
  continue;
@@ -3813,8 +3813,8 @@ var ToolTrajectoryEvaluator = class {
3813
3813
  };
3814
3814
  }
3815
3815
  };
3816
- var ExpectedMessagesEvaluator = class {
3817
- kind = "expected_messages";
3816
+ var ExpectedToolCallsEvaluator = class {
3817
+ kind = "expected_tool_calls";
3818
3818
  evaluate(context) {
3819
3819
  const { candidateTrace, evalCase } = context;
3820
3820
  const expectedSegments = evalCase.expected_segments;
@@ -4897,8 +4897,8 @@ async function runEvaluatorList(options) {
4897
4897
  return new ToolTrajectoryEvaluator({
4898
4898
  config: memberConfig
4899
4899
  });
4900
- case "expected_messages":
4901
- return new ExpectedMessagesEvaluator();
4900
+ case "expected_tool_calls":
4901
+ return new ExpectedToolCallsEvaluator();
4902
4902
  default: {
4903
4903
  const unknownConfig = memberConfig;
4904
4904
  throw new Error(`Unsupported evaluator type in composite: ${unknownConfig.type}`);
@@ -4963,9 +4963,9 @@ async function runEvaluatorList(options) {
4963
4963
  reasoning: score2.reasoning
4964
4964
  });
4965
4965
  }
4966
- if (evaluator.type === "expected_messages") {
4967
- const expectedMessagesEvaluator = new ExpectedMessagesEvaluator();
4968
- const score2 = expectedMessagesEvaluator.evaluate({
4966
+ if (evaluator.type === "expected_tool_calls") {
4967
+ const expectedToolCallsEvaluator = new ExpectedToolCallsEvaluator();
4968
+ const score2 = expectedToolCallsEvaluator.evaluate({
4969
4969
  evalCase,
4970
4970
  candidate,
4971
4971
  target,
@@ -5345,7 +5345,7 @@ function createAgentKernel() {
5345
5345
  export {
5346
5346
  CodeEvaluator,
5347
5347
  CompositeEvaluator,
5348
- ExpectedMessagesEvaluator,
5348
+ ExpectedToolCallsEvaluator,
5349
5349
  LlmJudgeEvaluator,
5350
5350
  TEST_MESSAGE_ROLES,
5351
5351
  ToolTrajectoryEvaluator,