npm - @agentv/core - Versions diffs - 0.2.6 → 0.2.11 - Mend

@agentv/core 0.2.6 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/{chunk-QVS4OL44.js → chunk-P4GOYWYH.js} +27 -1
package/dist/chunk-P4GOYWYH.js.map +1 -0
package/dist/chunk-XXNQA4EW.js +140 -0
package/dist/chunk-XXNQA4EW.js.map +1 -0
package/dist/evaluation/validation/index.cjs +93 -8
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.d.cts +7 -2
package/dist/evaluation/validation/index.d.ts +7 -2
package/dist/evaluation/validation/index.js +91 -7
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +533 -187
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +53 -10
package/dist/index.d.ts +53 -10
package/dist/index.js +502 -193
package/dist/index.js.map +1 -1
package/package.json +6 -2
package/dist/chunk-QVS4OL44.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
 /**
  * Test case definition sourced from AgentV specs.
  */
-interface TestCase {
+interface EvalCase {
     readonly id: string;
     readonly conversation_id?: string;
     readonly task: string;
     readonly user_segments: readonly JsonObject[];
+    readonly system_message?: string;
     readonly expected_assistant_raw: string;
     readonly guideline_paths: readonly string[];
+    readonly guideline_patterns?: readonly string[];
+    readonly file_paths: readonly string[];
     readonly code_snippets: readonly string[];
     readonly outcome: string;
     readonly grader: GraderKind;
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
 /**
  * Determine whether a path references guideline content (instructions or prompts).
  */
-declare function isGuidelineFile(filePath: string): boolean;
+declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
 /**
  * Extract fenced code blocks from AgentV user segments.
  */
@@ -139,13 +142,38 @@ type LoadOptions = {
 /**
  * Load eval cases from a AgentV YAML specification file.
  */
-declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
+declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
 /**
  * Build prompt inputs by consolidating user request context and guideline content.
  */
-declare function buildPromptInputs(testCase: TestCase): Promise<{
+declare function buildPromptInputs(testCase: EvalCase): Promise<{
     request: string;
     guidelines: string;
+    systemMessage?: string;
+}>;
+declare function fileExists(filePath: string): Promise<boolean>;
+/**
+ * Find git repository root by walking up the directory tree.
+ */
+declare function findGitRoot(startPath: string): Promise<string | null>;
+/**
+ * Build a chain of directories walking from a file's location up to repo root.
+ * Used for discovering configuration files like targets.yaml or config.yaml.
+ */
+declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
+/**
+ * Build search roots for file resolution, matching yaml-parser behavior.
+ * Searches from eval file directory up to repo root.
+ */
+declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
+/**
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
+ */
+declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
+    readonly displayPath: string;
+    readonly resolvedPath?: string;
+    readonly attempted: readonly string[];
 }>;
 type ChatPrompt = AxChatRequest["chatPrompt"];
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
 interface ProviderRequest {
     readonly prompt: string;
     readonly guidelines?: string;
+    readonly guideline_patterns?: readonly string[];
     readonly chatPrompt?: ChatPrompt;
     readonly attachments?: readonly string[];
-    readonly testCaseId?: string;
+    readonly evalCaseId?: string;
     readonly attempt?: number;
     readonly maxOutputTokens?: number;
     readonly temperature?: number;
@@ -173,6 +202,15 @@ interface Provider {
     readonly kind: ProviderKind;
     readonly targetName: string;
     invoke(request: ProviderRequest): Promise<ProviderResponse>;
+    /**
+     * Optional capability marker for provider-managed batching (single session handling multiple requests).
+     */
+    readonly supportsBatch?: boolean;
+    /**
+     * Optional batch invocation hook. When defined alongside supportsBatch=true,
+     * the orchestrator may send multiple requests in a single provider session.
+     */
+    invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
 }
 type EnvLookup = Readonly<Record<string, string | undefined>>;
 interface TargetDefinition {
@@ -222,30 +260,35 @@ type ResolvedTarget = {
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: AzureResolvedConfig;
 } | {
     readonly kind: "anthropic";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: AnthropicResolvedConfig;
 } | {
     readonly kind: "gemini";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: GeminiResolvedConfig;
 } | {
     readonly kind: "mock";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: MockResolvedConfig;
 } | {
     readonly kind: "vscode" | "vscode-insiders";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: VSCodeResolvedConfig;
 };
 declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup): ResolvedTarget;
@@ -304,7 +347,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
 declare function isErrorLike(text: string | undefined | null): boolean;
 interface GradeContext {
-    readonly testCase: TestCase;
+    readonly evalCase: EvalCase;
     readonly candidate: string;
     readonly target: ResolvedTarget;
     readonly provider: Provider;
@@ -353,8 +396,8 @@ interface EvaluationCache {
     get(key: string): MaybePromise<ProviderResponse | undefined>;
     set(key: string, value: ProviderResponse): MaybePromise<void>;
 }
-interface RunTestCaseOptions {
-    readonly testCase: TestCase;
+interface RunEvalCaseOptions {
+    readonly evalCase: EvalCase;
     readonly provider: Provider;
     readonly target: ResolvedTarget;
     readonly graders: Partial<Record<string, Grader>>;
@@ -396,11 +439,11 @@ interface RunEvaluationOptions {
     readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
 }
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
-declare function runTestCase(options: RunTestCaseOptions): Promise<EvaluationResult>;
+declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
 type AgentKernel = {
     status: string;
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvaluationOptions, type RunTestCaseOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestCase, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildPromptInputs, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadTestCases, readTargetDefinitions, resolveAndCreateProvider, resolveTargetDefinition, runEvaluation, runTestCase, scoreCandidateResponse };
+export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };

package/dist/index.d.ts CHANGED Viewed

@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
 /**
  * Test case definition sourced from AgentV specs.
  */
-interface TestCase {
+interface EvalCase {
     readonly id: string;
     readonly conversation_id?: string;
     readonly task: string;
     readonly user_segments: readonly JsonObject[];
+    readonly system_message?: string;
     readonly expected_assistant_raw: string;
     readonly guideline_paths: readonly string[];
+    readonly guideline_patterns?: readonly string[];
+    readonly file_paths: readonly string[];
     readonly code_snippets: readonly string[];
     readonly outcome: string;
     readonly grader: GraderKind;
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
 /**
  * Determine whether a path references guideline content (instructions or prompts).
  */
-declare function isGuidelineFile(filePath: string): boolean;
+declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
 /**
  * Extract fenced code blocks from AgentV user segments.
  */
@@ -139,13 +142,38 @@ type LoadOptions = {
 /**
  * Load eval cases from a AgentV YAML specification file.
  */
-declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
+declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
 /**
  * Build prompt inputs by consolidating user request context and guideline content.
  */
-declare function buildPromptInputs(testCase: TestCase): Promise<{
+declare function buildPromptInputs(testCase: EvalCase): Promise<{
     request: string;
     guidelines: string;
+    systemMessage?: string;
+}>;
+declare function fileExists(filePath: string): Promise<boolean>;
+/**
+ * Find git repository root by walking up the directory tree.
+ */
+declare function findGitRoot(startPath: string): Promise<string | null>;
+/**
+ * Build a chain of directories walking from a file's location up to repo root.
+ * Used for discovering configuration files like targets.yaml or config.yaml.
+ */
+declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
+/**
+ * Build search roots for file resolution, matching yaml-parser behavior.
+ * Searches from eval file directory up to repo root.
+ */
+declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
+/**
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
+ */
+declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
+    readonly displayPath: string;
+    readonly resolvedPath?: string;
+    readonly attempted: readonly string[];
 }>;
 type ChatPrompt = AxChatRequest["chatPrompt"];
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
 interface ProviderRequest {
     readonly prompt: string;
     readonly guidelines?: string;
+    readonly guideline_patterns?: readonly string[];
     readonly chatPrompt?: ChatPrompt;
     readonly attachments?: readonly string[];
-    readonly testCaseId?: string;
+    readonly evalCaseId?: string;
     readonly attempt?: number;
     readonly maxOutputTokens?: number;
     readonly temperature?: number;
@@ -173,6 +202,15 @@ interface Provider {
     readonly kind: ProviderKind;
     readonly targetName: string;
     invoke(request: ProviderRequest): Promise<ProviderResponse>;
+    /**
+     * Optional capability marker for provider-managed batching (single session handling multiple requests).
+     */
+    readonly supportsBatch?: boolean;
+    /**
+     * Optional batch invocation hook. When defined alongside supportsBatch=true,
+     * the orchestrator may send multiple requests in a single provider session.
+     */
+    invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
 }
 type EnvLookup = Readonly<Record<string, string | undefined>>;
 interface TargetDefinition {
@@ -222,30 +260,35 @@ type ResolvedTarget = {
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: AzureResolvedConfig;
 } | {
     readonly kind: "anthropic";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: AnthropicResolvedConfig;
 } | {
     readonly kind: "gemini";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: GeminiResolvedConfig;
 } | {
     readonly kind: "mock";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: MockResolvedConfig;
 } | {
     readonly kind: "vscode" | "vscode-insiders";
     readonly name: string;
     readonly judgeTarget?: string;
     readonly workers?: number;
+    readonly providerBatching?: boolean;
     readonly config: VSCodeResolvedConfig;
 };
 declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup): ResolvedTarget;
@@ -304,7 +347,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
 declare function isErrorLike(text: string | undefined | null): boolean;
 interface GradeContext {
-    readonly testCase: TestCase;
+    readonly evalCase: EvalCase;
     readonly candidate: string;
     readonly target: ResolvedTarget;
     readonly provider: Provider;
@@ -353,8 +396,8 @@ interface EvaluationCache {
     get(key: string): MaybePromise<ProviderResponse | undefined>;
     set(key: string, value: ProviderResponse): MaybePromise<void>;
 }
-interface RunTestCaseOptions {
-    readonly testCase: TestCase;
+interface RunEvalCaseOptions {
+    readonly evalCase: EvalCase;
     readonly provider: Provider;
     readonly target: ResolvedTarget;
     readonly graders: Partial<Record<string, Grader>>;
@@ -396,11 +439,11 @@ interface RunEvaluationOptions {
     readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
 }
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
-declare function runTestCase(options: RunTestCaseOptions): Promise<EvaluationResult>;
+declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
 type AgentKernel = {
     status: string;
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvaluationOptions, type RunTestCaseOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestCase, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildPromptInputs, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadTestCases, readTargetDefinitions, resolveAndCreateProvider, resolveTargetDefinition, runEvaluation, runTestCase, scoreCandidateResponse };
+export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };