@agentv/core 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
91
91
  /**
92
92
  * Test case definition sourced from AgentV specs.
93
93
  */
94
- interface TestCase {
94
+ interface EvalCase {
95
95
  readonly id: string;
96
96
  readonly conversation_id?: string;
97
97
  readonly task: string;
98
98
  readonly user_segments: readonly JsonObject[];
99
+ readonly system_message?: string;
99
100
  readonly expected_assistant_raw: string;
100
101
  readonly guideline_paths: readonly string[];
102
+ readonly guideline_patterns?: readonly string[];
103
+ readonly file_paths: readonly string[];
101
104
  readonly code_snippets: readonly string[];
102
105
  readonly outcome: string;
103
106
  readonly grader: GraderKind;
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
128
131
  /**
129
132
  * Determine whether a path references guideline content (instructions or prompts).
130
133
  */
131
- declare function isGuidelineFile(filePath: string): boolean;
134
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
132
135
  /**
133
136
  * Extract fenced code blocks from AgentV user segments.
134
137
  */
@@ -139,13 +142,38 @@ type LoadOptions = {
139
142
  /**
140
143
  * Load eval cases from a AgentV YAML specification file.
141
144
  */
142
- declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
145
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
143
146
  /**
144
147
  * Build prompt inputs by consolidating user request context and guideline content.
145
148
  */
146
- declare function buildPromptInputs(testCase: TestCase): Promise<{
149
+ declare function buildPromptInputs(testCase: EvalCase): Promise<{
147
150
  request: string;
148
151
  guidelines: string;
152
+ systemMessage?: string;
153
+ }>;
154
+
155
+ declare function fileExists(filePath: string): Promise<boolean>;
156
+ /**
157
+ * Find git repository root by walking up the directory tree.
158
+ */
159
+ declare function findGitRoot(startPath: string): Promise<string | null>;
160
+ /**
161
+ * Build a chain of directories walking from a file's location up to repo root.
162
+ * Used for discovering configuration files like targets.yaml or config.yaml.
163
+ */
164
+ declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
165
+ /**
166
+ * Build search roots for file resolution, matching yaml-parser behavior.
167
+ * Searches from eval file directory up to repo root.
168
+ */
169
+ declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
170
+ /**
171
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
172
+ */
173
+ declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
174
+ readonly displayPath: string;
175
+ readonly resolvedPath?: string;
176
+ readonly attempted: readonly string[];
149
177
  }>;
150
178
 
151
179
  type ChatPrompt = AxChatRequest["chatPrompt"];
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
153
181
  interface ProviderRequest {
154
182
  readonly prompt: string;
155
183
  readonly guidelines?: string;
184
+ readonly guideline_patterns?: readonly string[];
156
185
  readonly chatPrompt?: ChatPrompt;
157
186
  readonly attachments?: readonly string[];
158
- readonly testCaseId?: string;
187
+ readonly evalCaseId?: string;
159
188
  readonly attempt?: number;
160
189
  readonly maxOutputTokens?: number;
161
190
  readonly temperature?: number;
@@ -304,7 +333,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
304
333
  declare function isErrorLike(text: string | undefined | null): boolean;
305
334
 
306
335
  interface GradeContext {
307
- readonly testCase: TestCase;
336
+ readonly evalCase: EvalCase;
308
337
  readonly candidate: string;
309
338
  readonly target: ResolvedTarget;
310
339
  readonly provider: Provider;
@@ -353,8 +382,8 @@ interface EvaluationCache {
353
382
  get(key: string): MaybePromise<ProviderResponse | undefined>;
354
383
  set(key: string, value: ProviderResponse): MaybePromise<void>;
355
384
  }
356
- interface RunTestCaseOptions {
357
- readonly testCase: TestCase;
385
+ interface RunEvalCaseOptions {
386
+ readonly evalCase: EvalCase;
358
387
  readonly provider: Provider;
359
388
  readonly target: ResolvedTarget;
360
389
  readonly graders: Partial<Record<string, Grader>>;
@@ -396,11 +425,11 @@ interface RunEvaluationOptions {
396
425
  readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
397
426
  }
398
427
  declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
399
- declare function runTestCase(options: RunTestCaseOptions): Promise<EvaluationResult>;
428
+ declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
400
429
 
401
430
  type AgentKernel = {
402
431
  status: string;
403
432
  };
404
433
  declare function createAgentKernel(): AgentKernel;
405
434
 
406
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvaluationOptions, type RunTestCaseOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestCase, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildPromptInputs, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadTestCases, readTargetDefinitions, resolveAndCreateProvider, resolveTargetDefinition, runEvaluation, runTestCase, scoreCandidateResponse };
435
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };
package/dist/index.d.ts CHANGED
@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
91
91
  /**
92
92
  * Test case definition sourced from AgentV specs.
93
93
  */
94
- interface TestCase {
94
+ interface EvalCase {
95
95
  readonly id: string;
96
96
  readonly conversation_id?: string;
97
97
  readonly task: string;
98
98
  readonly user_segments: readonly JsonObject[];
99
+ readonly system_message?: string;
99
100
  readonly expected_assistant_raw: string;
100
101
  readonly guideline_paths: readonly string[];
102
+ readonly guideline_patterns?: readonly string[];
103
+ readonly file_paths: readonly string[];
101
104
  readonly code_snippets: readonly string[];
102
105
  readonly outcome: string;
103
106
  readonly grader: GraderKind;
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
128
131
  /**
129
132
  * Determine whether a path references guideline content (instructions or prompts).
130
133
  */
131
- declare function isGuidelineFile(filePath: string): boolean;
134
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
132
135
  /**
133
136
  * Extract fenced code blocks from AgentV user segments.
134
137
  */
@@ -139,13 +142,38 @@ type LoadOptions = {
139
142
  /**
140
143
  * Load eval cases from a AgentV YAML specification file.
141
144
  */
142
- declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
145
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
143
146
  /**
144
147
  * Build prompt inputs by consolidating user request context and guideline content.
145
148
  */
146
- declare function buildPromptInputs(testCase: TestCase): Promise<{
149
+ declare function buildPromptInputs(testCase: EvalCase): Promise<{
147
150
  request: string;
148
151
  guidelines: string;
152
+ systemMessage?: string;
153
+ }>;
154
+
155
+ declare function fileExists(filePath: string): Promise<boolean>;
156
+ /**
157
+ * Find git repository root by walking up the directory tree.
158
+ */
159
+ declare function findGitRoot(startPath: string): Promise<string | null>;
160
+ /**
161
+ * Build a chain of directories walking from a file's location up to repo root.
162
+ * Used for discovering configuration files like targets.yaml or config.yaml.
163
+ */
164
+ declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
165
+ /**
166
+ * Build search roots for file resolution, matching yaml-parser behavior.
167
+ * Searches from eval file directory up to repo root.
168
+ */
169
+ declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
170
+ /**
171
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
172
+ */
173
+ declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
174
+ readonly displayPath: string;
175
+ readonly resolvedPath?: string;
176
+ readonly attempted: readonly string[];
149
177
  }>;
150
178
 
151
179
  type ChatPrompt = AxChatRequest["chatPrompt"];
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
153
181
  interface ProviderRequest {
154
182
  readonly prompt: string;
155
183
  readonly guidelines?: string;
184
+ readonly guideline_patterns?: readonly string[];
156
185
  readonly chatPrompt?: ChatPrompt;
157
186
  readonly attachments?: readonly string[];
158
- readonly testCaseId?: string;
187
+ readonly evalCaseId?: string;
159
188
  readonly attempt?: number;
160
189
  readonly maxOutputTokens?: number;
161
190
  readonly temperature?: number;
@@ -304,7 +333,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
304
333
  declare function isErrorLike(text: string | undefined | null): boolean;
305
334
 
306
335
  interface GradeContext {
307
- readonly testCase: TestCase;
336
+ readonly evalCase: EvalCase;
308
337
  readonly candidate: string;
309
338
  readonly target: ResolvedTarget;
310
339
  readonly provider: Provider;
@@ -353,8 +382,8 @@ interface EvaluationCache {
353
382
  get(key: string): MaybePromise<ProviderResponse | undefined>;
354
383
  set(key: string, value: ProviderResponse): MaybePromise<void>;
355
384
  }
356
- interface RunTestCaseOptions {
357
- readonly testCase: TestCase;
385
+ interface RunEvalCaseOptions {
386
+ readonly evalCase: EvalCase;
358
387
  readonly provider: Provider;
359
388
  readonly target: ResolvedTarget;
360
389
  readonly graders: Partial<Record<string, Grader>>;
@@ -396,11 +425,11 @@ interface RunEvaluationOptions {
396
425
  readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
397
426
  }
398
427
  declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
399
- declare function runTestCase(options: RunTestCaseOptions): Promise<EvaluationResult>;
428
+ declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
400
429
 
401
430
  type AgentKernel = {
402
431
  status: string;
403
432
  };
404
433
  declare function createAgentKernel(): AgentKernel;
405
434
 
406
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvaluationOptions, type RunTestCaseOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestCase, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildPromptInputs, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadTestCases, readTargetDefinitions, resolveAndCreateProvider, resolveTargetDefinition, runEvaluation, runTestCase, scoreCandidateResponse };
435
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };