@agentv/core 0.2.6 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
91
91
  /**
92
92
  * Test case definition sourced from AgentV specs.
93
93
  */
94
- interface TestCase {
94
+ interface EvalCase {
95
95
  readonly id: string;
96
96
  readonly conversation_id?: string;
97
97
  readonly task: string;
98
98
  readonly user_segments: readonly JsonObject[];
99
+ readonly system_message?: string;
99
100
  readonly expected_assistant_raw: string;
100
101
  readonly guideline_paths: readonly string[];
102
+ readonly guideline_patterns?: readonly string[];
103
+ readonly file_paths: readonly string[];
101
104
  readonly code_snippets: readonly string[];
102
105
  readonly outcome: string;
103
106
  readonly grader: GraderKind;
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
128
131
  /**
129
132
  * Determine whether a path references guideline content (instructions or prompts).
130
133
  */
131
- declare function isGuidelineFile(filePath: string): boolean;
134
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
132
135
  /**
133
136
  * Extract fenced code blocks from AgentV user segments.
134
137
  */
@@ -139,13 +142,38 @@ type LoadOptions = {
139
142
  /**
140
143
  * Load eval cases from a AgentV YAML specification file.
141
144
  */
142
- declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
145
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
143
146
  /**
144
147
  * Build prompt inputs by consolidating user request context and guideline content.
145
148
  */
146
- declare function buildPromptInputs(testCase: TestCase): Promise<{
149
+ declare function buildPromptInputs(testCase: EvalCase): Promise<{
147
150
  request: string;
148
151
  guidelines: string;
152
+ systemMessage?: string;
153
+ }>;
154
+
155
+ declare function fileExists(filePath: string): Promise<boolean>;
156
+ /**
157
+ * Find git repository root by walking up the directory tree.
158
+ */
159
+ declare function findGitRoot(startPath: string): Promise<string | null>;
160
+ /**
161
+ * Build a chain of directories walking from a file's location up to repo root.
162
+ * Used for discovering configuration files like targets.yaml or config.yaml.
163
+ */
164
+ declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
165
+ /**
166
+ * Build search roots for file resolution, matching yaml-parser behavior.
167
+ * Searches from eval file directory up to repo root.
168
+ */
169
+ declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
170
+ /**
171
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
172
+ */
173
+ declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
174
+ readonly displayPath: string;
175
+ readonly resolvedPath?: string;
176
+ readonly attempted: readonly string[];
149
177
  }>;
150
178
 
151
179
  type ChatPrompt = AxChatRequest["chatPrompt"];
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
153
181
  interface ProviderRequest {
154
182
  readonly prompt: string;
155
183
  readonly guidelines?: string;
184
+ readonly guideline_patterns?: readonly string[];
156
185
  readonly chatPrompt?: ChatPrompt;
157
186
  readonly attachments?: readonly string[];
158
- readonly testCaseId?: string;
187
+ readonly evalCaseId?: string;
159
188
  readonly attempt?: number;
160
189
  readonly maxOutputTokens?: number;
161
190
  readonly temperature?: number;
@@ -173,6 +202,15 @@ interface Provider {
173
202
  readonly kind: ProviderKind;
174
203
  readonly targetName: string;
175
204
  invoke(request: ProviderRequest): Promise<ProviderResponse>;
205
+ /**
206
+ * Optional capability marker for provider-managed batching (single session handling multiple requests).
207
+ */
208
+ readonly supportsBatch?: boolean;
209
+ /**
210
+ * Optional batch invocation hook. When defined alongside supportsBatch=true,
211
+ * the orchestrator may send multiple requests in a single provider session.
212
+ */
213
+ invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
176
214
  }
177
215
  type EnvLookup = Readonly<Record<string, string | undefined>>;
178
216
  interface TargetDefinition {
@@ -222,30 +260,35 @@ type ResolvedTarget = {
222
260
  readonly name: string;
223
261
  readonly judgeTarget?: string;
224
262
  readonly workers?: number;
263
+ readonly providerBatching?: boolean;
225
264
  readonly config: AzureResolvedConfig;
226
265
  } | {
227
266
  readonly kind: "anthropic";
228
267
  readonly name: string;
229
268
  readonly judgeTarget?: string;
230
269
  readonly workers?: number;
270
+ readonly providerBatching?: boolean;
231
271
  readonly config: AnthropicResolvedConfig;
232
272
  } | {
233
273
  readonly kind: "gemini";
234
274
  readonly name: string;
235
275
  readonly judgeTarget?: string;
236
276
  readonly workers?: number;
277
+ readonly providerBatching?: boolean;
237
278
  readonly config: GeminiResolvedConfig;
238
279
  } | {
239
280
  readonly kind: "mock";
240
281
  readonly name: string;
241
282
  readonly judgeTarget?: string;
242
283
  readonly workers?: number;
284
+ readonly providerBatching?: boolean;
243
285
  readonly config: MockResolvedConfig;
244
286
  } | {
245
287
  readonly kind: "vscode" | "vscode-insiders";
246
288
  readonly name: string;
247
289
  readonly judgeTarget?: string;
248
290
  readonly workers?: number;
291
+ readonly providerBatching?: boolean;
249
292
  readonly config: VSCodeResolvedConfig;
250
293
  };
251
294
  declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup): ResolvedTarget;
@@ -304,7 +347,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
304
347
  declare function isErrorLike(text: string | undefined | null): boolean;
305
348
 
306
349
  interface GradeContext {
307
- readonly testCase: TestCase;
350
+ readonly evalCase: EvalCase;
308
351
  readonly candidate: string;
309
352
  readonly target: ResolvedTarget;
310
353
  readonly provider: Provider;
@@ -353,8 +396,8 @@ interface EvaluationCache {
353
396
  get(key: string): MaybePromise<ProviderResponse | undefined>;
354
397
  set(key: string, value: ProviderResponse): MaybePromise<void>;
355
398
  }
356
- interface RunTestCaseOptions {
357
- readonly testCase: TestCase;
399
+ interface RunEvalCaseOptions {
400
+ readonly evalCase: EvalCase;
358
401
  readonly provider: Provider;
359
402
  readonly target: ResolvedTarget;
360
403
  readonly graders: Partial<Record<string, Grader>>;
@@ -396,11 +439,11 @@ interface RunEvaluationOptions {
396
439
  readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
397
440
  }
398
441
  declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
399
- declare function runTestCase(options: RunTestCaseOptions): Promise<EvaluationResult>;
442
+ declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
400
443
 
401
444
  type AgentKernel = {
402
445
  status: string;
403
446
  };
404
447
  declare function createAgentKernel(): AgentKernel;
405
448
 
406
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvaluationOptions, type RunTestCaseOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestCase, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildPromptInputs, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadTestCases, readTargetDefinitions, resolveAndCreateProvider, resolveTargetDefinition, runEvaluation, runTestCase, scoreCandidateResponse };
449
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };
package/dist/index.d.ts CHANGED
@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
91
91
  /**
92
92
  * Test case definition sourced from AgentV specs.
93
93
  */
94
- interface TestCase {
94
+ interface EvalCase {
95
95
  readonly id: string;
96
96
  readonly conversation_id?: string;
97
97
  readonly task: string;
98
98
  readonly user_segments: readonly JsonObject[];
99
+ readonly system_message?: string;
99
100
  readonly expected_assistant_raw: string;
100
101
  readonly guideline_paths: readonly string[];
102
+ readonly guideline_patterns?: readonly string[];
103
+ readonly file_paths: readonly string[];
101
104
  readonly code_snippets: readonly string[];
102
105
  readonly outcome: string;
103
106
  readonly grader: GraderKind;
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
128
131
  /**
129
132
  * Determine whether a path references guideline content (instructions or prompts).
130
133
  */
131
- declare function isGuidelineFile(filePath: string): boolean;
134
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
132
135
  /**
133
136
  * Extract fenced code blocks from AgentV user segments.
134
137
  */
@@ -139,13 +142,38 @@ type LoadOptions = {
139
142
  /**
140
143
  * Load eval cases from a AgentV YAML specification file.
141
144
  */
142
- declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
145
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
143
146
  /**
144
147
  * Build prompt inputs by consolidating user request context and guideline content.
145
148
  */
146
- declare function buildPromptInputs(testCase: TestCase): Promise<{
149
+ declare function buildPromptInputs(testCase: EvalCase): Promise<{
147
150
  request: string;
148
151
  guidelines: string;
152
+ systemMessage?: string;
153
+ }>;
154
+
155
+ declare function fileExists(filePath: string): Promise<boolean>;
156
+ /**
157
+ * Find git repository root by walking up the directory tree.
158
+ */
159
+ declare function findGitRoot(startPath: string): Promise<string | null>;
160
+ /**
161
+ * Build a chain of directories walking from a file's location up to repo root.
162
+ * Used for discovering configuration files like targets.yaml or config.yaml.
163
+ */
164
+ declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
165
+ /**
166
+ * Build search roots for file resolution, matching yaml-parser behavior.
167
+ * Searches from eval file directory up to repo root.
168
+ */
169
+ declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
170
+ /**
171
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
172
+ */
173
+ declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
174
+ readonly displayPath: string;
175
+ readonly resolvedPath?: string;
176
+ readonly attempted: readonly string[];
149
177
  }>;
150
178
 
151
179
  type ChatPrompt = AxChatRequest["chatPrompt"];
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
153
181
  interface ProviderRequest {
154
182
  readonly prompt: string;
155
183
  readonly guidelines?: string;
184
+ readonly guideline_patterns?: readonly string[];
156
185
  readonly chatPrompt?: ChatPrompt;
157
186
  readonly attachments?: readonly string[];
158
- readonly testCaseId?: string;
187
+ readonly evalCaseId?: string;
159
188
  readonly attempt?: number;
160
189
  readonly maxOutputTokens?: number;
161
190
  readonly temperature?: number;
@@ -173,6 +202,15 @@ interface Provider {
173
202
  readonly kind: ProviderKind;
174
203
  readonly targetName: string;
175
204
  invoke(request: ProviderRequest): Promise<ProviderResponse>;
205
+ /**
206
+ * Optional capability marker for provider-managed batching (single session handling multiple requests).
207
+ */
208
+ readonly supportsBatch?: boolean;
209
+ /**
210
+ * Optional batch invocation hook. When defined alongside supportsBatch=true,
211
+ * the orchestrator may send multiple requests in a single provider session.
212
+ */
213
+ invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
176
214
  }
177
215
  type EnvLookup = Readonly<Record<string, string | undefined>>;
178
216
  interface TargetDefinition {
@@ -222,30 +260,35 @@ type ResolvedTarget = {
222
260
  readonly name: string;
223
261
  readonly judgeTarget?: string;
224
262
  readonly workers?: number;
263
+ readonly providerBatching?: boolean;
225
264
  readonly config: AzureResolvedConfig;
226
265
  } | {
227
266
  readonly kind: "anthropic";
228
267
  readonly name: string;
229
268
  readonly judgeTarget?: string;
230
269
  readonly workers?: number;
270
+ readonly providerBatching?: boolean;
231
271
  readonly config: AnthropicResolvedConfig;
232
272
  } | {
233
273
  readonly kind: "gemini";
234
274
  readonly name: string;
235
275
  readonly judgeTarget?: string;
236
276
  readonly workers?: number;
277
+ readonly providerBatching?: boolean;
237
278
  readonly config: GeminiResolvedConfig;
238
279
  } | {
239
280
  readonly kind: "mock";
240
281
  readonly name: string;
241
282
  readonly judgeTarget?: string;
242
283
  readonly workers?: number;
284
+ readonly providerBatching?: boolean;
243
285
  readonly config: MockResolvedConfig;
244
286
  } | {
245
287
  readonly kind: "vscode" | "vscode-insiders";
246
288
  readonly name: string;
247
289
  readonly judgeTarget?: string;
248
290
  readonly workers?: number;
291
+ readonly providerBatching?: boolean;
249
292
  readonly config: VSCodeResolvedConfig;
250
293
  };
251
294
  declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup): ResolvedTarget;
@@ -304,7 +347,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
304
347
  declare function isErrorLike(text: string | undefined | null): boolean;
305
348
 
306
349
  interface GradeContext {
307
- readonly testCase: TestCase;
350
+ readonly evalCase: EvalCase;
308
351
  readonly candidate: string;
309
352
  readonly target: ResolvedTarget;
310
353
  readonly provider: Provider;
@@ -353,8 +396,8 @@ interface EvaluationCache {
353
396
  get(key: string): MaybePromise<ProviderResponse | undefined>;
354
397
  set(key: string, value: ProviderResponse): MaybePromise<void>;
355
398
  }
356
- interface RunTestCaseOptions {
357
- readonly testCase: TestCase;
399
+ interface RunEvalCaseOptions {
400
+ readonly evalCase: EvalCase;
358
401
  readonly provider: Provider;
359
402
  readonly target: ResolvedTarget;
360
403
  readonly graders: Partial<Record<string, Grader>>;
@@ -396,11 +439,11 @@ interface RunEvaluationOptions {
396
439
  readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
397
440
  }
398
441
  declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
399
- declare function runTestCase(options: RunTestCaseOptions): Promise<EvaluationResult>;
442
+ declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
400
443
 
401
444
  type AgentKernel = {
402
445
  status: string;
403
446
  };
404
447
  declare function createAgentKernel(): AgentKernel;
405
448
 
406
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvaluationOptions, type RunTestCaseOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestCase, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildPromptInputs, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadTestCases, readTargetDefinitions, resolveAndCreateProvider, resolveTargetDefinition, runEvaluation, runTestCase, scoreCandidateResponse };
449
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };