@agentv/core 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-QVS4OL44.js → chunk-XXNQA4EW.js} +27 -1
- package/dist/chunk-XXNQA4EW.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +93 -8
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.d.cts +7 -2
- package/dist/evaluation/validation/index.d.ts +7 -2
- package/dist/evaluation/validation/index.js +91 -7
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +274 -182
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +39 -10
- package/dist/index.d.ts +39 -10
- package/dist/index.js +237 -187
- package/dist/index.js.map +1 -1
- package/package.json +6 -2
- package/dist/chunk-QVS4OL44.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
|
|
|
91
91
|
/**
|
|
92
92
|
* Test case definition sourced from AgentV specs.
|
|
93
93
|
*/
|
|
94
|
-
interface
|
|
94
|
+
interface EvalCase {
|
|
95
95
|
readonly id: string;
|
|
96
96
|
readonly conversation_id?: string;
|
|
97
97
|
readonly task: string;
|
|
98
98
|
readonly user_segments: readonly JsonObject[];
|
|
99
|
+
readonly system_message?: string;
|
|
99
100
|
readonly expected_assistant_raw: string;
|
|
100
101
|
readonly guideline_paths: readonly string[];
|
|
102
|
+
readonly guideline_patterns?: readonly string[];
|
|
103
|
+
readonly file_paths: readonly string[];
|
|
101
104
|
readonly code_snippets: readonly string[];
|
|
102
105
|
readonly outcome: string;
|
|
103
106
|
readonly grader: GraderKind;
|
|
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
|
|
|
128
131
|
/**
|
|
129
132
|
* Determine whether a path references guideline content (instructions or prompts).
|
|
130
133
|
*/
|
|
131
|
-
declare function isGuidelineFile(filePath: string): boolean;
|
|
134
|
+
declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
|
|
132
135
|
/**
|
|
133
136
|
* Extract fenced code blocks from AgentV user segments.
|
|
134
137
|
*/
|
|
@@ -139,13 +142,38 @@ type LoadOptions = {
|
|
|
139
142
|
/**
|
|
140
143
|
* Load eval cases from a AgentV YAML specification file.
|
|
141
144
|
*/
|
|
142
|
-
declare function
|
|
145
|
+
declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
|
|
143
146
|
/**
|
|
144
147
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
145
148
|
*/
|
|
146
|
-
declare function buildPromptInputs(testCase:
|
|
149
|
+
declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
147
150
|
request: string;
|
|
148
151
|
guidelines: string;
|
|
152
|
+
systemMessage?: string;
|
|
153
|
+
}>;
|
|
154
|
+
|
|
155
|
+
declare function fileExists(filePath: string): Promise<boolean>;
|
|
156
|
+
/**
|
|
157
|
+
* Find git repository root by walking up the directory tree.
|
|
158
|
+
*/
|
|
159
|
+
declare function findGitRoot(startPath: string): Promise<string | null>;
|
|
160
|
+
/**
|
|
161
|
+
* Build a chain of directories walking from a file's location up to repo root.
|
|
162
|
+
* Used for discovering configuration files like targets.yaml or config.yaml.
|
|
163
|
+
*/
|
|
164
|
+
declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
|
|
165
|
+
/**
|
|
166
|
+
* Build search roots for file resolution, matching yaml-parser behavior.
|
|
167
|
+
* Searches from eval file directory up to repo root.
|
|
168
|
+
*/
|
|
169
|
+
declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
|
|
170
|
+
/**
|
|
171
|
+
* Resolve a file reference using search roots, matching yaml-parser behavior.
|
|
172
|
+
*/
|
|
173
|
+
declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
|
|
174
|
+
readonly displayPath: string;
|
|
175
|
+
readonly resolvedPath?: string;
|
|
176
|
+
readonly attempted: readonly string[];
|
|
149
177
|
}>;
|
|
150
178
|
|
|
151
179
|
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
|
|
|
153
181
|
interface ProviderRequest {
|
|
154
182
|
readonly prompt: string;
|
|
155
183
|
readonly guidelines?: string;
|
|
184
|
+
readonly guideline_patterns?: readonly string[];
|
|
156
185
|
readonly chatPrompt?: ChatPrompt;
|
|
157
186
|
readonly attachments?: readonly string[];
|
|
158
|
-
readonly
|
|
187
|
+
readonly evalCaseId?: string;
|
|
159
188
|
readonly attempt?: number;
|
|
160
189
|
readonly maxOutputTokens?: number;
|
|
161
190
|
readonly temperature?: number;
|
|
@@ -304,7 +333,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
|
|
|
304
333
|
declare function isErrorLike(text: string | undefined | null): boolean;
|
|
305
334
|
|
|
306
335
|
interface GradeContext {
|
|
307
|
-
readonly
|
|
336
|
+
readonly evalCase: EvalCase;
|
|
308
337
|
readonly candidate: string;
|
|
309
338
|
readonly target: ResolvedTarget;
|
|
310
339
|
readonly provider: Provider;
|
|
@@ -353,8 +382,8 @@ interface EvaluationCache {
|
|
|
353
382
|
get(key: string): MaybePromise<ProviderResponse | undefined>;
|
|
354
383
|
set(key: string, value: ProviderResponse): MaybePromise<void>;
|
|
355
384
|
}
|
|
356
|
-
interface
|
|
357
|
-
readonly
|
|
385
|
+
interface RunEvalCaseOptions {
|
|
386
|
+
readonly evalCase: EvalCase;
|
|
358
387
|
readonly provider: Provider;
|
|
359
388
|
readonly target: ResolvedTarget;
|
|
360
389
|
readonly graders: Partial<Record<string, Grader>>;
|
|
@@ -396,11 +425,11 @@ interface RunEvaluationOptions {
|
|
|
396
425
|
readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
|
|
397
426
|
}
|
|
398
427
|
declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
|
|
399
|
-
declare function
|
|
428
|
+
declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
|
|
400
429
|
|
|
401
430
|
type AgentKernel = {
|
|
402
431
|
status: string;
|
|
403
432
|
};
|
|
404
433
|
declare function createAgentKernel(): AgentKernel;
|
|
405
434
|
|
|
406
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type
|
|
435
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };
|
package/dist/index.d.ts
CHANGED
|
@@ -91,13 +91,16 @@ declare function isGraderKind(value: unknown): value is GraderKind;
|
|
|
91
91
|
/**
|
|
92
92
|
* Test case definition sourced from AgentV specs.
|
|
93
93
|
*/
|
|
94
|
-
interface
|
|
94
|
+
interface EvalCase {
|
|
95
95
|
readonly id: string;
|
|
96
96
|
readonly conversation_id?: string;
|
|
97
97
|
readonly task: string;
|
|
98
98
|
readonly user_segments: readonly JsonObject[];
|
|
99
|
+
readonly system_message?: string;
|
|
99
100
|
readonly expected_assistant_raw: string;
|
|
100
101
|
readonly guideline_paths: readonly string[];
|
|
102
|
+
readonly guideline_patterns?: readonly string[];
|
|
103
|
+
readonly file_paths: readonly string[];
|
|
101
104
|
readonly code_snippets: readonly string[];
|
|
102
105
|
readonly outcome: string;
|
|
103
106
|
readonly grader: GraderKind;
|
|
@@ -128,7 +131,7 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
|
|
|
128
131
|
/**
|
|
129
132
|
* Determine whether a path references guideline content (instructions or prompts).
|
|
130
133
|
*/
|
|
131
|
-
declare function isGuidelineFile(filePath: string): boolean;
|
|
134
|
+
declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
|
|
132
135
|
/**
|
|
133
136
|
* Extract fenced code blocks from AgentV user segments.
|
|
134
137
|
*/
|
|
@@ -139,13 +142,38 @@ type LoadOptions = {
|
|
|
139
142
|
/**
|
|
140
143
|
* Load eval cases from a AgentV YAML specification file.
|
|
141
144
|
*/
|
|
142
|
-
declare function
|
|
145
|
+
declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
|
|
143
146
|
/**
|
|
144
147
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
145
148
|
*/
|
|
146
|
-
declare function buildPromptInputs(testCase:
|
|
149
|
+
declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
147
150
|
request: string;
|
|
148
151
|
guidelines: string;
|
|
152
|
+
systemMessage?: string;
|
|
153
|
+
}>;
|
|
154
|
+
|
|
155
|
+
declare function fileExists(filePath: string): Promise<boolean>;
|
|
156
|
+
/**
|
|
157
|
+
* Find git repository root by walking up the directory tree.
|
|
158
|
+
*/
|
|
159
|
+
declare function findGitRoot(startPath: string): Promise<string | null>;
|
|
160
|
+
/**
|
|
161
|
+
* Build a chain of directories walking from a file's location up to repo root.
|
|
162
|
+
* Used for discovering configuration files like targets.yaml or config.yaml.
|
|
163
|
+
*/
|
|
164
|
+
declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
|
|
165
|
+
/**
|
|
166
|
+
* Build search roots for file resolution, matching yaml-parser behavior.
|
|
167
|
+
* Searches from eval file directory up to repo root.
|
|
168
|
+
*/
|
|
169
|
+
declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
|
|
170
|
+
/**
|
|
171
|
+
* Resolve a file reference using search roots, matching yaml-parser behavior.
|
|
172
|
+
*/
|
|
173
|
+
declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
|
|
174
|
+
readonly displayPath: string;
|
|
175
|
+
readonly resolvedPath?: string;
|
|
176
|
+
readonly attempted: readonly string[];
|
|
149
177
|
}>;
|
|
150
178
|
|
|
151
179
|
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
@@ -153,9 +181,10 @@ type ProviderKind = "azure" | "anthropic" | "gemini" | "mock" | "vscode" | "vsco
|
|
|
153
181
|
interface ProviderRequest {
|
|
154
182
|
readonly prompt: string;
|
|
155
183
|
readonly guidelines?: string;
|
|
184
|
+
readonly guideline_patterns?: readonly string[];
|
|
156
185
|
readonly chatPrompt?: ChatPrompt;
|
|
157
186
|
readonly attachments?: readonly string[];
|
|
158
|
-
readonly
|
|
187
|
+
readonly evalCaseId?: string;
|
|
159
188
|
readonly attempt?: number;
|
|
160
189
|
readonly maxOutputTokens?: number;
|
|
161
190
|
readonly temperature?: number;
|
|
@@ -304,7 +333,7 @@ declare function scoreCandidateResponse(candidateResponse: string, expectedAspec
|
|
|
304
333
|
declare function isErrorLike(text: string | undefined | null): boolean;
|
|
305
334
|
|
|
306
335
|
interface GradeContext {
|
|
307
|
-
readonly
|
|
336
|
+
readonly evalCase: EvalCase;
|
|
308
337
|
readonly candidate: string;
|
|
309
338
|
readonly target: ResolvedTarget;
|
|
310
339
|
readonly provider: Provider;
|
|
@@ -353,8 +382,8 @@ interface EvaluationCache {
|
|
|
353
382
|
get(key: string): MaybePromise<ProviderResponse | undefined>;
|
|
354
383
|
set(key: string, value: ProviderResponse): MaybePromise<void>;
|
|
355
384
|
}
|
|
356
|
-
interface
|
|
357
|
-
readonly
|
|
385
|
+
interface RunEvalCaseOptions {
|
|
386
|
+
readonly evalCase: EvalCase;
|
|
358
387
|
readonly provider: Provider;
|
|
359
388
|
readonly target: ResolvedTarget;
|
|
360
389
|
readonly graders: Partial<Record<string, Grader>>;
|
|
@@ -396,11 +425,11 @@ interface RunEvaluationOptions {
|
|
|
396
425
|
readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
|
|
397
426
|
}
|
|
398
427
|
declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
|
|
399
|
-
declare function
|
|
428
|
+
declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
|
|
400
429
|
|
|
401
430
|
type AgentKernel = {
|
|
402
431
|
status: string;
|
|
403
432
|
};
|
|
404
433
|
declare function createAgentKernel(): AgentKernel;
|
|
405
434
|
|
|
406
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type
|
|
435
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationResult, GRADER_KINDS, type GeminiResolvedConfig, type GradeContext, type GradeResult, type Grader, type GraderKind, HeuristicGrader, type HeuristicScore, type JsonObject, type JsonPrimitive, type JsonValue, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, QualityGrader, type QualityGraderOptions, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, calculateHits, calculateMisses, createAgentKernel, createProvider, ensureVSCodeSubagents, extractAspects, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isErrorLike, isGraderKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreCandidateResponse };
|