@agentv/core 0.5.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-NL7K4CAK.js → chunk-L7I5UTJU.js} +7 -2
- package/dist/chunk-L7I5UTJU.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +260 -114
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +34 -10
- package/dist/index.d.ts +34 -10
- package/dist/index.js +255 -115
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-NL7K4CAK.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AxChatRequest } from '@ax-llm/ax';
|
|
1
|
+
import { AxChatRequest, AxAI } from '@ax-llm/ax';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* JSON primitive values appearing in AgentV payloads.
|
|
@@ -99,16 +99,18 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
|
|
|
99
99
|
*/
|
|
100
100
|
interface EvalCase {
|
|
101
101
|
readonly id: string;
|
|
102
|
+
readonly dataset?: string;
|
|
102
103
|
readonly conversation_id?: string;
|
|
103
|
-
readonly
|
|
104
|
-
readonly
|
|
104
|
+
readonly question: string;
|
|
105
|
+
readonly input_segments: readonly JsonObject[];
|
|
106
|
+
readonly output_segments: readonly JsonObject[];
|
|
105
107
|
readonly system_message?: string;
|
|
106
|
-
readonly
|
|
108
|
+
readonly reference_answer: string;
|
|
107
109
|
readonly guideline_paths: readonly string[];
|
|
108
110
|
readonly guideline_patterns?: readonly string[];
|
|
109
111
|
readonly file_paths: readonly string[];
|
|
110
112
|
readonly code_snippets: readonly string[];
|
|
111
|
-
readonly
|
|
113
|
+
readonly expected_outcome: string;
|
|
112
114
|
readonly evaluator?: EvaluatorKind;
|
|
113
115
|
readonly evaluators?: readonly EvaluatorConfig[];
|
|
114
116
|
}
|
|
@@ -117,11 +119,12 @@ interface EvalCase {
|
|
|
117
119
|
*/
|
|
118
120
|
interface EvaluationResult {
|
|
119
121
|
readonly eval_id: string;
|
|
122
|
+
readonly dataset?: string;
|
|
120
123
|
readonly conversation_id?: string;
|
|
121
124
|
readonly score: number;
|
|
122
125
|
readonly hits: readonly string[];
|
|
123
126
|
readonly misses: readonly string[];
|
|
124
|
-
readonly
|
|
127
|
+
readonly candidate_answer: string;
|
|
125
128
|
readonly expected_aspect_count: number;
|
|
126
129
|
readonly target: string;
|
|
127
130
|
readonly timestamp: string;
|
|
@@ -165,12 +168,17 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
|
|
|
165
168
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
166
169
|
*/
|
|
167
170
|
declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
168
|
-
|
|
171
|
+
question: string;
|
|
169
172
|
guidelines: string;
|
|
170
173
|
systemMessage?: string;
|
|
171
174
|
}>;
|
|
172
175
|
|
|
173
176
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
177
|
+
/**
|
|
178
|
+
* Read a text file and normalize line endings to LF (\n).
|
|
179
|
+
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
180
|
+
*/
|
|
181
|
+
declare function readTextFile(filePath: string): Promise<string>;
|
|
174
182
|
/**
|
|
175
183
|
* Find git repository root by walking up the directory tree.
|
|
176
184
|
*/
|
|
@@ -197,7 +205,7 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
|
|
|
197
205
|
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
198
206
|
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
199
207
|
interface ProviderRequest {
|
|
200
|
-
readonly
|
|
208
|
+
readonly question: string;
|
|
201
209
|
readonly guidelines?: string;
|
|
202
210
|
readonly guideline_patterns?: readonly string[];
|
|
203
211
|
readonly chatPrompt?: ChatPrompt;
|
|
@@ -229,6 +237,11 @@ interface Provider {
|
|
|
229
237
|
* the orchestrator may send multiple requests in a single provider session.
|
|
230
238
|
*/
|
|
231
239
|
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
240
|
+
/**
|
|
241
|
+
* Optional access to the underlying AxAI instance.
|
|
242
|
+
* This enables using advanced Ax features like structured output signatures.
|
|
243
|
+
*/
|
|
244
|
+
getAxAI?(): AxAI;
|
|
232
245
|
}
|
|
233
246
|
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
234
247
|
interface TargetDefinition {
|
|
@@ -372,6 +385,16 @@ interface EnsureSubagentsResult {
|
|
|
372
385
|
*/
|
|
373
386
|
declare function ensureVSCodeSubagents(options: EnsureSubagentsOptions): Promise<EnsureSubagentsResult>;
|
|
374
387
|
|
|
388
|
+
type CodexLogEntry = {
|
|
389
|
+
readonly filePath: string;
|
|
390
|
+
readonly evalCaseId?: string;
|
|
391
|
+
readonly targetName: string;
|
|
392
|
+
readonly attempt?: number;
|
|
393
|
+
};
|
|
394
|
+
type CodexLogListener = (entry: CodexLogEntry) => void;
|
|
395
|
+
declare function consumeCodexLogEntries(): CodexLogEntry[];
|
|
396
|
+
declare function subscribeToCodexLogEntries(listener: CodexLogListener): () => void;
|
|
397
|
+
|
|
375
398
|
declare function createProvider(target: ResolvedTarget): Provider;
|
|
376
399
|
declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
|
|
377
400
|
|
|
@@ -382,7 +405,7 @@ interface EvaluationContext {
|
|
|
382
405
|
readonly provider: Provider;
|
|
383
406
|
readonly attempt: number;
|
|
384
407
|
readonly promptInputs: {
|
|
385
|
-
readonly
|
|
408
|
+
readonly question: string;
|
|
386
409
|
readonly guidelines: string;
|
|
387
410
|
readonly systemMessage?: string;
|
|
388
411
|
};
|
|
@@ -420,6 +443,7 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
420
443
|
private readonly customPrompt?;
|
|
421
444
|
constructor(options: LlmJudgeEvaluatorOptions);
|
|
422
445
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
446
|
+
private evaluateWithPrompt;
|
|
423
447
|
}
|
|
424
448
|
interface CodeEvaluatorOptions {
|
|
425
449
|
readonly script: string;
|
|
@@ -492,4 +516,4 @@ type AgentKernel = {
|
|
|
492
516
|
};
|
|
493
517
|
declare function createAgentKernel(): AgentKernel;
|
|
494
518
|
|
|
495
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation };
|
|
519
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AxChatRequest } from '@ax-llm/ax';
|
|
1
|
+
import { AxChatRequest, AxAI } from '@ax-llm/ax';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* JSON primitive values appearing in AgentV payloads.
|
|
@@ -99,16 +99,18 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
|
|
|
99
99
|
*/
|
|
100
100
|
interface EvalCase {
|
|
101
101
|
readonly id: string;
|
|
102
|
+
readonly dataset?: string;
|
|
102
103
|
readonly conversation_id?: string;
|
|
103
|
-
readonly
|
|
104
|
-
readonly
|
|
104
|
+
readonly question: string;
|
|
105
|
+
readonly input_segments: readonly JsonObject[];
|
|
106
|
+
readonly output_segments: readonly JsonObject[];
|
|
105
107
|
readonly system_message?: string;
|
|
106
|
-
readonly
|
|
108
|
+
readonly reference_answer: string;
|
|
107
109
|
readonly guideline_paths: readonly string[];
|
|
108
110
|
readonly guideline_patterns?: readonly string[];
|
|
109
111
|
readonly file_paths: readonly string[];
|
|
110
112
|
readonly code_snippets: readonly string[];
|
|
111
|
-
readonly
|
|
113
|
+
readonly expected_outcome: string;
|
|
112
114
|
readonly evaluator?: EvaluatorKind;
|
|
113
115
|
readonly evaluators?: readonly EvaluatorConfig[];
|
|
114
116
|
}
|
|
@@ -117,11 +119,12 @@ interface EvalCase {
|
|
|
117
119
|
*/
|
|
118
120
|
interface EvaluationResult {
|
|
119
121
|
readonly eval_id: string;
|
|
122
|
+
readonly dataset?: string;
|
|
120
123
|
readonly conversation_id?: string;
|
|
121
124
|
readonly score: number;
|
|
122
125
|
readonly hits: readonly string[];
|
|
123
126
|
readonly misses: readonly string[];
|
|
124
|
-
readonly
|
|
127
|
+
readonly candidate_answer: string;
|
|
125
128
|
readonly expected_aspect_count: number;
|
|
126
129
|
readonly target: string;
|
|
127
130
|
readonly timestamp: string;
|
|
@@ -165,12 +168,17 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
|
|
|
165
168
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
166
169
|
*/
|
|
167
170
|
declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
168
|
-
|
|
171
|
+
question: string;
|
|
169
172
|
guidelines: string;
|
|
170
173
|
systemMessage?: string;
|
|
171
174
|
}>;
|
|
172
175
|
|
|
173
176
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
177
|
+
/**
|
|
178
|
+
* Read a text file and normalize line endings to LF (\n).
|
|
179
|
+
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
180
|
+
*/
|
|
181
|
+
declare function readTextFile(filePath: string): Promise<string>;
|
|
174
182
|
/**
|
|
175
183
|
* Find git repository root by walking up the directory tree.
|
|
176
184
|
*/
|
|
@@ -197,7 +205,7 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
|
|
|
197
205
|
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
198
206
|
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
199
207
|
interface ProviderRequest {
|
|
200
|
-
readonly
|
|
208
|
+
readonly question: string;
|
|
201
209
|
readonly guidelines?: string;
|
|
202
210
|
readonly guideline_patterns?: readonly string[];
|
|
203
211
|
readonly chatPrompt?: ChatPrompt;
|
|
@@ -229,6 +237,11 @@ interface Provider {
|
|
|
229
237
|
* the orchestrator may send multiple requests in a single provider session.
|
|
230
238
|
*/
|
|
231
239
|
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
240
|
+
/**
|
|
241
|
+
* Optional access to the underlying AxAI instance.
|
|
242
|
+
* This enables using advanced Ax features like structured output signatures.
|
|
243
|
+
*/
|
|
244
|
+
getAxAI?(): AxAI;
|
|
232
245
|
}
|
|
233
246
|
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
234
247
|
interface TargetDefinition {
|
|
@@ -372,6 +385,16 @@ interface EnsureSubagentsResult {
|
|
|
372
385
|
*/
|
|
373
386
|
declare function ensureVSCodeSubagents(options: EnsureSubagentsOptions): Promise<EnsureSubagentsResult>;
|
|
374
387
|
|
|
388
|
+
type CodexLogEntry = {
|
|
389
|
+
readonly filePath: string;
|
|
390
|
+
readonly evalCaseId?: string;
|
|
391
|
+
readonly targetName: string;
|
|
392
|
+
readonly attempt?: number;
|
|
393
|
+
};
|
|
394
|
+
type CodexLogListener = (entry: CodexLogEntry) => void;
|
|
395
|
+
declare function consumeCodexLogEntries(): CodexLogEntry[];
|
|
396
|
+
declare function subscribeToCodexLogEntries(listener: CodexLogListener): () => void;
|
|
397
|
+
|
|
375
398
|
declare function createProvider(target: ResolvedTarget): Provider;
|
|
376
399
|
declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
|
|
377
400
|
|
|
@@ -382,7 +405,7 @@ interface EvaluationContext {
|
|
|
382
405
|
readonly provider: Provider;
|
|
383
406
|
readonly attempt: number;
|
|
384
407
|
readonly promptInputs: {
|
|
385
|
-
readonly
|
|
408
|
+
readonly question: string;
|
|
386
409
|
readonly guidelines: string;
|
|
387
410
|
readonly systemMessage?: string;
|
|
388
411
|
};
|
|
@@ -420,6 +443,7 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
420
443
|
private readonly customPrompt?;
|
|
421
444
|
constructor(options: LlmJudgeEvaluatorOptions);
|
|
422
445
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
446
|
+
private evaluateWithPrompt;
|
|
423
447
|
}
|
|
424
448
|
interface CodeEvaluatorOptions {
|
|
425
449
|
readonly script: string;
|
|
@@ -492,4 +516,4 @@ type AgentKernel = {
|
|
|
492
516
|
};
|
|
493
517
|
declare function createAgentKernel(): AgentKernel;
|
|
494
518
|
|
|
495
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation };
|
|
519
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|