@agentv/core 0.7.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7XM7HYRS.js → chunk-YQBJAT5I.js} +97 -67
- package/dist/chunk-YQBJAT5I.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +61 -69
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +51 -58
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +538 -192
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +136 -58
- package/dist/index.d.ts +136 -58
- package/dist/index.js +443 -127
- package/dist/index.js.map +1 -1
- package/package.json +1 -2
- package/dist/chunk-7XM7HYRS.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -102,9 +102,10 @@ interface EvalCase {
|
|
|
102
102
|
readonly dataset?: string;
|
|
103
103
|
readonly conversation_id?: string;
|
|
104
104
|
readonly question: string;
|
|
105
|
+
readonly input_messages: readonly TestMessage[];
|
|
105
106
|
readonly input_segments: readonly JsonObject[];
|
|
106
107
|
readonly output_segments: readonly JsonObject[];
|
|
107
|
-
readonly reference_answer
|
|
108
|
+
readonly reference_answer?: string;
|
|
108
109
|
readonly guideline_paths: readonly string[];
|
|
109
110
|
readonly guideline_patterns?: readonly string[];
|
|
110
111
|
readonly file_paths: readonly string[];
|
|
@@ -129,9 +130,11 @@ interface EvaluationResult {
|
|
|
129
130
|
readonly timestamp: string;
|
|
130
131
|
readonly reasoning?: string;
|
|
131
132
|
readonly raw_aspects?: readonly string[];
|
|
132
|
-
readonly
|
|
133
|
+
readonly agent_provider_request?: JsonObject;
|
|
134
|
+
readonly lm_provider_request?: JsonObject;
|
|
133
135
|
readonly evaluator_raw_request?: JsonObject;
|
|
134
136
|
readonly evaluator_results?: readonly EvaluatorResult[];
|
|
137
|
+
readonly error?: string;
|
|
135
138
|
}
|
|
136
139
|
interface EvaluatorResult {
|
|
137
140
|
readonly name: string;
|
|
@@ -148,6 +151,119 @@ interface EvaluatorResult {
|
|
|
148
151
|
*/
|
|
149
152
|
declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
|
|
150
153
|
|
|
154
|
+
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
155
|
+
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
156
|
+
interface ProviderRequest {
|
|
157
|
+
readonly question: string;
|
|
158
|
+
readonly guidelines?: string;
|
|
159
|
+
readonly guideline_patterns?: readonly string[];
|
|
160
|
+
readonly chatPrompt?: ChatPrompt;
|
|
161
|
+
readonly inputFiles?: readonly string[];
|
|
162
|
+
readonly evalCaseId?: string;
|
|
163
|
+
readonly attempt?: number;
|
|
164
|
+
readonly maxOutputTokens?: number;
|
|
165
|
+
readonly temperature?: number;
|
|
166
|
+
readonly metadata?: JsonObject;
|
|
167
|
+
readonly signal?: AbortSignal;
|
|
168
|
+
}
|
|
169
|
+
interface ProviderResponse {
|
|
170
|
+
readonly text: string;
|
|
171
|
+
readonly reasoning?: string;
|
|
172
|
+
readonly raw?: unknown;
|
|
173
|
+
readonly usage?: JsonObject;
|
|
174
|
+
}
|
|
175
|
+
interface Provider {
|
|
176
|
+
readonly id: string;
|
|
177
|
+
readonly kind: ProviderKind;
|
|
178
|
+
readonly targetName: string;
|
|
179
|
+
invoke(request: ProviderRequest): Promise<ProviderResponse>;
|
|
180
|
+
/**
|
|
181
|
+
* Optional capability marker for provider-managed batching (single session handling multiple requests).
|
|
182
|
+
*/
|
|
183
|
+
readonly supportsBatch?: boolean;
|
|
184
|
+
/**
|
|
185
|
+
* Optional batch invocation hook. When defined alongside supportsBatch=true,
|
|
186
|
+
* the orchestrator may send multiple requests in a single provider session.
|
|
187
|
+
*/
|
|
188
|
+
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
189
|
+
/**
|
|
190
|
+
* Optional access to the underlying AxAI instance.
|
|
191
|
+
* This enables using advanced Ax features like structured output signatures.
|
|
192
|
+
*/
|
|
193
|
+
getAxAI?(): AxAI;
|
|
194
|
+
}
|
|
195
|
+
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
196
|
+
interface TargetDefinition {
|
|
197
|
+
readonly name: string;
|
|
198
|
+
readonly provider: ProviderKind | string;
|
|
199
|
+
readonly judge_target?: string | undefined;
|
|
200
|
+
readonly workers?: number | undefined;
|
|
201
|
+
readonly provider_batching?: boolean | undefined;
|
|
202
|
+
readonly providerBatching?: boolean | undefined;
|
|
203
|
+
readonly endpoint?: string | unknown | undefined;
|
|
204
|
+
readonly resource?: string | unknown | undefined;
|
|
205
|
+
readonly resourceName?: string | unknown | undefined;
|
|
206
|
+
readonly api_key?: string | unknown | undefined;
|
|
207
|
+
readonly apiKey?: string | unknown | undefined;
|
|
208
|
+
readonly deployment?: string | unknown | undefined;
|
|
209
|
+
readonly deploymentName?: string | unknown | undefined;
|
|
210
|
+
readonly model?: string | unknown | undefined;
|
|
211
|
+
readonly version?: string | unknown | undefined;
|
|
212
|
+
readonly api_version?: string | unknown | undefined;
|
|
213
|
+
readonly variant?: string | unknown | undefined;
|
|
214
|
+
readonly thinking_budget?: number | unknown | undefined;
|
|
215
|
+
readonly thinkingBudget?: number | unknown | undefined;
|
|
216
|
+
readonly temperature?: number | unknown | undefined;
|
|
217
|
+
readonly max_output_tokens?: number | unknown | undefined;
|
|
218
|
+
readonly maxTokens?: number | unknown | undefined;
|
|
219
|
+
readonly executable?: string | unknown | undefined;
|
|
220
|
+
readonly command?: string | unknown | undefined;
|
|
221
|
+
readonly binary?: string | unknown | undefined;
|
|
222
|
+
readonly args?: unknown | undefined;
|
|
223
|
+
readonly arguments?: unknown | undefined;
|
|
224
|
+
readonly cwd?: string | unknown | undefined;
|
|
225
|
+
readonly timeout_seconds?: number | unknown | undefined;
|
|
226
|
+
readonly timeoutSeconds?: number | unknown | undefined;
|
|
227
|
+
readonly log_dir?: string | unknown | undefined;
|
|
228
|
+
readonly logDir?: string | unknown | undefined;
|
|
229
|
+
readonly log_directory?: string | unknown | undefined;
|
|
230
|
+
readonly logDirectory?: string | unknown | undefined;
|
|
231
|
+
readonly log_format?: string | unknown | undefined;
|
|
232
|
+
readonly logFormat?: string | unknown | undefined;
|
|
233
|
+
readonly log_output_format?: string | unknown | undefined;
|
|
234
|
+
readonly logOutputFormat?: string | unknown | undefined;
|
|
235
|
+
readonly response?: string | unknown | undefined;
|
|
236
|
+
readonly delayMs?: number | unknown | undefined;
|
|
237
|
+
readonly delayMinMs?: number | unknown | undefined;
|
|
238
|
+
readonly delayMaxMs?: number | unknown | undefined;
|
|
239
|
+
readonly vscode_cmd?: string | unknown | undefined;
|
|
240
|
+
readonly wait?: boolean | unknown | undefined;
|
|
241
|
+
readonly dry_run?: boolean | unknown | undefined;
|
|
242
|
+
readonly dryRun?: boolean | unknown | undefined;
|
|
243
|
+
readonly subagent_root?: string | unknown | undefined;
|
|
244
|
+
readonly subagentRoot?: string | unknown | undefined;
|
|
245
|
+
readonly workspace_template?: string | unknown | undefined;
|
|
246
|
+
readonly workspaceTemplate?: string | unknown | undefined;
|
|
247
|
+
readonly command_template?: string | unknown | undefined;
|
|
248
|
+
readonly commandTemplate?: string | unknown | undefined;
|
|
249
|
+
readonly files_format?: string | unknown | undefined;
|
|
250
|
+
readonly filesFormat?: string | unknown | undefined;
|
|
251
|
+
readonly attachments_format?: string | unknown | undefined;
|
|
252
|
+
readonly attachmentsFormat?: string | unknown | undefined;
|
|
253
|
+
readonly env?: unknown | undefined;
|
|
254
|
+
readonly healthcheck?: unknown | undefined;
|
|
255
|
+
readonly max_retries?: number | unknown | undefined;
|
|
256
|
+
readonly maxRetries?: number | unknown | undefined;
|
|
257
|
+
readonly retry_initial_delay_ms?: number | unknown | undefined;
|
|
258
|
+
readonly retryInitialDelayMs?: number | unknown | undefined;
|
|
259
|
+
readonly retry_max_delay_ms?: number | unknown | undefined;
|
|
260
|
+
readonly retryMaxDelayMs?: number | unknown | undefined;
|
|
261
|
+
readonly retry_backoff_factor?: number | unknown | undefined;
|
|
262
|
+
readonly retryBackoffFactor?: number | unknown | undefined;
|
|
263
|
+
readonly retry_status_codes?: unknown | undefined;
|
|
264
|
+
readonly retryStatusCodes?: unknown | undefined;
|
|
265
|
+
}
|
|
266
|
+
|
|
151
267
|
/**
|
|
152
268
|
* Determine whether a path references guideline content (instructions or prompts).
|
|
153
269
|
*/
|
|
@@ -167,11 +283,13 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
|
|
|
167
283
|
/**
|
|
168
284
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
169
285
|
*/
|
|
170
|
-
|
|
171
|
-
question: string;
|
|
172
|
-
guidelines: string;
|
|
173
|
-
|
|
174
|
-
|
|
286
|
+
interface PromptInputs {
|
|
287
|
+
readonly question: string;
|
|
288
|
+
readonly guidelines: string;
|
|
289
|
+
readonly chatPrompt?: ChatPrompt;
|
|
290
|
+
readonly systemMessage?: string;
|
|
291
|
+
}
|
|
292
|
+
declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
|
|
175
293
|
|
|
176
294
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
177
295
|
/**
|
|
@@ -207,56 +325,13 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
|
|
|
207
325
|
readonly attempted: readonly string[];
|
|
208
326
|
}>;
|
|
209
327
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
readonly
|
|
214
|
-
readonly
|
|
215
|
-
readonly
|
|
216
|
-
readonly chatPrompt?: ChatPrompt;
|
|
217
|
-
readonly inputFiles?: readonly string[];
|
|
218
|
-
readonly evalCaseId?: string;
|
|
219
|
-
readonly attempt?: number;
|
|
220
|
-
readonly maxOutputTokens?: number;
|
|
221
|
-
readonly temperature?: number;
|
|
222
|
-
readonly metadata?: JsonObject;
|
|
223
|
-
readonly signal?: AbortSignal;
|
|
224
|
-
}
|
|
225
|
-
interface ProviderResponse {
|
|
226
|
-
readonly text: string;
|
|
227
|
-
readonly reasoning?: string;
|
|
228
|
-
readonly raw?: unknown;
|
|
229
|
-
readonly usage?: JsonObject;
|
|
230
|
-
}
|
|
231
|
-
interface Provider {
|
|
232
|
-
readonly id: string;
|
|
233
|
-
readonly kind: ProviderKind;
|
|
234
|
-
readonly targetName: string;
|
|
235
|
-
invoke(request: ProviderRequest): Promise<ProviderResponse>;
|
|
236
|
-
/**
|
|
237
|
-
* Optional capability marker for provider-managed batching (single session handling multiple requests).
|
|
238
|
-
*/
|
|
239
|
-
readonly supportsBatch?: boolean;
|
|
240
|
-
/**
|
|
241
|
-
* Optional batch invocation hook. When defined alongside supportsBatch=true,
|
|
242
|
-
* the orchestrator may send multiple requests in a single provider session.
|
|
243
|
-
*/
|
|
244
|
-
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
245
|
-
/**
|
|
246
|
-
* Optional access to the underlying AxAI instance.
|
|
247
|
-
* This enables using advanced Ax features like structured output signatures.
|
|
248
|
-
*/
|
|
249
|
-
getAxAI?(): AxAI;
|
|
250
|
-
}
|
|
251
|
-
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
252
|
-
interface TargetDefinition {
|
|
253
|
-
readonly name: string;
|
|
254
|
-
readonly provider: ProviderKind | string;
|
|
255
|
-
readonly settings?: Record<string, unknown> | undefined;
|
|
256
|
-
readonly judge_target?: string | undefined;
|
|
257
|
-
readonly workers?: number | undefined;
|
|
328
|
+
interface RetryConfig {
|
|
329
|
+
readonly maxRetries?: number;
|
|
330
|
+
readonly initialDelayMs?: number;
|
|
331
|
+
readonly maxDelayMs?: number;
|
|
332
|
+
readonly backoffFactor?: number;
|
|
333
|
+
readonly retryableStatusCodes?: readonly number[];
|
|
258
334
|
}
|
|
259
|
-
|
|
260
335
|
interface AzureResolvedConfig {
|
|
261
336
|
readonly resourceName: string;
|
|
262
337
|
readonly deploymentName: string;
|
|
@@ -264,6 +339,7 @@ interface AzureResolvedConfig {
|
|
|
264
339
|
readonly version?: string;
|
|
265
340
|
readonly temperature?: number;
|
|
266
341
|
readonly maxOutputTokens?: number;
|
|
342
|
+
readonly retry?: RetryConfig;
|
|
267
343
|
}
|
|
268
344
|
interface AnthropicResolvedConfig {
|
|
269
345
|
readonly apiKey: string;
|
|
@@ -271,12 +347,14 @@ interface AnthropicResolvedConfig {
|
|
|
271
347
|
readonly temperature?: number;
|
|
272
348
|
readonly maxOutputTokens?: number;
|
|
273
349
|
readonly thinkingBudget?: number;
|
|
350
|
+
readonly retry?: RetryConfig;
|
|
274
351
|
}
|
|
275
352
|
interface GeminiResolvedConfig {
|
|
276
353
|
readonly apiKey: string;
|
|
277
354
|
readonly model: string;
|
|
278
355
|
readonly temperature?: number;
|
|
279
356
|
readonly maxOutputTokens?: number;
|
|
357
|
+
readonly retry?: RetryConfig;
|
|
280
358
|
}
|
|
281
359
|
interface CodexResolvedConfig {
|
|
282
360
|
readonly executable: string;
|
|
@@ -313,7 +391,6 @@ interface CliResolvedConfig {
|
|
|
313
391
|
readonly commandTemplate: string;
|
|
314
392
|
readonly filesFormat?: string;
|
|
315
393
|
readonly cwd?: string;
|
|
316
|
-
readonly env?: Record<string, string>;
|
|
317
394
|
readonly timeoutMs?: number;
|
|
318
395
|
readonly healthcheck?: CliHealthcheck;
|
|
319
396
|
}
|
|
@@ -413,6 +490,7 @@ interface EvaluationContext {
|
|
|
413
490
|
readonly question: string;
|
|
414
491
|
readonly guidelines: string;
|
|
415
492
|
readonly systemMessage?: string;
|
|
493
|
+
readonly chatPrompt?: ChatPrompt;
|
|
416
494
|
};
|
|
417
495
|
readonly now: Date;
|
|
418
496
|
readonly judgeProvider?: Provider;
|
|
@@ -521,4 +599,4 @@ type AgentKernel = {
|
|
|
521
599
|
};
|
|
522
600
|
declare function createAgentKernel(): AgentKernel;
|
|
523
601
|
|
|
524
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
|
602
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.d.ts
CHANGED
|
@@ -102,9 +102,10 @@ interface EvalCase {
|
|
|
102
102
|
readonly dataset?: string;
|
|
103
103
|
readonly conversation_id?: string;
|
|
104
104
|
readonly question: string;
|
|
105
|
+
readonly input_messages: readonly TestMessage[];
|
|
105
106
|
readonly input_segments: readonly JsonObject[];
|
|
106
107
|
readonly output_segments: readonly JsonObject[];
|
|
107
|
-
readonly reference_answer
|
|
108
|
+
readonly reference_answer?: string;
|
|
108
109
|
readonly guideline_paths: readonly string[];
|
|
109
110
|
readonly guideline_patterns?: readonly string[];
|
|
110
111
|
readonly file_paths: readonly string[];
|
|
@@ -129,9 +130,11 @@ interface EvaluationResult {
|
|
|
129
130
|
readonly timestamp: string;
|
|
130
131
|
readonly reasoning?: string;
|
|
131
132
|
readonly raw_aspects?: readonly string[];
|
|
132
|
-
readonly
|
|
133
|
+
readonly agent_provider_request?: JsonObject;
|
|
134
|
+
readonly lm_provider_request?: JsonObject;
|
|
133
135
|
readonly evaluator_raw_request?: JsonObject;
|
|
134
136
|
readonly evaluator_results?: readonly EvaluatorResult[];
|
|
137
|
+
readonly error?: string;
|
|
135
138
|
}
|
|
136
139
|
interface EvaluatorResult {
|
|
137
140
|
readonly name: string;
|
|
@@ -148,6 +151,119 @@ interface EvaluatorResult {
|
|
|
148
151
|
*/
|
|
149
152
|
declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
|
|
150
153
|
|
|
154
|
+
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
155
|
+
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
156
|
+
interface ProviderRequest {
|
|
157
|
+
readonly question: string;
|
|
158
|
+
readonly guidelines?: string;
|
|
159
|
+
readonly guideline_patterns?: readonly string[];
|
|
160
|
+
readonly chatPrompt?: ChatPrompt;
|
|
161
|
+
readonly inputFiles?: readonly string[];
|
|
162
|
+
readonly evalCaseId?: string;
|
|
163
|
+
readonly attempt?: number;
|
|
164
|
+
readonly maxOutputTokens?: number;
|
|
165
|
+
readonly temperature?: number;
|
|
166
|
+
readonly metadata?: JsonObject;
|
|
167
|
+
readonly signal?: AbortSignal;
|
|
168
|
+
}
|
|
169
|
+
interface ProviderResponse {
|
|
170
|
+
readonly text: string;
|
|
171
|
+
readonly reasoning?: string;
|
|
172
|
+
readonly raw?: unknown;
|
|
173
|
+
readonly usage?: JsonObject;
|
|
174
|
+
}
|
|
175
|
+
interface Provider {
|
|
176
|
+
readonly id: string;
|
|
177
|
+
readonly kind: ProviderKind;
|
|
178
|
+
readonly targetName: string;
|
|
179
|
+
invoke(request: ProviderRequest): Promise<ProviderResponse>;
|
|
180
|
+
/**
|
|
181
|
+
* Optional capability marker for provider-managed batching (single session handling multiple requests).
|
|
182
|
+
*/
|
|
183
|
+
readonly supportsBatch?: boolean;
|
|
184
|
+
/**
|
|
185
|
+
* Optional batch invocation hook. When defined alongside supportsBatch=true,
|
|
186
|
+
* the orchestrator may send multiple requests in a single provider session.
|
|
187
|
+
*/
|
|
188
|
+
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
189
|
+
/**
|
|
190
|
+
* Optional access to the underlying AxAI instance.
|
|
191
|
+
* This enables using advanced Ax features like structured output signatures.
|
|
192
|
+
*/
|
|
193
|
+
getAxAI?(): AxAI;
|
|
194
|
+
}
|
|
195
|
+
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
196
|
+
interface TargetDefinition {
|
|
197
|
+
readonly name: string;
|
|
198
|
+
readonly provider: ProviderKind | string;
|
|
199
|
+
readonly judge_target?: string | undefined;
|
|
200
|
+
readonly workers?: number | undefined;
|
|
201
|
+
readonly provider_batching?: boolean | undefined;
|
|
202
|
+
readonly providerBatching?: boolean | undefined;
|
|
203
|
+
readonly endpoint?: string | unknown | undefined;
|
|
204
|
+
readonly resource?: string | unknown | undefined;
|
|
205
|
+
readonly resourceName?: string | unknown | undefined;
|
|
206
|
+
readonly api_key?: string | unknown | undefined;
|
|
207
|
+
readonly apiKey?: string | unknown | undefined;
|
|
208
|
+
readonly deployment?: string | unknown | undefined;
|
|
209
|
+
readonly deploymentName?: string | unknown | undefined;
|
|
210
|
+
readonly model?: string | unknown | undefined;
|
|
211
|
+
readonly version?: string | unknown | undefined;
|
|
212
|
+
readonly api_version?: string | unknown | undefined;
|
|
213
|
+
readonly variant?: string | unknown | undefined;
|
|
214
|
+
readonly thinking_budget?: number | unknown | undefined;
|
|
215
|
+
readonly thinkingBudget?: number | unknown | undefined;
|
|
216
|
+
readonly temperature?: number | unknown | undefined;
|
|
217
|
+
readonly max_output_tokens?: number | unknown | undefined;
|
|
218
|
+
readonly maxTokens?: number | unknown | undefined;
|
|
219
|
+
readonly executable?: string | unknown | undefined;
|
|
220
|
+
readonly command?: string | unknown | undefined;
|
|
221
|
+
readonly binary?: string | unknown | undefined;
|
|
222
|
+
readonly args?: unknown | undefined;
|
|
223
|
+
readonly arguments?: unknown | undefined;
|
|
224
|
+
readonly cwd?: string | unknown | undefined;
|
|
225
|
+
readonly timeout_seconds?: number | unknown | undefined;
|
|
226
|
+
readonly timeoutSeconds?: number | unknown | undefined;
|
|
227
|
+
readonly log_dir?: string | unknown | undefined;
|
|
228
|
+
readonly logDir?: string | unknown | undefined;
|
|
229
|
+
readonly log_directory?: string | unknown | undefined;
|
|
230
|
+
readonly logDirectory?: string | unknown | undefined;
|
|
231
|
+
readonly log_format?: string | unknown | undefined;
|
|
232
|
+
readonly logFormat?: string | unknown | undefined;
|
|
233
|
+
readonly log_output_format?: string | unknown | undefined;
|
|
234
|
+
readonly logOutputFormat?: string | unknown | undefined;
|
|
235
|
+
readonly response?: string | unknown | undefined;
|
|
236
|
+
readonly delayMs?: number | unknown | undefined;
|
|
237
|
+
readonly delayMinMs?: number | unknown | undefined;
|
|
238
|
+
readonly delayMaxMs?: number | unknown | undefined;
|
|
239
|
+
readonly vscode_cmd?: string | unknown | undefined;
|
|
240
|
+
readonly wait?: boolean | unknown | undefined;
|
|
241
|
+
readonly dry_run?: boolean | unknown | undefined;
|
|
242
|
+
readonly dryRun?: boolean | unknown | undefined;
|
|
243
|
+
readonly subagent_root?: string | unknown | undefined;
|
|
244
|
+
readonly subagentRoot?: string | unknown | undefined;
|
|
245
|
+
readonly workspace_template?: string | unknown | undefined;
|
|
246
|
+
readonly workspaceTemplate?: string | unknown | undefined;
|
|
247
|
+
readonly command_template?: string | unknown | undefined;
|
|
248
|
+
readonly commandTemplate?: string | unknown | undefined;
|
|
249
|
+
readonly files_format?: string | unknown | undefined;
|
|
250
|
+
readonly filesFormat?: string | unknown | undefined;
|
|
251
|
+
readonly attachments_format?: string | unknown | undefined;
|
|
252
|
+
readonly attachmentsFormat?: string | unknown | undefined;
|
|
253
|
+
readonly env?: unknown | undefined;
|
|
254
|
+
readonly healthcheck?: unknown | undefined;
|
|
255
|
+
readonly max_retries?: number | unknown | undefined;
|
|
256
|
+
readonly maxRetries?: number | unknown | undefined;
|
|
257
|
+
readonly retry_initial_delay_ms?: number | unknown | undefined;
|
|
258
|
+
readonly retryInitialDelayMs?: number | unknown | undefined;
|
|
259
|
+
readonly retry_max_delay_ms?: number | unknown | undefined;
|
|
260
|
+
readonly retryMaxDelayMs?: number | unknown | undefined;
|
|
261
|
+
readonly retry_backoff_factor?: number | unknown | undefined;
|
|
262
|
+
readonly retryBackoffFactor?: number | unknown | undefined;
|
|
263
|
+
readonly retry_status_codes?: unknown | undefined;
|
|
264
|
+
readonly retryStatusCodes?: unknown | undefined;
|
|
265
|
+
}
|
|
266
|
+
|
|
151
267
|
/**
|
|
152
268
|
* Determine whether a path references guideline content (instructions or prompts).
|
|
153
269
|
*/
|
|
@@ -167,11 +283,13 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
|
|
|
167
283
|
/**
|
|
168
284
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
169
285
|
*/
|
|
170
|
-
|
|
171
|
-
question: string;
|
|
172
|
-
guidelines: string;
|
|
173
|
-
|
|
174
|
-
|
|
286
|
+
interface PromptInputs {
|
|
287
|
+
readonly question: string;
|
|
288
|
+
readonly guidelines: string;
|
|
289
|
+
readonly chatPrompt?: ChatPrompt;
|
|
290
|
+
readonly systemMessage?: string;
|
|
291
|
+
}
|
|
292
|
+
declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
|
|
175
293
|
|
|
176
294
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
177
295
|
/**
|
|
@@ -207,56 +325,13 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
|
|
|
207
325
|
readonly attempted: readonly string[];
|
|
208
326
|
}>;
|
|
209
327
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
readonly
|
|
214
|
-
readonly
|
|
215
|
-
readonly
|
|
216
|
-
readonly chatPrompt?: ChatPrompt;
|
|
217
|
-
readonly inputFiles?: readonly string[];
|
|
218
|
-
readonly evalCaseId?: string;
|
|
219
|
-
readonly attempt?: number;
|
|
220
|
-
readonly maxOutputTokens?: number;
|
|
221
|
-
readonly temperature?: number;
|
|
222
|
-
readonly metadata?: JsonObject;
|
|
223
|
-
readonly signal?: AbortSignal;
|
|
224
|
-
}
|
|
225
|
-
interface ProviderResponse {
|
|
226
|
-
readonly text: string;
|
|
227
|
-
readonly reasoning?: string;
|
|
228
|
-
readonly raw?: unknown;
|
|
229
|
-
readonly usage?: JsonObject;
|
|
230
|
-
}
|
|
231
|
-
interface Provider {
|
|
232
|
-
readonly id: string;
|
|
233
|
-
readonly kind: ProviderKind;
|
|
234
|
-
readonly targetName: string;
|
|
235
|
-
invoke(request: ProviderRequest): Promise<ProviderResponse>;
|
|
236
|
-
/**
|
|
237
|
-
* Optional capability marker for provider-managed batching (single session handling multiple requests).
|
|
238
|
-
*/
|
|
239
|
-
readonly supportsBatch?: boolean;
|
|
240
|
-
/**
|
|
241
|
-
* Optional batch invocation hook. When defined alongside supportsBatch=true,
|
|
242
|
-
* the orchestrator may send multiple requests in a single provider session.
|
|
243
|
-
*/
|
|
244
|
-
invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
|
|
245
|
-
/**
|
|
246
|
-
* Optional access to the underlying AxAI instance.
|
|
247
|
-
* This enables using advanced Ax features like structured output signatures.
|
|
248
|
-
*/
|
|
249
|
-
getAxAI?(): AxAI;
|
|
250
|
-
}
|
|
251
|
-
type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
252
|
-
interface TargetDefinition {
|
|
253
|
-
readonly name: string;
|
|
254
|
-
readonly provider: ProviderKind | string;
|
|
255
|
-
readonly settings?: Record<string, unknown> | undefined;
|
|
256
|
-
readonly judge_target?: string | undefined;
|
|
257
|
-
readonly workers?: number | undefined;
|
|
328
|
+
interface RetryConfig {
|
|
329
|
+
readonly maxRetries?: number;
|
|
330
|
+
readonly initialDelayMs?: number;
|
|
331
|
+
readonly maxDelayMs?: number;
|
|
332
|
+
readonly backoffFactor?: number;
|
|
333
|
+
readonly retryableStatusCodes?: readonly number[];
|
|
258
334
|
}
|
|
259
|
-
|
|
260
335
|
interface AzureResolvedConfig {
|
|
261
336
|
readonly resourceName: string;
|
|
262
337
|
readonly deploymentName: string;
|
|
@@ -264,6 +339,7 @@ interface AzureResolvedConfig {
|
|
|
264
339
|
readonly version?: string;
|
|
265
340
|
readonly temperature?: number;
|
|
266
341
|
readonly maxOutputTokens?: number;
|
|
342
|
+
readonly retry?: RetryConfig;
|
|
267
343
|
}
|
|
268
344
|
interface AnthropicResolvedConfig {
|
|
269
345
|
readonly apiKey: string;
|
|
@@ -271,12 +347,14 @@ interface AnthropicResolvedConfig {
|
|
|
271
347
|
readonly temperature?: number;
|
|
272
348
|
readonly maxOutputTokens?: number;
|
|
273
349
|
readonly thinkingBudget?: number;
|
|
350
|
+
readonly retry?: RetryConfig;
|
|
274
351
|
}
|
|
275
352
|
interface GeminiResolvedConfig {
|
|
276
353
|
readonly apiKey: string;
|
|
277
354
|
readonly model: string;
|
|
278
355
|
readonly temperature?: number;
|
|
279
356
|
readonly maxOutputTokens?: number;
|
|
357
|
+
readonly retry?: RetryConfig;
|
|
280
358
|
}
|
|
281
359
|
interface CodexResolvedConfig {
|
|
282
360
|
readonly executable: string;
|
|
@@ -313,7 +391,6 @@ interface CliResolvedConfig {
|
|
|
313
391
|
readonly commandTemplate: string;
|
|
314
392
|
readonly filesFormat?: string;
|
|
315
393
|
readonly cwd?: string;
|
|
316
|
-
readonly env?: Record<string, string>;
|
|
317
394
|
readonly timeoutMs?: number;
|
|
318
395
|
readonly healthcheck?: CliHealthcheck;
|
|
319
396
|
}
|
|
@@ -413,6 +490,7 @@ interface EvaluationContext {
|
|
|
413
490
|
readonly question: string;
|
|
414
491
|
readonly guidelines: string;
|
|
415
492
|
readonly systemMessage?: string;
|
|
493
|
+
readonly chatPrompt?: ChatPrompt;
|
|
416
494
|
};
|
|
417
495
|
readonly now: Date;
|
|
418
496
|
readonly judgeProvider?: Provider;
|
|
@@ -521,4 +599,4 @@ type AgentKernel = {
|
|
|
521
599
|
};
|
|
522
600
|
declare function createAgentKernel(): AgentKernel;
|
|
523
601
|
|
|
524
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
|
602
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|