@agentv/core 0.7.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-L6RCDZ4Z.js → chunk-SNTZFB24.js} +102 -68
- package/dist/chunk-SNTZFB24.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +32 -57
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +31 -55
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +211 -107
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +81 -3
- package/dist/index.d.ts +81 -3
- package/dist/index.js +112 -41
- package/dist/index.js.map +1 -1
- package/package.json +1 -2
- package/dist/chunk-L6RCDZ4Z.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -132,6 +132,7 @@ interface EvaluationResult {
|
|
|
132
132
|
readonly raw_request?: JsonObject;
|
|
133
133
|
readonly evaluator_raw_request?: JsonObject;
|
|
134
134
|
readonly evaluator_results?: readonly EvaluatorResult[];
|
|
135
|
+
readonly error?: string;
|
|
135
136
|
}
|
|
136
137
|
interface EvaluatorResult {
|
|
137
138
|
readonly name: string;
|
|
@@ -174,6 +175,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
|
174
175
|
}>;
|
|
175
176
|
|
|
176
177
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
178
|
+
/**
|
|
179
|
+
* Normalize line endings to LF (\n).
|
|
180
|
+
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
181
|
+
*/
|
|
182
|
+
declare function normalizeLineEndings(content: string): string;
|
|
177
183
|
/**
|
|
178
184
|
* Read a text file and normalize line endings to LF (\n).
|
|
179
185
|
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
@@ -247,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
|
247
253
|
interface TargetDefinition {
|
|
248
254
|
readonly name: string;
|
|
249
255
|
readonly provider: ProviderKind | string;
|
|
250
|
-
readonly settings?: Record<string, unknown> | undefined;
|
|
251
256
|
readonly judge_target?: string | undefined;
|
|
252
257
|
readonly workers?: number | undefined;
|
|
258
|
+
readonly provider_batching?: boolean | undefined;
|
|
259
|
+
readonly providerBatching?: boolean | undefined;
|
|
260
|
+
readonly endpoint?: string | unknown | undefined;
|
|
261
|
+
readonly resource?: string | unknown | undefined;
|
|
262
|
+
readonly resourceName?: string | unknown | undefined;
|
|
263
|
+
readonly api_key?: string | unknown | undefined;
|
|
264
|
+
readonly apiKey?: string | unknown | undefined;
|
|
265
|
+
readonly deployment?: string | unknown | undefined;
|
|
266
|
+
readonly deploymentName?: string | unknown | undefined;
|
|
267
|
+
readonly model?: string | unknown | undefined;
|
|
268
|
+
readonly version?: string | unknown | undefined;
|
|
269
|
+
readonly api_version?: string | unknown | undefined;
|
|
270
|
+
readonly variant?: string | unknown | undefined;
|
|
271
|
+
readonly thinking_budget?: number | unknown | undefined;
|
|
272
|
+
readonly thinkingBudget?: number | unknown | undefined;
|
|
273
|
+
readonly temperature?: number | unknown | undefined;
|
|
274
|
+
readonly max_output_tokens?: number | unknown | undefined;
|
|
275
|
+
readonly maxTokens?: number | unknown | undefined;
|
|
276
|
+
readonly executable?: string | unknown | undefined;
|
|
277
|
+
readonly command?: string | unknown | undefined;
|
|
278
|
+
readonly binary?: string | unknown | undefined;
|
|
279
|
+
readonly args?: unknown | undefined;
|
|
280
|
+
readonly arguments?: unknown | undefined;
|
|
281
|
+
readonly cwd?: string | unknown | undefined;
|
|
282
|
+
readonly timeout_seconds?: number | unknown | undefined;
|
|
283
|
+
readonly timeoutSeconds?: number | unknown | undefined;
|
|
284
|
+
readonly log_dir?: string | unknown | undefined;
|
|
285
|
+
readonly logDir?: string | unknown | undefined;
|
|
286
|
+
readonly log_directory?: string | unknown | undefined;
|
|
287
|
+
readonly logDirectory?: string | unknown | undefined;
|
|
288
|
+
readonly log_format?: string | unknown | undefined;
|
|
289
|
+
readonly logFormat?: string | unknown | undefined;
|
|
290
|
+
readonly log_output_format?: string | unknown | undefined;
|
|
291
|
+
readonly logOutputFormat?: string | unknown | undefined;
|
|
292
|
+
readonly response?: string | unknown | undefined;
|
|
293
|
+
readonly delayMs?: number | unknown | undefined;
|
|
294
|
+
readonly delayMinMs?: number | unknown | undefined;
|
|
295
|
+
readonly delayMaxMs?: number | unknown | undefined;
|
|
296
|
+
readonly vscode_cmd?: string | unknown | undefined;
|
|
297
|
+
readonly wait?: boolean | unknown | undefined;
|
|
298
|
+
readonly dry_run?: boolean | unknown | undefined;
|
|
299
|
+
readonly dryRun?: boolean | unknown | undefined;
|
|
300
|
+
readonly subagent_root?: string | unknown | undefined;
|
|
301
|
+
readonly subagentRoot?: string | unknown | undefined;
|
|
302
|
+
readonly workspace_template?: string | unknown | undefined;
|
|
303
|
+
readonly workspaceTemplate?: string | unknown | undefined;
|
|
304
|
+
readonly command_template?: string | unknown | undefined;
|
|
305
|
+
readonly commandTemplate?: string | unknown | undefined;
|
|
306
|
+
readonly files_format?: string | unknown | undefined;
|
|
307
|
+
readonly filesFormat?: string | unknown | undefined;
|
|
308
|
+
readonly attachments_format?: string | unknown | undefined;
|
|
309
|
+
readonly attachmentsFormat?: string | unknown | undefined;
|
|
310
|
+
readonly env?: unknown | undefined;
|
|
311
|
+
readonly healthcheck?: unknown | undefined;
|
|
312
|
+
readonly max_retries?: number | unknown | undefined;
|
|
313
|
+
readonly maxRetries?: number | unknown | undefined;
|
|
314
|
+
readonly retry_initial_delay_ms?: number | unknown | undefined;
|
|
315
|
+
readonly retryInitialDelayMs?: number | unknown | undefined;
|
|
316
|
+
readonly retry_max_delay_ms?: number | unknown | undefined;
|
|
317
|
+
readonly retryMaxDelayMs?: number | unknown | undefined;
|
|
318
|
+
readonly retry_backoff_factor?: number | unknown | undefined;
|
|
319
|
+
readonly retryBackoffFactor?: number | unknown | undefined;
|
|
320
|
+
readonly retry_status_codes?: unknown | undefined;
|
|
321
|
+
readonly retryStatusCodes?: unknown | undefined;
|
|
253
322
|
}
|
|
254
323
|
|
|
324
|
+
interface RetryConfig {
|
|
325
|
+
readonly maxRetries?: number;
|
|
326
|
+
readonly initialDelayMs?: number;
|
|
327
|
+
readonly maxDelayMs?: number;
|
|
328
|
+
readonly backoffFactor?: number;
|
|
329
|
+
readonly retryableStatusCodes?: readonly number[];
|
|
330
|
+
}
|
|
255
331
|
interface AzureResolvedConfig {
|
|
256
332
|
readonly resourceName: string;
|
|
257
333
|
readonly deploymentName: string;
|
|
@@ -259,6 +335,7 @@ interface AzureResolvedConfig {
|
|
|
259
335
|
readonly version?: string;
|
|
260
336
|
readonly temperature?: number;
|
|
261
337
|
readonly maxOutputTokens?: number;
|
|
338
|
+
readonly retry?: RetryConfig;
|
|
262
339
|
}
|
|
263
340
|
interface AnthropicResolvedConfig {
|
|
264
341
|
readonly apiKey: string;
|
|
@@ -266,12 +343,14 @@ interface AnthropicResolvedConfig {
|
|
|
266
343
|
readonly temperature?: number;
|
|
267
344
|
readonly maxOutputTokens?: number;
|
|
268
345
|
readonly thinkingBudget?: number;
|
|
346
|
+
readonly retry?: RetryConfig;
|
|
269
347
|
}
|
|
270
348
|
interface GeminiResolvedConfig {
|
|
271
349
|
readonly apiKey: string;
|
|
272
350
|
readonly model: string;
|
|
273
351
|
readonly temperature?: number;
|
|
274
352
|
readonly maxOutputTokens?: number;
|
|
353
|
+
readonly retry?: RetryConfig;
|
|
275
354
|
}
|
|
276
355
|
interface CodexResolvedConfig {
|
|
277
356
|
readonly executable: string;
|
|
@@ -308,7 +387,6 @@ interface CliResolvedConfig {
|
|
|
308
387
|
readonly commandTemplate: string;
|
|
309
388
|
readonly filesFormat?: string;
|
|
310
389
|
readonly cwd?: string;
|
|
311
|
-
readonly env?: Record<string, string>;
|
|
312
390
|
readonly timeoutMs?: number;
|
|
313
391
|
readonly healthcheck?: CliHealthcheck;
|
|
314
392
|
}
|
|
@@ -516,4 +594,4 @@ type AgentKernel = {
|
|
|
516
594
|
};
|
|
517
595
|
declare function createAgentKernel(): AgentKernel;
|
|
518
596
|
|
|
519
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
|
597
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.d.ts
CHANGED
|
@@ -132,6 +132,7 @@ interface EvaluationResult {
|
|
|
132
132
|
readonly raw_request?: JsonObject;
|
|
133
133
|
readonly evaluator_raw_request?: JsonObject;
|
|
134
134
|
readonly evaluator_results?: readonly EvaluatorResult[];
|
|
135
|
+
readonly error?: string;
|
|
135
136
|
}
|
|
136
137
|
interface EvaluatorResult {
|
|
137
138
|
readonly name: string;
|
|
@@ -174,6 +175,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
|
174
175
|
}>;
|
|
175
176
|
|
|
176
177
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
178
|
+
/**
|
|
179
|
+
* Normalize line endings to LF (\n).
|
|
180
|
+
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
181
|
+
*/
|
|
182
|
+
declare function normalizeLineEndings(content: string): string;
|
|
177
183
|
/**
|
|
178
184
|
* Read a text file and normalize line endings to LF (\n).
|
|
179
185
|
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
@@ -247,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
|
247
253
|
interface TargetDefinition {
|
|
248
254
|
readonly name: string;
|
|
249
255
|
readonly provider: ProviderKind | string;
|
|
250
|
-
readonly settings?: Record<string, unknown> | undefined;
|
|
251
256
|
readonly judge_target?: string | undefined;
|
|
252
257
|
readonly workers?: number | undefined;
|
|
258
|
+
readonly provider_batching?: boolean | undefined;
|
|
259
|
+
readonly providerBatching?: boolean | undefined;
|
|
260
|
+
readonly endpoint?: string | unknown | undefined;
|
|
261
|
+
readonly resource?: string | unknown | undefined;
|
|
262
|
+
readonly resourceName?: string | unknown | undefined;
|
|
263
|
+
readonly api_key?: string | unknown | undefined;
|
|
264
|
+
readonly apiKey?: string | unknown | undefined;
|
|
265
|
+
readonly deployment?: string | unknown | undefined;
|
|
266
|
+
readonly deploymentName?: string | unknown | undefined;
|
|
267
|
+
readonly model?: string | unknown | undefined;
|
|
268
|
+
readonly version?: string | unknown | undefined;
|
|
269
|
+
readonly api_version?: string | unknown | undefined;
|
|
270
|
+
readonly variant?: string | unknown | undefined;
|
|
271
|
+
readonly thinking_budget?: number | unknown | undefined;
|
|
272
|
+
readonly thinkingBudget?: number | unknown | undefined;
|
|
273
|
+
readonly temperature?: number | unknown | undefined;
|
|
274
|
+
readonly max_output_tokens?: number | unknown | undefined;
|
|
275
|
+
readonly maxTokens?: number | unknown | undefined;
|
|
276
|
+
readonly executable?: string | unknown | undefined;
|
|
277
|
+
readonly command?: string | unknown | undefined;
|
|
278
|
+
readonly binary?: string | unknown | undefined;
|
|
279
|
+
readonly args?: unknown | undefined;
|
|
280
|
+
readonly arguments?: unknown | undefined;
|
|
281
|
+
readonly cwd?: string | unknown | undefined;
|
|
282
|
+
readonly timeout_seconds?: number | unknown | undefined;
|
|
283
|
+
readonly timeoutSeconds?: number | unknown | undefined;
|
|
284
|
+
readonly log_dir?: string | unknown | undefined;
|
|
285
|
+
readonly logDir?: string | unknown | undefined;
|
|
286
|
+
readonly log_directory?: string | unknown | undefined;
|
|
287
|
+
readonly logDirectory?: string | unknown | undefined;
|
|
288
|
+
readonly log_format?: string | unknown | undefined;
|
|
289
|
+
readonly logFormat?: string | unknown | undefined;
|
|
290
|
+
readonly log_output_format?: string | unknown | undefined;
|
|
291
|
+
readonly logOutputFormat?: string | unknown | undefined;
|
|
292
|
+
readonly response?: string | unknown | undefined;
|
|
293
|
+
readonly delayMs?: number | unknown | undefined;
|
|
294
|
+
readonly delayMinMs?: number | unknown | undefined;
|
|
295
|
+
readonly delayMaxMs?: number | unknown | undefined;
|
|
296
|
+
readonly vscode_cmd?: string | unknown | undefined;
|
|
297
|
+
readonly wait?: boolean | unknown | undefined;
|
|
298
|
+
readonly dry_run?: boolean | unknown | undefined;
|
|
299
|
+
readonly dryRun?: boolean | unknown | undefined;
|
|
300
|
+
readonly subagent_root?: string | unknown | undefined;
|
|
301
|
+
readonly subagentRoot?: string | unknown | undefined;
|
|
302
|
+
readonly workspace_template?: string | unknown | undefined;
|
|
303
|
+
readonly workspaceTemplate?: string | unknown | undefined;
|
|
304
|
+
readonly command_template?: string | unknown | undefined;
|
|
305
|
+
readonly commandTemplate?: string | unknown | undefined;
|
|
306
|
+
readonly files_format?: string | unknown | undefined;
|
|
307
|
+
readonly filesFormat?: string | unknown | undefined;
|
|
308
|
+
readonly attachments_format?: string | unknown | undefined;
|
|
309
|
+
readonly attachmentsFormat?: string | unknown | undefined;
|
|
310
|
+
readonly env?: unknown | undefined;
|
|
311
|
+
readonly healthcheck?: unknown | undefined;
|
|
312
|
+
readonly max_retries?: number | unknown | undefined;
|
|
313
|
+
readonly maxRetries?: number | unknown | undefined;
|
|
314
|
+
readonly retry_initial_delay_ms?: number | unknown | undefined;
|
|
315
|
+
readonly retryInitialDelayMs?: number | unknown | undefined;
|
|
316
|
+
readonly retry_max_delay_ms?: number | unknown | undefined;
|
|
317
|
+
readonly retryMaxDelayMs?: number | unknown | undefined;
|
|
318
|
+
readonly retry_backoff_factor?: number | unknown | undefined;
|
|
319
|
+
readonly retryBackoffFactor?: number | unknown | undefined;
|
|
320
|
+
readonly retry_status_codes?: unknown | undefined;
|
|
321
|
+
readonly retryStatusCodes?: unknown | undefined;
|
|
253
322
|
}
|
|
254
323
|
|
|
324
|
+
interface RetryConfig {
|
|
325
|
+
readonly maxRetries?: number;
|
|
326
|
+
readonly initialDelayMs?: number;
|
|
327
|
+
readonly maxDelayMs?: number;
|
|
328
|
+
readonly backoffFactor?: number;
|
|
329
|
+
readonly retryableStatusCodes?: readonly number[];
|
|
330
|
+
}
|
|
255
331
|
interface AzureResolvedConfig {
|
|
256
332
|
readonly resourceName: string;
|
|
257
333
|
readonly deploymentName: string;
|
|
@@ -259,6 +335,7 @@ interface AzureResolvedConfig {
|
|
|
259
335
|
readonly version?: string;
|
|
260
336
|
readonly temperature?: number;
|
|
261
337
|
readonly maxOutputTokens?: number;
|
|
338
|
+
readonly retry?: RetryConfig;
|
|
262
339
|
}
|
|
263
340
|
interface AnthropicResolvedConfig {
|
|
264
341
|
readonly apiKey: string;
|
|
@@ -266,12 +343,14 @@ interface AnthropicResolvedConfig {
|
|
|
266
343
|
readonly temperature?: number;
|
|
267
344
|
readonly maxOutputTokens?: number;
|
|
268
345
|
readonly thinkingBudget?: number;
|
|
346
|
+
readonly retry?: RetryConfig;
|
|
269
347
|
}
|
|
270
348
|
interface GeminiResolvedConfig {
|
|
271
349
|
readonly apiKey: string;
|
|
272
350
|
readonly model: string;
|
|
273
351
|
readonly temperature?: number;
|
|
274
352
|
readonly maxOutputTokens?: number;
|
|
353
|
+
readonly retry?: RetryConfig;
|
|
275
354
|
}
|
|
276
355
|
interface CodexResolvedConfig {
|
|
277
356
|
readonly executable: string;
|
|
@@ -308,7 +387,6 @@ interface CliResolvedConfig {
|
|
|
308
387
|
readonly commandTemplate: string;
|
|
309
388
|
readonly filesFormat?: string;
|
|
310
389
|
readonly cwd?: string;
|
|
311
|
-
readonly env?: Record<string, string>;
|
|
312
390
|
readonly timeoutMs?: number;
|
|
313
391
|
readonly healthcheck?: CliHealthcheck;
|
|
314
392
|
}
|
|
@@ -516,4 +594,4 @@ type AgentKernel = {
|
|
|
516
594
|
};
|
|
517
595
|
declare function createAgentKernel(): AgentKernel;
|
|
518
596
|
|
|
519
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
|
597
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.js
CHANGED
|
@@ -5,10 +5,11 @@ import {
|
|
|
5
5
|
fileExists,
|
|
6
6
|
findGitRoot,
|
|
7
7
|
isAgentProvider,
|
|
8
|
+
normalizeLineEndings,
|
|
8
9
|
readTextFile,
|
|
9
10
|
resolveFileReference,
|
|
10
11
|
resolveTargetDefinition
|
|
11
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-SNTZFB24.js";
|
|
12
13
|
|
|
13
14
|
// src/evaluation/types.ts
|
|
14
15
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -661,6 +662,67 @@ function ensureChatResponse(result) {
|
|
|
661
662
|
}
|
|
662
663
|
return result;
|
|
663
664
|
}
|
|
665
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
666
|
+
if (!error || typeof error !== "object") {
|
|
667
|
+
return false;
|
|
668
|
+
}
|
|
669
|
+
if ("status" in error && typeof error.status === "number") {
|
|
670
|
+
return retryableStatusCodes.includes(error.status);
|
|
671
|
+
}
|
|
672
|
+
if ("message" in error && typeof error.message === "string") {
|
|
673
|
+
const match = error.message.match(/HTTP (\d{3})/);
|
|
674
|
+
if (match) {
|
|
675
|
+
const status = Number.parseInt(match[1], 10);
|
|
676
|
+
return retryableStatusCodes.includes(status);
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
if ("name" in error && error.name === "AxAIServiceNetworkError") {
|
|
680
|
+
return true;
|
|
681
|
+
}
|
|
682
|
+
return false;
|
|
683
|
+
}
|
|
684
|
+
function calculateRetryDelay(attempt, config) {
|
|
685
|
+
const delay = Math.min(
|
|
686
|
+
config.maxDelayMs,
|
|
687
|
+
config.initialDelayMs * config.backoffFactor ** attempt
|
|
688
|
+
);
|
|
689
|
+
return delay * (0.75 + Math.random() * 0.5);
|
|
690
|
+
}
|
|
691
|
+
async function sleep(ms) {
|
|
692
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
693
|
+
}
|
|
694
|
+
async function withRetry(fn, retryConfig, signal) {
|
|
695
|
+
const config = {
|
|
696
|
+
maxRetries: retryConfig?.maxRetries ?? 3,
|
|
697
|
+
initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
|
|
698
|
+
maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
|
|
699
|
+
backoffFactor: retryConfig?.backoffFactor ?? 2,
|
|
700
|
+
retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
|
|
701
|
+
};
|
|
702
|
+
let lastError;
|
|
703
|
+
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
704
|
+
if (signal?.aborted) {
|
|
705
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
706
|
+
}
|
|
707
|
+
try {
|
|
708
|
+
return await fn();
|
|
709
|
+
} catch (error) {
|
|
710
|
+
lastError = error;
|
|
711
|
+
if (attempt >= config.maxRetries) {
|
|
712
|
+
break;
|
|
713
|
+
}
|
|
714
|
+
if (!isRetryableError(error, config.retryableStatusCodes)) {
|
|
715
|
+
throw error;
|
|
716
|
+
}
|
|
717
|
+
const delay = calculateRetryDelay(attempt, config);
|
|
718
|
+
await sleep(delay);
|
|
719
|
+
if (signal?.aborted) {
|
|
720
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
throw lastError;
|
|
725
|
+
}
|
|
664
726
|
var AzureProvider = class {
|
|
665
727
|
constructor(targetName, config) {
|
|
666
728
|
this.config = config;
|
|
@@ -670,6 +732,7 @@ var AzureProvider = class {
|
|
|
670
732
|
temperature: config.temperature,
|
|
671
733
|
maxOutputTokens: config.maxOutputTokens
|
|
672
734
|
};
|
|
735
|
+
this.retryConfig = config.retry;
|
|
673
736
|
this.ai = AxAI.create({
|
|
674
737
|
name: "azure-openai",
|
|
675
738
|
apiKey: config.apiKey,
|
|
@@ -686,16 +749,21 @@ var AzureProvider = class {
|
|
|
686
749
|
targetName;
|
|
687
750
|
ai;
|
|
688
751
|
defaults;
|
|
752
|
+
retryConfig;
|
|
689
753
|
async invoke(request) {
|
|
690
754
|
const chatPrompt = buildChatPrompt(request);
|
|
691
755
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
692
|
-
const response = await
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
756
|
+
const response = await withRetry(
|
|
757
|
+
async () => await this.ai.chat(
|
|
758
|
+
{
|
|
759
|
+
chatPrompt,
|
|
760
|
+
model: this.config.deploymentName,
|
|
761
|
+
...modelConfig ? { modelConfig } : {}
|
|
762
|
+
},
|
|
763
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
764
|
+
),
|
|
765
|
+
this.retryConfig,
|
|
766
|
+
request.signal
|
|
699
767
|
);
|
|
700
768
|
return mapResponse(ensureChatResponse(response));
|
|
701
769
|
}
|
|
@@ -713,6 +781,7 @@ var AnthropicProvider = class {
|
|
|
713
781
|
maxOutputTokens: config.maxOutputTokens,
|
|
714
782
|
thinkingBudget: config.thinkingBudget
|
|
715
783
|
};
|
|
784
|
+
this.retryConfig = config.retry;
|
|
716
785
|
this.ai = AxAI.create({
|
|
717
786
|
name: "anthropic",
|
|
718
787
|
apiKey: config.apiKey
|
|
@@ -723,16 +792,21 @@ var AnthropicProvider = class {
|
|
|
723
792
|
targetName;
|
|
724
793
|
ai;
|
|
725
794
|
defaults;
|
|
795
|
+
retryConfig;
|
|
726
796
|
async invoke(request) {
|
|
727
797
|
const chatPrompt = buildChatPrompt(request);
|
|
728
798
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
729
|
-
const response = await
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
799
|
+
const response = await withRetry(
|
|
800
|
+
async () => await this.ai.chat(
|
|
801
|
+
{
|
|
802
|
+
chatPrompt,
|
|
803
|
+
model: this.config.model,
|
|
804
|
+
...modelConfig ? { modelConfig } : {}
|
|
805
|
+
},
|
|
806
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
807
|
+
),
|
|
808
|
+
this.retryConfig,
|
|
809
|
+
request.signal
|
|
736
810
|
);
|
|
737
811
|
return mapResponse(ensureChatResponse(response));
|
|
738
812
|
}
|
|
@@ -749,6 +823,7 @@ var GeminiProvider = class {
|
|
|
749
823
|
temperature: config.temperature,
|
|
750
824
|
maxOutputTokens: config.maxOutputTokens
|
|
751
825
|
};
|
|
826
|
+
this.retryConfig = config.retry;
|
|
752
827
|
this.ai = AxAI.create({
|
|
753
828
|
name: "google-gemini",
|
|
754
829
|
apiKey: config.apiKey
|
|
@@ -759,16 +834,21 @@ var GeminiProvider = class {
|
|
|
759
834
|
targetName;
|
|
760
835
|
ai;
|
|
761
836
|
defaults;
|
|
837
|
+
retryConfig;
|
|
762
838
|
async invoke(request) {
|
|
763
839
|
const chatPrompt = buildChatPrompt(request);
|
|
764
840
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
765
|
-
const response = await
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
841
|
+
const response = await withRetry(
|
|
842
|
+
async () => await this.ai.chat(
|
|
843
|
+
{
|
|
844
|
+
chatPrompt,
|
|
845
|
+
model: this.config.model,
|
|
846
|
+
...modelConfig ? { modelConfig } : {}
|
|
847
|
+
},
|
|
848
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
849
|
+
),
|
|
850
|
+
this.retryConfig,
|
|
851
|
+
request.signal
|
|
772
852
|
);
|
|
773
853
|
return mapResponse(ensureChatResponse(response));
|
|
774
854
|
}
|
|
@@ -796,7 +876,6 @@ async function defaultCommandRunner(command, options) {
|
|
|
796
876
|
};
|
|
797
877
|
try {
|
|
798
878
|
const { stdout, stderr } = await execAsync(command, execOptions);
|
|
799
|
-
console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
|
|
800
879
|
return {
|
|
801
880
|
stdout,
|
|
802
881
|
stderr,
|
|
@@ -807,8 +886,6 @@ async function defaultCommandRunner(command, options) {
|
|
|
807
886
|
};
|
|
808
887
|
} catch (error) {
|
|
809
888
|
const execError = error;
|
|
810
|
-
console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
|
|
811
|
-
console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
|
|
812
889
|
return {
|
|
813
890
|
stdout: execError.stdout ?? "",
|
|
814
891
|
stderr: execError.stderr ?? "",
|
|
@@ -841,10 +918,9 @@ var CliProvider = class {
|
|
|
841
918
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
842
919
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
843
920
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
844
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
845
921
|
const result = await this.runCommand(renderedCommand, {
|
|
846
922
|
cwd: this.config.cwd,
|
|
847
|
-
env,
|
|
923
|
+
env: process.env,
|
|
848
924
|
timeoutMs: this.config.timeoutMs,
|
|
849
925
|
signal: request.signal
|
|
850
926
|
});
|
|
@@ -876,7 +952,7 @@ var CliProvider = class {
|
|
|
876
952
|
}
|
|
877
953
|
async readAndCleanupOutputFile(filePath) {
|
|
878
954
|
try {
|
|
879
|
-
const content = await
|
|
955
|
+
const content = await readTextFile(filePath);
|
|
880
956
|
return content;
|
|
881
957
|
} catch (error) {
|
|
882
958
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
@@ -933,10 +1009,9 @@ var CliProvider = class {
|
|
|
933
1009
|
generateOutputFilePath("healthcheck")
|
|
934
1010
|
)
|
|
935
1011
|
);
|
|
936
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
937
1012
|
const result = await this.runCommand(renderedCommand, {
|
|
938
1013
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
939
|
-
env,
|
|
1014
|
+
env: process.env,
|
|
940
1015
|
timeoutMs,
|
|
941
1016
|
signal
|
|
942
1017
|
});
|
|
@@ -2169,20 +2244,13 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
2169
2244
|
}
|
|
2170
2245
|
const name = value.name;
|
|
2171
2246
|
const provider = value.provider;
|
|
2172
|
-
const settings = value.settings;
|
|
2173
|
-
const judgeTarget = value.judge_target;
|
|
2174
2247
|
if (typeof name !== "string" || name.trim().length === 0) {
|
|
2175
2248
|
throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
|
|
2176
2249
|
}
|
|
2177
2250
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
2178
2251
|
throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
|
|
2179
2252
|
}
|
|
2180
|
-
return
|
|
2181
|
-
name,
|
|
2182
|
-
provider,
|
|
2183
|
-
settings: isRecord(settings) ? settings : void 0,
|
|
2184
|
-
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
2185
|
-
};
|
|
2253
|
+
return value;
|
|
2186
2254
|
}
|
|
2187
2255
|
async function fileExists3(filePath) {
|
|
2188
2256
|
try {
|
|
@@ -2823,10 +2891,11 @@ async function runEvaluation(options) {
|
|
|
2823
2891
|
await onProgress({
|
|
2824
2892
|
workerId,
|
|
2825
2893
|
evalId: evalCase.id,
|
|
2826
|
-
status: "completed",
|
|
2894
|
+
status: result.error ? "failed" : "completed",
|
|
2827
2895
|
startedAt: 0,
|
|
2828
2896
|
// Not used for completed status
|
|
2829
|
-
completedAt: Date.now()
|
|
2897
|
+
completedAt: Date.now(),
|
|
2898
|
+
error: result.error
|
|
2830
2899
|
});
|
|
2831
2900
|
}
|
|
2832
2901
|
if (onResult) {
|
|
@@ -3364,7 +3433,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
3364
3433
|
target: targetName,
|
|
3365
3434
|
timestamp: timestamp.toISOString(),
|
|
3366
3435
|
raw_aspects: [],
|
|
3367
|
-
raw_request: rawRequest
|
|
3436
|
+
raw_request: rawRequest,
|
|
3437
|
+
error: message
|
|
3368
3438
|
};
|
|
3369
3439
|
}
|
|
3370
3440
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
@@ -3420,6 +3490,7 @@ export {
|
|
|
3420
3490
|
isTestMessageRole,
|
|
3421
3491
|
listTargetNames,
|
|
3422
3492
|
loadEvalCases,
|
|
3493
|
+
normalizeLineEndings,
|
|
3423
3494
|
readTargetDefinitions,
|
|
3424
3495
|
readTextFile,
|
|
3425
3496
|
resolveAndCreateProvider,
|