@agentv/core 0.7.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7XM7HYRS.js → chunk-SNTZFB24.js} +97 -67
- package/dist/chunk-SNTZFB24.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +32 -57
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +31 -55
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +204 -102
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -2
- package/dist/index.d.ts +75 -2
- package/dist/index.js +109 -37
- package/dist/index.js.map +1 -1
- package/package.json +1 -2
- package/dist/chunk-7XM7HYRS.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -132,6 +132,7 @@ interface EvaluationResult {
|
|
|
132
132
|
readonly raw_request?: JsonObject;
|
|
133
133
|
readonly evaluator_raw_request?: JsonObject;
|
|
134
134
|
readonly evaluator_results?: readonly EvaluatorResult[];
|
|
135
|
+
readonly error?: string;
|
|
135
136
|
}
|
|
136
137
|
interface EvaluatorResult {
|
|
137
138
|
readonly name: string;
|
|
@@ -252,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
|
252
253
|
interface TargetDefinition {
|
|
253
254
|
readonly name: string;
|
|
254
255
|
readonly provider: ProviderKind | string;
|
|
255
|
-
readonly settings?: Record<string, unknown> | undefined;
|
|
256
256
|
readonly judge_target?: string | undefined;
|
|
257
257
|
readonly workers?: number | undefined;
|
|
258
|
+
readonly provider_batching?: boolean | undefined;
|
|
259
|
+
readonly providerBatching?: boolean | undefined;
|
|
260
|
+
readonly endpoint?: string | unknown | undefined;
|
|
261
|
+
readonly resource?: string | unknown | undefined;
|
|
262
|
+
readonly resourceName?: string | unknown | undefined;
|
|
263
|
+
readonly api_key?: string | unknown | undefined;
|
|
264
|
+
readonly apiKey?: string | unknown | undefined;
|
|
265
|
+
readonly deployment?: string | unknown | undefined;
|
|
266
|
+
readonly deploymentName?: string | unknown | undefined;
|
|
267
|
+
readonly model?: string | unknown | undefined;
|
|
268
|
+
readonly version?: string | unknown | undefined;
|
|
269
|
+
readonly api_version?: string | unknown | undefined;
|
|
270
|
+
readonly variant?: string | unknown | undefined;
|
|
271
|
+
readonly thinking_budget?: number | unknown | undefined;
|
|
272
|
+
readonly thinkingBudget?: number | unknown | undefined;
|
|
273
|
+
readonly temperature?: number | unknown | undefined;
|
|
274
|
+
readonly max_output_tokens?: number | unknown | undefined;
|
|
275
|
+
readonly maxTokens?: number | unknown | undefined;
|
|
276
|
+
readonly executable?: string | unknown | undefined;
|
|
277
|
+
readonly command?: string | unknown | undefined;
|
|
278
|
+
readonly binary?: string | unknown | undefined;
|
|
279
|
+
readonly args?: unknown | undefined;
|
|
280
|
+
readonly arguments?: unknown | undefined;
|
|
281
|
+
readonly cwd?: string | unknown | undefined;
|
|
282
|
+
readonly timeout_seconds?: number | unknown | undefined;
|
|
283
|
+
readonly timeoutSeconds?: number | unknown | undefined;
|
|
284
|
+
readonly log_dir?: string | unknown | undefined;
|
|
285
|
+
readonly logDir?: string | unknown | undefined;
|
|
286
|
+
readonly log_directory?: string | unknown | undefined;
|
|
287
|
+
readonly logDirectory?: string | unknown | undefined;
|
|
288
|
+
readonly log_format?: string | unknown | undefined;
|
|
289
|
+
readonly logFormat?: string | unknown | undefined;
|
|
290
|
+
readonly log_output_format?: string | unknown | undefined;
|
|
291
|
+
readonly logOutputFormat?: string | unknown | undefined;
|
|
292
|
+
readonly response?: string | unknown | undefined;
|
|
293
|
+
readonly delayMs?: number | unknown | undefined;
|
|
294
|
+
readonly delayMinMs?: number | unknown | undefined;
|
|
295
|
+
readonly delayMaxMs?: number | unknown | undefined;
|
|
296
|
+
readonly vscode_cmd?: string | unknown | undefined;
|
|
297
|
+
readonly wait?: boolean | unknown | undefined;
|
|
298
|
+
readonly dry_run?: boolean | unknown | undefined;
|
|
299
|
+
readonly dryRun?: boolean | unknown | undefined;
|
|
300
|
+
readonly subagent_root?: string | unknown | undefined;
|
|
301
|
+
readonly subagentRoot?: string | unknown | undefined;
|
|
302
|
+
readonly workspace_template?: string | unknown | undefined;
|
|
303
|
+
readonly workspaceTemplate?: string | unknown | undefined;
|
|
304
|
+
readonly command_template?: string | unknown | undefined;
|
|
305
|
+
readonly commandTemplate?: string | unknown | undefined;
|
|
306
|
+
readonly files_format?: string | unknown | undefined;
|
|
307
|
+
readonly filesFormat?: string | unknown | undefined;
|
|
308
|
+
readonly attachments_format?: string | unknown | undefined;
|
|
309
|
+
readonly attachmentsFormat?: string | unknown | undefined;
|
|
310
|
+
readonly env?: unknown | undefined;
|
|
311
|
+
readonly healthcheck?: unknown | undefined;
|
|
312
|
+
readonly max_retries?: number | unknown | undefined;
|
|
313
|
+
readonly maxRetries?: number | unknown | undefined;
|
|
314
|
+
readonly retry_initial_delay_ms?: number | unknown | undefined;
|
|
315
|
+
readonly retryInitialDelayMs?: number | unknown | undefined;
|
|
316
|
+
readonly retry_max_delay_ms?: number | unknown | undefined;
|
|
317
|
+
readonly retryMaxDelayMs?: number | unknown | undefined;
|
|
318
|
+
readonly retry_backoff_factor?: number | unknown | undefined;
|
|
319
|
+
readonly retryBackoffFactor?: number | unknown | undefined;
|
|
320
|
+
readonly retry_status_codes?: unknown | undefined;
|
|
321
|
+
readonly retryStatusCodes?: unknown | undefined;
|
|
258
322
|
}
|
|
259
323
|
|
|
324
|
+
interface RetryConfig {
|
|
325
|
+
readonly maxRetries?: number;
|
|
326
|
+
readonly initialDelayMs?: number;
|
|
327
|
+
readonly maxDelayMs?: number;
|
|
328
|
+
readonly backoffFactor?: number;
|
|
329
|
+
readonly retryableStatusCodes?: readonly number[];
|
|
330
|
+
}
|
|
260
331
|
interface AzureResolvedConfig {
|
|
261
332
|
readonly resourceName: string;
|
|
262
333
|
readonly deploymentName: string;
|
|
@@ -264,6 +335,7 @@ interface AzureResolvedConfig {
|
|
|
264
335
|
readonly version?: string;
|
|
265
336
|
readonly temperature?: number;
|
|
266
337
|
readonly maxOutputTokens?: number;
|
|
338
|
+
readonly retry?: RetryConfig;
|
|
267
339
|
}
|
|
268
340
|
interface AnthropicResolvedConfig {
|
|
269
341
|
readonly apiKey: string;
|
|
@@ -271,12 +343,14 @@ interface AnthropicResolvedConfig {
|
|
|
271
343
|
readonly temperature?: number;
|
|
272
344
|
readonly maxOutputTokens?: number;
|
|
273
345
|
readonly thinkingBudget?: number;
|
|
346
|
+
readonly retry?: RetryConfig;
|
|
274
347
|
}
|
|
275
348
|
interface GeminiResolvedConfig {
|
|
276
349
|
readonly apiKey: string;
|
|
277
350
|
readonly model: string;
|
|
278
351
|
readonly temperature?: number;
|
|
279
352
|
readonly maxOutputTokens?: number;
|
|
353
|
+
readonly retry?: RetryConfig;
|
|
280
354
|
}
|
|
281
355
|
interface CodexResolvedConfig {
|
|
282
356
|
readonly executable: string;
|
|
@@ -313,7 +387,6 @@ interface CliResolvedConfig {
|
|
|
313
387
|
readonly commandTemplate: string;
|
|
314
388
|
readonly filesFormat?: string;
|
|
315
389
|
readonly cwd?: string;
|
|
316
|
-
readonly env?: Record<string, string>;
|
|
317
390
|
readonly timeoutMs?: number;
|
|
318
391
|
readonly healthcheck?: CliHealthcheck;
|
|
319
392
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -132,6 +132,7 @@ interface EvaluationResult {
|
|
|
132
132
|
readonly raw_request?: JsonObject;
|
|
133
133
|
readonly evaluator_raw_request?: JsonObject;
|
|
134
134
|
readonly evaluator_results?: readonly EvaluatorResult[];
|
|
135
|
+
readonly error?: string;
|
|
135
136
|
}
|
|
136
137
|
interface EvaluatorResult {
|
|
137
138
|
readonly name: string;
|
|
@@ -252,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
|
|
|
252
253
|
interface TargetDefinition {
|
|
253
254
|
readonly name: string;
|
|
254
255
|
readonly provider: ProviderKind | string;
|
|
255
|
-
readonly settings?: Record<string, unknown> | undefined;
|
|
256
256
|
readonly judge_target?: string | undefined;
|
|
257
257
|
readonly workers?: number | undefined;
|
|
258
|
+
readonly provider_batching?: boolean | undefined;
|
|
259
|
+
readonly providerBatching?: boolean | undefined;
|
|
260
|
+
readonly endpoint?: string | unknown | undefined;
|
|
261
|
+
readonly resource?: string | unknown | undefined;
|
|
262
|
+
readonly resourceName?: string | unknown | undefined;
|
|
263
|
+
readonly api_key?: string | unknown | undefined;
|
|
264
|
+
readonly apiKey?: string | unknown | undefined;
|
|
265
|
+
readonly deployment?: string | unknown | undefined;
|
|
266
|
+
readonly deploymentName?: string | unknown | undefined;
|
|
267
|
+
readonly model?: string | unknown | undefined;
|
|
268
|
+
readonly version?: string | unknown | undefined;
|
|
269
|
+
readonly api_version?: string | unknown | undefined;
|
|
270
|
+
readonly variant?: string | unknown | undefined;
|
|
271
|
+
readonly thinking_budget?: number | unknown | undefined;
|
|
272
|
+
readonly thinkingBudget?: number | unknown | undefined;
|
|
273
|
+
readonly temperature?: number | unknown | undefined;
|
|
274
|
+
readonly max_output_tokens?: number | unknown | undefined;
|
|
275
|
+
readonly maxTokens?: number | unknown | undefined;
|
|
276
|
+
readonly executable?: string | unknown | undefined;
|
|
277
|
+
readonly command?: string | unknown | undefined;
|
|
278
|
+
readonly binary?: string | unknown | undefined;
|
|
279
|
+
readonly args?: unknown | undefined;
|
|
280
|
+
readonly arguments?: unknown | undefined;
|
|
281
|
+
readonly cwd?: string | unknown | undefined;
|
|
282
|
+
readonly timeout_seconds?: number | unknown | undefined;
|
|
283
|
+
readonly timeoutSeconds?: number | unknown | undefined;
|
|
284
|
+
readonly log_dir?: string | unknown | undefined;
|
|
285
|
+
readonly logDir?: string | unknown | undefined;
|
|
286
|
+
readonly log_directory?: string | unknown | undefined;
|
|
287
|
+
readonly logDirectory?: string | unknown | undefined;
|
|
288
|
+
readonly log_format?: string | unknown | undefined;
|
|
289
|
+
readonly logFormat?: string | unknown | undefined;
|
|
290
|
+
readonly log_output_format?: string | unknown | undefined;
|
|
291
|
+
readonly logOutputFormat?: string | unknown | undefined;
|
|
292
|
+
readonly response?: string | unknown | undefined;
|
|
293
|
+
readonly delayMs?: number | unknown | undefined;
|
|
294
|
+
readonly delayMinMs?: number | unknown | undefined;
|
|
295
|
+
readonly delayMaxMs?: number | unknown | undefined;
|
|
296
|
+
readonly vscode_cmd?: string | unknown | undefined;
|
|
297
|
+
readonly wait?: boolean | unknown | undefined;
|
|
298
|
+
readonly dry_run?: boolean | unknown | undefined;
|
|
299
|
+
readonly dryRun?: boolean | unknown | undefined;
|
|
300
|
+
readonly subagent_root?: string | unknown | undefined;
|
|
301
|
+
readonly subagentRoot?: string | unknown | undefined;
|
|
302
|
+
readonly workspace_template?: string | unknown | undefined;
|
|
303
|
+
readonly workspaceTemplate?: string | unknown | undefined;
|
|
304
|
+
readonly command_template?: string | unknown | undefined;
|
|
305
|
+
readonly commandTemplate?: string | unknown | undefined;
|
|
306
|
+
readonly files_format?: string | unknown | undefined;
|
|
307
|
+
readonly filesFormat?: string | unknown | undefined;
|
|
308
|
+
readonly attachments_format?: string | unknown | undefined;
|
|
309
|
+
readonly attachmentsFormat?: string | unknown | undefined;
|
|
310
|
+
readonly env?: unknown | undefined;
|
|
311
|
+
readonly healthcheck?: unknown | undefined;
|
|
312
|
+
readonly max_retries?: number | unknown | undefined;
|
|
313
|
+
readonly maxRetries?: number | unknown | undefined;
|
|
314
|
+
readonly retry_initial_delay_ms?: number | unknown | undefined;
|
|
315
|
+
readonly retryInitialDelayMs?: number | unknown | undefined;
|
|
316
|
+
readonly retry_max_delay_ms?: number | unknown | undefined;
|
|
317
|
+
readonly retryMaxDelayMs?: number | unknown | undefined;
|
|
318
|
+
readonly retry_backoff_factor?: number | unknown | undefined;
|
|
319
|
+
readonly retryBackoffFactor?: number | unknown | undefined;
|
|
320
|
+
readonly retry_status_codes?: unknown | undefined;
|
|
321
|
+
readonly retryStatusCodes?: unknown | undefined;
|
|
258
322
|
}
|
|
259
323
|
|
|
324
|
+
interface RetryConfig {
|
|
325
|
+
readonly maxRetries?: number;
|
|
326
|
+
readonly initialDelayMs?: number;
|
|
327
|
+
readonly maxDelayMs?: number;
|
|
328
|
+
readonly backoffFactor?: number;
|
|
329
|
+
readonly retryableStatusCodes?: readonly number[];
|
|
330
|
+
}
|
|
260
331
|
interface AzureResolvedConfig {
|
|
261
332
|
readonly resourceName: string;
|
|
262
333
|
readonly deploymentName: string;
|
|
@@ -264,6 +335,7 @@ interface AzureResolvedConfig {
|
|
|
264
335
|
readonly version?: string;
|
|
265
336
|
readonly temperature?: number;
|
|
266
337
|
readonly maxOutputTokens?: number;
|
|
338
|
+
readonly retry?: RetryConfig;
|
|
267
339
|
}
|
|
268
340
|
interface AnthropicResolvedConfig {
|
|
269
341
|
readonly apiKey: string;
|
|
@@ -271,12 +343,14 @@ interface AnthropicResolvedConfig {
|
|
|
271
343
|
readonly temperature?: number;
|
|
272
344
|
readonly maxOutputTokens?: number;
|
|
273
345
|
readonly thinkingBudget?: number;
|
|
346
|
+
readonly retry?: RetryConfig;
|
|
274
347
|
}
|
|
275
348
|
interface GeminiResolvedConfig {
|
|
276
349
|
readonly apiKey: string;
|
|
277
350
|
readonly model: string;
|
|
278
351
|
readonly temperature?: number;
|
|
279
352
|
readonly maxOutputTokens?: number;
|
|
353
|
+
readonly retry?: RetryConfig;
|
|
280
354
|
}
|
|
281
355
|
interface CodexResolvedConfig {
|
|
282
356
|
readonly executable: string;
|
|
@@ -313,7 +387,6 @@ interface CliResolvedConfig {
|
|
|
313
387
|
readonly commandTemplate: string;
|
|
314
388
|
readonly filesFormat?: string;
|
|
315
389
|
readonly cwd?: string;
|
|
316
|
-
readonly env?: Record<string, string>;
|
|
317
390
|
readonly timeoutMs?: number;
|
|
318
391
|
readonly healthcheck?: CliHealthcheck;
|
|
319
392
|
}
|
package/dist/index.js
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
readTextFile,
|
|
10
10
|
resolveFileReference,
|
|
11
11
|
resolveTargetDefinition
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-SNTZFB24.js";
|
|
13
13
|
|
|
14
14
|
// src/evaluation/types.ts
|
|
15
15
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -662,6 +662,67 @@ function ensureChatResponse(result) {
|
|
|
662
662
|
}
|
|
663
663
|
return result;
|
|
664
664
|
}
|
|
665
|
+
function isRetryableError(error, retryableStatusCodes) {
|
|
666
|
+
if (!error || typeof error !== "object") {
|
|
667
|
+
return false;
|
|
668
|
+
}
|
|
669
|
+
if ("status" in error && typeof error.status === "number") {
|
|
670
|
+
return retryableStatusCodes.includes(error.status);
|
|
671
|
+
}
|
|
672
|
+
if ("message" in error && typeof error.message === "string") {
|
|
673
|
+
const match = error.message.match(/HTTP (\d{3})/);
|
|
674
|
+
if (match) {
|
|
675
|
+
const status = Number.parseInt(match[1], 10);
|
|
676
|
+
return retryableStatusCodes.includes(status);
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
if ("name" in error && error.name === "AxAIServiceNetworkError") {
|
|
680
|
+
return true;
|
|
681
|
+
}
|
|
682
|
+
return false;
|
|
683
|
+
}
|
|
684
|
+
function calculateRetryDelay(attempt, config) {
|
|
685
|
+
const delay = Math.min(
|
|
686
|
+
config.maxDelayMs,
|
|
687
|
+
config.initialDelayMs * config.backoffFactor ** attempt
|
|
688
|
+
);
|
|
689
|
+
return delay * (0.75 + Math.random() * 0.5);
|
|
690
|
+
}
|
|
691
|
+
async function sleep(ms) {
|
|
692
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
693
|
+
}
|
|
694
|
+
async function withRetry(fn, retryConfig, signal) {
|
|
695
|
+
const config = {
|
|
696
|
+
maxRetries: retryConfig?.maxRetries ?? 3,
|
|
697
|
+
initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
|
|
698
|
+
maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
|
|
699
|
+
backoffFactor: retryConfig?.backoffFactor ?? 2,
|
|
700
|
+
retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
|
|
701
|
+
};
|
|
702
|
+
let lastError;
|
|
703
|
+
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
|
704
|
+
if (signal?.aborted) {
|
|
705
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
706
|
+
}
|
|
707
|
+
try {
|
|
708
|
+
return await fn();
|
|
709
|
+
} catch (error) {
|
|
710
|
+
lastError = error;
|
|
711
|
+
if (attempt >= config.maxRetries) {
|
|
712
|
+
break;
|
|
713
|
+
}
|
|
714
|
+
if (!isRetryableError(error, config.retryableStatusCodes)) {
|
|
715
|
+
throw error;
|
|
716
|
+
}
|
|
717
|
+
const delay = calculateRetryDelay(attempt, config);
|
|
718
|
+
await sleep(delay);
|
|
719
|
+
if (signal?.aborted) {
|
|
720
|
+
throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
throw lastError;
|
|
725
|
+
}
|
|
665
726
|
var AzureProvider = class {
|
|
666
727
|
constructor(targetName, config) {
|
|
667
728
|
this.config = config;
|
|
@@ -671,6 +732,7 @@ var AzureProvider = class {
|
|
|
671
732
|
temperature: config.temperature,
|
|
672
733
|
maxOutputTokens: config.maxOutputTokens
|
|
673
734
|
};
|
|
735
|
+
this.retryConfig = config.retry;
|
|
674
736
|
this.ai = AxAI.create({
|
|
675
737
|
name: "azure-openai",
|
|
676
738
|
apiKey: config.apiKey,
|
|
@@ -687,16 +749,21 @@ var AzureProvider = class {
|
|
|
687
749
|
targetName;
|
|
688
750
|
ai;
|
|
689
751
|
defaults;
|
|
752
|
+
retryConfig;
|
|
690
753
|
async invoke(request) {
|
|
691
754
|
const chatPrompt = buildChatPrompt(request);
|
|
692
755
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
693
|
-
const response = await
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
756
|
+
const response = await withRetry(
|
|
757
|
+
async () => await this.ai.chat(
|
|
758
|
+
{
|
|
759
|
+
chatPrompt,
|
|
760
|
+
model: this.config.deploymentName,
|
|
761
|
+
...modelConfig ? { modelConfig } : {}
|
|
762
|
+
},
|
|
763
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
764
|
+
),
|
|
765
|
+
this.retryConfig,
|
|
766
|
+
request.signal
|
|
700
767
|
);
|
|
701
768
|
return mapResponse(ensureChatResponse(response));
|
|
702
769
|
}
|
|
@@ -714,6 +781,7 @@ var AnthropicProvider = class {
|
|
|
714
781
|
maxOutputTokens: config.maxOutputTokens,
|
|
715
782
|
thinkingBudget: config.thinkingBudget
|
|
716
783
|
};
|
|
784
|
+
this.retryConfig = config.retry;
|
|
717
785
|
this.ai = AxAI.create({
|
|
718
786
|
name: "anthropic",
|
|
719
787
|
apiKey: config.apiKey
|
|
@@ -724,16 +792,21 @@ var AnthropicProvider = class {
|
|
|
724
792
|
targetName;
|
|
725
793
|
ai;
|
|
726
794
|
defaults;
|
|
795
|
+
retryConfig;
|
|
727
796
|
async invoke(request) {
|
|
728
797
|
const chatPrompt = buildChatPrompt(request);
|
|
729
798
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
730
|
-
const response = await
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
799
|
+
const response = await withRetry(
|
|
800
|
+
async () => await this.ai.chat(
|
|
801
|
+
{
|
|
802
|
+
chatPrompt,
|
|
803
|
+
model: this.config.model,
|
|
804
|
+
...modelConfig ? { modelConfig } : {}
|
|
805
|
+
},
|
|
806
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
807
|
+
),
|
|
808
|
+
this.retryConfig,
|
|
809
|
+
request.signal
|
|
737
810
|
);
|
|
738
811
|
return mapResponse(ensureChatResponse(response));
|
|
739
812
|
}
|
|
@@ -750,6 +823,7 @@ var GeminiProvider = class {
|
|
|
750
823
|
temperature: config.temperature,
|
|
751
824
|
maxOutputTokens: config.maxOutputTokens
|
|
752
825
|
};
|
|
826
|
+
this.retryConfig = config.retry;
|
|
753
827
|
this.ai = AxAI.create({
|
|
754
828
|
name: "google-gemini",
|
|
755
829
|
apiKey: config.apiKey
|
|
@@ -760,16 +834,21 @@ var GeminiProvider = class {
|
|
|
760
834
|
targetName;
|
|
761
835
|
ai;
|
|
762
836
|
defaults;
|
|
837
|
+
retryConfig;
|
|
763
838
|
async invoke(request) {
|
|
764
839
|
const chatPrompt = buildChatPrompt(request);
|
|
765
840
|
const modelConfig = extractModelConfig(request, this.defaults);
|
|
766
|
-
const response = await
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
841
|
+
const response = await withRetry(
|
|
842
|
+
async () => await this.ai.chat(
|
|
843
|
+
{
|
|
844
|
+
chatPrompt,
|
|
845
|
+
model: this.config.model,
|
|
846
|
+
...modelConfig ? { modelConfig } : {}
|
|
847
|
+
},
|
|
848
|
+
request.signal ? { abortSignal: request.signal } : void 0
|
|
849
|
+
),
|
|
850
|
+
this.retryConfig,
|
|
851
|
+
request.signal
|
|
773
852
|
);
|
|
774
853
|
return mapResponse(ensureChatResponse(response));
|
|
775
854
|
}
|
|
@@ -839,10 +918,9 @@ var CliProvider = class {
|
|
|
839
918
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
840
919
|
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
841
920
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
842
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
843
921
|
const result = await this.runCommand(renderedCommand, {
|
|
844
922
|
cwd: this.config.cwd,
|
|
845
|
-
env,
|
|
923
|
+
env: process.env,
|
|
846
924
|
timeoutMs: this.config.timeoutMs,
|
|
847
925
|
signal: request.signal
|
|
848
926
|
});
|
|
@@ -931,10 +1009,9 @@ var CliProvider = class {
|
|
|
931
1009
|
generateOutputFilePath("healthcheck")
|
|
932
1010
|
)
|
|
933
1011
|
);
|
|
934
|
-
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
935
1012
|
const result = await this.runCommand(renderedCommand, {
|
|
936
1013
|
cwd: healthcheck.cwd ?? this.config.cwd,
|
|
937
|
-
env,
|
|
1014
|
+
env: process.env,
|
|
938
1015
|
timeoutMs,
|
|
939
1016
|
signal
|
|
940
1017
|
});
|
|
@@ -2167,20 +2244,13 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
2167
2244
|
}
|
|
2168
2245
|
const name = value.name;
|
|
2169
2246
|
const provider = value.provider;
|
|
2170
|
-
const settings = value.settings;
|
|
2171
|
-
const judgeTarget = value.judge_target;
|
|
2172
2247
|
if (typeof name !== "string" || name.trim().length === 0) {
|
|
2173
2248
|
throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
|
|
2174
2249
|
}
|
|
2175
2250
|
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
2176
2251
|
throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
|
|
2177
2252
|
}
|
|
2178
|
-
return
|
|
2179
|
-
name,
|
|
2180
|
-
provider,
|
|
2181
|
-
settings: isRecord(settings) ? settings : void 0,
|
|
2182
|
-
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
2183
|
-
};
|
|
2253
|
+
return value;
|
|
2184
2254
|
}
|
|
2185
2255
|
async function fileExists3(filePath) {
|
|
2186
2256
|
try {
|
|
@@ -2821,10 +2891,11 @@ async function runEvaluation(options) {
|
|
|
2821
2891
|
await onProgress({
|
|
2822
2892
|
workerId,
|
|
2823
2893
|
evalId: evalCase.id,
|
|
2824
|
-
status: "completed",
|
|
2894
|
+
status: result.error ? "failed" : "completed",
|
|
2825
2895
|
startedAt: 0,
|
|
2826
2896
|
// Not used for completed status
|
|
2827
|
-
completedAt: Date.now()
|
|
2897
|
+
completedAt: Date.now(),
|
|
2898
|
+
error: result.error
|
|
2828
2899
|
});
|
|
2829
2900
|
}
|
|
2830
2901
|
if (onResult) {
|
|
@@ -3362,7 +3433,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
3362
3433
|
target: targetName,
|
|
3363
3434
|
timestamp: timestamp.toISOString(),
|
|
3364
3435
|
raw_aspects: [],
|
|
3365
|
-
raw_request: rawRequest
|
|
3436
|
+
raw_request: rawRequest,
|
|
3437
|
+
error: message
|
|
3366
3438
|
};
|
|
3367
3439
|
}
|
|
3368
3440
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|