@agentv/core 0.7.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -132,6 +132,7 @@ interface EvaluationResult {
132
132
  readonly raw_request?: JsonObject;
133
133
  readonly evaluator_raw_request?: JsonObject;
134
134
  readonly evaluator_results?: readonly EvaluatorResult[];
135
+ readonly error?: string;
135
136
  }
136
137
  interface EvaluatorResult {
137
138
  readonly name: string;
@@ -174,6 +175,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
174
175
  }>;
175
176
 
176
177
  declare function fileExists(filePath: string): Promise<boolean>;
178
+ /**
179
+ * Normalize line endings to LF (\n).
180
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
181
+ */
182
+ declare function normalizeLineEndings(content: string): string;
177
183
  /**
178
184
  * Read a text file and normalize line endings to LF (\n).
179
185
  * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
@@ -247,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
247
253
  interface TargetDefinition {
248
254
  readonly name: string;
249
255
  readonly provider: ProviderKind | string;
250
- readonly settings?: Record<string, unknown> | undefined;
251
256
  readonly judge_target?: string | undefined;
252
257
  readonly workers?: number | undefined;
258
+ readonly provider_batching?: boolean | undefined;
259
+ readonly providerBatching?: boolean | undefined;
260
+ readonly endpoint?: string | unknown | undefined;
261
+ readonly resource?: string | unknown | undefined;
262
+ readonly resourceName?: string | unknown | undefined;
263
+ readonly api_key?: string | unknown | undefined;
264
+ readonly apiKey?: string | unknown | undefined;
265
+ readonly deployment?: string | unknown | undefined;
266
+ readonly deploymentName?: string | unknown | undefined;
267
+ readonly model?: string | unknown | undefined;
268
+ readonly version?: string | unknown | undefined;
269
+ readonly api_version?: string | unknown | undefined;
270
+ readonly variant?: string | unknown | undefined;
271
+ readonly thinking_budget?: number | unknown | undefined;
272
+ readonly thinkingBudget?: number | unknown | undefined;
273
+ readonly temperature?: number | unknown | undefined;
274
+ readonly max_output_tokens?: number | unknown | undefined;
275
+ readonly maxTokens?: number | unknown | undefined;
276
+ readonly executable?: string | unknown | undefined;
277
+ readonly command?: string | unknown | undefined;
278
+ readonly binary?: string | unknown | undefined;
279
+ readonly args?: unknown | undefined;
280
+ readonly arguments?: unknown | undefined;
281
+ readonly cwd?: string | unknown | undefined;
282
+ readonly timeout_seconds?: number | unknown | undefined;
283
+ readonly timeoutSeconds?: number | unknown | undefined;
284
+ readonly log_dir?: string | unknown | undefined;
285
+ readonly logDir?: string | unknown | undefined;
286
+ readonly log_directory?: string | unknown | undefined;
287
+ readonly logDirectory?: string | unknown | undefined;
288
+ readonly log_format?: string | unknown | undefined;
289
+ readonly logFormat?: string | unknown | undefined;
290
+ readonly log_output_format?: string | unknown | undefined;
291
+ readonly logOutputFormat?: string | unknown | undefined;
292
+ readonly response?: string | unknown | undefined;
293
+ readonly delayMs?: number | unknown | undefined;
294
+ readonly delayMinMs?: number | unknown | undefined;
295
+ readonly delayMaxMs?: number | unknown | undefined;
296
+ readonly vscode_cmd?: string | unknown | undefined;
297
+ readonly wait?: boolean | unknown | undefined;
298
+ readonly dry_run?: boolean | unknown | undefined;
299
+ readonly dryRun?: boolean | unknown | undefined;
300
+ readonly subagent_root?: string | unknown | undefined;
301
+ readonly subagentRoot?: string | unknown | undefined;
302
+ readonly workspace_template?: string | unknown | undefined;
303
+ readonly workspaceTemplate?: string | unknown | undefined;
304
+ readonly command_template?: string | unknown | undefined;
305
+ readonly commandTemplate?: string | unknown | undefined;
306
+ readonly files_format?: string | unknown | undefined;
307
+ readonly filesFormat?: string | unknown | undefined;
308
+ readonly attachments_format?: string | unknown | undefined;
309
+ readonly attachmentsFormat?: string | unknown | undefined;
310
+ readonly env?: unknown | undefined;
311
+ readonly healthcheck?: unknown | undefined;
312
+ readonly max_retries?: number | unknown | undefined;
313
+ readonly maxRetries?: number | unknown | undefined;
314
+ readonly retry_initial_delay_ms?: number | unknown | undefined;
315
+ readonly retryInitialDelayMs?: number | unknown | undefined;
316
+ readonly retry_max_delay_ms?: number | unknown | undefined;
317
+ readonly retryMaxDelayMs?: number | unknown | undefined;
318
+ readonly retry_backoff_factor?: number | unknown | undefined;
319
+ readonly retryBackoffFactor?: number | unknown | undefined;
320
+ readonly retry_status_codes?: unknown | undefined;
321
+ readonly retryStatusCodes?: unknown | undefined;
253
322
  }
254
323
 
324
+ interface RetryConfig {
325
+ readonly maxRetries?: number;
326
+ readonly initialDelayMs?: number;
327
+ readonly maxDelayMs?: number;
328
+ readonly backoffFactor?: number;
329
+ readonly retryableStatusCodes?: readonly number[];
330
+ }
255
331
  interface AzureResolvedConfig {
256
332
  readonly resourceName: string;
257
333
  readonly deploymentName: string;
@@ -259,6 +335,7 @@ interface AzureResolvedConfig {
259
335
  readonly version?: string;
260
336
  readonly temperature?: number;
261
337
  readonly maxOutputTokens?: number;
338
+ readonly retry?: RetryConfig;
262
339
  }
263
340
  interface AnthropicResolvedConfig {
264
341
  readonly apiKey: string;
@@ -266,12 +343,14 @@ interface AnthropicResolvedConfig {
266
343
  readonly temperature?: number;
267
344
  readonly maxOutputTokens?: number;
268
345
  readonly thinkingBudget?: number;
346
+ readonly retry?: RetryConfig;
269
347
  }
270
348
  interface GeminiResolvedConfig {
271
349
  readonly apiKey: string;
272
350
  readonly model: string;
273
351
  readonly temperature?: number;
274
352
  readonly maxOutputTokens?: number;
353
+ readonly retry?: RetryConfig;
275
354
  }
276
355
  interface CodexResolvedConfig {
277
356
  readonly executable: string;
@@ -308,7 +387,6 @@ interface CliResolvedConfig {
308
387
  readonly commandTemplate: string;
309
388
  readonly filesFormat?: string;
310
389
  readonly cwd?: string;
311
- readonly env?: Record<string, string>;
312
390
  readonly timeoutMs?: number;
313
391
  readonly healthcheck?: CliHealthcheck;
314
392
  }
@@ -516,4 +594,4 @@ type AgentKernel = {
516
594
  };
517
595
  declare function createAgentKernel(): AgentKernel;
518
596
 
519
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
597
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.d.ts CHANGED
@@ -132,6 +132,7 @@ interface EvaluationResult {
132
132
  readonly raw_request?: JsonObject;
133
133
  readonly evaluator_raw_request?: JsonObject;
134
134
  readonly evaluator_results?: readonly EvaluatorResult[];
135
+ readonly error?: string;
135
136
  }
136
137
  interface EvaluatorResult {
137
138
  readonly name: string;
@@ -174,6 +175,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
174
175
  }>;
175
176
 
176
177
  declare function fileExists(filePath: string): Promise<boolean>;
178
+ /**
179
+ * Normalize line endings to LF (\n).
180
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
181
+ */
182
+ declare function normalizeLineEndings(content: string): string;
177
183
  /**
178
184
  * Read a text file and normalize line endings to LF (\n).
179
185
  * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
@@ -247,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
247
253
  interface TargetDefinition {
248
254
  readonly name: string;
249
255
  readonly provider: ProviderKind | string;
250
- readonly settings?: Record<string, unknown> | undefined;
251
256
  readonly judge_target?: string | undefined;
252
257
  readonly workers?: number | undefined;
258
+ readonly provider_batching?: boolean | undefined;
259
+ readonly providerBatching?: boolean | undefined;
260
+ readonly endpoint?: string | unknown | undefined;
261
+ readonly resource?: string | unknown | undefined;
262
+ readonly resourceName?: string | unknown | undefined;
263
+ readonly api_key?: string | unknown | undefined;
264
+ readonly apiKey?: string | unknown | undefined;
265
+ readonly deployment?: string | unknown | undefined;
266
+ readonly deploymentName?: string | unknown | undefined;
267
+ readonly model?: string | unknown | undefined;
268
+ readonly version?: string | unknown | undefined;
269
+ readonly api_version?: string | unknown | undefined;
270
+ readonly variant?: string | unknown | undefined;
271
+ readonly thinking_budget?: number | unknown | undefined;
272
+ readonly thinkingBudget?: number | unknown | undefined;
273
+ readonly temperature?: number | unknown | undefined;
274
+ readonly max_output_tokens?: number | unknown | undefined;
275
+ readonly maxTokens?: number | unknown | undefined;
276
+ readonly executable?: string | unknown | undefined;
277
+ readonly command?: string | unknown | undefined;
278
+ readonly binary?: string | unknown | undefined;
279
+ readonly args?: unknown | undefined;
280
+ readonly arguments?: unknown | undefined;
281
+ readonly cwd?: string | unknown | undefined;
282
+ readonly timeout_seconds?: number | unknown | undefined;
283
+ readonly timeoutSeconds?: number | unknown | undefined;
284
+ readonly log_dir?: string | unknown | undefined;
285
+ readonly logDir?: string | unknown | undefined;
286
+ readonly log_directory?: string | unknown | undefined;
287
+ readonly logDirectory?: string | unknown | undefined;
288
+ readonly log_format?: string | unknown | undefined;
289
+ readonly logFormat?: string | unknown | undefined;
290
+ readonly log_output_format?: string | unknown | undefined;
291
+ readonly logOutputFormat?: string | unknown | undefined;
292
+ readonly response?: string | unknown | undefined;
293
+ readonly delayMs?: number | unknown | undefined;
294
+ readonly delayMinMs?: number | unknown | undefined;
295
+ readonly delayMaxMs?: number | unknown | undefined;
296
+ readonly vscode_cmd?: string | unknown | undefined;
297
+ readonly wait?: boolean | unknown | undefined;
298
+ readonly dry_run?: boolean | unknown | undefined;
299
+ readonly dryRun?: boolean | unknown | undefined;
300
+ readonly subagent_root?: string | unknown | undefined;
301
+ readonly subagentRoot?: string | unknown | undefined;
302
+ readonly workspace_template?: string | unknown | undefined;
303
+ readonly workspaceTemplate?: string | unknown | undefined;
304
+ readonly command_template?: string | unknown | undefined;
305
+ readonly commandTemplate?: string | unknown | undefined;
306
+ readonly files_format?: string | unknown | undefined;
307
+ readonly filesFormat?: string | unknown | undefined;
308
+ readonly attachments_format?: string | unknown | undefined;
309
+ readonly attachmentsFormat?: string | unknown | undefined;
310
+ readonly env?: unknown | undefined;
311
+ readonly healthcheck?: unknown | undefined;
312
+ readonly max_retries?: number | unknown | undefined;
313
+ readonly maxRetries?: number | unknown | undefined;
314
+ readonly retry_initial_delay_ms?: number | unknown | undefined;
315
+ readonly retryInitialDelayMs?: number | unknown | undefined;
316
+ readonly retry_max_delay_ms?: number | unknown | undefined;
317
+ readonly retryMaxDelayMs?: number | unknown | undefined;
318
+ readonly retry_backoff_factor?: number | unknown | undefined;
319
+ readonly retryBackoffFactor?: number | unknown | undefined;
320
+ readonly retry_status_codes?: unknown | undefined;
321
+ readonly retryStatusCodes?: unknown | undefined;
253
322
  }
254
323
 
324
+ interface RetryConfig {
325
+ readonly maxRetries?: number;
326
+ readonly initialDelayMs?: number;
327
+ readonly maxDelayMs?: number;
328
+ readonly backoffFactor?: number;
329
+ readonly retryableStatusCodes?: readonly number[];
330
+ }
255
331
  interface AzureResolvedConfig {
256
332
  readonly resourceName: string;
257
333
  readonly deploymentName: string;
@@ -259,6 +335,7 @@ interface AzureResolvedConfig {
259
335
  readonly version?: string;
260
336
  readonly temperature?: number;
261
337
  readonly maxOutputTokens?: number;
338
+ readonly retry?: RetryConfig;
262
339
  }
263
340
  interface AnthropicResolvedConfig {
264
341
  readonly apiKey: string;
@@ -266,12 +343,14 @@ interface AnthropicResolvedConfig {
266
343
  readonly temperature?: number;
267
344
  readonly maxOutputTokens?: number;
268
345
  readonly thinkingBudget?: number;
346
+ readonly retry?: RetryConfig;
269
347
  }
270
348
  interface GeminiResolvedConfig {
271
349
  readonly apiKey: string;
272
350
  readonly model: string;
273
351
  readonly temperature?: number;
274
352
  readonly maxOutputTokens?: number;
353
+ readonly retry?: RetryConfig;
275
354
  }
276
355
  interface CodexResolvedConfig {
277
356
  readonly executable: string;
@@ -308,7 +387,6 @@ interface CliResolvedConfig {
308
387
  readonly commandTemplate: string;
309
388
  readonly filesFormat?: string;
310
389
  readonly cwd?: string;
311
- readonly env?: Record<string, string>;
312
390
  readonly timeoutMs?: number;
313
391
  readonly healthcheck?: CliHealthcheck;
314
392
  }
@@ -516,4 +594,4 @@ type AgentKernel = {
516
594
  };
517
595
  declare function createAgentKernel(): AgentKernel;
518
596
 
519
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
597
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.js CHANGED
@@ -5,10 +5,11 @@ import {
5
5
  fileExists,
6
6
  findGitRoot,
7
7
  isAgentProvider,
8
+ normalizeLineEndings,
8
9
  readTextFile,
9
10
  resolveFileReference,
10
11
  resolveTargetDefinition
11
- } from "./chunk-L6RCDZ4Z.js";
12
+ } from "./chunk-SNTZFB24.js";
12
13
 
13
14
  // src/evaluation/types.ts
14
15
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -661,6 +662,67 @@ function ensureChatResponse(result) {
661
662
  }
662
663
  return result;
663
664
  }
665
+ function isRetryableError(error, retryableStatusCodes) {
666
+ if (!error || typeof error !== "object") {
667
+ return false;
668
+ }
669
+ if ("status" in error && typeof error.status === "number") {
670
+ return retryableStatusCodes.includes(error.status);
671
+ }
672
+ if ("message" in error && typeof error.message === "string") {
673
+ const match = error.message.match(/HTTP (\d{3})/);
674
+ if (match) {
675
+ const status = Number.parseInt(match[1], 10);
676
+ return retryableStatusCodes.includes(status);
677
+ }
678
+ }
679
+ if ("name" in error && error.name === "AxAIServiceNetworkError") {
680
+ return true;
681
+ }
682
+ return false;
683
+ }
684
+ function calculateRetryDelay(attempt, config) {
685
+ const delay = Math.min(
686
+ config.maxDelayMs,
687
+ config.initialDelayMs * config.backoffFactor ** attempt
688
+ );
689
+ return delay * (0.75 + Math.random() * 0.5);
690
+ }
691
+ async function sleep(ms) {
692
+ return new Promise((resolve) => setTimeout(resolve, ms));
693
+ }
694
+ async function withRetry(fn, retryConfig, signal) {
695
+ const config = {
696
+ maxRetries: retryConfig?.maxRetries ?? 3,
697
+ initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
698
+ maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
699
+ backoffFactor: retryConfig?.backoffFactor ?? 2,
700
+ retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
701
+ };
702
+ let lastError;
703
+ for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
704
+ if (signal?.aborted) {
705
+ throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
706
+ }
707
+ try {
708
+ return await fn();
709
+ } catch (error) {
710
+ lastError = error;
711
+ if (attempt >= config.maxRetries) {
712
+ break;
713
+ }
714
+ if (!isRetryableError(error, config.retryableStatusCodes)) {
715
+ throw error;
716
+ }
717
+ const delay = calculateRetryDelay(attempt, config);
718
+ await sleep(delay);
719
+ if (signal?.aborted) {
720
+ throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
721
+ }
722
+ }
723
+ }
724
+ throw lastError;
725
+ }
664
726
  var AzureProvider = class {
665
727
  constructor(targetName, config) {
666
728
  this.config = config;
@@ -670,6 +732,7 @@ var AzureProvider = class {
670
732
  temperature: config.temperature,
671
733
  maxOutputTokens: config.maxOutputTokens
672
734
  };
735
+ this.retryConfig = config.retry;
673
736
  this.ai = AxAI.create({
674
737
  name: "azure-openai",
675
738
  apiKey: config.apiKey,
@@ -686,16 +749,21 @@ var AzureProvider = class {
686
749
  targetName;
687
750
  ai;
688
751
  defaults;
752
+ retryConfig;
689
753
  async invoke(request) {
690
754
  const chatPrompt = buildChatPrompt(request);
691
755
  const modelConfig = extractModelConfig(request, this.defaults);
692
- const response = await this.ai.chat(
693
- {
694
- chatPrompt,
695
- model: this.config.deploymentName,
696
- ...modelConfig ? { modelConfig } : {}
697
- },
698
- request.signal ? { abortSignal: request.signal } : void 0
756
+ const response = await withRetry(
757
+ async () => await this.ai.chat(
758
+ {
759
+ chatPrompt,
760
+ model: this.config.deploymentName,
761
+ ...modelConfig ? { modelConfig } : {}
762
+ },
763
+ request.signal ? { abortSignal: request.signal } : void 0
764
+ ),
765
+ this.retryConfig,
766
+ request.signal
699
767
  );
700
768
  return mapResponse(ensureChatResponse(response));
701
769
  }
@@ -713,6 +781,7 @@ var AnthropicProvider = class {
713
781
  maxOutputTokens: config.maxOutputTokens,
714
782
  thinkingBudget: config.thinkingBudget
715
783
  };
784
+ this.retryConfig = config.retry;
716
785
  this.ai = AxAI.create({
717
786
  name: "anthropic",
718
787
  apiKey: config.apiKey
@@ -723,16 +792,21 @@ var AnthropicProvider = class {
723
792
  targetName;
724
793
  ai;
725
794
  defaults;
795
+ retryConfig;
726
796
  async invoke(request) {
727
797
  const chatPrompt = buildChatPrompt(request);
728
798
  const modelConfig = extractModelConfig(request, this.defaults);
729
- const response = await this.ai.chat(
730
- {
731
- chatPrompt,
732
- model: this.config.model,
733
- ...modelConfig ? { modelConfig } : {}
734
- },
735
- request.signal ? { abortSignal: request.signal } : void 0
799
+ const response = await withRetry(
800
+ async () => await this.ai.chat(
801
+ {
802
+ chatPrompt,
803
+ model: this.config.model,
804
+ ...modelConfig ? { modelConfig } : {}
805
+ },
806
+ request.signal ? { abortSignal: request.signal } : void 0
807
+ ),
808
+ this.retryConfig,
809
+ request.signal
736
810
  );
737
811
  return mapResponse(ensureChatResponse(response));
738
812
  }
@@ -749,6 +823,7 @@ var GeminiProvider = class {
749
823
  temperature: config.temperature,
750
824
  maxOutputTokens: config.maxOutputTokens
751
825
  };
826
+ this.retryConfig = config.retry;
752
827
  this.ai = AxAI.create({
753
828
  name: "google-gemini",
754
829
  apiKey: config.apiKey
@@ -759,16 +834,21 @@ var GeminiProvider = class {
759
834
  targetName;
760
835
  ai;
761
836
  defaults;
837
+ retryConfig;
762
838
  async invoke(request) {
763
839
  const chatPrompt = buildChatPrompt(request);
764
840
  const modelConfig = extractModelConfig(request, this.defaults);
765
- const response = await this.ai.chat(
766
- {
767
- chatPrompt,
768
- model: this.config.model,
769
- ...modelConfig ? { modelConfig } : {}
770
- },
771
- request.signal ? { abortSignal: request.signal } : void 0
841
+ const response = await withRetry(
842
+ async () => await this.ai.chat(
843
+ {
844
+ chatPrompt,
845
+ model: this.config.model,
846
+ ...modelConfig ? { modelConfig } : {}
847
+ },
848
+ request.signal ? { abortSignal: request.signal } : void 0
849
+ ),
850
+ this.retryConfig,
851
+ request.signal
772
852
  );
773
853
  return mapResponse(ensureChatResponse(response));
774
854
  }
@@ -796,7 +876,6 @@ async function defaultCommandRunner(command, options) {
796
876
  };
797
877
  try {
798
878
  const { stdout, stderr } = await execAsync(command, execOptions);
799
- console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
800
879
  return {
801
880
  stdout,
802
881
  stderr,
@@ -807,8 +886,6 @@ async function defaultCommandRunner(command, options) {
807
886
  };
808
887
  } catch (error) {
809
888
  const execError = error;
810
- console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
811
- console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
812
889
  return {
813
890
  stdout: execError.stdout ?? "",
814
891
  stderr: execError.stderr ?? "",
@@ -841,10 +918,9 @@ var CliProvider = class {
841
918
  const outputFilePath = generateOutputFilePath(request.evalCaseId);
842
919
  const templateValues = buildTemplateValues(request, this.config, outputFilePath);
843
920
  const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
844
- const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
845
921
  const result = await this.runCommand(renderedCommand, {
846
922
  cwd: this.config.cwd,
847
- env,
923
+ env: process.env,
848
924
  timeoutMs: this.config.timeoutMs,
849
925
  signal: request.signal
850
926
  });
@@ -876,7 +952,7 @@ var CliProvider = class {
876
952
  }
877
953
  async readAndCleanupOutputFile(filePath) {
878
954
  try {
879
- const content = await fs.readFile(filePath, "utf-8");
955
+ const content = await readTextFile(filePath);
880
956
  return content;
881
957
  } catch (error) {
882
958
  const errorMsg = error instanceof Error ? error.message : String(error);
@@ -933,10 +1009,9 @@ var CliProvider = class {
933
1009
  generateOutputFilePath("healthcheck")
934
1010
  )
935
1011
  );
936
- const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
937
1012
  const result = await this.runCommand(renderedCommand, {
938
1013
  cwd: healthcheck.cwd ?? this.config.cwd,
939
- env,
1014
+ env: process.env,
940
1015
  timeoutMs,
941
1016
  signal
942
1017
  });
@@ -2169,20 +2244,13 @@ function assertTargetDefinition(value, index, filePath) {
2169
2244
  }
2170
2245
  const name = value.name;
2171
2246
  const provider = value.provider;
2172
- const settings = value.settings;
2173
- const judgeTarget = value.judge_target;
2174
2247
  if (typeof name !== "string" || name.trim().length === 0) {
2175
2248
  throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
2176
2249
  }
2177
2250
  if (typeof provider !== "string" || provider.trim().length === 0) {
2178
2251
  throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
2179
2252
  }
2180
- return {
2181
- name,
2182
- provider,
2183
- settings: isRecord(settings) ? settings : void 0,
2184
- judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
2185
- };
2253
+ return value;
2186
2254
  }
2187
2255
  async function fileExists3(filePath) {
2188
2256
  try {
@@ -2823,10 +2891,11 @@ async function runEvaluation(options) {
2823
2891
  await onProgress({
2824
2892
  workerId,
2825
2893
  evalId: evalCase.id,
2826
- status: "completed",
2894
+ status: result.error ? "failed" : "completed",
2827
2895
  startedAt: 0,
2828
2896
  // Not used for completed status
2829
- completedAt: Date.now()
2897
+ completedAt: Date.now(),
2898
+ error: result.error
2830
2899
  });
2831
2900
  }
2832
2901
  if (onResult) {
@@ -3364,7 +3433,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
3364
3433
  target: targetName,
3365
3434
  timestamp: timestamp.toISOString(),
3366
3435
  raw_aspects: [],
3367
- raw_request: rawRequest
3436
+ raw_request: rawRequest,
3437
+ error: message
3368
3438
  };
3369
3439
  }
3370
3440
  function createCacheKey(provider, target, evalCase, promptInputs) {
@@ -3420,6 +3490,7 @@ export {
3420
3490
  isTestMessageRole,
3421
3491
  listTargetNames,
3422
3492
  loadEvalCases,
3493
+ normalizeLineEndings,
3423
3494
  readTargetDefinitions,
3424
3495
  readTextFile,
3425
3496
  resolveAndCreateProvider,