@agentv/core 0.7.3 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-7XM7HYRS.js +645 -0
- package/dist/chunk-7XM7HYRS.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +11 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +56 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -1
- package/dist/index.d.ts +6 -1
- package/dist/index.js +39 -492
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-UQLHF3T7.js +0 -158
- package/dist/chunk-UQLHF3T7.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -174,6 +174,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
|
174
174
|
}>;
|
|
175
175
|
|
|
176
176
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
177
|
+
/**
|
|
178
|
+
* Normalize line endings to LF (\n).
|
|
179
|
+
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
180
|
+
*/
|
|
181
|
+
declare function normalizeLineEndings(content: string): string;
|
|
177
182
|
/**
|
|
178
183
|
* Read a text file and normalize line endings to LF (\n).
|
|
179
184
|
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
@@ -516,4 +521,4 @@ type AgentKernel = {
|
|
|
516
521
|
};
|
|
517
522
|
declare function createAgentKernel(): AgentKernel;
|
|
518
523
|
|
|
519
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
|
524
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.d.ts
CHANGED
|
@@ -174,6 +174,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
|
174
174
|
}>;
|
|
175
175
|
|
|
176
176
|
declare function fileExists(filePath: string): Promise<boolean>;
|
|
177
|
+
/**
|
|
178
|
+
* Normalize line endings to LF (\n).
|
|
179
|
+
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
180
|
+
*/
|
|
181
|
+
declare function normalizeLineEndings(content: string): string;
|
|
177
182
|
/**
|
|
178
183
|
* Read a text file and normalize line endings to LF (\n).
|
|
179
184
|
* This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
|
|
@@ -516,4 +521,4 @@ type AgentKernel = {
|
|
|
516
521
|
};
|
|
517
522
|
declare function createAgentKernel(): AgentKernel;
|
|
518
523
|
|
|
519
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
|
524
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.js
CHANGED
|
@@ -5,9 +5,11 @@ import {
|
|
|
5
5
|
fileExists,
|
|
6
6
|
findGitRoot,
|
|
7
7
|
isAgentProvider,
|
|
8
|
+
normalizeLineEndings,
|
|
8
9
|
readTextFile,
|
|
9
|
-
resolveFileReference
|
|
10
|
-
|
|
10
|
+
resolveFileReference,
|
|
11
|
+
resolveTargetDefinition
|
|
12
|
+
} from "./chunk-7XM7HYRS.js";
|
|
11
13
|
|
|
12
14
|
// src/evaluation/types.ts
|
|
13
15
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -778,6 +780,8 @@ var GeminiProvider = class {
|
|
|
778
780
|
|
|
779
781
|
// src/evaluation/providers/cli.ts
|
|
780
782
|
import { exec as execWithCallback } from "node:child_process";
|
|
783
|
+
import fs from "node:fs/promises";
|
|
784
|
+
import os from "node:os";
|
|
781
785
|
import path2 from "node:path";
|
|
782
786
|
import { promisify } from "node:util";
|
|
783
787
|
var execAsync = promisify(execWithCallback);
|
|
@@ -832,7 +836,8 @@ var CliProvider = class {
|
|
|
832
836
|
throw new Error("CLI provider request was aborted before execution");
|
|
833
837
|
}
|
|
834
838
|
await this.ensureHealthy(request.signal);
|
|
835
|
-
const
|
|
839
|
+
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
840
|
+
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
836
841
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
837
842
|
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
838
843
|
const result = await this.runCommand(renderedCommand, {
|
|
@@ -855,16 +860,30 @@ var CliProvider = class {
|
|
|
855
860
|
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
856
861
|
throw new Error(message);
|
|
857
862
|
}
|
|
863
|
+
const responseText = await this.readAndCleanupOutputFile(outputFilePath);
|
|
858
864
|
return {
|
|
859
|
-
text:
|
|
865
|
+
text: responseText,
|
|
860
866
|
raw: {
|
|
861
867
|
command: renderedCommand,
|
|
862
868
|
stderr: result.stderr,
|
|
863
869
|
exitCode: result.exitCode ?? 0,
|
|
864
|
-
cwd: this.config.cwd
|
|
870
|
+
cwd: this.config.cwd,
|
|
871
|
+
outputFile: outputFilePath
|
|
865
872
|
}
|
|
866
873
|
};
|
|
867
874
|
}
|
|
875
|
+
async readAndCleanupOutputFile(filePath) {
|
|
876
|
+
try {
|
|
877
|
+
const content = await readTextFile(filePath);
|
|
878
|
+
return content;
|
|
879
|
+
} catch (error) {
|
|
880
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
881
|
+
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
882
|
+
} finally {
|
|
883
|
+
await fs.unlink(filePath).catch(() => {
|
|
884
|
+
});
|
|
885
|
+
}
|
|
886
|
+
}
|
|
868
887
|
async ensureHealthy(signal) {
|
|
869
888
|
if (!this.config.healthcheck) {
|
|
870
889
|
return;
|
|
@@ -905,10 +924,11 @@ var CliProvider = class {
|
|
|
905
924
|
question: "",
|
|
906
925
|
guidelines: "",
|
|
907
926
|
inputFiles: [],
|
|
908
|
-
evalCaseId: "",
|
|
927
|
+
evalCaseId: "healthcheck",
|
|
909
928
|
attempt: 0
|
|
910
929
|
},
|
|
911
|
-
this.config
|
|
930
|
+
this.config,
|
|
931
|
+
generateOutputFilePath("healthcheck")
|
|
912
932
|
)
|
|
913
933
|
);
|
|
914
934
|
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
@@ -926,14 +946,15 @@ var CliProvider = class {
|
|
|
926
946
|
}
|
|
927
947
|
}
|
|
928
948
|
};
|
|
929
|
-
function buildTemplateValues(request, config) {
|
|
949
|
+
function buildTemplateValues(request, config, outputFilePath) {
|
|
930
950
|
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
931
951
|
return {
|
|
932
952
|
PROMPT: shellEscape(request.question ?? ""),
|
|
933
953
|
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
934
954
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
935
955
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
936
|
-
FILES: formatFileList(inputFiles, config.filesFormat)
|
|
956
|
+
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
957
|
+
OUTPUT_FILE: shellEscape(outputFilePath)
|
|
937
958
|
};
|
|
938
959
|
}
|
|
939
960
|
function normalizeInputFiles(inputFiles) {
|
|
@@ -971,11 +992,17 @@ function shellEscape(value) {
|
|
|
971
992
|
return "''";
|
|
972
993
|
}
|
|
973
994
|
if (process.platform === "win32") {
|
|
974
|
-
const escaped = value.replace(/
|
|
975
|
-
return `
|
|
995
|
+
const escaped = value.replace(/'/g, "''");
|
|
996
|
+
return `'${escaped}'`;
|
|
976
997
|
}
|
|
977
998
|
return `'${value.replace(/'/g, `'"'"'`)}'`;
|
|
978
999
|
}
|
|
1000
|
+
function generateOutputFilePath(evalCaseId) {
|
|
1001
|
+
const safeEvalId = evalCaseId || "unknown";
|
|
1002
|
+
const timestamp = Date.now();
|
|
1003
|
+
const random = Math.random().toString(36).substring(2, 9);
|
|
1004
|
+
return path2.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
|
|
1005
|
+
}
|
|
979
1006
|
function formatTimeoutSuffix(timeoutMs) {
|
|
980
1007
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
981
1008
|
return "";
|
|
@@ -1852,487 +1879,6 @@ var MockProvider = class {
|
|
|
1852
1879
|
}
|
|
1853
1880
|
};
|
|
1854
1881
|
|
|
1855
|
-
// src/evaluation/providers/targets.ts
|
|
1856
|
-
import { z } from "zod";
|
|
1857
|
-
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
1858
|
-
var BASE_TARGET_SCHEMA = z.object({
|
|
1859
|
-
name: z.string().min(1, "target name is required"),
|
|
1860
|
-
provider: z.string().min(1, "provider is required"),
|
|
1861
|
-
settings: z.record(z.unknown()).optional(),
|
|
1862
|
-
judge_target: z.string().optional(),
|
|
1863
|
-
workers: z.number().int().min(1).optional()
|
|
1864
|
-
});
|
|
1865
|
-
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
1866
|
-
function normalizeAzureApiVersion(value) {
|
|
1867
|
-
if (!value) {
|
|
1868
|
-
return DEFAULT_AZURE_API_VERSION;
|
|
1869
|
-
}
|
|
1870
|
-
const trimmed = value.trim();
|
|
1871
|
-
if (trimmed.length === 0) {
|
|
1872
|
-
return DEFAULT_AZURE_API_VERSION;
|
|
1873
|
-
}
|
|
1874
|
-
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
1875
|
-
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
1876
|
-
}
|
|
1877
|
-
function resolveTargetDefinition(definition, env = process.env) {
|
|
1878
|
-
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
1879
|
-
const provider = parsed.provider.toLowerCase();
|
|
1880
|
-
const providerBatching = resolveOptionalBoolean(
|
|
1881
|
-
parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
|
|
1882
|
-
);
|
|
1883
|
-
switch (provider) {
|
|
1884
|
-
case "azure":
|
|
1885
|
-
case "azure-openai":
|
|
1886
|
-
return {
|
|
1887
|
-
kind: "azure",
|
|
1888
|
-
name: parsed.name,
|
|
1889
|
-
judgeTarget: parsed.judge_target,
|
|
1890
|
-
workers: parsed.workers,
|
|
1891
|
-
providerBatching,
|
|
1892
|
-
config: resolveAzureConfig(parsed, env)
|
|
1893
|
-
};
|
|
1894
|
-
case "anthropic":
|
|
1895
|
-
return {
|
|
1896
|
-
kind: "anthropic",
|
|
1897
|
-
name: parsed.name,
|
|
1898
|
-
judgeTarget: parsed.judge_target,
|
|
1899
|
-
workers: parsed.workers,
|
|
1900
|
-
providerBatching,
|
|
1901
|
-
config: resolveAnthropicConfig(parsed, env)
|
|
1902
|
-
};
|
|
1903
|
-
case "gemini":
|
|
1904
|
-
case "google":
|
|
1905
|
-
case "google-gemini":
|
|
1906
|
-
return {
|
|
1907
|
-
kind: "gemini",
|
|
1908
|
-
name: parsed.name,
|
|
1909
|
-
judgeTarget: parsed.judge_target,
|
|
1910
|
-
workers: parsed.workers,
|
|
1911
|
-
providerBatching,
|
|
1912
|
-
config: resolveGeminiConfig(parsed, env)
|
|
1913
|
-
};
|
|
1914
|
-
case "codex":
|
|
1915
|
-
case "codex-cli":
|
|
1916
|
-
return {
|
|
1917
|
-
kind: "codex",
|
|
1918
|
-
name: parsed.name,
|
|
1919
|
-
judgeTarget: parsed.judge_target,
|
|
1920
|
-
workers: parsed.workers,
|
|
1921
|
-
providerBatching,
|
|
1922
|
-
config: resolveCodexConfig(parsed, env)
|
|
1923
|
-
};
|
|
1924
|
-
case "mock":
|
|
1925
|
-
return {
|
|
1926
|
-
kind: "mock",
|
|
1927
|
-
name: parsed.name,
|
|
1928
|
-
judgeTarget: parsed.judge_target,
|
|
1929
|
-
workers: parsed.workers,
|
|
1930
|
-
providerBatching,
|
|
1931
|
-
config: resolveMockConfig(parsed)
|
|
1932
|
-
};
|
|
1933
|
-
case "vscode":
|
|
1934
|
-
case "vscode-insiders":
|
|
1935
|
-
return {
|
|
1936
|
-
kind: provider,
|
|
1937
|
-
name: parsed.name,
|
|
1938
|
-
judgeTarget: parsed.judge_target,
|
|
1939
|
-
workers: parsed.workers,
|
|
1940
|
-
providerBatching,
|
|
1941
|
-
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
|
|
1942
|
-
};
|
|
1943
|
-
case "cli":
|
|
1944
|
-
return {
|
|
1945
|
-
kind: "cli",
|
|
1946
|
-
name: parsed.name,
|
|
1947
|
-
judgeTarget: parsed.judge_target,
|
|
1948
|
-
workers: parsed.workers,
|
|
1949
|
-
providerBatching,
|
|
1950
|
-
config: resolveCliConfig(parsed, env)
|
|
1951
|
-
};
|
|
1952
|
-
default:
|
|
1953
|
-
throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
|
|
1954
|
-
}
|
|
1955
|
-
}
|
|
1956
|
-
function resolveAzureConfig(target, env) {
|
|
1957
|
-
const settings = target.settings ?? {};
|
|
1958
|
-
const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
|
|
1959
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1960
|
-
const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
|
|
1961
|
-
const versionSource = settings.version ?? settings.api_version;
|
|
1962
|
-
const temperatureSource = settings.temperature;
|
|
1963
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1964
|
-
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
1965
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
1966
|
-
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
1967
|
-
const version = normalizeAzureApiVersion(
|
|
1968
|
-
resolveOptionalString(versionSource, env, `${target.name} api version`)
|
|
1969
|
-
);
|
|
1970
|
-
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
1971
|
-
const maxOutputTokens = resolveOptionalNumber(
|
|
1972
|
-
maxTokensSource,
|
|
1973
|
-
`${target.name} max output tokens`
|
|
1974
|
-
);
|
|
1975
|
-
return {
|
|
1976
|
-
resourceName,
|
|
1977
|
-
deploymentName,
|
|
1978
|
-
apiKey,
|
|
1979
|
-
version,
|
|
1980
|
-
temperature,
|
|
1981
|
-
maxOutputTokens
|
|
1982
|
-
};
|
|
1983
|
-
}
|
|
1984
|
-
function resolveAnthropicConfig(target, env) {
|
|
1985
|
-
const settings = target.settings ?? {};
|
|
1986
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1987
|
-
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1988
|
-
const temperatureSource = settings.temperature;
|
|
1989
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1990
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
1991
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
1992
|
-
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
1993
|
-
return {
|
|
1994
|
-
apiKey,
|
|
1995
|
-
model,
|
|
1996
|
-
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1997
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1998
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
1999
|
-
};
|
|
2000
|
-
}
|
|
2001
|
-
function resolveGeminiConfig(target, env) {
|
|
2002
|
-
const settings = target.settings ?? {};
|
|
2003
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
2004
|
-
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
2005
|
-
const temperatureSource = settings.temperature;
|
|
2006
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2007
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
2008
|
-
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
2009
|
-
allowLiteral: true,
|
|
2010
|
-
optionalEnv: true
|
|
2011
|
-
}) ?? "gemini-2.5-flash";
|
|
2012
|
-
return {
|
|
2013
|
-
apiKey,
|
|
2014
|
-
model,
|
|
2015
|
-
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2016
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
2017
|
-
};
|
|
2018
|
-
}
|
|
2019
|
-
function resolveCodexConfig(target, env) {
|
|
2020
|
-
const settings = target.settings ?? {};
|
|
2021
|
-
const executableSource = settings.executable ?? settings.command ?? settings.binary;
|
|
2022
|
-
const argsSource = settings.args ?? settings.arguments;
|
|
2023
|
-
const cwdSource = settings.cwd;
|
|
2024
|
-
const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
|
|
2025
|
-
const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
|
|
2026
|
-
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
2027
|
-
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
2028
|
-
allowLiteral: true,
|
|
2029
|
-
optionalEnv: true
|
|
2030
|
-
}) ?? "codex";
|
|
2031
|
-
const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
|
|
2032
|
-
const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
|
|
2033
|
-
allowLiteral: true,
|
|
2034
|
-
optionalEnv: true
|
|
2035
|
-
});
|
|
2036
|
-
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
2037
|
-
const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
|
|
2038
|
-
allowLiteral: true,
|
|
2039
|
-
optionalEnv: true
|
|
2040
|
-
});
|
|
2041
|
-
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
2042
|
-
return {
|
|
2043
|
-
executable,
|
|
2044
|
-
args,
|
|
2045
|
-
cwd,
|
|
2046
|
-
timeoutMs,
|
|
2047
|
-
logDir,
|
|
2048
|
-
logFormat
|
|
2049
|
-
};
|
|
2050
|
-
}
|
|
2051
|
-
function normalizeCodexLogFormat(value) {
|
|
2052
|
-
if (value === void 0 || value === null) {
|
|
2053
|
-
return void 0;
|
|
2054
|
-
}
|
|
2055
|
-
if (typeof value !== "string") {
|
|
2056
|
-
throw new Error("codex log format must be 'summary' or 'json'");
|
|
2057
|
-
}
|
|
2058
|
-
const normalized = value.trim().toLowerCase();
|
|
2059
|
-
if (normalized === "json" || normalized === "summary") {
|
|
2060
|
-
return normalized;
|
|
2061
|
-
}
|
|
2062
|
-
throw new Error("codex log format must be 'summary' or 'json'");
|
|
2063
|
-
}
|
|
2064
|
-
function resolveMockConfig(target) {
|
|
2065
|
-
const settings = target.settings ?? {};
|
|
2066
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
2067
|
-
return { response };
|
|
2068
|
-
}
|
|
2069
|
-
function resolveVSCodeConfig(target, env, insiders) {
|
|
2070
|
-
const settings = target.settings ?? {};
|
|
2071
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
2072
|
-
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
2073
|
-
allowLiteral: false,
|
|
2074
|
-
optionalEnv: true
|
|
2075
|
-
}) : void 0;
|
|
2076
|
-
const commandSource = settings.vscode_cmd ?? settings.command;
|
|
2077
|
-
const waitSource = settings.wait;
|
|
2078
|
-
const dryRunSource = settings.dry_run ?? settings.dryRun;
|
|
2079
|
-
const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
|
|
2080
|
-
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
2081
|
-
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
2082
|
-
return {
|
|
2083
|
-
command,
|
|
2084
|
-
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
2085
|
-
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
2086
|
-
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
2087
|
-
allowLiteral: true,
|
|
2088
|
-
optionalEnv: true
|
|
2089
|
-
}),
|
|
2090
|
-
workspaceTemplate
|
|
2091
|
-
};
|
|
2092
|
-
}
|
|
2093
|
-
function resolveCliConfig(target, env) {
|
|
2094
|
-
const settings = target.settings ?? {};
|
|
2095
|
-
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
2096
|
-
const filesFormat = resolveOptionalLiteralString(
|
|
2097
|
-
settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
|
|
2098
|
-
);
|
|
2099
|
-
const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
|
|
2100
|
-
allowLiteral: true,
|
|
2101
|
-
optionalEnv: true
|
|
2102
|
-
});
|
|
2103
|
-
const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
|
|
2104
|
-
const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
|
|
2105
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
2106
|
-
const commandTemplate = resolveString(
|
|
2107
|
-
commandTemplateSource,
|
|
2108
|
-
env,
|
|
2109
|
-
`${target.name} CLI command template`,
|
|
2110
|
-
true
|
|
2111
|
-
);
|
|
2112
|
-
assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
|
|
2113
|
-
return {
|
|
2114
|
-
commandTemplate,
|
|
2115
|
-
filesFormat,
|
|
2116
|
-
cwd,
|
|
2117
|
-
env: envOverrides,
|
|
2118
|
-
timeoutMs,
|
|
2119
|
-
healthcheck
|
|
2120
|
-
};
|
|
2121
|
-
}
|
|
2122
|
-
function resolveEnvOverrides(source, env, targetName) {
|
|
2123
|
-
if (source === void 0 || source === null) {
|
|
2124
|
-
return void 0;
|
|
2125
|
-
}
|
|
2126
|
-
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2127
|
-
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
2128
|
-
}
|
|
2129
|
-
const entries = Object.entries(source);
|
|
2130
|
-
const resolved = {};
|
|
2131
|
-
for (const [key, value] of entries) {
|
|
2132
|
-
if (typeof value !== "string") {
|
|
2133
|
-
throw new Error(`${targetName} env override '${key}' must be a string`);
|
|
2134
|
-
}
|
|
2135
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
2136
|
-
resolved[key] = resolvedValue;
|
|
2137
|
-
}
|
|
2138
|
-
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
2139
|
-
}
|
|
2140
|
-
function resolveTimeoutMs(source, description) {
|
|
2141
|
-
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
2142
|
-
if (seconds === void 0) {
|
|
2143
|
-
return void 0;
|
|
2144
|
-
}
|
|
2145
|
-
if (seconds <= 0) {
|
|
2146
|
-
throw new Error(`${description} must be greater than zero seconds`);
|
|
2147
|
-
}
|
|
2148
|
-
return Math.floor(seconds * 1e3);
|
|
2149
|
-
}
|
|
2150
|
-
function resolveCliHealthcheck(source, env, targetName) {
|
|
2151
|
-
if (source === void 0 || source === null) {
|
|
2152
|
-
return void 0;
|
|
2153
|
-
}
|
|
2154
|
-
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2155
|
-
throw new Error(`${targetName} healthcheck must be an object`);
|
|
2156
|
-
}
|
|
2157
|
-
const candidate = source;
|
|
2158
|
-
const type = candidate.type;
|
|
2159
|
-
const timeoutMs = resolveTimeoutMs(
|
|
2160
|
-
candidate.timeout_seconds ?? candidate.timeoutSeconds,
|
|
2161
|
-
`${targetName} healthcheck timeout`
|
|
2162
|
-
);
|
|
2163
|
-
if (type === "http") {
|
|
2164
|
-
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
2165
|
-
return {
|
|
2166
|
-
type: "http",
|
|
2167
|
-
url,
|
|
2168
|
-
timeoutMs
|
|
2169
|
-
};
|
|
2170
|
-
}
|
|
2171
|
-
if (type === "command") {
|
|
2172
|
-
const commandTemplate = resolveString(
|
|
2173
|
-
candidate.command_template ?? candidate.commandTemplate,
|
|
2174
|
-
env,
|
|
2175
|
-
`${targetName} healthcheck command template`,
|
|
2176
|
-
true
|
|
2177
|
-
);
|
|
2178
|
-
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
2179
|
-
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
2180
|
-
allowLiteral: true,
|
|
2181
|
-
optionalEnv: true
|
|
2182
|
-
});
|
|
2183
|
-
return {
|
|
2184
|
-
type: "command",
|
|
2185
|
-
commandTemplate,
|
|
2186
|
-
timeoutMs,
|
|
2187
|
-
cwd
|
|
2188
|
-
};
|
|
2189
|
-
}
|
|
2190
|
-
throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
|
|
2191
|
-
}
|
|
2192
|
-
function assertSupportedCliPlaceholders(template, description) {
|
|
2193
|
-
const placeholders = extractCliPlaceholders(template);
|
|
2194
|
-
for (const placeholder of placeholders) {
|
|
2195
|
-
if (!CLI_PLACEHOLDERS.has(placeholder)) {
|
|
2196
|
-
throw new Error(
|
|
2197
|
-
`${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
|
|
2198
|
-
);
|
|
2199
|
-
}
|
|
2200
|
-
}
|
|
2201
|
-
}
|
|
2202
|
-
function extractCliPlaceholders(template) {
|
|
2203
|
-
const matches = template.matchAll(/\{([A-Z_]+)\}/g);
|
|
2204
|
-
const results = [];
|
|
2205
|
-
for (const match of matches) {
|
|
2206
|
-
if (match[1]) {
|
|
2207
|
-
results.push(match[1]);
|
|
2208
|
-
}
|
|
2209
|
-
}
|
|
2210
|
-
return results;
|
|
2211
|
-
}
|
|
2212
|
-
function resolveString(source, env, description, allowLiteral = false) {
|
|
2213
|
-
const value = resolveOptionalString(source, env, description, {
|
|
2214
|
-
allowLiteral,
|
|
2215
|
-
optionalEnv: false
|
|
2216
|
-
});
|
|
2217
|
-
if (value === void 0) {
|
|
2218
|
-
throw new Error(`${description} is required`);
|
|
2219
|
-
}
|
|
2220
|
-
return value;
|
|
2221
|
-
}
|
|
2222
|
-
function resolveOptionalString(source, env, description, options) {
|
|
2223
|
-
if (source === void 0 || source === null) {
|
|
2224
|
-
return void 0;
|
|
2225
|
-
}
|
|
2226
|
-
if (typeof source !== "string") {
|
|
2227
|
-
throw new Error(`${description} must be a string`);
|
|
2228
|
-
}
|
|
2229
|
-
const trimmed = source.trim();
|
|
2230
|
-
if (trimmed.length === 0) {
|
|
2231
|
-
return void 0;
|
|
2232
|
-
}
|
|
2233
|
-
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
2234
|
-
if (envVarMatch) {
|
|
2235
|
-
const varName = envVarMatch[1];
|
|
2236
|
-
const envValue = env[varName];
|
|
2237
|
-
if (envValue !== void 0) {
|
|
2238
|
-
if (envValue.trim().length === 0) {
|
|
2239
|
-
throw new Error(`Environment variable '${varName}' for ${description} is empty`);
|
|
2240
|
-
}
|
|
2241
|
-
return envValue;
|
|
2242
|
-
}
|
|
2243
|
-
const optionalEnv = options?.optionalEnv ?? false;
|
|
2244
|
-
if (optionalEnv) {
|
|
2245
|
-
return void 0;
|
|
2246
|
-
}
|
|
2247
|
-
throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
|
|
2248
|
-
}
|
|
2249
|
-
const allowLiteral = options?.allowLiteral ?? false;
|
|
2250
|
-
if (!allowLiteral) {
|
|
2251
|
-
throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
|
|
2252
|
-
}
|
|
2253
|
-
return trimmed;
|
|
2254
|
-
}
|
|
2255
|
-
function resolveOptionalLiteralString(source) {
|
|
2256
|
-
if (source === void 0 || source === null) {
|
|
2257
|
-
return void 0;
|
|
2258
|
-
}
|
|
2259
|
-
if (typeof source !== "string") {
|
|
2260
|
-
throw new Error("expected string value");
|
|
2261
|
-
}
|
|
2262
|
-
const trimmed = source.trim();
|
|
2263
|
-
return trimmed.length > 0 ? trimmed : void 0;
|
|
2264
|
-
}
|
|
2265
|
-
function resolveOptionalNumber(source, description) {
|
|
2266
|
-
if (source === void 0 || source === null || source === "") {
|
|
2267
|
-
return void 0;
|
|
2268
|
-
}
|
|
2269
|
-
if (typeof source === "number") {
|
|
2270
|
-
return Number.isFinite(source) ? source : void 0;
|
|
2271
|
-
}
|
|
2272
|
-
if (typeof source === "string") {
|
|
2273
|
-
const numeric = Number(source);
|
|
2274
|
-
if (Number.isFinite(numeric)) {
|
|
2275
|
-
return numeric;
|
|
2276
|
-
}
|
|
2277
|
-
}
|
|
2278
|
-
throw new Error(`${description} must be a number`);
|
|
2279
|
-
}
|
|
2280
|
-
function resolveOptionalBoolean(source) {
|
|
2281
|
-
if (source === void 0 || source === null || source === "") {
|
|
2282
|
-
return void 0;
|
|
2283
|
-
}
|
|
2284
|
-
if (typeof source === "boolean") {
|
|
2285
|
-
return source;
|
|
2286
|
-
}
|
|
2287
|
-
if (typeof source === "string") {
|
|
2288
|
-
const lowered = source.trim().toLowerCase();
|
|
2289
|
-
if (lowered === "true" || lowered === "1") {
|
|
2290
|
-
return true;
|
|
2291
|
-
}
|
|
2292
|
-
if (lowered === "false" || lowered === "0") {
|
|
2293
|
-
return false;
|
|
2294
|
-
}
|
|
2295
|
-
}
|
|
2296
|
-
throw new Error("expected boolean value");
|
|
2297
|
-
}
|
|
2298
|
-
function resolveOptionalStringArray(source, env, description) {
|
|
2299
|
-
if (source === void 0 || source === null) {
|
|
2300
|
-
return void 0;
|
|
2301
|
-
}
|
|
2302
|
-
if (!Array.isArray(source)) {
|
|
2303
|
-
throw new Error(`${description} must be an array of strings`);
|
|
2304
|
-
}
|
|
2305
|
-
if (source.length === 0) {
|
|
2306
|
-
return void 0;
|
|
2307
|
-
}
|
|
2308
|
-
const resolved = [];
|
|
2309
|
-
for (let i = 0; i < source.length; i++) {
|
|
2310
|
-
const item = source[i];
|
|
2311
|
-
if (typeof item !== "string") {
|
|
2312
|
-
throw new Error(`${description}[${i}] must be a string`);
|
|
2313
|
-
}
|
|
2314
|
-
const trimmed = item.trim();
|
|
2315
|
-
if (trimmed.length === 0) {
|
|
2316
|
-
throw new Error(`${description}[${i}] cannot be empty`);
|
|
2317
|
-
}
|
|
2318
|
-
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
2319
|
-
if (envVarMatch) {
|
|
2320
|
-
const varName = envVarMatch[1];
|
|
2321
|
-
const envValue = env[varName];
|
|
2322
|
-
if (envValue !== void 0) {
|
|
2323
|
-
if (envValue.trim().length === 0) {
|
|
2324
|
-
throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
|
|
2325
|
-
}
|
|
2326
|
-
resolved.push(envValue);
|
|
2327
|
-
continue;
|
|
2328
|
-
}
|
|
2329
|
-
throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
|
|
2330
|
-
}
|
|
2331
|
-
resolved.push(trimmed);
|
|
2332
|
-
}
|
|
2333
|
-
return resolved.length > 0 ? resolved : void 0;
|
|
2334
|
-
}
|
|
2335
|
-
|
|
2336
1882
|
// src/evaluation/providers/vscode.ts
|
|
2337
1883
|
import path5 from "node:path";
|
|
2338
1884
|
import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
|
|
@@ -3872,6 +3418,7 @@ export {
|
|
|
3872
3418
|
isTestMessageRole,
|
|
3873
3419
|
listTargetNames,
|
|
3874
3420
|
loadEvalCases,
|
|
3421
|
+
normalizeLineEndings,
|
|
3875
3422
|
readTargetDefinitions,
|
|
3876
3423
|
readTextFile,
|
|
3877
3424
|
resolveAndCreateProvider,
|