agentv 4.6.1 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-NSVFUL27.js → chunk-A6W3KOCS.js} +4428 -3605
- package/dist/chunk-A6W3KOCS.js.map +1 -0
- package/dist/{chunk-YXXD27OK.js → chunk-H4GQXK5M.js} +1314 -440
- package/dist/chunk-H4GQXK5M.js.map +1 -0
- package/dist/{chunk-MHWYA4CS.js → chunk-QBZJSQXV.js} +365 -349
- package/dist/chunk-QBZJSQXV.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-BN5NUVAB.js → dist-QXVR2ZRH.js} +16 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-DMSVE6CS.js → interactive-IRYNIFCY.js} +10 -47
- package/dist/interactive-IRYNIFCY.js.map +1 -0
- package/dist/studio/assets/index-DHxVz6M9.css +1 -0
- package/dist/studio/assets/{index-C7TnyYee.js → index-DcwjOyrk.js} +1 -1
- package/dist/studio/assets/index-Y5InSvcS.js +65 -0
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-MHWYA4CS.js.map +0 -1
- package/dist/chunk-NSVFUL27.js.map +0 -1
- package/dist/chunk-YXXD27OK.js.map +0 -1
- package/dist/interactive-DMSVE6CS.js.map +0 -1
- package/dist/studio/assets/index-jJVIJh8b.css +0 -1
- package/dist/studio/assets/index-vn54AYtS.js +0 -65
- /package/dist/{dist-BN5NUVAB.js.map → dist-QXVR2ZRH.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-VCVVKCC4.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-VCVVKCC4.js
|
|
423
423
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
424
|
import path3 from "node:path";
|
|
425
425
|
import fg from "fast-glob";
|
|
@@ -633,15 +633,13 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
633
633
|
}
|
|
634
634
|
var CliHealthcheckHttpInputSchema = external_exports2.object({
|
|
635
635
|
url: external_exports2.string().min(1, "healthcheck URL is required"),
|
|
636
|
-
timeout_seconds: external_exports2.number().positive().optional()
|
|
637
|
-
|
|
638
|
-
});
|
|
636
|
+
timeout_seconds: external_exports2.number().positive().optional()
|
|
637
|
+
}).passthrough();
|
|
639
638
|
var CliHealthcheckCommandInputSchema = external_exports2.object({
|
|
640
639
|
command: external_exports2.string().min(1, "healthcheck command is required"),
|
|
641
640
|
cwd: external_exports2.string().optional(),
|
|
642
|
-
timeout_seconds: external_exports2.number().positive().optional()
|
|
643
|
-
|
|
644
|
-
});
|
|
641
|
+
timeout_seconds: external_exports2.number().positive().optional()
|
|
642
|
+
}).passthrough();
|
|
645
643
|
var CliHealthcheckInputSchema = external_exports2.union([
|
|
646
644
|
CliHealthcheckHttpInputSchema,
|
|
647
645
|
CliHealthcheckCommandInputSchema
|
|
@@ -653,36 +651,28 @@ var CliTargetInputSchema = external_exports2.object({
|
|
|
653
651
|
command: external_exports2.string(),
|
|
654
652
|
// Files format - optional
|
|
655
653
|
files_format: external_exports2.string().optional(),
|
|
656
|
-
filesFormat: external_exports2.string().optional(),
|
|
657
654
|
attachments_format: external_exports2.string().optional(),
|
|
658
|
-
attachmentsFormat: external_exports2.string().optional(),
|
|
659
655
|
// Working directory - optional
|
|
660
656
|
cwd: external_exports2.string().optional(),
|
|
661
657
|
// Workspace template directory - optional (mutually exclusive with cwd)
|
|
662
658
|
workspace_template: external_exports2.string().optional(),
|
|
663
|
-
workspaceTemplate: external_exports2.string().optional(),
|
|
664
659
|
// Timeout in seconds - optional
|
|
665
660
|
timeout_seconds: external_exports2.number().positive().optional(),
|
|
666
|
-
timeoutSeconds: external_exports2.number().positive().optional(),
|
|
667
661
|
// Healthcheck configuration - optional
|
|
668
662
|
healthcheck: CliHealthcheckInputSchema.optional(),
|
|
669
663
|
// Verbose mode - optional
|
|
670
664
|
verbose: external_exports2.boolean().optional(),
|
|
671
665
|
cli_verbose: external_exports2.boolean().optional(),
|
|
672
|
-
cliVerbose: external_exports2.boolean().optional(),
|
|
673
666
|
// Keep temp files - optional
|
|
674
667
|
keep_temp_files: external_exports2.boolean().optional(),
|
|
675
|
-
keepTempFiles: external_exports2.boolean().optional(),
|
|
676
668
|
keep_output_files: external_exports2.boolean().optional(),
|
|
677
|
-
keepOutputFiles: external_exports2.boolean().optional(),
|
|
678
669
|
// Common target fields
|
|
679
670
|
grader_target: external_exports2.string().optional(),
|
|
680
671
|
judge_target: external_exports2.string().optional(),
|
|
681
672
|
// backward compat
|
|
682
673
|
workers: external_exports2.number().int().min(1).optional(),
|
|
683
|
-
provider_batching: external_exports2.boolean().optional()
|
|
684
|
-
|
|
685
|
-
});
|
|
674
|
+
provider_batching: external_exports2.boolean().optional()
|
|
675
|
+
}).passthrough();
|
|
686
676
|
var CliHealthcheckHttpSchema = external_exports2.object({
|
|
687
677
|
url: external_exports2.string().min(1),
|
|
688
678
|
timeoutMs: external_exports2.number().positive().optional()
|
|
@@ -707,7 +697,7 @@ var CliTargetConfigSchema = external_exports2.object({
|
|
|
707
697
|
keepTempFiles: external_exports2.boolean().optional()
|
|
708
698
|
}).strict();
|
|
709
699
|
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
710
|
-
const timeoutSeconds = input.timeout_seconds
|
|
700
|
+
const timeoutSeconds = input.timeout_seconds;
|
|
711
701
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
712
702
|
if ("url" in input && input.url) {
|
|
713
703
|
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
@@ -741,9 +731,9 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
741
731
|
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
742
732
|
const targetName = input.name;
|
|
743
733
|
const command = resolveString(input.command, env, `${targetName} CLI command`, true);
|
|
744
|
-
const filesFormatSource = input.files_format ?? input.
|
|
734
|
+
const filesFormatSource = input.files_format ?? input.attachments_format;
|
|
745
735
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
746
|
-
const workspaceTemplateSource = input.workspace_template
|
|
736
|
+
const workspaceTemplateSource = input.workspace_template;
|
|
747
737
|
let workspaceTemplate = resolveOptionalString(
|
|
748
738
|
workspaceTemplateSource,
|
|
749
739
|
env,
|
|
@@ -771,12 +761,10 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
771
761
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
772
762
|
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
773
763
|
}
|
|
774
|
-
const timeoutSeconds = input.timeout_seconds
|
|
764
|
+
const timeoutSeconds = input.timeout_seconds;
|
|
775
765
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
776
|
-
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose
|
|
777
|
-
const keepTempFiles = resolveOptionalBoolean(
|
|
778
|
-
input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
|
|
779
|
-
);
|
|
766
|
+
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose);
|
|
767
|
+
const keepTempFiles = resolveOptionalBoolean(input.keep_temp_files ?? input.keep_output_files);
|
|
780
768
|
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
781
769
|
return {
|
|
782
770
|
command,
|
|
@@ -797,14 +785,104 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
797
785
|
"FILES",
|
|
798
786
|
"OUTPUT_FILE"
|
|
799
787
|
]);
|
|
788
|
+
var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
|
|
789
|
+
["providerBatching", "provider_batching"],
|
|
790
|
+
["subagentModeAllowed", "subagent_mode_allowed"],
|
|
791
|
+
["fallbackTargets", "fallback_targets"],
|
|
792
|
+
["resourceName", "endpoint"],
|
|
793
|
+
["baseUrl", "base_url"],
|
|
794
|
+
["apiKey", "api_key"],
|
|
795
|
+
["deploymentName", "model"],
|
|
796
|
+
["thinkingBudget", "thinking_budget"],
|
|
797
|
+
["maxTokens", "max_output_tokens"],
|
|
798
|
+
["apiFormat", "api_format"],
|
|
799
|
+
["timeoutSeconds", "timeout_seconds"],
|
|
800
|
+
["logDir", "log_dir"],
|
|
801
|
+
["logDirectory", "log_directory"],
|
|
802
|
+
["logFormat", "log_format"],
|
|
803
|
+
["logOutputFormat", "log_output_format"],
|
|
804
|
+
["systemPrompt", "system_prompt"],
|
|
805
|
+
["maxTurns", "max_turns"],
|
|
806
|
+
["maxBudgetUsd", "max_budget_usd"],
|
|
807
|
+
["dryRun", "dry_run"],
|
|
808
|
+
["subagentRoot", "subagent_root"],
|
|
809
|
+
["filesFormat", "files_format"],
|
|
810
|
+
["attachmentsFormat", "attachments_format"],
|
|
811
|
+
["cliUrl", "cli_url"],
|
|
812
|
+
["cliPath", "cli_path"],
|
|
813
|
+
["githubToken", "github_token"],
|
|
814
|
+
["sessionDir", "session_dir"],
|
|
815
|
+
["sessionId", "session_id"],
|
|
816
|
+
["sessionStateDir", "session_state_dir"],
|
|
817
|
+
["maxRetries", "max_retries"],
|
|
818
|
+
["retryInitialDelayMs", "retry_initial_delay_ms"],
|
|
819
|
+
["retryMaxDelayMs", "retry_max_delay_ms"],
|
|
820
|
+
["retryBackoffFactor", "retry_backoff_factor"],
|
|
821
|
+
["retryStatusCodes", "retry_status_codes"]
|
|
822
|
+
]);
|
|
823
|
+
var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
|
|
824
|
+
["timeoutSeconds", "timeout_seconds"]
|
|
825
|
+
]);
|
|
826
|
+
function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
|
|
827
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
828
|
+
return [];
|
|
829
|
+
}
|
|
830
|
+
const warnings = [];
|
|
831
|
+
for (const [camelCaseField, snakeCaseField] of aliases) {
|
|
832
|
+
if (Object.prototype.hasOwnProperty.call(value, camelCaseField)) {
|
|
833
|
+
warnings.push({
|
|
834
|
+
location: `${location}.${camelCaseField}`,
|
|
835
|
+
message: `camelCase field '${camelCaseField}' is no longer supported in targets.yaml. Use '${snakeCaseField}' instead.`
|
|
836
|
+
});
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
return warnings;
|
|
840
|
+
}
|
|
841
|
+
function assertNoDeprecatedCamelCaseTargetFields(definition) {
|
|
842
|
+
if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
|
|
843
|
+
throw new Error(
|
|
844
|
+
`${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
845
|
+
);
|
|
846
|
+
}
|
|
847
|
+
const warning = findDeprecatedCamelCaseTargetWarnings(
|
|
848
|
+
definition,
|
|
849
|
+
`target "${definition.name}"`
|
|
850
|
+
)[0];
|
|
851
|
+
if (!warning) {
|
|
852
|
+
return;
|
|
853
|
+
}
|
|
854
|
+
const fieldMatch = warning.message.match(/field '([^']+)'/);
|
|
855
|
+
const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
|
|
856
|
+
const field = fieldMatch?.[1] ?? "unknown";
|
|
857
|
+
const replacement = replacementMatch?.[1] ?? "snake_case";
|
|
858
|
+
throw new Error(
|
|
859
|
+
`${warning.location}: camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
|
|
860
|
+
);
|
|
861
|
+
}
|
|
862
|
+
function findDeprecatedCamelCaseTargetWarnings(target, location) {
|
|
863
|
+
const warnings = collectDeprecatedCamelCaseWarnings(
|
|
864
|
+
target,
|
|
865
|
+
location,
|
|
866
|
+
DEPRECATED_TARGET_CAMEL_CASE_FIELDS
|
|
867
|
+
);
|
|
868
|
+
if (typeof target !== "object" || target === null || Array.isArray(target)) {
|
|
869
|
+
return warnings;
|
|
870
|
+
}
|
|
871
|
+
const healthcheck = target.healthcheck;
|
|
872
|
+
warnings.push(
|
|
873
|
+
...collectDeprecatedCamelCaseWarnings(
|
|
874
|
+
healthcheck,
|
|
875
|
+
`${location}.healthcheck`,
|
|
876
|
+
DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS
|
|
877
|
+
)
|
|
878
|
+
);
|
|
879
|
+
return warnings;
|
|
880
|
+
}
|
|
800
881
|
var COMMON_TARGET_SETTINGS = [
|
|
801
882
|
"use_target",
|
|
802
883
|
"provider_batching",
|
|
803
|
-
"providerBatching",
|
|
804
884
|
"subagent_mode_allowed",
|
|
805
|
-
"
|
|
806
|
-
"fallback_targets",
|
|
807
|
-
"fallbackTargets"
|
|
885
|
+
"fallback_targets"
|
|
808
886
|
];
|
|
809
887
|
var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
|
|
810
888
|
var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
@@ -816,43 +894,40 @@ var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
|
816
894
|
// backward compat
|
|
817
895
|
workers: external_exports2.number().int().min(1).optional(),
|
|
818
896
|
workspace_template: external_exports2.string().optional(),
|
|
819
|
-
workspaceTemplate: external_exports2.string().optional(),
|
|
820
897
|
subagent_mode_allowed: external_exports2.boolean().optional(),
|
|
821
|
-
fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
822
|
-
fallbackTargets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
898
|
+
fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
823
899
|
}).passthrough();
|
|
824
900
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
901
|
+
var DEFAULT_AZURE_RESPONSES_API_VERSION = "v1";
|
|
825
902
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
826
|
-
function normalizeAzureApiVersion(value) {
|
|
903
|
+
function normalizeAzureApiVersion(value, apiFormat) {
|
|
904
|
+
const defaultVersion = apiFormat === "responses" ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;
|
|
827
905
|
if (!value) {
|
|
828
|
-
return
|
|
906
|
+
return defaultVersion;
|
|
829
907
|
}
|
|
830
908
|
const trimmed = value.trim();
|
|
831
909
|
if (trimmed.length === 0) {
|
|
832
|
-
return
|
|
910
|
+
return defaultVersion;
|
|
833
911
|
}
|
|
834
912
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
835
|
-
return withoutPrefix.length > 0 ? withoutPrefix :
|
|
913
|
+
return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
|
|
836
914
|
}
|
|
837
915
|
function resolveRetryConfig(target) {
|
|
838
|
-
const maxRetries = resolveOptionalNumber(
|
|
839
|
-
target.max_retries ?? target.maxRetries,
|
|
840
|
-
`${target.name} max retries`
|
|
841
|
-
);
|
|
916
|
+
const maxRetries = resolveOptionalNumber(target.max_retries, `${target.name} max retries`);
|
|
842
917
|
const initialDelayMs = resolveOptionalNumber(
|
|
843
|
-
target.retry_initial_delay_ms
|
|
918
|
+
target.retry_initial_delay_ms,
|
|
844
919
|
`${target.name} retry initial delay`
|
|
845
920
|
);
|
|
846
921
|
const maxDelayMs = resolveOptionalNumber(
|
|
847
|
-
target.retry_max_delay_ms
|
|
922
|
+
target.retry_max_delay_ms,
|
|
848
923
|
`${target.name} retry max delay`
|
|
849
924
|
);
|
|
850
925
|
const backoffFactor = resolveOptionalNumber(
|
|
851
|
-
target.retry_backoff_factor
|
|
926
|
+
target.retry_backoff_factor,
|
|
852
927
|
`${target.name} retry backoff factor`
|
|
853
928
|
);
|
|
854
929
|
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
855
|
-
target.retry_status_codes
|
|
930
|
+
target.retry_status_codes,
|
|
856
931
|
`${target.name} retry status codes`
|
|
857
932
|
);
|
|
858
933
|
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
@@ -912,9 +987,10 @@ function resolveDelegatedTargetDefinition(name21, definitions, env = process.env
|
|
|
912
987
|
`Target "${name21}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
|
|
913
988
|
);
|
|
914
989
|
}
|
|
915
|
-
function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
990
|
+
function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
|
|
991
|
+
assertNoDeprecatedCamelCaseTargetFields(definition);
|
|
916
992
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
917
|
-
if (parsed.workspace_template !== void 0
|
|
993
|
+
if (parsed.workspace_template !== void 0) {
|
|
918
994
|
throw new Error(
|
|
919
995
|
`${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
920
996
|
);
|
|
@@ -930,13 +1006,9 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
930
1006
|
`${parsed.name} provider`,
|
|
931
1007
|
true
|
|
932
1008
|
).toLowerCase();
|
|
933
|
-
const providerBatching = resolveOptionalBoolean(
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
const subagentModeAllowed = resolveOptionalBoolean(
|
|
937
|
-
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
938
|
-
);
|
|
939
|
-
const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
|
|
1009
|
+
const providerBatching = resolveOptionalBoolean(parsed.provider_batching);
|
|
1010
|
+
const subagentModeAllowed = resolveOptionalBoolean(parsed.subagent_mode_allowed);
|
|
1011
|
+
const fallbackTargets = parsed.fallback_targets;
|
|
940
1012
|
const base = {
|
|
941
1013
|
name: parsed.name,
|
|
942
1014
|
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
@@ -1086,20 +1158,22 @@ function normalizeOpenAIBaseUrl(value) {
|
|
|
1086
1158
|
return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
|
|
1087
1159
|
}
|
|
1088
1160
|
function resolveAzureConfig(target, env) {
|
|
1089
|
-
const endpointSource = target.endpoint ?? target.resource
|
|
1090
|
-
const apiKeySource = target.api_key
|
|
1091
|
-
const deploymentSource = target.deployment ?? target.
|
|
1161
|
+
const endpointSource = target.endpoint ?? target.resource;
|
|
1162
|
+
const apiKeySource = target.api_key;
|
|
1163
|
+
const deploymentSource = target.deployment ?? target.model;
|
|
1092
1164
|
const versionSource = target.version ?? target.api_version;
|
|
1093
1165
|
const temperatureSource = target.temperature;
|
|
1094
|
-
const maxTokensSource = target.max_output_tokens
|
|
1166
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1095
1167
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
1096
1168
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
1097
1169
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
1170
|
+
const apiFormat = resolveApiFormat(target, env, target.name);
|
|
1098
1171
|
const version = normalizeAzureApiVersion(
|
|
1099
1172
|
resolveOptionalString(versionSource, env, `${target.name} api version`, {
|
|
1100
1173
|
allowLiteral: true,
|
|
1101
1174
|
optionalEnv: true
|
|
1102
|
-
})
|
|
1175
|
+
}),
|
|
1176
|
+
apiFormat
|
|
1103
1177
|
);
|
|
1104
1178
|
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
1105
1179
|
const maxOutputTokens = resolveOptionalNumber(
|
|
@@ -1112,13 +1186,17 @@ function resolveAzureConfig(target, env) {
|
|
|
1112
1186
|
deploymentName,
|
|
1113
1187
|
apiKey,
|
|
1114
1188
|
version,
|
|
1189
|
+
apiFormat,
|
|
1115
1190
|
temperature,
|
|
1116
1191
|
maxOutputTokens,
|
|
1117
1192
|
retry
|
|
1118
1193
|
};
|
|
1119
1194
|
}
|
|
1120
|
-
function resolveApiFormat(target, targetName) {
|
|
1121
|
-
const raw = target.api_format
|
|
1195
|
+
function resolveApiFormat(target, env, targetName) {
|
|
1196
|
+
const raw = resolveOptionalString(target.api_format, env, `${targetName} api format`, {
|
|
1197
|
+
allowLiteral: true,
|
|
1198
|
+
optionalEnv: true
|
|
1199
|
+
});
|
|
1122
1200
|
if (raw === void 0) return void 0;
|
|
1123
1201
|
if (raw === "chat" || raw === "responses") return raw;
|
|
1124
1202
|
throw new Error(
|
|
@@ -1126,11 +1204,11 @@ function resolveApiFormat(target, targetName) {
|
|
|
1126
1204
|
);
|
|
1127
1205
|
}
|
|
1128
1206
|
function resolveOpenAIConfig(target, env) {
|
|
1129
|
-
const endpointSource = target.endpoint ?? target.base_url
|
|
1130
|
-
const apiKeySource = target.api_key
|
|
1207
|
+
const endpointSource = target.endpoint ?? target.base_url;
|
|
1208
|
+
const apiKeySource = target.api_key;
|
|
1131
1209
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1132
1210
|
const temperatureSource = target.temperature;
|
|
1133
|
-
const maxTokensSource = target.max_output_tokens
|
|
1211
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1134
1212
|
const baseURL = normalizeOpenAIBaseUrl(
|
|
1135
1213
|
resolveOptionalString(endpointSource, env, `${target.name} endpoint`, {
|
|
1136
1214
|
allowLiteral: true,
|
|
@@ -1144,17 +1222,17 @@ function resolveOpenAIConfig(target, env) {
|
|
|
1144
1222
|
baseURL,
|
|
1145
1223
|
apiKey,
|
|
1146
1224
|
model,
|
|
1147
|
-
apiFormat: resolveApiFormat(target, target.name),
|
|
1225
|
+
apiFormat: resolveApiFormat(target, env, target.name),
|
|
1148
1226
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1149
1227
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1150
1228
|
retry
|
|
1151
1229
|
};
|
|
1152
1230
|
}
|
|
1153
1231
|
function resolveOpenRouterConfig(target, env) {
|
|
1154
|
-
const apiKeySource = target.api_key
|
|
1232
|
+
const apiKeySource = target.api_key;
|
|
1155
1233
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1156
1234
|
const temperatureSource = target.temperature;
|
|
1157
|
-
const maxTokensSource = target.max_output_tokens
|
|
1235
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1158
1236
|
const retry = resolveRetryConfig(target);
|
|
1159
1237
|
return {
|
|
1160
1238
|
apiKey: resolveString(apiKeySource, env, `${target.name} OpenRouter api key`),
|
|
@@ -1165,11 +1243,11 @@ function resolveOpenRouterConfig(target, env) {
|
|
|
1165
1243
|
};
|
|
1166
1244
|
}
|
|
1167
1245
|
function resolveAnthropicConfig(target, env) {
|
|
1168
|
-
const apiKeySource = target.api_key
|
|
1246
|
+
const apiKeySource = target.api_key;
|
|
1169
1247
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1170
1248
|
const temperatureSource = target.temperature;
|
|
1171
|
-
const maxTokensSource = target.max_output_tokens
|
|
1172
|
-
const thinkingBudgetSource = target.thinking_budget
|
|
1249
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1250
|
+
const thinkingBudgetSource = target.thinking_budget;
|
|
1173
1251
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
1174
1252
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
1175
1253
|
const retry = resolveRetryConfig(target);
|
|
@@ -1183,10 +1261,10 @@ function resolveAnthropicConfig(target, env) {
|
|
|
1183
1261
|
};
|
|
1184
1262
|
}
|
|
1185
1263
|
function resolveGeminiConfig(target, env) {
|
|
1186
|
-
const apiKeySource = target.api_key
|
|
1264
|
+
const apiKeySource = target.api_key;
|
|
1187
1265
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1188
1266
|
const temperatureSource = target.temperature;
|
|
1189
|
-
const maxTokensSource = target.max_output_tokens
|
|
1267
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1190
1268
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
1191
1269
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
1192
1270
|
allowLiteral: true,
|
|
@@ -1206,11 +1284,11 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
1206
1284
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
1207
1285
|
const argsSource = target.args ?? target.arguments;
|
|
1208
1286
|
const cwdSource = target.cwd;
|
|
1209
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1210
|
-
const timeoutSource = target.timeout_seconds
|
|
1211
|
-
const logDirSource = target.log_dir ?? target.
|
|
1212
|
-
const logFormatSource = target.log_format ?? target.
|
|
1213
|
-
const systemPromptSource = target.system_prompt
|
|
1287
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1288
|
+
const timeoutSource = target.timeout_seconds;
|
|
1289
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1290
|
+
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
1291
|
+
const systemPromptSource = target.system_prompt;
|
|
1214
1292
|
const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
|
|
1215
1293
|
allowLiteral: true,
|
|
1216
1294
|
optionalEnv: true
|
|
@@ -1274,16 +1352,16 @@ function normalizeCodexLogFormat(value) {
|
|
|
1274
1352
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
1275
1353
|
}
|
|
1276
1354
|
function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
1277
|
-
const cliUrlSource = target.cli_url
|
|
1278
|
-
const cliPathSource = target.cli_path
|
|
1279
|
-
const githubTokenSource = target.github_token
|
|
1355
|
+
const cliUrlSource = target.cli_url;
|
|
1356
|
+
const cliPathSource = target.cli_path;
|
|
1357
|
+
const githubTokenSource = target.github_token;
|
|
1280
1358
|
const modelSource = target.model;
|
|
1281
1359
|
const cwdSource = target.cwd;
|
|
1282
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1283
|
-
const timeoutSource = target.timeout_seconds
|
|
1284
|
-
const logDirSource = target.log_dir ?? target.
|
|
1285
|
-
const logFormatSource = target.log_format
|
|
1286
|
-
const systemPromptSource = target.system_prompt
|
|
1360
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1361
|
+
const timeoutSource = target.timeout_seconds;
|
|
1362
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1363
|
+
const logFormatSource = target.log_format;
|
|
1364
|
+
const systemPromptSource = target.system_prompt;
|
|
1287
1365
|
const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
|
|
1288
1366
|
allowLiteral: true,
|
|
1289
1367
|
optionalEnv: true
|
|
@@ -1338,6 +1416,52 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
1338
1416
|
);
|
|
1339
1417
|
const logFormat = normalizeCopilotLogFormat(logFormatSource);
|
|
1340
1418
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
1419
|
+
const byok = target.byok;
|
|
1420
|
+
let byokType;
|
|
1421
|
+
let byokBaseUrl;
|
|
1422
|
+
let byokApiKey;
|
|
1423
|
+
let byokBearerToken;
|
|
1424
|
+
let byokApiVersion;
|
|
1425
|
+
let byokWireApi;
|
|
1426
|
+
if (byok && typeof byok === "object") {
|
|
1427
|
+
byokType = resolveOptionalString(byok.type, env, `${target.name} byok type`, {
|
|
1428
|
+
allowLiteral: true,
|
|
1429
|
+
optionalEnv: true
|
|
1430
|
+
});
|
|
1431
|
+
byokBaseUrl = resolveOptionalString(byok.base_url, env, `${target.name} byok base URL`, {
|
|
1432
|
+
allowLiteral: true,
|
|
1433
|
+
optionalEnv: true
|
|
1434
|
+
});
|
|
1435
|
+
byokApiKey = resolveOptionalString(byok.api_key, env, `${target.name} byok API key`, {
|
|
1436
|
+
allowLiteral: false,
|
|
1437
|
+
optionalEnv: true
|
|
1438
|
+
});
|
|
1439
|
+
byokBearerToken = resolveOptionalString(
|
|
1440
|
+
byok.bearer_token,
|
|
1441
|
+
env,
|
|
1442
|
+
`${target.name} byok bearer token`,
|
|
1443
|
+
{
|
|
1444
|
+
allowLiteral: false,
|
|
1445
|
+
optionalEnv: true
|
|
1446
|
+
}
|
|
1447
|
+
);
|
|
1448
|
+
byokApiVersion = resolveOptionalString(
|
|
1449
|
+
byok.api_version,
|
|
1450
|
+
env,
|
|
1451
|
+
`${target.name} byok API version`,
|
|
1452
|
+
{
|
|
1453
|
+
allowLiteral: true,
|
|
1454
|
+
optionalEnv: true
|
|
1455
|
+
}
|
|
1456
|
+
);
|
|
1457
|
+
byokWireApi = resolveOptionalString(byok.wire_api, env, `${target.name} byok wire API`, {
|
|
1458
|
+
allowLiteral: true,
|
|
1459
|
+
optionalEnv: true
|
|
1460
|
+
});
|
|
1461
|
+
if (!byokBaseUrl) {
|
|
1462
|
+
throw new Error(`${target.name}: 'byok.base_url' is required when 'byok' is specified`);
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1341
1465
|
return {
|
|
1342
1466
|
cliUrl,
|
|
1343
1467
|
cliPath,
|
|
@@ -1348,7 +1472,13 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
1348
1472
|
timeoutMs,
|
|
1349
1473
|
logDir,
|
|
1350
1474
|
logFormat,
|
|
1351
|
-
systemPrompt
|
|
1475
|
+
systemPrompt,
|
|
1476
|
+
byokType,
|
|
1477
|
+
byokBaseUrl,
|
|
1478
|
+
byokApiKey,
|
|
1479
|
+
byokBearerToken,
|
|
1480
|
+
byokApiVersion,
|
|
1481
|
+
byokWireApi
|
|
1352
1482
|
};
|
|
1353
1483
|
}
|
|
1354
1484
|
function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
@@ -1356,11 +1486,11 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
1356
1486
|
const modelSource = target.model;
|
|
1357
1487
|
const argsSource = target.args ?? target.arguments;
|
|
1358
1488
|
const cwdSource = target.cwd;
|
|
1359
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1360
|
-
const timeoutSource = target.timeout_seconds
|
|
1361
|
-
const logDirSource = target.log_dir ?? target.
|
|
1362
|
-
const logFormatSource = target.log_format
|
|
1363
|
-
const systemPromptSource = target.system_prompt
|
|
1489
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1490
|
+
const timeoutSource = target.timeout_seconds;
|
|
1491
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1492
|
+
const logFormatSource = target.log_format;
|
|
1493
|
+
const systemPromptSource = target.system_prompt;
|
|
1364
1494
|
const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
|
|
1365
1495
|
allowLiteral: true,
|
|
1366
1496
|
optionalEnv: true
|
|
@@ -1424,16 +1554,16 @@ function normalizeCopilotLogFormat(value) {
|
|
|
1424
1554
|
}
|
|
1425
1555
|
function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
1426
1556
|
const subproviderSource = target.subprovider;
|
|
1427
|
-
const modelSource = target.model ?? target.pi_model
|
|
1428
|
-
const apiKeySource = target.api_key
|
|
1429
|
-
const toolsSource = target.tools ?? target.pi_tools
|
|
1430
|
-
const thinkingSource = target.thinking ?? target.pi_thinking
|
|
1557
|
+
const modelSource = target.model ?? target.pi_model;
|
|
1558
|
+
const apiKeySource = target.api_key;
|
|
1559
|
+
const toolsSource = target.tools ?? target.pi_tools;
|
|
1560
|
+
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
1431
1561
|
const cwdSource = target.cwd;
|
|
1432
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1433
|
-
const timeoutSource = target.timeout_seconds
|
|
1434
|
-
const logDirSource = target.log_dir ?? target.
|
|
1435
|
-
const logFormatSource = target.log_format
|
|
1436
|
-
const systemPromptSource = target.system_prompt
|
|
1562
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1563
|
+
const timeoutSource = target.timeout_seconds;
|
|
1564
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1565
|
+
const logFormatSource = target.log_format;
|
|
1566
|
+
const systemPromptSource = target.system_prompt;
|
|
1437
1567
|
const subprovider = resolveOptionalString(
|
|
1438
1568
|
subproviderSource,
|
|
1439
1569
|
env,
|
|
@@ -1451,7 +1581,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1451
1581
|
allowLiteral: false,
|
|
1452
1582
|
optionalEnv: true
|
|
1453
1583
|
});
|
|
1454
|
-
const baseUrlSource = target.base_url ?? target.
|
|
1584
|
+
const baseUrlSource = target.base_url ?? target.endpoint;
|
|
1455
1585
|
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
|
|
1456
1586
|
allowLiteral: true,
|
|
1457
1587
|
optionalEnv: true
|
|
@@ -1510,16 +1640,16 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1510
1640
|
function resolvePiCliConfig(target, env, evalFilePath) {
|
|
1511
1641
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
1512
1642
|
const subproviderSource = target.subprovider;
|
|
1513
|
-
const modelSource = target.model ?? target.pi_model
|
|
1514
|
-
const apiKeySource = target.api_key
|
|
1515
|
-
const toolsSource = target.tools ?? target.pi_tools
|
|
1516
|
-
const thinkingSource = target.thinking ?? target.pi_thinking
|
|
1643
|
+
const modelSource = target.model ?? target.pi_model;
|
|
1644
|
+
const apiKeySource = target.api_key;
|
|
1645
|
+
const toolsSource = target.tools ?? target.pi_tools;
|
|
1646
|
+
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
1517
1647
|
const cwdSource = target.cwd;
|
|
1518
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1519
|
-
const timeoutSource = target.timeout_seconds
|
|
1520
|
-
const logDirSource = target.log_dir ?? target.
|
|
1521
|
-
const logFormatSource = target.log_format
|
|
1522
|
-
const systemPromptSource = target.system_prompt
|
|
1648
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1649
|
+
const timeoutSource = target.timeout_seconds;
|
|
1650
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1651
|
+
const logFormatSource = target.log_format;
|
|
1652
|
+
const systemPromptSource = target.system_prompt;
|
|
1523
1653
|
const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
|
|
1524
1654
|
allowLiteral: true,
|
|
1525
1655
|
optionalEnv: true
|
|
@@ -1538,7 +1668,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
1538
1668
|
allowLiteral: false,
|
|
1539
1669
|
optionalEnv: true
|
|
1540
1670
|
});
|
|
1541
|
-
const baseUrlSource = target.base_url ?? target.
|
|
1671
|
+
const baseUrlSource = target.base_url ?? target.endpoint;
|
|
1542
1672
|
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
|
|
1543
1673
|
allowLiteral: true,
|
|
1544
1674
|
optionalEnv: true
|
|
@@ -1596,11 +1726,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
1596
1726
|
function resolveClaudeConfig(target, env, evalFilePath) {
|
|
1597
1727
|
const modelSource = target.model;
|
|
1598
1728
|
const cwdSource = target.cwd;
|
|
1599
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1600
|
-
const timeoutSource = target.timeout_seconds
|
|
1601
|
-
const logDirSource = target.log_dir ?? target.
|
|
1602
|
-
const logFormatSource = target.log_format ?? target.
|
|
1603
|
-
const systemPromptSource = target.system_prompt
|
|
1729
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1730
|
+
const timeoutSource = target.timeout_seconds;
|
|
1731
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1732
|
+
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
|
|
1733
|
+
const systemPromptSource = target.system_prompt;
|
|
1604
1734
|
const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
|
|
1605
1735
|
allowLiteral: true,
|
|
1606
1736
|
optionalEnv: true
|
|
@@ -1633,8 +1763,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
1633
1763
|
});
|
|
1634
1764
|
const logFormat = normalizeClaudeLogFormat(logFormatSource);
|
|
1635
1765
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
1636
|
-
const maxTurns = typeof target.max_turns === "number" ? target.max_turns :
|
|
1637
|
-
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd :
|
|
1766
|
+
const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
|
|
1767
|
+
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
|
|
1638
1768
|
return {
|
|
1639
1769
|
model,
|
|
1640
1770
|
systemPrompt,
|
|
@@ -1665,9 +1795,7 @@ function resolveMockConfig(target) {
|
|
|
1665
1795
|
return { response };
|
|
1666
1796
|
}
|
|
1667
1797
|
function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
1668
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
1669
|
-
target.workspace_template ?? target.workspaceTemplate
|
|
1670
|
-
);
|
|
1798
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
|
|
1671
1799
|
let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
|
|
1672
1800
|
workspaceTemplateEnvVar,
|
|
1673
1801
|
env,
|
|
@@ -1682,9 +1810,9 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
1682
1810
|
}
|
|
1683
1811
|
const executableSource = target.executable;
|
|
1684
1812
|
const waitSource = target.wait;
|
|
1685
|
-
const dryRunSource = target.dry_run
|
|
1686
|
-
const subagentRootSource = target.subagent_root
|
|
1687
|
-
const timeoutSource = target.timeout_seconds
|
|
1813
|
+
const dryRunSource = target.dry_run;
|
|
1814
|
+
const subagentRootSource = target.subagent_root;
|
|
1815
|
+
const timeoutSource = target.timeout_seconds;
|
|
1688
1816
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
1689
1817
|
const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
|
|
1690
1818
|
allowLiteral: true,
|
|
@@ -1719,8 +1847,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
1719
1847
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
1720
1848
|
if (!parseResult.success) {
|
|
1721
1849
|
const firstError = parseResult.error.errors[0];
|
|
1722
|
-
const
|
|
1723
|
-
const prefix =
|
|
1850
|
+
const path410 = firstError?.path.join(".") || "";
|
|
1851
|
+
const prefix = path410 ? `${target.name} ${path410}: ` : `${target.name}: `;
|
|
1724
1852
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
1725
1853
|
}
|
|
1726
1854
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -1735,7 +1863,7 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
1735
1863
|
}
|
|
1736
1864
|
function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
|
|
1737
1865
|
const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
1738
|
-
const timeoutSeconds = target.timeout_seconds
|
|
1866
|
+
const timeoutSeconds = target.timeout_seconds;
|
|
1739
1867
|
const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
|
|
1740
1868
|
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
1741
1869
|
allowLiteral: true,
|
|
@@ -1799,10 +1927,10 @@ function resolveDiscover(value, targetName) {
|
|
|
1799
1927
|
throw new Error(`Target "${targetName}": discover must be "latest" (got "${String(value)}")`);
|
|
1800
1928
|
}
|
|
1801
1929
|
function resolveCopilotLogConfig(target, env) {
|
|
1802
|
-
const sessionDirSource = target.session_dir
|
|
1803
|
-
const sessionIdSource = target.session_id
|
|
1930
|
+
const sessionDirSource = target.session_dir;
|
|
1931
|
+
const sessionIdSource = target.session_id;
|
|
1804
1932
|
const discoverSource = target.discover;
|
|
1805
|
-
const sessionStateDirSource = target.session_state_dir
|
|
1933
|
+
const sessionStateDirSource = target.session_state_dir;
|
|
1806
1934
|
const cwdSource = target.cwd;
|
|
1807
1935
|
return {
|
|
1808
1936
|
sessionDir: resolveOptionalString(
|
|
@@ -1975,6 +2103,15 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
1975
2103
|
"vscode",
|
|
1976
2104
|
"vscode-insiders"
|
|
1977
2105
|
];
|
|
2106
|
+
var LLM_GRADER_CAPABLE_KINDS = [
|
|
2107
|
+
"openai",
|
|
2108
|
+
"openrouter",
|
|
2109
|
+
"azure",
|
|
2110
|
+
"anthropic",
|
|
2111
|
+
"gemini",
|
|
2112
|
+
"agentv",
|
|
2113
|
+
"mock"
|
|
2114
|
+
];
|
|
1978
2115
|
var KNOWN_PROVIDERS = [
|
|
1979
2116
|
"openai",
|
|
1980
2117
|
"openrouter",
|
|
@@ -1994,7 +2131,8 @@ var KNOWN_PROVIDERS = [
|
|
|
1994
2131
|
"mock",
|
|
1995
2132
|
"vscode",
|
|
1996
2133
|
"vscode-insiders",
|
|
1997
|
-
"agentv"
|
|
2134
|
+
"agentv",
|
|
2135
|
+
"transcript"
|
|
1998
2136
|
];
|
|
1999
2137
|
var PROVIDER_ALIASES = [
|
|
2000
2138
|
"azure-openai",
|
|
@@ -6803,7 +6941,7 @@ function createOpenRouter(options = {}) {
|
|
|
6803
6941
|
);
|
|
6804
6942
|
const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
|
|
6805
6943
|
provider: "openrouter.chat",
|
|
6806
|
-
url: ({ path:
|
|
6944
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6807
6945
|
headers: getHeaders,
|
|
6808
6946
|
compatibility,
|
|
6809
6947
|
fetch: options.fetch,
|
|
@@ -6811,7 +6949,7 @@ function createOpenRouter(options = {}) {
|
|
|
6811
6949
|
});
|
|
6812
6950
|
const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
|
|
6813
6951
|
provider: "openrouter.completion",
|
|
6814
|
-
url: ({ path:
|
|
6952
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6815
6953
|
headers: getHeaders,
|
|
6816
6954
|
compatibility,
|
|
6817
6955
|
fetch: options.fetch,
|
|
@@ -6819,14 +6957,14 @@ function createOpenRouter(options = {}) {
|
|
|
6819
6957
|
});
|
|
6820
6958
|
const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
|
|
6821
6959
|
provider: "openrouter.embedding",
|
|
6822
|
-
url: ({ path:
|
|
6960
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6823
6961
|
headers: getHeaders,
|
|
6824
6962
|
fetch: options.fetch,
|
|
6825
6963
|
extraBody: options.extraBody
|
|
6826
6964
|
});
|
|
6827
6965
|
const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
|
|
6828
6966
|
provider: "openrouter.image",
|
|
6829
|
-
url: ({ path:
|
|
6967
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6830
6968
|
headers: getHeaders,
|
|
6831
6969
|
fetch: options.fetch,
|
|
6832
6970
|
extraBody: options.extraBody
|
|
@@ -14345,11 +14483,13 @@ import { tmpdir } from "node:os";
|
|
|
14345
14483
|
import path19 from "node:path";
|
|
14346
14484
|
import { execSync as execSync2 } from "node:child_process";
|
|
14347
14485
|
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
14348
|
-
import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
|
|
14486
|
+
import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
|
|
14349
14487
|
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
14350
|
-
import
|
|
14488
|
+
import path21 from "node:path";
|
|
14351
14489
|
import { createInterface } from "node:readline";
|
|
14352
|
-
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
14490
|
+
import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
|
|
14491
|
+
import os2 from "node:os";
|
|
14492
|
+
import path20 from "node:path";
|
|
14353
14493
|
import { exec as exec2 } from "node:child_process";
|
|
14354
14494
|
import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
|
|
14355
14495
|
import path322 from "node:path";
|
|
@@ -14358,18 +14498,16 @@ import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
|
|
|
14358
14498
|
import path30 from "node:path";
|
|
14359
14499
|
import { constants as constants22 } from "node:fs";
|
|
14360
14500
|
import { access as access22, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
|
|
14361
|
-
import path21 from "node:path";
|
|
14362
14501
|
import path222 from "node:path";
|
|
14363
14502
|
import path23 from "node:path";
|
|
14364
|
-
import { readFile as readFile9 } from "node:fs/promises";
|
|
14365
14503
|
import path24 from "node:path";
|
|
14504
|
+
import { readFile as readFile9 } from "node:fs/promises";
|
|
14505
|
+
import path25 from "node:path";
|
|
14366
14506
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
14367
14507
|
import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
|
|
14368
14508
|
import path27 from "node:path";
|
|
14369
14509
|
import { promisify as promisify2 } from "node:util";
|
|
14370
14510
|
import path26 from "node:path";
|
|
14371
|
-
import os2 from "node:os";
|
|
14372
|
-
import path25 from "node:path";
|
|
14373
14511
|
import { copyFile, mkdir as mkdir10, readFile as readFile10, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
|
|
14374
14512
|
import path29 from "node:path";
|
|
14375
14513
|
import path28 from "node:path";
|
|
@@ -14420,12 +14558,15 @@ import { existsSync as existsSync5 } from "node:fs";
|
|
|
14420
14558
|
import path45 from "node:path";
|
|
14421
14559
|
import { mkdir as mkdir15, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
|
|
14422
14560
|
import path46 from "node:path";
|
|
14423
|
-
import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
|
|
14561
|
+
import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
|
|
14424
14562
|
import path47 from "node:path";
|
|
14425
14563
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
14426
14564
|
import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
|
|
14427
14565
|
import { homedir as homedir3 } from "node:os";
|
|
14428
14566
|
import path48 from "node:path";
|
|
14567
|
+
import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
|
|
14568
|
+
import { homedir as homedir4 } from "node:os";
|
|
14569
|
+
import path49 from "node:path";
|
|
14429
14570
|
import { readFile as readFile14 } from "node:fs/promises";
|
|
14430
14571
|
function computeTraceSummary(messages) {
|
|
14431
14572
|
const toolCallCounts = {};
|
|
@@ -15213,8 +15354,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15213
15354
|
const negate = rawEvaluator.negate === true ? true : void 0;
|
|
15214
15355
|
if (isCustomType) {
|
|
15215
15356
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15216
|
-
const required2 =
|
|
15217
|
-
|
|
15357
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15358
|
+
rawEvaluator.required,
|
|
15359
|
+
rawEvaluator.min_score,
|
|
15360
|
+
name21,
|
|
15361
|
+
evalId
|
|
15362
|
+
);
|
|
15363
|
+
const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "min_score", "negate"]);
|
|
15218
15364
|
const config2 = {};
|
|
15219
15365
|
for (const [key, value] of Object.entries(rawEvaluator)) {
|
|
15220
15366
|
if (!knownProps2.has(key) && value !== void 0) {
|
|
@@ -15226,6 +15372,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15226
15372
|
type: customTypeName,
|
|
15227
15373
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15228
15374
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15375
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15229
15376
|
...negate !== void 0 ? { negate } : {},
|
|
15230
15377
|
...Object.keys(config2).length > 0 ? { config: config2 } : {}
|
|
15231
15378
|
});
|
|
@@ -15295,7 +15442,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15295
15442
|
);
|
|
15296
15443
|
}
|
|
15297
15444
|
}
|
|
15298
|
-
const required2 =
|
|
15445
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15446
|
+
rawEvaluator.required,
|
|
15447
|
+
rawEvaluator.min_score,
|
|
15448
|
+
name21,
|
|
15449
|
+
evalId
|
|
15450
|
+
);
|
|
15299
15451
|
const knownProps2 = /* @__PURE__ */ new Set([
|
|
15300
15452
|
"name",
|
|
15301
15453
|
"type",
|
|
@@ -15321,6 +15473,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15321
15473
|
resolvedCwd,
|
|
15322
15474
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15323
15475
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15476
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15324
15477
|
...negate !== void 0 ? { negate } : {},
|
|
15325
15478
|
...Object.keys(config2).length > 0 ? { config: config2 } : {},
|
|
15326
15479
|
...targetConfig !== void 0 ? { target: targetConfig } : {}
|
|
@@ -15449,7 +15602,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15449
15602
|
};
|
|
15450
15603
|
}
|
|
15451
15604
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15452
|
-
const required2 =
|
|
15605
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15606
|
+
rawEvaluator.required,
|
|
15607
|
+
rawEvaluator.min_score,
|
|
15608
|
+
name21,
|
|
15609
|
+
evalId
|
|
15610
|
+
);
|
|
15453
15611
|
evaluators.push({
|
|
15454
15612
|
name: name21,
|
|
15455
15613
|
type: "composite",
|
|
@@ -15457,6 +15615,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15457
15615
|
aggregator,
|
|
15458
15616
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15459
15617
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15618
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15460
15619
|
...negate !== void 0 ? { negate } : {}
|
|
15461
15620
|
});
|
|
15462
15621
|
continue;
|
|
@@ -15567,7 +15726,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15567
15726
|
continue;
|
|
15568
15727
|
}
|
|
15569
15728
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15570
|
-
const required2 =
|
|
15729
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15730
|
+
rawEvaluator.required,
|
|
15731
|
+
rawEvaluator.min_score,
|
|
15732
|
+
name21,
|
|
15733
|
+
evalId
|
|
15734
|
+
);
|
|
15571
15735
|
const config2 = {
|
|
15572
15736
|
name: name21,
|
|
15573
15737
|
type: "tool-trajectory",
|
|
@@ -15576,6 +15740,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15576
15740
|
...expected ? { expected } : {},
|
|
15577
15741
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15578
15742
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15743
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15579
15744
|
...negate !== void 0 ? { negate } : {},
|
|
15580
15745
|
...argsMatch2 !== void 0 ? { argsMatch: argsMatch2 } : {}
|
|
15581
15746
|
};
|
|
@@ -15638,7 +15803,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15638
15803
|
const aggregation = asString(rawEvaluator.aggregation);
|
|
15639
15804
|
const validAggregation = isValidFieldAggregationType(aggregation) ? aggregation : void 0;
|
|
15640
15805
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15641
|
-
const required2 =
|
|
15806
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15807
|
+
rawEvaluator.required,
|
|
15808
|
+
rawEvaluator.min_score,
|
|
15809
|
+
name21,
|
|
15810
|
+
evalId
|
|
15811
|
+
);
|
|
15642
15812
|
evaluators.push({
|
|
15643
15813
|
name: name21,
|
|
15644
15814
|
type: "field-accuracy",
|
|
@@ -15646,6 +15816,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15646
15816
|
...validAggregation ? { aggregation: validAggregation } : {},
|
|
15647
15817
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15648
15818
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15819
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15649
15820
|
...negate !== void 0 ? { negate } : {}
|
|
15650
15821
|
});
|
|
15651
15822
|
continue;
|
|
@@ -15659,13 +15830,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15659
15830
|
continue;
|
|
15660
15831
|
}
|
|
15661
15832
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15662
|
-
const required2 =
|
|
15833
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15834
|
+
rawEvaluator.required,
|
|
15835
|
+
rawEvaluator.min_score,
|
|
15836
|
+
name21,
|
|
15837
|
+
evalId
|
|
15838
|
+
);
|
|
15663
15839
|
evaluators.push({
|
|
15664
15840
|
name: name21,
|
|
15665
15841
|
type: "latency",
|
|
15666
15842
|
threshold,
|
|
15667
15843
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15668
15844
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15845
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15669
15846
|
...negate !== void 0 ? { negate } : {}
|
|
15670
15847
|
});
|
|
15671
15848
|
continue;
|
|
@@ -15679,13 +15856,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15679
15856
|
continue;
|
|
15680
15857
|
}
|
|
15681
15858
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15682
|
-
const required2 =
|
|
15859
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15860
|
+
rawEvaluator.required,
|
|
15861
|
+
rawEvaluator.min_score,
|
|
15862
|
+
name21,
|
|
15863
|
+
evalId
|
|
15864
|
+
);
|
|
15683
15865
|
evaluators.push({
|
|
15684
15866
|
name: name21,
|
|
15685
15867
|
type: "cost",
|
|
15686
15868
|
budget,
|
|
15687
15869
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15688
15870
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15871
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15689
15872
|
...negate !== void 0 ? { negate } : {}
|
|
15690
15873
|
});
|
|
15691
15874
|
continue;
|
|
@@ -15717,13 +15900,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15717
15900
|
continue;
|
|
15718
15901
|
}
|
|
15719
15902
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15720
|
-
const required2 =
|
|
15903
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15904
|
+
rawEvaluator.required,
|
|
15905
|
+
rawEvaluator.min_score,
|
|
15906
|
+
name21,
|
|
15907
|
+
evalId
|
|
15908
|
+
);
|
|
15721
15909
|
evaluators.push({
|
|
15722
15910
|
name: name21,
|
|
15723
15911
|
type: "token-usage",
|
|
15724
15912
|
...validLimits,
|
|
15725
15913
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15726
15914
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15915
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15727
15916
|
...negate !== void 0 ? { negate } : {}
|
|
15728
15917
|
});
|
|
15729
15918
|
continue;
|
|
@@ -15769,13 +15958,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15769
15958
|
continue;
|
|
15770
15959
|
}
|
|
15771
15960
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15772
|
-
const required2 =
|
|
15961
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15962
|
+
rawEvaluator.required,
|
|
15963
|
+
rawEvaluator.min_score,
|
|
15964
|
+
name21,
|
|
15965
|
+
evalId
|
|
15966
|
+
);
|
|
15773
15967
|
evaluators.push({
|
|
15774
15968
|
name: name21,
|
|
15775
15969
|
type: "execution-metrics",
|
|
15776
15970
|
...validThresholds,
|
|
15777
15971
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15778
15972
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15973
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15779
15974
|
...negate !== void 0 ? { negate } : {}
|
|
15780
15975
|
});
|
|
15781
15976
|
continue;
|
|
@@ -15789,7 +15984,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15789
15984
|
const rawShouldTrigger = rawEvaluator.should_trigger;
|
|
15790
15985
|
const shouldTrigger = typeof rawShouldTrigger === "boolean" ? rawShouldTrigger : void 0;
|
|
15791
15986
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15792
|
-
const required2 =
|
|
15987
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15988
|
+
rawEvaluator.required,
|
|
15989
|
+
rawEvaluator.min_score,
|
|
15990
|
+
name21,
|
|
15991
|
+
evalId
|
|
15992
|
+
);
|
|
15793
15993
|
evaluators.push({
|
|
15794
15994
|
name: name21,
|
|
15795
15995
|
type: "skill-trigger",
|
|
@@ -15797,6 +15997,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15797
15997
|
...shouldTrigger !== void 0 ? { should_trigger: shouldTrigger } : {},
|
|
15798
15998
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15799
15999
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16000
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15800
16001
|
...negate !== void 0 ? { negate } : {}
|
|
15801
16002
|
});
|
|
15802
16003
|
continue;
|
|
@@ -15808,13 +16009,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15808
16009
|
continue;
|
|
15809
16010
|
}
|
|
15810
16011
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15811
|
-
const required2 =
|
|
16012
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16013
|
+
rawEvaluator.required,
|
|
16014
|
+
rawEvaluator.min_score,
|
|
16015
|
+
name21,
|
|
16016
|
+
evalId
|
|
16017
|
+
);
|
|
15812
16018
|
evaluators.push({
|
|
15813
16019
|
name: name21,
|
|
15814
16020
|
type: "contains",
|
|
15815
16021
|
value,
|
|
15816
16022
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15817
16023
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16024
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15818
16025
|
...negate !== void 0 ? { negate } : {}
|
|
15819
16026
|
});
|
|
15820
16027
|
continue;
|
|
@@ -15828,13 +16035,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15828
16035
|
continue;
|
|
15829
16036
|
}
|
|
15830
16037
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15831
|
-
const required2 =
|
|
16038
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16039
|
+
rawEvaluator.required,
|
|
16040
|
+
rawEvaluator.min_score,
|
|
16041
|
+
name21,
|
|
16042
|
+
evalId
|
|
16043
|
+
);
|
|
15832
16044
|
evaluators.push({
|
|
15833
16045
|
name: name21,
|
|
15834
16046
|
type: typeValue,
|
|
15835
16047
|
value,
|
|
15836
16048
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15837
16049
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16050
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15838
16051
|
...negate !== void 0 ? { negate } : {}
|
|
15839
16052
|
});
|
|
15840
16053
|
continue;
|
|
@@ -15846,13 +16059,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15846
16059
|
continue;
|
|
15847
16060
|
}
|
|
15848
16061
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15849
|
-
const required2 =
|
|
16062
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16063
|
+
rawEvaluator.required,
|
|
16064
|
+
rawEvaluator.min_score,
|
|
16065
|
+
name21,
|
|
16066
|
+
evalId
|
|
16067
|
+
);
|
|
15850
16068
|
evaluators.push({
|
|
15851
16069
|
name: name21,
|
|
15852
16070
|
type: "icontains",
|
|
15853
16071
|
value,
|
|
15854
16072
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15855
16073
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16074
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15856
16075
|
...negate !== void 0 ? { negate } : {}
|
|
15857
16076
|
});
|
|
15858
16077
|
continue;
|
|
@@ -15866,13 +16085,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15866
16085
|
continue;
|
|
15867
16086
|
}
|
|
15868
16087
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15869
|
-
const required2 =
|
|
16088
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16089
|
+
rawEvaluator.required,
|
|
16090
|
+
rawEvaluator.min_score,
|
|
16091
|
+
name21,
|
|
16092
|
+
evalId
|
|
16093
|
+
);
|
|
15870
16094
|
evaluators.push({
|
|
15871
16095
|
name: name21,
|
|
15872
16096
|
type: typeValue,
|
|
15873
16097
|
value,
|
|
15874
16098
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15875
16099
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16100
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15876
16101
|
...negate !== void 0 ? { negate } : {}
|
|
15877
16102
|
});
|
|
15878
16103
|
continue;
|
|
@@ -15884,13 +16109,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15884
16109
|
continue;
|
|
15885
16110
|
}
|
|
15886
16111
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15887
|
-
const required2 =
|
|
16112
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16113
|
+
rawEvaluator.required,
|
|
16114
|
+
rawEvaluator.min_score,
|
|
16115
|
+
name21,
|
|
16116
|
+
evalId
|
|
16117
|
+
);
|
|
15888
16118
|
evaluators.push({
|
|
15889
16119
|
name: name21,
|
|
15890
16120
|
type: typeValue,
|
|
15891
16121
|
value,
|
|
15892
16122
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15893
16123
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16124
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15894
16125
|
...negate !== void 0 ? { negate } : {}
|
|
15895
16126
|
});
|
|
15896
16127
|
continue;
|
|
@@ -15903,7 +16134,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15903
16134
|
}
|
|
15904
16135
|
const flags = asString(rawEvaluator.flags);
|
|
15905
16136
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15906
|
-
const required2 =
|
|
16137
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16138
|
+
rawEvaluator.required,
|
|
16139
|
+
rawEvaluator.min_score,
|
|
16140
|
+
name21,
|
|
16141
|
+
evalId
|
|
16142
|
+
);
|
|
15907
16143
|
evaluators.push({
|
|
15908
16144
|
name: name21,
|
|
15909
16145
|
type: "regex",
|
|
@@ -15911,18 +16147,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15911
16147
|
...flags !== void 0 ? { flags } : {},
|
|
15912
16148
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15913
16149
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16150
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15914
16151
|
...negate !== void 0 ? { negate } : {}
|
|
15915
16152
|
});
|
|
15916
16153
|
continue;
|
|
15917
16154
|
}
|
|
15918
16155
|
if (typeValue === "is-json") {
|
|
15919
16156
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15920
|
-
const required2 =
|
|
16157
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16158
|
+
rawEvaluator.required,
|
|
16159
|
+
rawEvaluator.min_score,
|
|
16160
|
+
name21,
|
|
16161
|
+
evalId
|
|
16162
|
+
);
|
|
15921
16163
|
evaluators.push({
|
|
15922
16164
|
name: name21,
|
|
15923
16165
|
type: "is-json",
|
|
15924
16166
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15925
16167
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16168
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15926
16169
|
...negate !== void 0 ? { negate } : {}
|
|
15927
16170
|
});
|
|
15928
16171
|
continue;
|
|
@@ -15934,13 +16177,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15934
16177
|
continue;
|
|
15935
16178
|
}
|
|
15936
16179
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15937
|
-
const required2 =
|
|
16180
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16181
|
+
rawEvaluator.required,
|
|
16182
|
+
rawEvaluator.min_score,
|
|
16183
|
+
name21,
|
|
16184
|
+
evalId
|
|
16185
|
+
);
|
|
15938
16186
|
evaluators.push({
|
|
15939
16187
|
name: name21,
|
|
15940
16188
|
type: "equals",
|
|
15941
16189
|
value,
|
|
15942
16190
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15943
16191
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16192
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15944
16193
|
...negate !== void 0 ? { negate } : {}
|
|
15945
16194
|
});
|
|
15946
16195
|
continue;
|
|
@@ -15976,7 +16225,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15976
16225
|
continue;
|
|
15977
16226
|
}
|
|
15978
16227
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15979
|
-
const required2 =
|
|
16228
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16229
|
+
rawEvaluator.required,
|
|
16230
|
+
rawEvaluator.min_score,
|
|
16231
|
+
name21,
|
|
16232
|
+
evalId
|
|
16233
|
+
);
|
|
15980
16234
|
evaluators.push({
|
|
15981
16235
|
name: name21,
|
|
15982
16236
|
type: "llm-grader",
|
|
@@ -15984,6 +16238,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15984
16238
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
15985
16239
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15986
16240
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16241
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15987
16242
|
...negate !== void 0 ? { negate } : {}
|
|
15988
16243
|
});
|
|
15989
16244
|
continue;
|
|
@@ -16053,7 +16308,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16053
16308
|
continue;
|
|
16054
16309
|
}
|
|
16055
16310
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
16056
|
-
const required2 =
|
|
16311
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16312
|
+
rawEvaluator.required,
|
|
16313
|
+
rawEvaluator.min_score,
|
|
16314
|
+
name21,
|
|
16315
|
+
evalId
|
|
16316
|
+
);
|
|
16057
16317
|
evaluators.push({
|
|
16058
16318
|
name: name21,
|
|
16059
16319
|
type: "llm-grader",
|
|
@@ -16061,12 +16321,18 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16061
16321
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
16062
16322
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
16063
16323
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16324
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
16064
16325
|
...negate !== void 0 ? { negate } : {}
|
|
16065
16326
|
});
|
|
16066
16327
|
continue;
|
|
16067
16328
|
}
|
|
16068
16329
|
const weight = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
16069
|
-
const required =
|
|
16330
|
+
const { required, min_score } = parseRequiredAndMinScore(
|
|
16331
|
+
rawEvaluator.required,
|
|
16332
|
+
rawEvaluator.min_score,
|
|
16333
|
+
name21,
|
|
16334
|
+
evalId
|
|
16335
|
+
);
|
|
16070
16336
|
const knownProps = /* @__PURE__ */ new Set([
|
|
16071
16337
|
"name",
|
|
16072
16338
|
"type",
|
|
@@ -16077,6 +16343,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16077
16343
|
"weight",
|
|
16078
16344
|
"config",
|
|
16079
16345
|
"required",
|
|
16346
|
+
"min_score",
|
|
16080
16347
|
"negate",
|
|
16081
16348
|
"max_steps",
|
|
16082
16349
|
"maxSteps",
|
|
@@ -16106,6 +16373,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16106
16373
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
16107
16374
|
...weight !== void 0 ? { weight } : {},
|
|
16108
16375
|
...required !== void 0 ? { required } : {},
|
|
16376
|
+
...min_score !== void 0 ? { min_score } : {},
|
|
16109
16377
|
...negate !== void 0 ? { negate } : {},
|
|
16110
16378
|
...finalConfig ? { config: finalConfig } : {},
|
|
16111
16379
|
...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
|
|
@@ -16237,10 +16505,23 @@ ${detailBlock}${ANSI_RESET4}`);
|
|
|
16237
16505
|
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
|
|
16238
16506
|
}
|
|
16239
16507
|
}
|
|
16240
|
-
function
|
|
16241
|
-
|
|
16242
|
-
if (typeof
|
|
16243
|
-
|
|
16508
|
+
function parseRequiredAndMinScore(rawRequired, rawMinScore, evaluatorName, evalId) {
|
|
16509
|
+
const result = {};
|
|
16510
|
+
if (typeof rawMinScore === "number" && rawMinScore > 0 && rawMinScore <= 1) {
|
|
16511
|
+
result.min_score = rawMinScore;
|
|
16512
|
+
}
|
|
16513
|
+
if (rawRequired === true) {
|
|
16514
|
+
result.required = true;
|
|
16515
|
+
} else if (typeof rawRequired === "number" && rawRequired > 0 && rawRequired <= 1) {
|
|
16516
|
+
if (result.min_score === void 0) {
|
|
16517
|
+
result.min_score = rawRequired;
|
|
16518
|
+
}
|
|
16519
|
+
result.required = rawRequired;
|
|
16520
|
+
logWarning2(
|
|
16521
|
+
`Evaluator '${evaluatorName}' in '${evalId}': 'required: ${rawRequired}' is deprecated. Use 'required: true' + 'min_score: ${rawRequired}' instead.`
|
|
16522
|
+
);
|
|
16523
|
+
}
|
|
16524
|
+
return result;
|
|
16244
16525
|
}
|
|
16245
16526
|
function validateWeight(rawWeight, evaluatorName, evalId) {
|
|
16246
16527
|
if (rawWeight === void 0) {
|
|
@@ -16283,16 +16564,30 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
16283
16564
|
const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
|
|
16284
16565
|
const expectedOutcome = asString(rawRubric.outcome) ?? "";
|
|
16285
16566
|
const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
|
|
16567
|
+
let minScore;
|
|
16286
16568
|
let requiredMinScore;
|
|
16287
16569
|
let required;
|
|
16288
|
-
if (typeof rawRubric.
|
|
16289
|
-
const
|
|
16290
|
-
if (
|
|
16570
|
+
if (typeof rawRubric.min_score === "number") {
|
|
16571
|
+
const ms = rawRubric.min_score;
|
|
16572
|
+
if (ms <= 0 || ms > 1) {
|
|
16573
|
+
throw new Error(
|
|
16574
|
+
`Invalid min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be in (0, 1] (got ${ms})`
|
|
16575
|
+
);
|
|
16576
|
+
}
|
|
16577
|
+
minScore = ms;
|
|
16578
|
+
requiredMinScore = Math.round(ms * 10);
|
|
16579
|
+
} else if (typeof rawRubric.required_min_score === "number") {
|
|
16580
|
+
const rms = rawRubric.required_min_score;
|
|
16581
|
+
if (!Number.isInteger(rms) || rms < 0 || rms > 10) {
|
|
16291
16582
|
throw new Error(
|
|
16292
|
-
`Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${
|
|
16583
|
+
`Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${rms})`
|
|
16293
16584
|
);
|
|
16294
16585
|
}
|
|
16295
|
-
requiredMinScore =
|
|
16586
|
+
requiredMinScore = rms;
|
|
16587
|
+
minScore = rms / 10;
|
|
16588
|
+
logWarning2(
|
|
16589
|
+
`Rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': 'required_min_score: ${rms}' is deprecated. Use 'min_score: ${rms / 10}' (0-1 scale) instead.`
|
|
16590
|
+
);
|
|
16296
16591
|
}
|
|
16297
16592
|
if (typeof rawRubric.required === "boolean") {
|
|
16298
16593
|
required = rawRubric.required;
|
|
@@ -16312,6 +16607,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
16312
16607
|
weight,
|
|
16313
16608
|
...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
|
|
16314
16609
|
...required !== void 0 ? { required } : {},
|
|
16610
|
+
...minScore !== void 0 ? { min_score: minScore } : {},
|
|
16315
16611
|
...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
|
|
16316
16612
|
score_ranges: scoreRanges
|
|
16317
16613
|
});
|
|
@@ -16328,6 +16624,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
16328
16624
|
weight,
|
|
16329
16625
|
// Default to required: true if not specified (backward compatibility)
|
|
16330
16626
|
required: required ?? true,
|
|
16627
|
+
...minScore !== void 0 ? { min_score: minScore } : {},
|
|
16331
16628
|
...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
|
|
16332
16629
|
});
|
|
16333
16630
|
}
|
|
@@ -16456,12 +16753,22 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
16456
16753
|
id: asString(rubric.id) ?? `rubric-${index + 1}`,
|
|
16457
16754
|
weight: typeof rubric.weight === "number" ? rubric.weight : 1
|
|
16458
16755
|
};
|
|
16756
|
+
let inlineMinScore;
|
|
16757
|
+
let inlineRequiredMinScore;
|
|
16758
|
+
if (typeof rubric.min_score === "number") {
|
|
16759
|
+
inlineMinScore = rubric.min_score;
|
|
16760
|
+
inlineRequiredMinScore = Math.round(inlineMinScore * 10);
|
|
16761
|
+
} else if (typeof rubric.required_min_score === "number") {
|
|
16762
|
+
inlineRequiredMinScore = rubric.required_min_score;
|
|
16763
|
+
inlineMinScore = inlineRequiredMinScore / 10;
|
|
16764
|
+
}
|
|
16459
16765
|
if (scoreRanges && scoreRanges.length > 0) {
|
|
16460
16766
|
return {
|
|
16461
16767
|
...baseRubric,
|
|
16462
16768
|
...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
|
|
16463
16769
|
...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
|
|
16464
|
-
...
|
|
16770
|
+
...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
|
|
16771
|
+
...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {},
|
|
16465
16772
|
score_ranges: scoreRanges
|
|
16466
16773
|
};
|
|
16467
16774
|
}
|
|
@@ -16469,7 +16776,8 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
16469
16776
|
...baseRubric,
|
|
16470
16777
|
outcome: expectedOutcome,
|
|
16471
16778
|
required: typeof rubric.required === "boolean" ? rubric.required : true,
|
|
16472
|
-
...
|
|
16779
|
+
...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
|
|
16780
|
+
...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {}
|
|
16473
16781
|
};
|
|
16474
16782
|
}).filter((r) => r.outcome && r.outcome.length > 0 || "score_ranges" in r && r.score_ranges);
|
|
16475
16783
|
if (rubricItems.length === 0) {
|
|
@@ -16851,6 +17159,9 @@ function resolveExpectedMessages(raw) {
|
|
|
16851
17159
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
16852
17160
|
var ANSI_RED2 = "\x1B[31m";
|
|
16853
17161
|
var ANSI_RESET6 = "\x1B[0m";
|
|
17162
|
+
function matchesFilter(id, filter2) {
|
|
17163
|
+
return typeof filter2 === "string" ? micromatch.isMatch(id, filter2) : filter2.some((pattern) => micromatch.isMatch(id, pattern));
|
|
17164
|
+
}
|
|
16854
17165
|
function detectFormat(filePath) {
|
|
16855
17166
|
const ext = path6.extname(filePath).toLowerCase();
|
|
16856
17167
|
if (ext === ".jsonl") return "jsonl";
|
|
@@ -16918,40 +17229,40 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16918
17229
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
16919
17230
|
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
16920
17231
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
16921
|
-
const
|
|
16922
|
-
const
|
|
17232
|
+
const fallbackSuiteName = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
17233
|
+
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
16923
17234
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
16924
17235
|
const globalExecution = sidecar.execution;
|
|
16925
17236
|
if (verbose) {
|
|
16926
17237
|
console.log(`
|
|
16927
|
-
[JSONL
|
|
17238
|
+
[JSONL Suite: ${evalFilePath}]`);
|
|
16928
17239
|
console.log(` Cases: ${rawCases.length}`);
|
|
16929
|
-
console.log(`
|
|
17240
|
+
console.log(` Suite: ${suiteName}`);
|
|
16930
17241
|
if (sidecar.description) {
|
|
16931
17242
|
console.log(` Description: ${sidecar.description}`);
|
|
16932
17243
|
}
|
|
16933
17244
|
}
|
|
16934
17245
|
const results = [];
|
|
16935
17246
|
for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
|
|
16936
|
-
const
|
|
17247
|
+
const testCaseConfig = rawCases[lineIndex];
|
|
16937
17248
|
const lineNumber = lineIndex + 1;
|
|
16938
|
-
const id = asString4(
|
|
16939
|
-
if (filterPattern && (!id || !
|
|
17249
|
+
const id = asString4(testCaseConfig.id);
|
|
17250
|
+
if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
|
|
16940
17251
|
continue;
|
|
16941
17252
|
}
|
|
16942
|
-
const conversationId = asString4(
|
|
16943
|
-
let outcome = asString4(
|
|
16944
|
-
if (!outcome &&
|
|
16945
|
-
outcome = asString4(
|
|
17253
|
+
const conversationId = asString4(testCaseConfig.conversation_id);
|
|
17254
|
+
let outcome = asString4(testCaseConfig.criteria);
|
|
17255
|
+
if (!outcome && testCaseConfig.expected_outcome !== void 0) {
|
|
17256
|
+
outcome = asString4(testCaseConfig.expected_outcome);
|
|
16946
17257
|
if (outcome) {
|
|
16947
17258
|
logWarning4(
|
|
16948
|
-
`Test '${asString4(
|
|
17259
|
+
`Test '${asString4(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
16949
17260
|
);
|
|
16950
17261
|
}
|
|
16951
17262
|
}
|
|
16952
|
-
const rawInputMessages = resolveInputMessages(
|
|
16953
|
-
const expectedMessages = resolveExpectedMessages(
|
|
16954
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
17263
|
+
const rawInputMessages = resolveInputMessages(testCaseConfig);
|
|
17264
|
+
const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
|
|
17265
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assert !== void 0;
|
|
16955
17266
|
if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
|
|
16956
17267
|
logError2(
|
|
16957
17268
|
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
@@ -16988,18 +17299,23 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16988
17299
|
}
|
|
16989
17300
|
}
|
|
16990
17301
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
16991
|
-
const caseExecution = isJsonObject(
|
|
17302
|
+
const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
|
|
16992
17303
|
const mergedExecution = caseExecution ?? globalExecution;
|
|
16993
|
-
const
|
|
17304
|
+
const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
|
|
16994
17305
|
let evaluators;
|
|
16995
17306
|
try {
|
|
16996
|
-
evaluators = await parseEvaluators(
|
|
17307
|
+
evaluators = await parseEvaluators(
|
|
17308
|
+
testCaseConfig,
|
|
17309
|
+
mergedExecution,
|
|
17310
|
+
searchRoots,
|
|
17311
|
+
id ?? "unknown"
|
|
17312
|
+
);
|
|
16997
17313
|
} catch (error) {
|
|
16998
17314
|
const message = error instanceof Error ? error.message : String(error);
|
|
16999
17315
|
logError2(`Skipping test '${id}' at line ${lineNumber}: ${message}`);
|
|
17000
17316
|
continue;
|
|
17001
17317
|
}
|
|
17002
|
-
const inlineRubrics =
|
|
17318
|
+
const inlineRubrics = testCaseConfig.rubrics;
|
|
17003
17319
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
17004
17320
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
17005
17321
|
if (rubricEvaluator) {
|
|
@@ -17010,7 +17326,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
17010
17326
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
17011
17327
|
const testCase = {
|
|
17012
17328
|
id,
|
|
17013
|
-
|
|
17329
|
+
suite: suiteName,
|
|
17014
17330
|
conversation_id: conversationId,
|
|
17015
17331
|
question,
|
|
17016
17332
|
input: inputMessages,
|
|
@@ -17018,7 +17334,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
17018
17334
|
reference_answer: referenceAnswer,
|
|
17019
17335
|
file_paths: userFilePaths,
|
|
17020
17336
|
criteria: outcome ?? "",
|
|
17021
|
-
evaluator:
|
|
17337
|
+
evaluator: testCaseEvaluatorKind,
|
|
17022
17338
|
assertions: evaluators
|
|
17023
17339
|
};
|
|
17024
17340
|
results.push(testCase);
|
|
@@ -17194,6 +17510,9 @@ function buildChatPromptFromSegments(options) {
|
|
|
17194
17510
|
var ANSI_YELLOW6 = "\x1B[33m";
|
|
17195
17511
|
var ANSI_RED3 = "\x1B[31m";
|
|
17196
17512
|
var ANSI_RESET7 = "\x1B[0m";
|
|
17513
|
+
function matchesFilter2(id, filter2) {
|
|
17514
|
+
return typeof filter2 === "string" ? micromatch2.isMatch(id, filter2) : filter2.some((pattern) => micromatch2.isMatch(id, pattern));
|
|
17515
|
+
}
|
|
17197
17516
|
function resolveTests(suite) {
|
|
17198
17517
|
if (suite.tests !== void 0) return suite.tests;
|
|
17199
17518
|
if (suite.eval_cases !== void 0) {
|
|
@@ -17273,18 +17592,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17273
17592
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
17274
17593
|
}
|
|
17275
17594
|
const suite = interpolated;
|
|
17276
|
-
const
|
|
17277
|
-
const
|
|
17278
|
-
const
|
|
17279
|
-
const
|
|
17595
|
+
const suiteNameFromFile = asString5(suite.name)?.trim();
|
|
17596
|
+
const fallbackSuiteName = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
17597
|
+
const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
|
|
17598
|
+
const rawTestCases = resolveTests(suite);
|
|
17280
17599
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
17281
17600
|
const evalFileDir = path7.dirname(absoluteTestPath);
|
|
17282
|
-
let
|
|
17283
|
-
if (typeof
|
|
17284
|
-
const externalPath = path7.resolve(evalFileDir,
|
|
17285
|
-
|
|
17286
|
-
} else if (Array.isArray(
|
|
17287
|
-
|
|
17601
|
+
let expandedTestCases;
|
|
17602
|
+
if (typeof rawTestCases === "string") {
|
|
17603
|
+
const externalPath = path7.resolve(evalFileDir, rawTestCases);
|
|
17604
|
+
expandedTestCases = await loadCasesFromFile(externalPath);
|
|
17605
|
+
} else if (Array.isArray(rawTestCases)) {
|
|
17606
|
+
expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
|
|
17288
17607
|
} else {
|
|
17289
17608
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
17290
17609
|
}
|
|
@@ -17299,32 +17618,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17299
17618
|
}
|
|
17300
17619
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
17301
17620
|
const results = [];
|
|
17302
|
-
for (const
|
|
17303
|
-
if (!isJsonObject(
|
|
17621
|
+
for (const rawTestCase of expandedTestCases) {
|
|
17622
|
+
if (!isJsonObject(rawTestCase)) {
|
|
17304
17623
|
logWarning5("Skipping invalid test entry (expected object)");
|
|
17305
17624
|
continue;
|
|
17306
17625
|
}
|
|
17307
|
-
const
|
|
17308
|
-
const id = asString5(
|
|
17309
|
-
if (filterPattern && (!id || !
|
|
17626
|
+
const testCaseConfig = rawTestCase;
|
|
17627
|
+
const id = asString5(testCaseConfig.id);
|
|
17628
|
+
if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
|
|
17310
17629
|
continue;
|
|
17311
17630
|
}
|
|
17312
|
-
const conversationId = asString5(
|
|
17313
|
-
let outcome = asString5(
|
|
17314
|
-
if (!outcome &&
|
|
17315
|
-
outcome = asString5(
|
|
17631
|
+
const conversationId = asString5(testCaseConfig.conversation_id);
|
|
17632
|
+
let outcome = asString5(testCaseConfig.criteria);
|
|
17633
|
+
if (!outcome && testCaseConfig.expected_outcome !== void 0) {
|
|
17634
|
+
outcome = asString5(testCaseConfig.expected_outcome);
|
|
17316
17635
|
if (outcome) {
|
|
17317
17636
|
logWarning5(
|
|
17318
|
-
`Test '${asString5(
|
|
17637
|
+
`Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
17319
17638
|
);
|
|
17320
17639
|
}
|
|
17321
17640
|
}
|
|
17322
|
-
const caseExecution = isJsonObject(
|
|
17641
|
+
const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
|
|
17323
17642
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
17643
|
+
const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
|
|
17324
17644
|
const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
|
|
17325
|
-
const testInputMessages = resolveInputMessages(
|
|
17326
|
-
const expectedMessages = resolveExpectedMessages(
|
|
17327
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
17645
|
+
const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
|
|
17646
|
+
const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
|
|
17647
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0;
|
|
17328
17648
|
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
17329
17649
|
logError3(
|
|
17330
17650
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions`
|
|
@@ -17371,16 +17691,21 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17371
17691
|
}
|
|
17372
17692
|
}
|
|
17373
17693
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
17374
|
-
const
|
|
17694
|
+
const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
|
|
17375
17695
|
let evaluators;
|
|
17376
17696
|
try {
|
|
17377
|
-
evaluators = await parseEvaluators(
|
|
17697
|
+
evaluators = await parseEvaluators(
|
|
17698
|
+
testCaseConfig,
|
|
17699
|
+
globalExecution,
|
|
17700
|
+
searchRoots,
|
|
17701
|
+
id ?? "unknown"
|
|
17702
|
+
);
|
|
17378
17703
|
} catch (error) {
|
|
17379
17704
|
const message = error instanceof Error ? error.message : String(error);
|
|
17380
17705
|
logError3(`Skipping test '${id}': ${message}`);
|
|
17381
17706
|
continue;
|
|
17382
17707
|
}
|
|
17383
|
-
const inlineRubrics =
|
|
17708
|
+
const inlineRubrics = testCaseConfig.rubrics;
|
|
17384
17709
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
17385
17710
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
17386
17711
|
if (rubricEvaluator) {
|
|
@@ -17389,13 +17714,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17389
17714
|
}
|
|
17390
17715
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
17391
17716
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
17392
|
-
const caseWorkspace = await resolveWorkspaceConfig(
|
|
17717
|
+
const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
|
|
17393
17718
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
17394
|
-
const metadata = isJsonObject(
|
|
17395
|
-
const caseTargets = extractTargetsFromTestCase(
|
|
17719
|
+
const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
17720
|
+
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
17396
17721
|
const testCase = {
|
|
17397
17722
|
id,
|
|
17398
|
-
|
|
17723
|
+
suite: suiteName,
|
|
17399
17724
|
category: options?.category,
|
|
17400
17725
|
conversation_id: conversationId,
|
|
17401
17726
|
question,
|
|
@@ -17404,11 +17729,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17404
17729
|
reference_answer: referenceAnswer,
|
|
17405
17730
|
file_paths: userFilePaths,
|
|
17406
17731
|
criteria: outcome ?? "",
|
|
17407
|
-
evaluator:
|
|
17732
|
+
evaluator: testCaseEvaluatorKind,
|
|
17408
17733
|
assertions: evaluators,
|
|
17409
17734
|
workspace: mergedWorkspace,
|
|
17410
17735
|
metadata,
|
|
17411
|
-
targets: caseTargets
|
|
17736
|
+
targets: caseTargets,
|
|
17737
|
+
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
|
|
17412
17738
|
};
|
|
17413
17739
|
results.push(testCase);
|
|
17414
17740
|
}
|
|
@@ -17939,7 +18265,7 @@ var AzureProvider = class {
|
|
|
17939
18265
|
};
|
|
17940
18266
|
this.retryConfig = config.retry;
|
|
17941
18267
|
const azure = createAzure(buildAzureOptions(config));
|
|
17942
|
-
this.model = azure.chat(config.deploymentName);
|
|
18268
|
+
this.model = config.apiFormat === "responses" ? azure(config.deploymentName) : azure.chat(config.deploymentName);
|
|
17943
18269
|
}
|
|
17944
18270
|
id;
|
|
17945
18271
|
kind = "azure";
|
|
@@ -18065,7 +18391,9 @@ function buildAzureOptions(config) {
|
|
|
18065
18391
|
const options = {
|
|
18066
18392
|
apiKey: config.apiKey,
|
|
18067
18393
|
apiVersion: config.version,
|
|
18068
|
-
|
|
18394
|
+
// Chat completions still use deployment-scoped Azure URLs for compatibility
|
|
18395
|
+
// with existing deployments. Responses API should use the SDK's v1 path.
|
|
18396
|
+
useDeploymentBasedUrls: config.apiFormat !== "responses"
|
|
18069
18397
|
};
|
|
18070
18398
|
const baseURL = normalizeAzureBaseUrl(config.resourceName);
|
|
18071
18399
|
if (baseURL) {
|
|
@@ -21181,6 +21509,25 @@ var CopilotSdkProvider = class {
|
|
|
21181
21509
|
content: systemPrompt
|
|
21182
21510
|
};
|
|
21183
21511
|
}
|
|
21512
|
+
if (this.config.byokBaseUrl) {
|
|
21513
|
+
const byokType = this.config.byokType ?? "openai";
|
|
21514
|
+
const provider = {
|
|
21515
|
+
type: byokType,
|
|
21516
|
+
baseUrl: normalizeByokBaseUrl(this.config.byokBaseUrl, byokType)
|
|
21517
|
+
};
|
|
21518
|
+
if (this.config.byokBearerToken) {
|
|
21519
|
+
provider.bearerToken = this.config.byokBearerToken;
|
|
21520
|
+
} else if (this.config.byokApiKey) {
|
|
21521
|
+
provider.apiKey = this.config.byokApiKey;
|
|
21522
|
+
}
|
|
21523
|
+
if (this.config.byokWireApi) {
|
|
21524
|
+
provider.wireApi = this.config.byokWireApi;
|
|
21525
|
+
}
|
|
21526
|
+
if (this.config.byokType === "azure" && this.config.byokApiVersion) {
|
|
21527
|
+
provider.azure = { apiVersion: this.config.byokApiVersion };
|
|
21528
|
+
}
|
|
21529
|
+
sessionOptions.provider = provider;
|
|
21530
|
+
}
|
|
21184
21531
|
let session;
|
|
21185
21532
|
try {
|
|
21186
21533
|
session = await client.createSession(sessionOptions);
|
|
@@ -21412,6 +21759,16 @@ function resolveSkillDirectories(cwd) {
|
|
|
21412
21759
|
];
|
|
21413
21760
|
return candidates.filter((dir) => existsSync2(dir));
|
|
21414
21761
|
}
|
|
21762
|
+
function normalizeByokBaseUrl(baseUrl, type) {
|
|
21763
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
21764
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
21765
|
+
return trimmed;
|
|
21766
|
+
}
|
|
21767
|
+
if (type === "azure") {
|
|
21768
|
+
return `https://${trimmed}.openai.azure.com`;
|
|
21769
|
+
}
|
|
21770
|
+
return trimmed;
|
|
21771
|
+
}
|
|
21415
21772
|
function summarizeSdkEvent(eventType, data) {
|
|
21416
21773
|
if (!data || typeof data !== "object") {
|
|
21417
21774
|
return eventType;
|
|
@@ -21575,6 +21932,22 @@ function extractAzureResourceName(baseUrl) {
|
|
|
21575
21932
|
if (urlMatch) return urlMatch[1];
|
|
21576
21933
|
return baseUrl;
|
|
21577
21934
|
}
|
|
21935
|
+
function normalizeAzureSdkBaseUrl(baseUrl) {
|
|
21936
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
21937
|
+
if (!trimmed) {
|
|
21938
|
+
return trimmed;
|
|
21939
|
+
}
|
|
21940
|
+
if (!/^https?:\/\//i.test(trimmed)) {
|
|
21941
|
+
return `https://${trimmed}.openai.azure.com/openai/v1`;
|
|
21942
|
+
}
|
|
21943
|
+
if (/\/openai\/v1$/i.test(trimmed)) {
|
|
21944
|
+
return trimmed;
|
|
21945
|
+
}
|
|
21946
|
+
if (/\/openai$/i.test(trimmed)) {
|
|
21947
|
+
return `${trimmed}/v1`;
|
|
21948
|
+
}
|
|
21949
|
+
return `${trimmed}/openai/v1`;
|
|
21950
|
+
}
|
|
21578
21951
|
function extractPiTextContent(content) {
|
|
21579
21952
|
if (typeof content === "string") {
|
|
21580
21953
|
return content;
|
|
@@ -22397,6 +22770,30 @@ async function defaultPiRunner(options) {
|
|
|
22397
22770
|
});
|
|
22398
22771
|
});
|
|
22399
22772
|
}
|
|
22773
|
+
var logged = false;
|
|
22774
|
+
function getAgentvHome() {
|
|
22775
|
+
const envHome = process.env.AGENTV_HOME;
|
|
22776
|
+
if (envHome && envHome !== "undefined") {
|
|
22777
|
+
if (!logged) {
|
|
22778
|
+
logged = true;
|
|
22779
|
+
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
22780
|
+
}
|
|
22781
|
+
return envHome;
|
|
22782
|
+
}
|
|
22783
|
+
return path20.join(os2.homedir(), ".agentv");
|
|
22784
|
+
}
|
|
22785
|
+
function getWorkspacesRoot() {
|
|
22786
|
+
return path20.join(getAgentvHome(), "workspaces");
|
|
22787
|
+
}
|
|
22788
|
+
function getSubagentsRoot() {
|
|
22789
|
+
return path20.join(getAgentvHome(), "subagents");
|
|
22790
|
+
}
|
|
22791
|
+
function getTraceStateRoot() {
|
|
22792
|
+
return path20.join(getAgentvHome(), "trace-state");
|
|
22793
|
+
}
|
|
22794
|
+
function getWorkspacePoolRoot() {
|
|
22795
|
+
return path20.join(getAgentvHome(), "workspace-pool");
|
|
22796
|
+
}
|
|
22400
22797
|
var piCodingAgentModule = null;
|
|
22401
22798
|
var piAiModule = null;
|
|
22402
22799
|
var loadingPromise = null;
|
|
@@ -22414,46 +22811,126 @@ async function promptInstall() {
|
|
|
22414
22811
|
rl.close();
|
|
22415
22812
|
}
|
|
22416
22813
|
}
|
|
22417
|
-
function
|
|
22418
|
-
|
|
22419
|
-
|
|
22420
|
-
|
|
22814
|
+
function findManagedSdkInstallRoot() {
|
|
22815
|
+
return path21.join(getAgentvHome(), "deps", "pi-sdk");
|
|
22816
|
+
}
|
|
22817
|
+
function resolveGlobalNpmRoot() {
|
|
22818
|
+
try {
|
|
22819
|
+
const root = execSync2("npm root -g", {
|
|
22820
|
+
encoding: "utf-8",
|
|
22821
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
22822
|
+
}).trim();
|
|
22823
|
+
return root.length > 0 ? root : void 0;
|
|
22824
|
+
} catch {
|
|
22825
|
+
return void 0;
|
|
22826
|
+
}
|
|
22827
|
+
}
|
|
22828
|
+
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
22829
|
+
return path21.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
22830
|
+
}
|
|
22831
|
+
function findAccessiblePath(paths) {
|
|
22832
|
+
for (const candidate of paths) {
|
|
22421
22833
|
try {
|
|
22422
|
-
|
|
22423
|
-
|
|
22424
|
-
return dir;
|
|
22834
|
+
accessSync2(candidate);
|
|
22835
|
+
return candidate;
|
|
22425
22836
|
} catch {
|
|
22426
|
-
const parent = path20.dirname(dir);
|
|
22427
|
-
if (parent === dir) break;
|
|
22428
|
-
dir = parent;
|
|
22429
22837
|
}
|
|
22430
22838
|
}
|
|
22431
|
-
return
|
|
22839
|
+
return void 0;
|
|
22432
22840
|
}
|
|
22433
|
-
async function
|
|
22841
|
+
async function tryImportLocalSdkModules() {
|
|
22434
22842
|
try {
|
|
22435
22843
|
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
22436
22844
|
import("@mariozechner/pi-coding-agent"),
|
|
22437
22845
|
import("@mariozechner/pi-ai")
|
|
22438
22846
|
]);
|
|
22847
|
+
return true;
|
|
22439
22848
|
} catch {
|
|
22440
|
-
|
|
22441
|
-
|
|
22442
|
-
|
|
22443
|
-
|
|
22444
|
-
|
|
22445
|
-
|
|
22446
|
-
|
|
22447
|
-
|
|
22448
|
-
|
|
22449
|
-
|
|
22450
|
-
|
|
22451
|
-
|
|
22452
|
-
|
|
22453
|
-
|
|
22454
|
-
|
|
22849
|
+
return false;
|
|
22850
|
+
}
|
|
22851
|
+
}
|
|
22852
|
+
async function tryImportManagedSdkModules() {
|
|
22853
|
+
const managedRoot = findManagedSdkInstallRoot();
|
|
22854
|
+
const piCodingAgentEntry = findAccessiblePath([
|
|
22855
|
+
path21.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
22856
|
+
]);
|
|
22857
|
+
const piAiEntry = findAccessiblePath([
|
|
22858
|
+
path21.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
22859
|
+
path21.join(
|
|
22860
|
+
managedRoot,
|
|
22861
|
+
"node_modules",
|
|
22862
|
+
"@mariozechner",
|
|
22863
|
+
"pi-coding-agent",
|
|
22864
|
+
"node_modules",
|
|
22865
|
+
"@mariozechner",
|
|
22866
|
+
"pi-ai",
|
|
22867
|
+
"dist",
|
|
22868
|
+
"index.js"
|
|
22869
|
+
)
|
|
22870
|
+
]);
|
|
22871
|
+
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
22872
|
+
try {
|
|
22873
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
22874
|
+
import(pathToFileURL(piCodingAgentEntry).href),
|
|
22875
|
+
import(pathToFileURL(piAiEntry).href)
|
|
22876
|
+
]);
|
|
22877
|
+
return true;
|
|
22878
|
+
} catch {
|
|
22879
|
+
return false;
|
|
22880
|
+
}
|
|
22881
|
+
}
|
|
22882
|
+
async function tryImportGlobalSdkModules() {
|
|
22883
|
+
const globalNpmRoot = resolveGlobalNpmRoot();
|
|
22884
|
+
if (!globalNpmRoot) return false;
|
|
22885
|
+
const piCodingAgentEntry = findAccessiblePath([
|
|
22886
|
+
buildGlobalModuleEntry("@mariozechner/pi-coding-agent", globalNpmRoot)
|
|
22887
|
+
]);
|
|
22888
|
+
const piAiEntry = findAccessiblePath([
|
|
22889
|
+
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
22890
|
+
path21.join(
|
|
22891
|
+
globalNpmRoot,
|
|
22892
|
+
"@mariozechner",
|
|
22893
|
+
"pi-coding-agent",
|
|
22894
|
+
"node_modules",
|
|
22895
|
+
"@mariozechner",
|
|
22896
|
+
"pi-ai",
|
|
22897
|
+
"dist",
|
|
22898
|
+
"index.js"
|
|
22899
|
+
)
|
|
22900
|
+
]);
|
|
22901
|
+
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
22902
|
+
try {
|
|
22903
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
22904
|
+
import(pathToFileURL(piCodingAgentEntry).href),
|
|
22905
|
+
import(pathToFileURL(piAiEntry).href)
|
|
22906
|
+
]);
|
|
22907
|
+
return true;
|
|
22908
|
+
} catch {
|
|
22909
|
+
return false;
|
|
22910
|
+
}
|
|
22911
|
+
}
|
|
22912
|
+
function installSdkModules(installDir) {
|
|
22913
|
+
console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
|
|
22914
|
+
mkdirSync(installDir, { recursive: true });
|
|
22915
|
+
execSync2("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
|
|
22916
|
+
cwd: installDir,
|
|
22917
|
+
stdio: "inherit"
|
|
22918
|
+
});
|
|
22919
|
+
}
|
|
22920
|
+
async function doLoadSdkModules() {
|
|
22921
|
+
if (await tryImportLocalSdkModules() || await tryImportManagedSdkModules() || await tryImportGlobalSdkModules()) {
|
|
22922
|
+
return;
|
|
22923
|
+
}
|
|
22924
|
+
if (await promptInstall()) {
|
|
22925
|
+
const installDir = findManagedSdkInstallRoot();
|
|
22926
|
+
installSdkModules(installDir);
|
|
22927
|
+
if (await tryImportManagedSdkModules()) {
|
|
22928
|
+
return;
|
|
22455
22929
|
}
|
|
22456
22930
|
}
|
|
22931
|
+
throw new Error(
|
|
22932
|
+
"pi-coding-agent SDK is not installed. Install it with:\n npm install @mariozechner/pi-coding-agent"
|
|
22933
|
+
);
|
|
22457
22934
|
}
|
|
22458
22935
|
async function loadSdkModules() {
|
|
22459
22936
|
if (!piCodingAgentModule || !piAiModule) {
|
|
@@ -22510,12 +22987,16 @@ var PiCodingAgentProvider = class {
|
|
|
22510
22987
|
try {
|
|
22511
22988
|
const cwd = this.resolveCwd(request.cwd);
|
|
22512
22989
|
const rawProvider = this.config.subprovider ?? "google";
|
|
22513
|
-
const
|
|
22990
|
+
const normalizedBaseUrl = this.normalizeSdkBaseUrl(rawProvider, this.config.baseUrl);
|
|
22991
|
+
const hasBaseUrl = !!normalizedBaseUrl;
|
|
22514
22992
|
const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
|
|
22515
22993
|
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
22516
22994
|
this.setApiKeyEnv(rawProvider, hasBaseUrl);
|
|
22517
|
-
this.setBaseUrlEnv(rawProvider, hasBaseUrl);
|
|
22995
|
+
this.setBaseUrlEnv(rawProvider, normalizedBaseUrl, hasBaseUrl);
|
|
22518
22996
|
let model = sdk.getModel(providerName, modelId);
|
|
22997
|
+
if (model && normalizedBaseUrl) {
|
|
22998
|
+
model = { ...model, baseUrl: normalizedBaseUrl };
|
|
22999
|
+
}
|
|
22519
23000
|
if (!model) {
|
|
22520
23001
|
const envProvider = providerName.replace(/-responses$/, "");
|
|
22521
23002
|
model = {
|
|
@@ -22523,7 +23004,7 @@ var PiCodingAgentProvider = class {
|
|
|
22523
23004
|
name: modelId,
|
|
22524
23005
|
api: providerName,
|
|
22525
23006
|
provider: envProvider,
|
|
22526
|
-
baseUrl:
|
|
23007
|
+
baseUrl: normalizedBaseUrl ?? "",
|
|
22527
23008
|
reasoning: false,
|
|
22528
23009
|
input: ["text"],
|
|
22529
23010
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
@@ -22690,19 +23171,27 @@ ${fileList}`;
|
|
|
22690
23171
|
}
|
|
22691
23172
|
}
|
|
22692
23173
|
/** Maps config baseUrl to the provider-specific env var the SDK reads. */
|
|
22693
|
-
setBaseUrlEnv(providerName, hasBaseUrl = false) {
|
|
22694
|
-
|
|
23174
|
+
setBaseUrlEnv(providerName, baseUrl = this.config.baseUrl, hasBaseUrl = false) {
|
|
23175
|
+
const normalizedBaseUrl = this.normalizeSdkBaseUrl(providerName, baseUrl);
|
|
23176
|
+
if (!normalizedBaseUrl) return;
|
|
22695
23177
|
const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
|
|
22696
23178
|
if (envKey) {
|
|
22697
|
-
process.env[envKey] =
|
|
23179
|
+
process.env[envKey] = normalizedBaseUrl;
|
|
22698
23180
|
}
|
|
22699
23181
|
}
|
|
23182
|
+
normalizeSdkBaseUrl(providerName, baseUrl) {
|
|
23183
|
+
if (!baseUrl) return void 0;
|
|
23184
|
+
if (providerName.toLowerCase() === "azure") {
|
|
23185
|
+
return normalizeAzureSdkBaseUrl(baseUrl);
|
|
23186
|
+
}
|
|
23187
|
+
return baseUrl;
|
|
23188
|
+
}
|
|
22700
23189
|
resolveCwd(cwdOverride) {
|
|
22701
23190
|
if (cwdOverride) {
|
|
22702
|
-
return
|
|
23191
|
+
return path21.resolve(cwdOverride);
|
|
22703
23192
|
}
|
|
22704
23193
|
if (this.config.cwd) {
|
|
22705
|
-
return
|
|
23194
|
+
return path21.resolve(this.config.cwd);
|
|
22706
23195
|
}
|
|
22707
23196
|
return process.cwd();
|
|
22708
23197
|
}
|
|
@@ -22721,9 +23210,9 @@ ${fileList}`;
|
|
|
22721
23210
|
}
|
|
22722
23211
|
resolveLogDirectory() {
|
|
22723
23212
|
if (this.config.logDir) {
|
|
22724
|
-
return
|
|
23213
|
+
return path21.resolve(this.config.logDir);
|
|
22725
23214
|
}
|
|
22726
|
-
return
|
|
23215
|
+
return path21.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
22727
23216
|
}
|
|
22728
23217
|
async createStreamLogger(request) {
|
|
22729
23218
|
const logDir = this.resolveLogDirectory();
|
|
@@ -22737,7 +23226,7 @@ ${fileList}`;
|
|
|
22737
23226
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
22738
23227
|
return void 0;
|
|
22739
23228
|
}
|
|
22740
|
-
const filePath =
|
|
23229
|
+
const filePath = path21.join(logDir, buildLogFilename6(request, this.targetName));
|
|
22741
23230
|
try {
|
|
22742
23231
|
const logger = await PiStreamLogger2.create({
|
|
22743
23232
|
filePath,
|
|
@@ -22961,7 +23450,7 @@ async function readDirEntries(target) {
|
|
|
22961
23450
|
const entries = await readdir2(target, { withFileTypes: true });
|
|
22962
23451
|
return entries.map((entry) => ({
|
|
22963
23452
|
name: entry.name,
|
|
22964
|
-
absolutePath:
|
|
23453
|
+
absolutePath: path222.join(target, entry.name),
|
|
22965
23454
|
isDirectory: entry.isDirectory()
|
|
22966
23455
|
}));
|
|
22967
23456
|
}
|
|
@@ -22975,7 +23464,7 @@ async function removeIfExists(target) {
|
|
|
22975
23464
|
}
|
|
22976
23465
|
}
|
|
22977
23466
|
function pathToFileUri2(filePath) {
|
|
22978
|
-
const absolutePath =
|
|
23467
|
+
const absolutePath = path23.isAbsolute(filePath) ? filePath : path23.resolve(filePath);
|
|
22979
23468
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
22980
23469
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
22981
23470
|
return `file:///${normalizedPath}`;
|
|
@@ -23067,8 +23556,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
23067
23556
|
});
|
|
23068
23557
|
}
|
|
23069
23558
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
23070
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
23071
|
-
const responseList = responseFiles.map((file) => `"${
|
|
23559
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path24.basename(file)}`).join("\n");
|
|
23560
|
+
const responseList = responseFiles.map((file) => `"${path24.basename(file)}"`).join(", ");
|
|
23072
23561
|
return renderTemplate2(templateContent, {
|
|
23073
23562
|
requestFiles: requestLines,
|
|
23074
23563
|
responseList
|
|
@@ -23128,7 +23617,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
23128
23617
|
}
|
|
23129
23618
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
23130
23619
|
if (!silent) {
|
|
23131
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
23620
|
+
const fileList = responseFilesFinal.map((file) => path25.basename(file)).join(", ");
|
|
23132
23621
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
23133
23622
|
}
|
|
23134
23623
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -23137,7 +23626,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
23137
23626
|
while (pending.size > 0) {
|
|
23138
23627
|
if (Date.now() >= deadline) {
|
|
23139
23628
|
if (!silent) {
|
|
23140
|
-
const remaining = [...pending].map((f) =>
|
|
23629
|
+
const remaining = [...pending].map((f) => path25.basename(f)).join(", ");
|
|
23141
23630
|
console.error(
|
|
23142
23631
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
23143
23632
|
);
|
|
@@ -23184,30 +23673,6 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
23184
23673
|
}
|
|
23185
23674
|
return true;
|
|
23186
23675
|
}
|
|
23187
|
-
var logged = false;
|
|
23188
|
-
function getAgentvHome() {
|
|
23189
|
-
const envHome = process.env.AGENTV_HOME;
|
|
23190
|
-
if (envHome && envHome !== "undefined") {
|
|
23191
|
-
if (!logged) {
|
|
23192
|
-
logged = true;
|
|
23193
|
-
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
23194
|
-
}
|
|
23195
|
-
return envHome;
|
|
23196
|
-
}
|
|
23197
|
-
return path25.join(os2.homedir(), ".agentv");
|
|
23198
|
-
}
|
|
23199
|
-
function getWorkspacesRoot() {
|
|
23200
|
-
return path25.join(getAgentvHome(), "workspaces");
|
|
23201
|
-
}
|
|
23202
|
-
function getSubagentsRoot() {
|
|
23203
|
-
return path25.join(getAgentvHome(), "subagents");
|
|
23204
|
-
}
|
|
23205
|
-
function getTraceStateRoot() {
|
|
23206
|
-
return path25.join(getAgentvHome(), "trace-state");
|
|
23207
|
-
}
|
|
23208
|
-
function getWorkspacePoolRoot() {
|
|
23209
|
-
return path25.join(getAgentvHome(), "workspace-pool");
|
|
23210
|
-
}
|
|
23211
23676
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
23212
23677
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
23213
23678
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
@@ -24428,9 +24893,10 @@ function resolveAndCreateProvider(definition, env = process.env) {
|
|
|
24428
24893
|
const resolved = resolveTargetDefinition(definition, env);
|
|
24429
24894
|
return createProvider(resolved);
|
|
24430
24895
|
}
|
|
24431
|
-
var
|
|
24432
|
-
|
|
24433
|
-
|
|
24896
|
+
var DEFAULT_THRESHOLD = 0.8;
|
|
24897
|
+
var PASS_THRESHOLD = DEFAULT_THRESHOLD;
|
|
24898
|
+
function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
|
|
24899
|
+
return score >= threshold ? "pass" : "fail";
|
|
24434
24900
|
}
|
|
24435
24901
|
function clampScore(value) {
|
|
24436
24902
|
if (Number.isNaN(value) || !Number.isFinite(value)) {
|
|
@@ -24612,13 +25078,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
24612
25078
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
24613
25079
|
const { mkdir: mkdir16, readFile: readFile15, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
24614
25080
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
24615
|
-
const
|
|
25081
|
+
const path50 = await import("node:path");
|
|
24616
25082
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
24617
|
-
const dir =
|
|
25083
|
+
const dir = path50.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
24618
25084
|
await mkdir16(dir, { recursive: true });
|
|
24619
|
-
const stdinPath =
|
|
24620
|
-
const stdoutPath =
|
|
24621
|
-
const stderrPath =
|
|
25085
|
+
const stdinPath = path50.join(dir, "stdin.txt");
|
|
25086
|
+
const stdoutPath = path50.join(dir, "stdout.txt");
|
|
25087
|
+
const stderrPath = path50.join(dir, "stderr.txt");
|
|
24622
25088
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
24623
25089
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
24624
25090
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -25799,7 +26265,7 @@ ${outputSchema2}`;
|
|
|
25799
26265
|
parts.push("[[ ## scoring_criteria ## ]]");
|
|
25800
26266
|
for (const rubric of rubrics) {
|
|
25801
26267
|
const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
|
|
25802
|
-
const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
|
|
26268
|
+
const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
|
|
25803
26269
|
parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
|
|
25804
26270
|
if (rubric.outcome) {
|
|
25805
26271
|
parts.push(`Description: ${rubric.outcome}`);
|
|
@@ -25853,54 +26319,106 @@ ${outputSchema2}`;
|
|
|
25853
26319
|
async runWithRetry(options) {
|
|
25854
26320
|
const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
|
|
25855
26321
|
let lastError;
|
|
26322
|
+
let lastInvalidResponse;
|
|
26323
|
+
let shouldAttemptStructureFix = false;
|
|
25856
26324
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
25857
26325
|
try {
|
|
25858
|
-
const
|
|
25859
|
-
|
|
25860
|
-
|
|
25861
|
-
|
|
25862
|
-
|
|
25863
|
-
|
|
25864
|
-
|
|
25865
|
-
|
|
25866
|
-
|
|
25867
|
-
|
|
25868
|
-
|
|
25869
|
-
|
|
25870
|
-
|
|
25871
|
-
|
|
25872
|
-
|
|
25873
|
-
|
|
25874
|
-
]
|
|
25875
|
-
}
|
|
25876
|
-
],
|
|
25877
|
-
...modelOptions
|
|
25878
|
-
}) : await generateText({
|
|
25879
|
-
model,
|
|
25880
|
-
system: systemPrompt,
|
|
25881
|
-
prompt: userPrompt,
|
|
25882
|
-
...modelOptions
|
|
25883
|
-
});
|
|
25884
|
-
const data2 = schema.parse(parseJsonFromText(result.text));
|
|
25885
|
-
const rawUsage = result.usage;
|
|
25886
|
-
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
|
|
25887
|
-
return { data: data2, tokenUsage };
|
|
26326
|
+
const result = await this.generateStructuredResponse({
|
|
26327
|
+
context: context2,
|
|
26328
|
+
graderProvider,
|
|
26329
|
+
systemPrompt,
|
|
26330
|
+
userPrompt,
|
|
26331
|
+
images
|
|
26332
|
+
});
|
|
26333
|
+
const canRepairResponse = result.text.trim().length > 0;
|
|
26334
|
+
lastInvalidResponse = canRepairResponse ? result : void 0;
|
|
26335
|
+
let data;
|
|
26336
|
+
try {
|
|
26337
|
+
data = schema.parse(parseJsonFromText(result.text));
|
|
26338
|
+
} catch (e) {
|
|
26339
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
26340
|
+
shouldAttemptStructureFix = canRepairResponse;
|
|
26341
|
+
continue;
|
|
25888
26342
|
}
|
|
25889
|
-
|
|
25890
|
-
|
|
26343
|
+
return {
|
|
26344
|
+
data,
|
|
26345
|
+
providerResponse: result.providerResponse,
|
|
26346
|
+
tokenUsage: result.tokenUsage
|
|
26347
|
+
};
|
|
26348
|
+
} catch (e) {
|
|
26349
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
26350
|
+
}
|
|
26351
|
+
}
|
|
26352
|
+
if (shouldAttemptStructureFix && lastInvalidResponse) {
|
|
26353
|
+
try {
|
|
26354
|
+
const repaired = await this.generateStructuredResponse({
|
|
26355
|
+
context: context2,
|
|
26356
|
+
graderProvider,
|
|
25891
26357
|
systemPrompt,
|
|
25892
|
-
|
|
25893
|
-
|
|
25894
|
-
|
|
25895
|
-
|
|
26358
|
+
userPrompt: buildStructureRepairPrompt({
|
|
26359
|
+
validationError: lastError?.message ?? "Schema validation failed",
|
|
26360
|
+
invalidResponse: lastInvalidResponse.text
|
|
26361
|
+
})
|
|
25896
26362
|
});
|
|
25897
|
-
const data = schema.parse(parseJsonFromText(
|
|
25898
|
-
return {
|
|
26363
|
+
const data = schema.parse(parseJsonFromText(repaired.text));
|
|
26364
|
+
return {
|
|
26365
|
+
data,
|
|
26366
|
+
providerResponse: repaired.providerResponse,
|
|
26367
|
+
tokenUsage: sumTokenUsage(lastInvalidResponse.tokenUsage, repaired.tokenUsage)
|
|
26368
|
+
};
|
|
25899
26369
|
} catch (e) {
|
|
25900
26370
|
lastError = e instanceof Error ? e : new Error(String(e));
|
|
25901
26371
|
}
|
|
25902
26372
|
}
|
|
25903
|
-
throw new Error(
|
|
26373
|
+
throw new Error(
|
|
26374
|
+
`Failed to parse evaluator response after 3 attempts and 1 structure-fix attempt: ${lastError?.message}`
|
|
26375
|
+
);
|
|
26376
|
+
}
|
|
26377
|
+
async generateStructuredResponse(options) {
|
|
26378
|
+
const { context: context2, graderProvider, systemPrompt, userPrompt, images } = options;
|
|
26379
|
+
const model = graderProvider.asLanguageModel?.();
|
|
26380
|
+
if (model) {
|
|
26381
|
+
const modelOptions = {
|
|
26382
|
+
...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
|
|
26383
|
+
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
26384
|
+
};
|
|
26385
|
+
const hasImages = images && images.length > 0;
|
|
26386
|
+
const result = hasImages ? await generateText({
|
|
26387
|
+
model,
|
|
26388
|
+
system: systemPrompt,
|
|
26389
|
+
messages: [
|
|
26390
|
+
{
|
|
26391
|
+
role: "user",
|
|
26392
|
+
content: [
|
|
26393
|
+
{ type: "text", text: userPrompt },
|
|
26394
|
+
...toAiSdkImageParts(images)
|
|
26395
|
+
]
|
|
26396
|
+
}
|
|
26397
|
+
],
|
|
26398
|
+
...modelOptions
|
|
26399
|
+
}) : await generateText({
|
|
26400
|
+
model,
|
|
26401
|
+
system: systemPrompt,
|
|
26402
|
+
prompt: userPrompt,
|
|
26403
|
+
...modelOptions
|
|
26404
|
+
});
|
|
26405
|
+
const rawUsage = result.usage;
|
|
26406
|
+
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
|
|
26407
|
+
return { text: result.text, tokenUsage };
|
|
26408
|
+
}
|
|
26409
|
+
const response = await graderProvider.invoke({
|
|
26410
|
+
question: userPrompt,
|
|
26411
|
+
systemPrompt,
|
|
26412
|
+
evalCaseId: context2.evalCase.id,
|
|
26413
|
+
attempt: context2.attempt,
|
|
26414
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
26415
|
+
temperature: this.temperature
|
|
26416
|
+
});
|
|
26417
|
+
return {
|
|
26418
|
+
text: extractLastAssistantContent(response.output),
|
|
26419
|
+
providerResponse: response,
|
|
26420
|
+
tokenUsage: response.tokenUsage
|
|
26421
|
+
};
|
|
25904
26422
|
}
|
|
25905
26423
|
};
|
|
25906
26424
|
function buildOutputSchema() {
|
|
@@ -25920,6 +26438,29 @@ function buildOutputSchema() {
|
|
|
25920
26438
|
"}"
|
|
25921
26439
|
].join("\n");
|
|
25922
26440
|
}
|
|
26441
|
+
function buildStructureRepairPrompt(options) {
|
|
26442
|
+
const { validationError, invalidResponse } = options;
|
|
26443
|
+
return [
|
|
26444
|
+
"The following evaluation response has useful grading content but invalid JSON structure.",
|
|
26445
|
+
"Repair it to satisfy the schema in the system prompt.",
|
|
26446
|
+
"Preserve the evaluation meaning, do not re-grade the answer, and return only a single JSON object.",
|
|
26447
|
+
"",
|
|
26448
|
+
"Validation error:",
|
|
26449
|
+
validationError,
|
|
26450
|
+
"",
|
|
26451
|
+
"Invalid response:",
|
|
26452
|
+
invalidResponse
|
|
26453
|
+
].join("\n");
|
|
26454
|
+
}
|
|
26455
|
+
function sumTokenUsage(first, second) {
|
|
26456
|
+
if (!first && !second) {
|
|
26457
|
+
return void 0;
|
|
26458
|
+
}
|
|
26459
|
+
return {
|
|
26460
|
+
input: (first?.input ?? 0) + (second?.input ?? 0),
|
|
26461
|
+
output: (first?.output ?? 0) + (second?.output ?? 0)
|
|
26462
|
+
};
|
|
26463
|
+
}
|
|
25923
26464
|
function buildRubricOutputSchema() {
|
|
25924
26465
|
return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
|
|
25925
26466
|
You must return a valid JSON object matching this schema:
|
|
@@ -26019,19 +26560,21 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
26019
26560
|
rawScores[rubric.id] = rawScore;
|
|
26020
26561
|
totalWeight += rubric.weight;
|
|
26021
26562
|
weightedScoreSum += normalizedScore * rubric.weight;
|
|
26022
|
-
let
|
|
26023
|
-
if (rubric.
|
|
26024
|
-
|
|
26563
|
+
let minScoreThreshold;
|
|
26564
|
+
if (rubric.min_score !== void 0) {
|
|
26565
|
+
minScoreThreshold = rubric.min_score;
|
|
26566
|
+
} else if (rubric.required_min_score !== void 0) {
|
|
26567
|
+
minScoreThreshold = rubric.required_min_score / 10;
|
|
26025
26568
|
} else if (rubric.required === true) {
|
|
26026
|
-
|
|
26569
|
+
minScoreThreshold = 1;
|
|
26027
26570
|
}
|
|
26028
26571
|
const matchingRange = rubric.score_ranges?.find(
|
|
26029
26572
|
(r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
|
|
26030
26573
|
);
|
|
26031
26574
|
const rangeDescription = matchingRange?.outcome ?? "";
|
|
26032
26575
|
const criterionLabel = rubric.outcome ?? rubric.id;
|
|
26033
|
-
const passed = !(
|
|
26034
|
-
if (
|
|
26576
|
+
const passed = !(minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) && rawScore >= 7;
|
|
26577
|
+
if (minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) {
|
|
26035
26578
|
failedRequired = true;
|
|
26036
26579
|
}
|
|
26037
26580
|
assertions.push({
|
|
@@ -26108,11 +26651,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
26108
26651
|
execute: async (input) => {
|
|
26109
26652
|
try {
|
|
26110
26653
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
26111
|
-
const
|
|
26112
|
-
if (
|
|
26654
|
+
const stat11 = await fs2.stat(resolved);
|
|
26655
|
+
if (stat11.isDirectory()) {
|
|
26113
26656
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
26114
26657
|
}
|
|
26115
|
-
const buffer = Buffer.alloc(Math.min(
|
|
26658
|
+
const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
|
|
26116
26659
|
const fd = await fs2.open(resolved, "r");
|
|
26117
26660
|
try {
|
|
26118
26661
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -26120,8 +26663,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
26120
26663
|
await fd.close();
|
|
26121
26664
|
}
|
|
26122
26665
|
const content = buffer.toString("utf-8");
|
|
26123
|
-
const truncated =
|
|
26124
|
-
return { content, truncated, size:
|
|
26666
|
+
const truncated = stat11.size > MAX_FILE_SIZE;
|
|
26667
|
+
return { content, truncated, size: stat11.size };
|
|
26125
26668
|
} catch (error) {
|
|
26126
26669
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
26127
26670
|
}
|
|
@@ -26172,8 +26715,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
26172
26715
|
const ext = path35.extname(entry.name).toLowerCase();
|
|
26173
26716
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
26174
26717
|
try {
|
|
26175
|
-
const
|
|
26176
|
-
if (
|
|
26718
|
+
const stat11 = await fs2.stat(fullPath);
|
|
26719
|
+
if (stat11.size > MAX_FILE_SIZE) continue;
|
|
26177
26720
|
const content = await fs2.readFile(fullPath, "utf-8");
|
|
26178
26721
|
const lines = content.split("\n");
|
|
26179
26722
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -26806,115 +27349,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
26806
27349
|
* Evaluate a single field against the expected value.
|
|
26807
27350
|
*/
|
|
26808
27351
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
26809
|
-
const { path:
|
|
26810
|
-
const candidateValue = resolvePath(candidateData,
|
|
26811
|
-
const expectedValue = resolvePath(expectedData,
|
|
27352
|
+
const { path: path50, match, required = true, weight = 1 } = fieldConfig;
|
|
27353
|
+
const candidateValue = resolvePath(candidateData, path50);
|
|
27354
|
+
const expectedValue = resolvePath(expectedData, path50);
|
|
26812
27355
|
if (expectedValue === void 0) {
|
|
26813
27356
|
return {
|
|
26814
|
-
path:
|
|
27357
|
+
path: path50,
|
|
26815
27358
|
score: 1,
|
|
26816
27359
|
// No expected value means no comparison needed
|
|
26817
27360
|
weight,
|
|
26818
27361
|
hit: true,
|
|
26819
|
-
message: `${
|
|
27362
|
+
message: `${path50}: no expected value`
|
|
26820
27363
|
};
|
|
26821
27364
|
}
|
|
26822
27365
|
if (candidateValue === void 0) {
|
|
26823
27366
|
if (required) {
|
|
26824
27367
|
return {
|
|
26825
|
-
path:
|
|
27368
|
+
path: path50,
|
|
26826
27369
|
score: 0,
|
|
26827
27370
|
weight,
|
|
26828
27371
|
hit: false,
|
|
26829
|
-
message: `${
|
|
27372
|
+
message: `${path50} (required, missing)`
|
|
26830
27373
|
};
|
|
26831
27374
|
}
|
|
26832
27375
|
return {
|
|
26833
|
-
path:
|
|
27376
|
+
path: path50,
|
|
26834
27377
|
score: 1,
|
|
26835
27378
|
// Don't penalize missing optional fields
|
|
26836
27379
|
weight: 0,
|
|
26837
27380
|
// Zero weight means it won't affect the score
|
|
26838
27381
|
hit: true,
|
|
26839
|
-
message: `${
|
|
27382
|
+
message: `${path50}: optional field missing`
|
|
26840
27383
|
};
|
|
26841
27384
|
}
|
|
26842
27385
|
switch (match) {
|
|
26843
27386
|
case "exact":
|
|
26844
|
-
return this.compareExact(
|
|
27387
|
+
return this.compareExact(path50, candidateValue, expectedValue, weight);
|
|
26845
27388
|
case "numeric_tolerance":
|
|
26846
27389
|
return this.compareNumericTolerance(
|
|
26847
|
-
|
|
27390
|
+
path50,
|
|
26848
27391
|
candidateValue,
|
|
26849
27392
|
expectedValue,
|
|
26850
27393
|
fieldConfig,
|
|
26851
27394
|
weight
|
|
26852
27395
|
);
|
|
26853
27396
|
case "date":
|
|
26854
|
-
return this.compareDate(
|
|
27397
|
+
return this.compareDate(path50, candidateValue, expectedValue, fieldConfig, weight);
|
|
26855
27398
|
default:
|
|
26856
27399
|
return {
|
|
26857
|
-
path:
|
|
27400
|
+
path: path50,
|
|
26858
27401
|
score: 0,
|
|
26859
27402
|
weight,
|
|
26860
27403
|
hit: false,
|
|
26861
|
-
message: `${
|
|
27404
|
+
message: `${path50}: unknown match type "${match}"`
|
|
26862
27405
|
};
|
|
26863
27406
|
}
|
|
26864
27407
|
}
|
|
26865
27408
|
/**
|
|
26866
27409
|
* Exact equality comparison.
|
|
26867
27410
|
*/
|
|
26868
|
-
compareExact(
|
|
27411
|
+
compareExact(path50, candidateValue, expectedValue, weight) {
|
|
26869
27412
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
26870
27413
|
return {
|
|
26871
|
-
path:
|
|
27414
|
+
path: path50,
|
|
26872
27415
|
score: 1,
|
|
26873
27416
|
weight,
|
|
26874
27417
|
hit: true,
|
|
26875
|
-
message:
|
|
27418
|
+
message: path50
|
|
26876
27419
|
};
|
|
26877
27420
|
}
|
|
26878
27421
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
26879
27422
|
return {
|
|
26880
|
-
path:
|
|
27423
|
+
path: path50,
|
|
26881
27424
|
score: 0,
|
|
26882
27425
|
weight,
|
|
26883
27426
|
hit: false,
|
|
26884
|
-
message: `${
|
|
27427
|
+
message: `${path50} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
26885
27428
|
};
|
|
26886
27429
|
}
|
|
26887
27430
|
return {
|
|
26888
|
-
path:
|
|
27431
|
+
path: path50,
|
|
26889
27432
|
score: 0,
|
|
26890
27433
|
weight,
|
|
26891
27434
|
hit: false,
|
|
26892
|
-
message: `${
|
|
27435
|
+
message: `${path50} (value mismatch)`
|
|
26893
27436
|
};
|
|
26894
27437
|
}
|
|
26895
27438
|
/**
|
|
26896
27439
|
* Numeric comparison with absolute or relative tolerance.
|
|
26897
27440
|
*/
|
|
26898
|
-
compareNumericTolerance(
|
|
27441
|
+
compareNumericTolerance(path50, candidateValue, expectedValue, fieldConfig, weight) {
|
|
26899
27442
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
26900
27443
|
const candidateNum = toNumber(candidateValue);
|
|
26901
27444
|
const expectedNum = toNumber(expectedValue);
|
|
26902
27445
|
if (candidateNum === null || expectedNum === null) {
|
|
26903
27446
|
return {
|
|
26904
|
-
path:
|
|
27447
|
+
path: path50,
|
|
26905
27448
|
score: 0,
|
|
26906
27449
|
weight,
|
|
26907
27450
|
hit: false,
|
|
26908
|
-
message: `${
|
|
27451
|
+
message: `${path50} (non-numeric value)`
|
|
26909
27452
|
};
|
|
26910
27453
|
}
|
|
26911
27454
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
26912
27455
|
return {
|
|
26913
|
-
path:
|
|
27456
|
+
path: path50,
|
|
26914
27457
|
score: 0,
|
|
26915
27458
|
weight,
|
|
26916
27459
|
hit: false,
|
|
26917
|
-
message: `${
|
|
27460
|
+
message: `${path50} (invalid numeric value)`
|
|
26918
27461
|
};
|
|
26919
27462
|
}
|
|
26920
27463
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -26927,61 +27470,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
26927
27470
|
}
|
|
26928
27471
|
if (withinTolerance) {
|
|
26929
27472
|
return {
|
|
26930
|
-
path:
|
|
27473
|
+
path: path50,
|
|
26931
27474
|
score: 1,
|
|
26932
27475
|
weight,
|
|
26933
27476
|
hit: true,
|
|
26934
|
-
message: `${
|
|
27477
|
+
message: `${path50} (within tolerance: diff=${diff.toFixed(2)})`
|
|
26935
27478
|
};
|
|
26936
27479
|
}
|
|
26937
27480
|
return {
|
|
26938
|
-
path:
|
|
27481
|
+
path: path50,
|
|
26939
27482
|
score: 0,
|
|
26940
27483
|
weight,
|
|
26941
27484
|
hit: false,
|
|
26942
|
-
message: `${
|
|
27485
|
+
message: `${path50} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
26943
27486
|
};
|
|
26944
27487
|
}
|
|
26945
27488
|
/**
|
|
26946
27489
|
* Date comparison with format normalization.
|
|
26947
27490
|
*/
|
|
26948
|
-
compareDate(
|
|
27491
|
+
compareDate(path50, candidateValue, expectedValue, fieldConfig, weight) {
|
|
26949
27492
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
26950
27493
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
26951
27494
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
26952
27495
|
if (candidateDate === null) {
|
|
26953
27496
|
return {
|
|
26954
|
-
path:
|
|
27497
|
+
path: path50,
|
|
26955
27498
|
score: 0,
|
|
26956
27499
|
weight,
|
|
26957
27500
|
hit: false,
|
|
26958
|
-
message: `${
|
|
27501
|
+
message: `${path50} (unparseable candidate date)`
|
|
26959
27502
|
};
|
|
26960
27503
|
}
|
|
26961
27504
|
if (expectedDate === null) {
|
|
26962
27505
|
return {
|
|
26963
|
-
path:
|
|
27506
|
+
path: path50,
|
|
26964
27507
|
score: 0,
|
|
26965
27508
|
weight,
|
|
26966
27509
|
hit: false,
|
|
26967
|
-
message: `${
|
|
27510
|
+
message: `${path50} (unparseable expected date)`
|
|
26968
27511
|
};
|
|
26969
27512
|
}
|
|
26970
27513
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
26971
27514
|
return {
|
|
26972
|
-
path:
|
|
27515
|
+
path: path50,
|
|
26973
27516
|
score: 1,
|
|
26974
27517
|
weight,
|
|
26975
27518
|
hit: true,
|
|
26976
|
-
message:
|
|
27519
|
+
message: path50
|
|
26977
27520
|
};
|
|
26978
27521
|
}
|
|
26979
27522
|
return {
|
|
26980
|
-
path:
|
|
27523
|
+
path: path50,
|
|
26981
27524
|
score: 0,
|
|
26982
27525
|
weight,
|
|
26983
27526
|
hit: false,
|
|
26984
|
-
message: `${
|
|
27527
|
+
message: `${path50} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
26985
27528
|
};
|
|
26986
27529
|
}
|
|
26987
27530
|
/**
|
|
@@ -27014,11 +27557,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
27014
27557
|
};
|
|
27015
27558
|
}
|
|
27016
27559
|
};
|
|
27017
|
-
function resolvePath(obj,
|
|
27018
|
-
if (!
|
|
27560
|
+
function resolvePath(obj, path50) {
|
|
27561
|
+
if (!path50 || !obj) {
|
|
27019
27562
|
return void 0;
|
|
27020
27563
|
}
|
|
27021
|
-
const parts =
|
|
27564
|
+
const parts = path50.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
27022
27565
|
let current = obj;
|
|
27023
27566
|
for (const part of parts) {
|
|
27024
27567
|
if (current === null || current === void 0) {
|
|
@@ -27500,8 +28043,8 @@ var TokenUsageEvaluator = class {
|
|
|
27500
28043
|
};
|
|
27501
28044
|
}
|
|
27502
28045
|
};
|
|
27503
|
-
function getNestedValue(obj,
|
|
27504
|
-
const parts =
|
|
28046
|
+
function getNestedValue(obj, path50) {
|
|
28047
|
+
const parts = path50.split(".");
|
|
27505
28048
|
let current = obj;
|
|
27506
28049
|
for (const part of parts) {
|
|
27507
28050
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -29224,7 +29767,7 @@ var WorkspacePoolManager = class {
|
|
|
29224
29767
|
}
|
|
29225
29768
|
/**
|
|
29226
29769
|
* Reset an existing slot for reuse:
|
|
29227
|
-
* 1. Reset repos (git reset --hard
|
|
29770
|
+
* 1. Reset repos (fetch from origin when resolve=remote, then git reset --hard && git clean per repo)
|
|
29228
29771
|
* 2. Re-copy template files (skip repo directories)
|
|
29229
29772
|
*/
|
|
29230
29773
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
@@ -29237,7 +29780,17 @@ var WorkspacePoolManager = class {
|
|
|
29237
29780
|
continue;
|
|
29238
29781
|
}
|
|
29239
29782
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
29240
|
-
|
|
29783
|
+
const resolve2 = repo.checkout?.resolve ?? "remote";
|
|
29784
|
+
if (resolve2 === "remote") {
|
|
29785
|
+
const fetchArgs = ["fetch", "origin", ref];
|
|
29786
|
+
if (repo.clone?.depth) {
|
|
29787
|
+
fetchArgs.splice(1, 0, "--depth", String(repo.clone.depth));
|
|
29788
|
+
}
|
|
29789
|
+
await git(fetchArgs, { cwd: repoDir });
|
|
29790
|
+
await git(["reset", "--hard", "FETCH_HEAD"], { cwd: repoDir });
|
|
29791
|
+
} else {
|
|
29792
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
29793
|
+
}
|
|
29241
29794
|
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
29242
29795
|
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
29243
29796
|
}
|
|
@@ -29520,7 +30073,7 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
29520
30073
|
}
|
|
29521
30074
|
return result.stdout;
|
|
29522
30075
|
}
|
|
29523
|
-
function classifyQualityStatus(score, threshold =
|
|
30076
|
+
function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
|
|
29524
30077
|
return score >= threshold ? "ok" : "quality_failure";
|
|
29525
30078
|
}
|
|
29526
30079
|
function buildSkippedEvaluatorError(scores) {
|
|
@@ -29612,7 +30165,7 @@ async function runEvaluation(options) {
|
|
|
29612
30165
|
const filteredEvalCases = filterEvalCases(evalCases, filter2);
|
|
29613
30166
|
if (filteredEvalCases.length === 0) {
|
|
29614
30167
|
if (filter2) {
|
|
29615
|
-
throw new Error(`No tests matched filter '${filter2}' in ${evalFilePath}`);
|
|
30168
|
+
throw new Error(`No tests matched filter '${formatFilter(filter2)}' in ${evalFilePath}`);
|
|
29616
30169
|
}
|
|
29617
30170
|
return [];
|
|
29618
30171
|
}
|
|
@@ -29664,6 +30217,9 @@ async function runEvaluation(options) {
|
|
|
29664
30217
|
const graderName = targetContext.graderTarget ?? targetContext.name;
|
|
29665
30218
|
const resolvedGrader = resolveTargetByName(graderName);
|
|
29666
30219
|
if (!resolvedGrader) {
|
|
30220
|
+
if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
|
|
30221
|
+
return void 0;
|
|
30222
|
+
}
|
|
29667
30223
|
return getOrCreateProvider(targetContext);
|
|
29668
30224
|
}
|
|
29669
30225
|
return getOrCreateProvider(resolvedGrader);
|
|
@@ -29994,7 +30550,7 @@ async function runEvaluation(options) {
|
|
|
29994
30550
|
const budgetResult = {
|
|
29995
30551
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
29996
30552
|
testId: evalCase.id,
|
|
29997
|
-
|
|
30553
|
+
suite: evalCase.suite,
|
|
29998
30554
|
category: evalCase.category,
|
|
29999
30555
|
score: 0,
|
|
30000
30556
|
assertions: [],
|
|
@@ -30031,7 +30587,7 @@ async function runEvaluation(options) {
|
|
|
30031
30587
|
const haltResult = {
|
|
30032
30588
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
30033
30589
|
testId: evalCase.id,
|
|
30034
|
-
|
|
30590
|
+
suite: evalCase.suite,
|
|
30035
30591
|
category: evalCase.category,
|
|
30036
30592
|
score: 0,
|
|
30037
30593
|
assertions: [],
|
|
@@ -30343,7 +30899,7 @@ async function runBatchEvaluation(options) {
|
|
|
30343
30899
|
targetResolver,
|
|
30344
30900
|
availableTargets,
|
|
30345
30901
|
verbose,
|
|
30346
|
-
threshold: batchThreshold
|
|
30902
|
+
threshold: evalCase.threshold ?? batchThreshold
|
|
30347
30903
|
});
|
|
30348
30904
|
if (providerError) {
|
|
30349
30905
|
result = {
|
|
@@ -30805,8 +31361,9 @@ async function runEvalCase(options) {
|
|
|
30805
31361
|
fileChanges,
|
|
30806
31362
|
workspacePath,
|
|
30807
31363
|
verbose,
|
|
30808
|
-
threshold: caseThreshold
|
|
31364
|
+
threshold: evalCase.threshold ?? caseThreshold
|
|
30809
31365
|
});
|
|
31366
|
+
const effectiveThreshold = evalCase.threshold ?? caseThreshold;
|
|
30810
31367
|
const totalDurationMs = Date.now() - caseStartMs;
|
|
30811
31368
|
const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
|
|
30812
31369
|
const evalRunTokenUsage = tokenUsage || graderTokens ? {
|
|
@@ -30820,7 +31377,7 @@ async function runEvalCase(options) {
|
|
|
30820
31377
|
...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
|
|
30821
31378
|
};
|
|
30822
31379
|
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
30823
|
-
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score,
|
|
31380
|
+
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, effectiveThreshold);
|
|
30824
31381
|
const targetUsedField = targetUsed ? { targetUsed } : {};
|
|
30825
31382
|
const finalResult = providerError ? {
|
|
30826
31383
|
...result,
|
|
@@ -31021,7 +31578,8 @@ async function evaluateCandidate(options) {
|
|
|
31021
31578
|
targetResolver,
|
|
31022
31579
|
availableTargets,
|
|
31023
31580
|
fileChanges,
|
|
31024
|
-
workspacePath
|
|
31581
|
+
workspacePath,
|
|
31582
|
+
threshold: evalThreshold
|
|
31025
31583
|
});
|
|
31026
31584
|
const completedAt = nowFn();
|
|
31027
31585
|
let agentRequest;
|
|
@@ -31052,7 +31610,7 @@ async function evaluateCandidate(options) {
|
|
|
31052
31610
|
return {
|
|
31053
31611
|
timestamp: completedAt.toISOString(),
|
|
31054
31612
|
testId: evalCase.id,
|
|
31055
|
-
|
|
31613
|
+
suite: evalCase.suite,
|
|
31056
31614
|
category: evalCase.category,
|
|
31057
31615
|
conversationId: evalCase.conversation_id,
|
|
31058
31616
|
score: score.score,
|
|
@@ -31095,7 +31653,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
31095
31653
|
targetResolver,
|
|
31096
31654
|
availableTargets,
|
|
31097
31655
|
fileChanges,
|
|
31098
|
-
workspacePath
|
|
31656
|
+
workspacePath,
|
|
31657
|
+
threshold
|
|
31099
31658
|
} = options;
|
|
31100
31659
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
31101
31660
|
return runEvaluatorList({
|
|
@@ -31121,7 +31680,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
31121
31680
|
targetResolver,
|
|
31122
31681
|
availableTargets,
|
|
31123
31682
|
fileChanges,
|
|
31124
|
-
workspacePath
|
|
31683
|
+
workspacePath,
|
|
31684
|
+
threshold
|
|
31125
31685
|
});
|
|
31126
31686
|
}
|
|
31127
31687
|
const evaluatorKind = evalCase.evaluator ?? "llm-grader";
|
|
@@ -31223,7 +31783,8 @@ async function runEvaluatorList(options) {
|
|
|
31223
31783
|
name: evaluatorConfig.name,
|
|
31224
31784
|
type: evaluatorConfig.type,
|
|
31225
31785
|
weight,
|
|
31226
|
-
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
|
|
31786
|
+
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
|
|
31787
|
+
...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
|
|
31227
31788
|
});
|
|
31228
31789
|
scores.push({
|
|
31229
31790
|
name: evaluatorConfig.name,
|
|
@@ -31258,7 +31819,8 @@ async function runEvaluatorList(options) {
|
|
|
31258
31819
|
name: evaluatorConfig.name ?? "unknown",
|
|
31259
31820
|
type: evaluatorConfig.type ?? "llm-grader",
|
|
31260
31821
|
weight,
|
|
31261
|
-
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
|
|
31822
|
+
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
|
|
31823
|
+
...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
|
|
31262
31824
|
});
|
|
31263
31825
|
scores.push({
|
|
31264
31826
|
name: evaluatorConfig.name ?? "unknown",
|
|
@@ -31292,9 +31854,10 @@ async function runEvaluatorList(options) {
|
|
|
31292
31854
|
}
|
|
31293
31855
|
}
|
|
31294
31856
|
}
|
|
31857
|
+
const effectiveThreshold = options.threshold ?? DEFAULT_THRESHOLD;
|
|
31295
31858
|
const hasRequiredFailure = scored.some((entry) => {
|
|
31296
31859
|
if (!entry.required) return false;
|
|
31297
|
-
const minScore = typeof entry.required === "number" ? entry.required :
|
|
31860
|
+
const minScore = entry.min_score ?? (typeof entry.required === "number" ? entry.required : effectiveThreshold);
|
|
31298
31861
|
return entry.score.score < minScore;
|
|
31299
31862
|
});
|
|
31300
31863
|
const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
|
|
@@ -31305,17 +31868,23 @@ async function runEvaluatorList(options) {
|
|
|
31305
31868
|
const expectedAspectCount = assertions.length || 1;
|
|
31306
31869
|
const score = {
|
|
31307
31870
|
score: aggregateScore,
|
|
31308
|
-
verdict: scoreToVerdict(aggregateScore),
|
|
31871
|
+
verdict: scoreToVerdict(aggregateScore, effectiveThreshold),
|
|
31309
31872
|
assertions,
|
|
31310
31873
|
expectedAspectCount
|
|
31311
31874
|
};
|
|
31312
31875
|
return { score, scores };
|
|
31313
31876
|
}
|
|
31877
|
+
function formatFilter(filter2) {
|
|
31878
|
+
return typeof filter2 === "string" ? filter2 : filter2.join(", ");
|
|
31879
|
+
}
|
|
31880
|
+
function matchesFilter3(id, filter2) {
|
|
31881
|
+
return typeof filter2 === "string" ? micromatch3.isMatch(id, filter2) : filter2.some((pattern) => micromatch3.isMatch(id, pattern));
|
|
31882
|
+
}
|
|
31314
31883
|
function filterEvalCases(evalCases, filter2) {
|
|
31315
31884
|
if (!filter2) {
|
|
31316
31885
|
return evalCases;
|
|
31317
31886
|
}
|
|
31318
|
-
return evalCases.filter((evalCase) =>
|
|
31887
|
+
return evalCases.filter((evalCase) => matchesFilter3(evalCase.id, filter2));
|
|
31319
31888
|
}
|
|
31320
31889
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
31321
31890
|
const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
|
|
@@ -31402,7 +31971,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
31402
31971
|
return {
|
|
31403
31972
|
timestamp: timestamp.toISOString(),
|
|
31404
31973
|
testId: evalCase.id,
|
|
31405
|
-
|
|
31974
|
+
suite: evalCase.suite,
|
|
31406
31975
|
category: evalCase.category,
|
|
31407
31976
|
conversationId: evalCase.conversation_id,
|
|
31408
31977
|
score: 0,
|
|
@@ -31666,6 +32235,7 @@ async function evaluate(config) {
|
|
|
31666
32235
|
verbose: config.verbose,
|
|
31667
32236
|
maxConcurrency: config.workers ?? 3,
|
|
31668
32237
|
filter: config.filter,
|
|
32238
|
+
threshold: config.threshold,
|
|
31669
32239
|
evalCases,
|
|
31670
32240
|
onResult: async (result) => {
|
|
31671
32241
|
collectedResults.push(result);
|
|
@@ -31676,19 +32246,19 @@ async function evaluate(config) {
|
|
|
31676
32246
|
const durationMs = Date.now() - startTime;
|
|
31677
32247
|
return {
|
|
31678
32248
|
results: allResults,
|
|
31679
|
-
summary: computeSummary(allResults, durationMs)
|
|
32249
|
+
summary: computeSummary(allResults, durationMs, config.threshold)
|
|
31680
32250
|
};
|
|
31681
32251
|
}
|
|
31682
32252
|
function mapAssertionType(type) {
|
|
31683
32253
|
return type.replace(/_/g, "-");
|
|
31684
32254
|
}
|
|
31685
|
-
function computeSummary(results, durationMs) {
|
|
32255
|
+
function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
|
|
31686
32256
|
const total = results.length;
|
|
31687
32257
|
let passed = 0;
|
|
31688
32258
|
let scoreSum = 0;
|
|
31689
32259
|
for (const r of results) {
|
|
31690
32260
|
scoreSum += r.score;
|
|
31691
|
-
if (r.score >=
|
|
32261
|
+
if (r.score >= threshold) {
|
|
31692
32262
|
passed++;
|
|
31693
32263
|
}
|
|
31694
32264
|
}
|
|
@@ -31798,7 +32368,7 @@ var CONFIG_FILE_NAMES = [
|
|
|
31798
32368
|
];
|
|
31799
32369
|
async function loadTsConfig(projectRoot) {
|
|
31800
32370
|
const { existsSync: existsSync7 } = await import("node:fs");
|
|
31801
|
-
const { pathToFileURL } = await import("node:url");
|
|
32371
|
+
const { pathToFileURL: pathToFileURL2 } = await import("node:url");
|
|
31802
32372
|
const { join: join2 } = await import("node:path");
|
|
31803
32373
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
31804
32374
|
const filePath = join2(projectRoot, fileName);
|
|
@@ -31806,7 +32376,7 @@ async function loadTsConfig(projectRoot) {
|
|
|
31806
32376
|
continue;
|
|
31807
32377
|
}
|
|
31808
32378
|
try {
|
|
31809
|
-
const fileUrl =
|
|
32379
|
+
const fileUrl = pathToFileURL2(filePath).href;
|
|
31810
32380
|
const mod = await import(fileUrl);
|
|
31811
32381
|
const config = mod.default ?? mod;
|
|
31812
32382
|
return AgentVConfigSchema.parse(config);
|
|
@@ -31953,7 +32523,7 @@ function saveProjectRegistry(registry) {
|
|
|
31953
32523
|
const registryPath = getProjectsRegistryPath();
|
|
31954
32524
|
const dir = path47.dirname(registryPath);
|
|
31955
32525
|
if (!existsSync6(dir)) {
|
|
31956
|
-
|
|
32526
|
+
mkdirSync2(dir, { recursive: true });
|
|
31957
32527
|
}
|
|
31958
32528
|
writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
|
|
31959
32529
|
}
|
|
@@ -32213,7 +32783,7 @@ var OtelTraceExporter = class {
|
|
|
32213
32783
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
32214
32784
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
32215
32785
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
32216
|
-
if (result.
|
|
32786
|
+
if (result.suite) rootSpan.setAttribute("agentv.suite", result.suite);
|
|
32217
32787
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
32218
32788
|
if (captureContent && result.output.length > 0) {
|
|
32219
32789
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -32422,7 +32992,7 @@ var OtelStreamingObserver = class {
|
|
|
32422
32992
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
32423
32993
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
32424
32994
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
32425
|
-
if (evalSet) this.rootSpan.setAttribute("agentv.
|
|
32995
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.suite", evalSet);
|
|
32426
32996
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
32427
32997
|
}
|
|
32428
32998
|
/** Create and immediately export a tool span */
|
|
@@ -32768,7 +33338,230 @@ function extractToolResultContent(content) {
|
|
|
32768
33338
|
}
|
|
32769
33339
|
return parts.length > 0 ? parts.join("") : void 0;
|
|
32770
33340
|
}
|
|
32771
|
-
|
|
33341
|
+
function parseCodexSession(jsonl) {
|
|
33342
|
+
const messages = [];
|
|
33343
|
+
let sessionId = "";
|
|
33344
|
+
let cwd;
|
|
33345
|
+
let model;
|
|
33346
|
+
let version;
|
|
33347
|
+
let startTimestamp;
|
|
33348
|
+
let endTimestamp;
|
|
33349
|
+
const pendingCalls = /* @__PURE__ */ new Map();
|
|
33350
|
+
const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
|
|
33351
|
+
for (const line of lines) {
|
|
33352
|
+
let entry;
|
|
33353
|
+
try {
|
|
33354
|
+
entry = JSON.parse(line);
|
|
33355
|
+
} catch {
|
|
33356
|
+
continue;
|
|
33357
|
+
}
|
|
33358
|
+
if (!entry.type) continue;
|
|
33359
|
+
if (entry.timestamp) {
|
|
33360
|
+
if (!startTimestamp) startTimestamp = entry.timestamp;
|
|
33361
|
+
endTimestamp = entry.timestamp;
|
|
33362
|
+
}
|
|
33363
|
+
const payload = entry.payload ?? {};
|
|
33364
|
+
switch (entry.type) {
|
|
33365
|
+
case "session_meta": {
|
|
33366
|
+
sessionId = String(payload.id ?? "");
|
|
33367
|
+
cwd = payload.cwd ? String(payload.cwd) : void 0;
|
|
33368
|
+
version = payload.cli_version ? String(payload.cli_version) : void 0;
|
|
33369
|
+
if (payload.model && !model) {
|
|
33370
|
+
model = String(payload.model);
|
|
33371
|
+
}
|
|
33372
|
+
break;
|
|
33373
|
+
}
|
|
33374
|
+
case "turn_context": {
|
|
33375
|
+
if (payload.model && !model) {
|
|
33376
|
+
model = String(payload.model);
|
|
33377
|
+
}
|
|
33378
|
+
if (payload.cwd && !cwd) {
|
|
33379
|
+
cwd = String(payload.cwd);
|
|
33380
|
+
}
|
|
33381
|
+
break;
|
|
33382
|
+
}
|
|
33383
|
+
case "response_item": {
|
|
33384
|
+
const itemType = String(payload.type ?? "");
|
|
33385
|
+
const role = String(payload.role ?? "");
|
|
33386
|
+
switch (itemType) {
|
|
33387
|
+
case "message": {
|
|
33388
|
+
if (role === "developer") break;
|
|
33389
|
+
const content = extractResponseItemContent(payload.content);
|
|
33390
|
+
if (role === "user" && content) {
|
|
33391
|
+
messages.push({ role: "user", content });
|
|
33392
|
+
} else if (role === "assistant" && content) {
|
|
33393
|
+
messages.push({ role: "assistant", content });
|
|
33394
|
+
}
|
|
33395
|
+
break;
|
|
33396
|
+
}
|
|
33397
|
+
case "function_call": {
|
|
33398
|
+
const toolName = String(payload.name ?? "");
|
|
33399
|
+
const callId = String(payload.call_id ?? "");
|
|
33400
|
+
let input;
|
|
33401
|
+
if (typeof payload.arguments === "string") {
|
|
33402
|
+
try {
|
|
33403
|
+
input = JSON.parse(payload.arguments);
|
|
33404
|
+
} catch {
|
|
33405
|
+
input = payload.arguments;
|
|
33406
|
+
}
|
|
33407
|
+
} else {
|
|
33408
|
+
input = payload.arguments;
|
|
33409
|
+
}
|
|
33410
|
+
const toolCall = { tool: toolName, input, id: callId };
|
|
33411
|
+
const msgIdx = messages.length;
|
|
33412
|
+
messages.push({
|
|
33413
|
+
role: "assistant",
|
|
33414
|
+
toolCalls: [toolCall]
|
|
33415
|
+
});
|
|
33416
|
+
if (callId) {
|
|
33417
|
+
pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
|
|
33418
|
+
}
|
|
33419
|
+
break;
|
|
33420
|
+
}
|
|
33421
|
+
case "custom_tool_call": {
|
|
33422
|
+
const toolName = String(payload.name ?? "");
|
|
33423
|
+
const callId = String(payload.call_id ?? "");
|
|
33424
|
+
let input;
|
|
33425
|
+
if (typeof payload.arguments === "string") {
|
|
33426
|
+
try {
|
|
33427
|
+
input = JSON.parse(payload.arguments);
|
|
33428
|
+
} catch {
|
|
33429
|
+
input = payload.arguments;
|
|
33430
|
+
}
|
|
33431
|
+
} else {
|
|
33432
|
+
input = payload.arguments;
|
|
33433
|
+
}
|
|
33434
|
+
const toolCall = { tool: toolName, input, id: callId };
|
|
33435
|
+
const msgIdx = messages.length;
|
|
33436
|
+
messages.push({
|
|
33437
|
+
role: "assistant",
|
|
33438
|
+
toolCalls: [toolCall]
|
|
33439
|
+
});
|
|
33440
|
+
if (callId) {
|
|
33441
|
+
pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
|
|
33442
|
+
}
|
|
33443
|
+
break;
|
|
33444
|
+
}
|
|
33445
|
+
case "function_call_output":
|
|
33446
|
+
case "custom_tool_call_output": {
|
|
33447
|
+
const callId = String(payload.call_id ?? "");
|
|
33448
|
+
const pending = pendingCalls.get(callId);
|
|
33449
|
+
if (pending) {
|
|
33450
|
+
const existingMsg = messages[pending.msgIdx];
|
|
33451
|
+
const existingCalls = [...existingMsg.toolCalls ?? []];
|
|
33452
|
+
existingCalls[pending.toolIdx] = {
|
|
33453
|
+
...existingCalls[pending.toolIdx],
|
|
33454
|
+
output: payload.output
|
|
33455
|
+
};
|
|
33456
|
+
messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
|
|
33457
|
+
pendingCalls.delete(callId);
|
|
33458
|
+
}
|
|
33459
|
+
break;
|
|
33460
|
+
}
|
|
33461
|
+
// Skip reasoning blocks (thinking tokens)
|
|
33462
|
+
case "reasoning":
|
|
33463
|
+
break;
|
|
33464
|
+
}
|
|
33465
|
+
break;
|
|
33466
|
+
}
|
|
33467
|
+
}
|
|
33468
|
+
}
|
|
33469
|
+
let durationMs;
|
|
33470
|
+
if (startTimestamp && endTimestamp) {
|
|
33471
|
+
durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
|
|
33472
|
+
}
|
|
33473
|
+
const source = {
|
|
33474
|
+
provider: "codex",
|
|
33475
|
+
sessionId,
|
|
33476
|
+
cwd,
|
|
33477
|
+
startedAt: startTimestamp,
|
|
33478
|
+
model,
|
|
33479
|
+
version
|
|
33480
|
+
};
|
|
33481
|
+
return {
|
|
33482
|
+
messages,
|
|
33483
|
+
source,
|
|
33484
|
+
// Codex rollout files don't include token counts (only rate limit info)
|
|
33485
|
+
tokenUsage: void 0,
|
|
33486
|
+
durationMs,
|
|
33487
|
+
costUsd: null
|
|
33488
|
+
};
|
|
33489
|
+
}
|
|
33490
|
+
function extractResponseItemContent(content) {
|
|
33491
|
+
if (typeof content === "string") return content;
|
|
33492
|
+
if (!Array.isArray(content)) return void 0;
|
|
33493
|
+
const parts = [];
|
|
33494
|
+
for (const block of content) {
|
|
33495
|
+
if (typeof block === "object" && block !== null) {
|
|
33496
|
+
const b = block;
|
|
33497
|
+
if (typeof b.text === "string") {
|
|
33498
|
+
parts.push(b.text);
|
|
33499
|
+
}
|
|
33500
|
+
}
|
|
33501
|
+
}
|
|
33502
|
+
return parts.length > 0 ? parts.join("") : void 0;
|
|
33503
|
+
}
|
|
33504
|
+
var DEFAULT_SESSIONS_DIR = () => path48.join(homedir3(), ".codex", "sessions");
|
|
33505
|
+
async function discoverCodexSessions(opts) {
|
|
33506
|
+
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
33507
|
+
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
33508
|
+
const sessions = [];
|
|
33509
|
+
let yearDirs;
|
|
33510
|
+
try {
|
|
33511
|
+
yearDirs = await readdir8(sessionsDir);
|
|
33512
|
+
} catch {
|
|
33513
|
+
return [];
|
|
33514
|
+
}
|
|
33515
|
+
for (const year of yearDirs) {
|
|
33516
|
+
const yearPath = path48.join(sessionsDir, year);
|
|
33517
|
+
let monthDirs;
|
|
33518
|
+
try {
|
|
33519
|
+
monthDirs = await readdir8(yearPath);
|
|
33520
|
+
} catch {
|
|
33521
|
+
continue;
|
|
33522
|
+
}
|
|
33523
|
+
for (const month of monthDirs) {
|
|
33524
|
+
const monthPath = path48.join(yearPath, month);
|
|
33525
|
+
let dayDirs;
|
|
33526
|
+
try {
|
|
33527
|
+
dayDirs = await readdir8(monthPath);
|
|
33528
|
+
} catch {
|
|
33529
|
+
continue;
|
|
33530
|
+
}
|
|
33531
|
+
for (const day of dayDirs) {
|
|
33532
|
+
if (opts?.date) {
|
|
33533
|
+
const dirDate = `${year}-${month}-${day}`;
|
|
33534
|
+
if (dirDate !== opts.date) continue;
|
|
33535
|
+
}
|
|
33536
|
+
const dayPath = path48.join(monthPath, day);
|
|
33537
|
+
let files;
|
|
33538
|
+
try {
|
|
33539
|
+
files = await readdir8(dayPath);
|
|
33540
|
+
} catch {
|
|
33541
|
+
continue;
|
|
33542
|
+
}
|
|
33543
|
+
for (const file of files) {
|
|
33544
|
+
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
33545
|
+
const filePath = path48.join(dayPath, file);
|
|
33546
|
+
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
33547
|
+
const parts = nameWithoutExt.split("-");
|
|
33548
|
+
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
33549
|
+
let updatedAt;
|
|
33550
|
+
try {
|
|
33551
|
+
const fileStat = await stat9(filePath);
|
|
33552
|
+
updatedAt = fileStat.mtime;
|
|
33553
|
+
} catch {
|
|
33554
|
+
updatedAt = /* @__PURE__ */ new Date(0);
|
|
33555
|
+
}
|
|
33556
|
+
sessions.push({ sessionId, filePath, filename: file, updatedAt });
|
|
33557
|
+
}
|
|
33558
|
+
}
|
|
33559
|
+
}
|
|
33560
|
+
}
|
|
33561
|
+
sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
33562
|
+
return sessions.slice(0, limit);
|
|
33563
|
+
}
|
|
33564
|
+
var DEFAULT_PROJECTS_DIR = () => path49.join(homedir4(), ".claude", "projects");
|
|
32772
33565
|
function encodeProjectPath(projectPath) {
|
|
32773
33566
|
return projectPath.replace(/\//g, "-");
|
|
32774
33567
|
}
|
|
@@ -32777,7 +33570,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
32777
33570
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
32778
33571
|
let projectDirs;
|
|
32779
33572
|
try {
|
|
32780
|
-
projectDirs = await
|
|
33573
|
+
projectDirs = await readdir9(projectsDir);
|
|
32781
33574
|
} catch {
|
|
32782
33575
|
return [];
|
|
32783
33576
|
}
|
|
@@ -32787,10 +33580,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
32787
33580
|
}
|
|
32788
33581
|
const sessions = [];
|
|
32789
33582
|
for (const projectDir of projectDirs) {
|
|
32790
|
-
const dirPath =
|
|
33583
|
+
const dirPath = path49.join(projectsDir, projectDir);
|
|
32791
33584
|
let entries;
|
|
32792
33585
|
try {
|
|
32793
|
-
entries = await
|
|
33586
|
+
entries = await readdir9(dirPath);
|
|
32794
33587
|
} catch {
|
|
32795
33588
|
continue;
|
|
32796
33589
|
}
|
|
@@ -32798,10 +33591,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
32798
33591
|
if (!entry.endsWith(".jsonl")) continue;
|
|
32799
33592
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
32800
33593
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
32801
|
-
const filePath =
|
|
33594
|
+
const filePath = path49.join(dirPath, entry);
|
|
32802
33595
|
let updatedAt;
|
|
32803
33596
|
try {
|
|
32804
|
-
const fileStat = await
|
|
33597
|
+
const fileStat = await stat10(filePath);
|
|
32805
33598
|
updatedAt = fileStat.mtime;
|
|
32806
33599
|
} catch {
|
|
32807
33600
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -32817,9 +33610,82 @@ async function discoverClaudeSessions(opts) {
|
|
|
32817
33610
|
sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
32818
33611
|
return sessions.slice(0, limit);
|
|
32819
33612
|
}
|
|
33613
|
+
function toTranscriptJsonLine(entry) {
|
|
33614
|
+
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
33615
|
+
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
33616
|
+
return {
|
|
33617
|
+
input,
|
|
33618
|
+
output: entry.messages,
|
|
33619
|
+
token_usage: entry.tokenUsage ? {
|
|
33620
|
+
input: entry.tokenUsage.input,
|
|
33621
|
+
output: entry.tokenUsage.output,
|
|
33622
|
+
cached: entry.tokenUsage.cached
|
|
33623
|
+
} : void 0,
|
|
33624
|
+
duration_ms: entry.durationMs,
|
|
33625
|
+
cost_usd: entry.costUsd,
|
|
33626
|
+
source: {
|
|
33627
|
+
provider: entry.source.provider,
|
|
33628
|
+
session_id: entry.source.sessionId,
|
|
33629
|
+
model: entry.source.model,
|
|
33630
|
+
timestamp: entry.source.startedAt,
|
|
33631
|
+
git_branch: entry.source.gitBranch,
|
|
33632
|
+
cwd: entry.source.cwd ?? entry.source.projectPath,
|
|
33633
|
+
version: entry.source.version
|
|
33634
|
+
}
|
|
33635
|
+
};
|
|
33636
|
+
}
|
|
33637
|
+
async function readTranscriptJsonl(filePath) {
|
|
33638
|
+
const text2 = await readFile14(filePath, "utf8");
|
|
33639
|
+
return text2.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
33640
|
+
}
|
|
32820
33641
|
async function readTranscriptFile(filePath) {
|
|
32821
33642
|
return readFile14(filePath, "utf8");
|
|
32822
33643
|
}
|
|
33644
|
+
var TranscriptProvider = class _TranscriptProvider {
|
|
33645
|
+
id;
|
|
33646
|
+
kind = "transcript";
|
|
33647
|
+
targetName;
|
|
33648
|
+
lines;
|
|
33649
|
+
cursor = 0;
|
|
33650
|
+
constructor(targetName, lines) {
|
|
33651
|
+
this.targetName = targetName;
|
|
33652
|
+
this.id = `transcript:${targetName}`;
|
|
33653
|
+
this.lines = lines;
|
|
33654
|
+
}
|
|
33655
|
+
/**
|
|
33656
|
+
* Create a TranscriptProvider from a JSONL file path.
|
|
33657
|
+
*/
|
|
33658
|
+
static async fromFile(filePath) {
|
|
33659
|
+
const lines = await readTranscriptJsonl(filePath);
|
|
33660
|
+
if (lines.length === 0) {
|
|
33661
|
+
throw new Error(`Transcript file is empty: ${filePath}`);
|
|
33662
|
+
}
|
|
33663
|
+
const providerName = lines[0].source.provider ?? "transcript";
|
|
33664
|
+
return new _TranscriptProvider(providerName, lines);
|
|
33665
|
+
}
|
|
33666
|
+
get lineCount() {
|
|
33667
|
+
return this.lines.length;
|
|
33668
|
+
}
|
|
33669
|
+
async invoke(_request) {
|
|
33670
|
+
if (this.cursor >= this.lines.length) {
|
|
33671
|
+
throw new Error(
|
|
33672
|
+
`Transcript exhausted: ${this.lines.length} line(s) available but ${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`
|
|
33673
|
+
);
|
|
33674
|
+
}
|
|
33675
|
+
const line = this.lines[this.cursor++];
|
|
33676
|
+
return {
|
|
33677
|
+
output: line.output,
|
|
33678
|
+
tokenUsage: line.token_usage ? {
|
|
33679
|
+
input: line.token_usage.input,
|
|
33680
|
+
output: line.token_usage.output,
|
|
33681
|
+
cached: line.token_usage.cached
|
|
33682
|
+
} : void 0,
|
|
33683
|
+
durationMs: line.duration_ms,
|
|
33684
|
+
costUsd: line.cost_usd ?? void 0,
|
|
33685
|
+
startTime: line.source.timestamp
|
|
33686
|
+
};
|
|
33687
|
+
}
|
|
33688
|
+
};
|
|
32823
33689
|
function createAgentKernel() {
|
|
32824
33690
|
return { status: "stub" };
|
|
32825
33691
|
}
|
|
@@ -32843,6 +33709,7 @@ export {
|
|
|
32843
33709
|
buildSearchRoots,
|
|
32844
33710
|
resolveFileReference,
|
|
32845
33711
|
CLI_PLACEHOLDERS,
|
|
33712
|
+
findDeprecatedCamelCaseTargetWarnings,
|
|
32846
33713
|
COMMON_TARGET_SETTINGS,
|
|
32847
33714
|
resolveDelegatedTargetDefinition,
|
|
32848
33715
|
resolveTargetDefinition,
|
|
@@ -32887,17 +33754,18 @@ export {
|
|
|
32887
33754
|
subscribeToCodexLogEntries,
|
|
32888
33755
|
consumeCopilotCliLogEntries,
|
|
32889
33756
|
subscribeToCopilotCliLogEntries,
|
|
33757
|
+
parseCopilotEvents,
|
|
32890
33758
|
discoverCopilotSessions,
|
|
32891
33759
|
consumeCopilotSdkLogEntries,
|
|
32892
33760
|
subscribeToCopilotSdkLogEntries,
|
|
32893
33761
|
consumePiLogEntries,
|
|
32894
33762
|
subscribeToPiLogEntries,
|
|
32895
|
-
ProviderRegistry,
|
|
32896
33763
|
getAgentvHome,
|
|
32897
33764
|
getWorkspacesRoot,
|
|
32898
33765
|
getSubagentsRoot,
|
|
32899
33766
|
getTraceStateRoot,
|
|
32900
33767
|
getWorkspacePoolRoot,
|
|
33768
|
+
ProviderRegistry,
|
|
32901
33769
|
ensureVSCodeSubagents,
|
|
32902
33770
|
readTargetDefinitions,
|
|
32903
33771
|
listTargetNames,
|
|
@@ -32905,6 +33773,7 @@ export {
|
|
|
32905
33773
|
createBuiltinProviderRegistry,
|
|
32906
33774
|
createProvider,
|
|
32907
33775
|
resolveAndCreateProvider,
|
|
33776
|
+
DEFAULT_THRESHOLD,
|
|
32908
33777
|
PASS_THRESHOLD,
|
|
32909
33778
|
scoreToVerdict,
|
|
32910
33779
|
clampScore,
|
|
@@ -32992,8 +33861,13 @@ export {
|
|
|
32992
33861
|
OtelTraceExporter,
|
|
32993
33862
|
OtelStreamingObserver,
|
|
32994
33863
|
parseClaudeSession,
|
|
33864
|
+
parseCodexSession,
|
|
33865
|
+
discoverCodexSessions,
|
|
32995
33866
|
discoverClaudeSessions,
|
|
33867
|
+
toTranscriptJsonLine,
|
|
33868
|
+
readTranscriptJsonl,
|
|
32996
33869
|
readTranscriptFile,
|
|
33870
|
+
TranscriptProvider,
|
|
32997
33871
|
createAgentKernel
|
|
32998
33872
|
};
|
|
32999
|
-
//# sourceMappingURL=chunk-
|
|
33873
|
+
//# sourceMappingURL=chunk-H4GQXK5M.js.map
|