agentv 4.6.0 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/{chunk-5GZJIXTY.js → chunk-AX4CQS45.js} +300 -283
- package/dist/chunk-AX4CQS45.js.map +1 -0
- package/dist/{chunk-KQQTEWZF.js → chunk-I6UE4LHZ.js} +1449 -495
- package/dist/chunk-I6UE4LHZ.js.map +1 -0
- package/dist/{chunk-U2LSJ6Y4.js → chunk-VEAOMKNS.js} +4325 -3470
- package/dist/chunk-VEAOMKNS.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-FBPCDLOY.js → dist-XRVHRBJF.js} +18 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-6D3ULOMN.js → interactive-UBEMNJZG.js} +10 -47
- package/dist/interactive-UBEMNJZG.js.map +1 -0
- package/dist/studio/assets/index-DHxVz6M9.css +1 -0
- package/dist/studio/assets/{index-zWHsVvgi.js → index-DcwjOyrk.js} +1 -1
- package/dist/studio/assets/index-Y5InSvcS.js +65 -0
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-5GZJIXTY.js.map +0 -1
- package/dist/chunk-KQQTEWZF.js.map +0 -1
- package/dist/chunk-U2LSJ6Y4.js.map +0 -1
- package/dist/interactive-6D3ULOMN.js.map +0 -1
- package/dist/studio/assets/index-D-gfAa3s.js +0 -65
- package/dist/studio/assets/index-jJVIJh8b.css +0 -1
- /package/dist/{dist-FBPCDLOY.js.map → dist-XRVHRBJF.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-75RFVESM.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-75RFVESM.js
|
|
423
423
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
424
|
import path3 from "node:path";
|
|
425
425
|
import fg from "fast-glob";
|
|
@@ -633,15 +633,13 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
633
633
|
}
|
|
634
634
|
var CliHealthcheckHttpInputSchema = external_exports2.object({
|
|
635
635
|
url: external_exports2.string().min(1, "healthcheck URL is required"),
|
|
636
|
-
timeout_seconds: external_exports2.number().positive().optional()
|
|
637
|
-
|
|
638
|
-
});
|
|
636
|
+
timeout_seconds: external_exports2.number().positive().optional()
|
|
637
|
+
}).passthrough();
|
|
639
638
|
var CliHealthcheckCommandInputSchema = external_exports2.object({
|
|
640
639
|
command: external_exports2.string().min(1, "healthcheck command is required"),
|
|
641
640
|
cwd: external_exports2.string().optional(),
|
|
642
|
-
timeout_seconds: external_exports2.number().positive().optional()
|
|
643
|
-
|
|
644
|
-
});
|
|
641
|
+
timeout_seconds: external_exports2.number().positive().optional()
|
|
642
|
+
}).passthrough();
|
|
645
643
|
var CliHealthcheckInputSchema = external_exports2.union([
|
|
646
644
|
CliHealthcheckHttpInputSchema,
|
|
647
645
|
CliHealthcheckCommandInputSchema
|
|
@@ -653,36 +651,28 @@ var CliTargetInputSchema = external_exports2.object({
|
|
|
653
651
|
command: external_exports2.string(),
|
|
654
652
|
// Files format - optional
|
|
655
653
|
files_format: external_exports2.string().optional(),
|
|
656
|
-
filesFormat: external_exports2.string().optional(),
|
|
657
654
|
attachments_format: external_exports2.string().optional(),
|
|
658
|
-
attachmentsFormat: external_exports2.string().optional(),
|
|
659
655
|
// Working directory - optional
|
|
660
656
|
cwd: external_exports2.string().optional(),
|
|
661
657
|
// Workspace template directory - optional (mutually exclusive with cwd)
|
|
662
658
|
workspace_template: external_exports2.string().optional(),
|
|
663
|
-
workspaceTemplate: external_exports2.string().optional(),
|
|
664
659
|
// Timeout in seconds - optional
|
|
665
660
|
timeout_seconds: external_exports2.number().positive().optional(),
|
|
666
|
-
timeoutSeconds: external_exports2.number().positive().optional(),
|
|
667
661
|
// Healthcheck configuration - optional
|
|
668
662
|
healthcheck: CliHealthcheckInputSchema.optional(),
|
|
669
663
|
// Verbose mode - optional
|
|
670
664
|
verbose: external_exports2.boolean().optional(),
|
|
671
665
|
cli_verbose: external_exports2.boolean().optional(),
|
|
672
|
-
cliVerbose: external_exports2.boolean().optional(),
|
|
673
666
|
// Keep temp files - optional
|
|
674
667
|
keep_temp_files: external_exports2.boolean().optional(),
|
|
675
|
-
keepTempFiles: external_exports2.boolean().optional(),
|
|
676
668
|
keep_output_files: external_exports2.boolean().optional(),
|
|
677
|
-
keepOutputFiles: external_exports2.boolean().optional(),
|
|
678
669
|
// Common target fields
|
|
679
670
|
grader_target: external_exports2.string().optional(),
|
|
680
671
|
judge_target: external_exports2.string().optional(),
|
|
681
672
|
// backward compat
|
|
682
673
|
workers: external_exports2.number().int().min(1).optional(),
|
|
683
|
-
provider_batching: external_exports2.boolean().optional()
|
|
684
|
-
|
|
685
|
-
});
|
|
674
|
+
provider_batching: external_exports2.boolean().optional()
|
|
675
|
+
}).passthrough();
|
|
686
676
|
var CliHealthcheckHttpSchema = external_exports2.object({
|
|
687
677
|
url: external_exports2.string().min(1),
|
|
688
678
|
timeoutMs: external_exports2.number().positive().optional()
|
|
@@ -707,7 +697,7 @@ var CliTargetConfigSchema = external_exports2.object({
|
|
|
707
697
|
keepTempFiles: external_exports2.boolean().optional()
|
|
708
698
|
}).strict();
|
|
709
699
|
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
710
|
-
const timeoutSeconds = input.timeout_seconds
|
|
700
|
+
const timeoutSeconds = input.timeout_seconds;
|
|
711
701
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
712
702
|
if ("url" in input && input.url) {
|
|
713
703
|
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
@@ -741,9 +731,9 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
741
731
|
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
742
732
|
const targetName = input.name;
|
|
743
733
|
const command = resolveString(input.command, env, `${targetName} CLI command`, true);
|
|
744
|
-
const filesFormatSource = input.files_format ?? input.
|
|
734
|
+
const filesFormatSource = input.files_format ?? input.attachments_format;
|
|
745
735
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
746
|
-
const workspaceTemplateSource = input.workspace_template
|
|
736
|
+
const workspaceTemplateSource = input.workspace_template;
|
|
747
737
|
let workspaceTemplate = resolveOptionalString(
|
|
748
738
|
workspaceTemplateSource,
|
|
749
739
|
env,
|
|
@@ -771,12 +761,10 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
771
761
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
772
762
|
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
773
763
|
}
|
|
774
|
-
const timeoutSeconds = input.timeout_seconds
|
|
764
|
+
const timeoutSeconds = input.timeout_seconds;
|
|
775
765
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
776
|
-
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose
|
|
777
|
-
const keepTempFiles = resolveOptionalBoolean(
|
|
778
|
-
input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
|
|
779
|
-
);
|
|
766
|
+
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose);
|
|
767
|
+
const keepTempFiles = resolveOptionalBoolean(input.keep_temp_files ?? input.keep_output_files);
|
|
780
768
|
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
781
769
|
return {
|
|
782
770
|
command,
|
|
@@ -797,15 +785,106 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
797
785
|
"FILES",
|
|
798
786
|
"OUTPUT_FILE"
|
|
799
787
|
]);
|
|
788
|
+
var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
|
|
789
|
+
["providerBatching", "provider_batching"],
|
|
790
|
+
["subagentModeAllowed", "subagent_mode_allowed"],
|
|
791
|
+
["fallbackTargets", "fallback_targets"],
|
|
792
|
+
["resourceName", "endpoint"],
|
|
793
|
+
["baseUrl", "base_url"],
|
|
794
|
+
["apiKey", "api_key"],
|
|
795
|
+
["deploymentName", "model"],
|
|
796
|
+
["thinkingBudget", "thinking_budget"],
|
|
797
|
+
["maxTokens", "max_output_tokens"],
|
|
798
|
+
["apiFormat", "api_format"],
|
|
799
|
+
["timeoutSeconds", "timeout_seconds"],
|
|
800
|
+
["logDir", "log_dir"],
|
|
801
|
+
["logDirectory", "log_directory"],
|
|
802
|
+
["logFormat", "log_format"],
|
|
803
|
+
["logOutputFormat", "log_output_format"],
|
|
804
|
+
["systemPrompt", "system_prompt"],
|
|
805
|
+
["maxTurns", "max_turns"],
|
|
806
|
+
["maxBudgetUsd", "max_budget_usd"],
|
|
807
|
+
["dryRun", "dry_run"],
|
|
808
|
+
["subagentRoot", "subagent_root"],
|
|
809
|
+
["filesFormat", "files_format"],
|
|
810
|
+
["attachmentsFormat", "attachments_format"],
|
|
811
|
+
["cliUrl", "cli_url"],
|
|
812
|
+
["cliPath", "cli_path"],
|
|
813
|
+
["githubToken", "github_token"],
|
|
814
|
+
["sessionDir", "session_dir"],
|
|
815
|
+
["sessionId", "session_id"],
|
|
816
|
+
["sessionStateDir", "session_state_dir"],
|
|
817
|
+
["maxRetries", "max_retries"],
|
|
818
|
+
["retryInitialDelayMs", "retry_initial_delay_ms"],
|
|
819
|
+
["retryMaxDelayMs", "retry_max_delay_ms"],
|
|
820
|
+
["retryBackoffFactor", "retry_backoff_factor"],
|
|
821
|
+
["retryStatusCodes", "retry_status_codes"]
|
|
822
|
+
]);
|
|
823
|
+
var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
|
|
824
|
+
["timeoutSeconds", "timeout_seconds"]
|
|
825
|
+
]);
|
|
826
|
+
function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
|
|
827
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
828
|
+
return [];
|
|
829
|
+
}
|
|
830
|
+
const warnings = [];
|
|
831
|
+
for (const [camelCaseField, snakeCaseField] of aliases) {
|
|
832
|
+
if (Object.prototype.hasOwnProperty.call(value, camelCaseField)) {
|
|
833
|
+
warnings.push({
|
|
834
|
+
location: `${location}.${camelCaseField}`,
|
|
835
|
+
message: `camelCase field '${camelCaseField}' is no longer supported in targets.yaml. Use '${snakeCaseField}' instead.`
|
|
836
|
+
});
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
return warnings;
|
|
840
|
+
}
|
|
841
|
+
function assertNoDeprecatedCamelCaseTargetFields(definition) {
|
|
842
|
+
if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
|
|
843
|
+
throw new Error(
|
|
844
|
+
`${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
845
|
+
);
|
|
846
|
+
}
|
|
847
|
+
const warning = findDeprecatedCamelCaseTargetWarnings(
|
|
848
|
+
definition,
|
|
849
|
+
`target "${definition.name}"`
|
|
850
|
+
)[0];
|
|
851
|
+
if (!warning) {
|
|
852
|
+
return;
|
|
853
|
+
}
|
|
854
|
+
const fieldMatch = warning.message.match(/field '([^']+)'/);
|
|
855
|
+
const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
|
|
856
|
+
const field = fieldMatch?.[1] ?? "unknown";
|
|
857
|
+
const replacement = replacementMatch?.[1] ?? "snake_case";
|
|
858
|
+
throw new Error(
|
|
859
|
+
`${warning.location}: camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
|
|
860
|
+
);
|
|
861
|
+
}
|
|
862
|
+
function findDeprecatedCamelCaseTargetWarnings(target, location) {
|
|
863
|
+
const warnings = collectDeprecatedCamelCaseWarnings(
|
|
864
|
+
target,
|
|
865
|
+
location,
|
|
866
|
+
DEPRECATED_TARGET_CAMEL_CASE_FIELDS
|
|
867
|
+
);
|
|
868
|
+
if (typeof target !== "object" || target === null || Array.isArray(target)) {
|
|
869
|
+
return warnings;
|
|
870
|
+
}
|
|
871
|
+
const healthcheck = target.healthcheck;
|
|
872
|
+
warnings.push(
|
|
873
|
+
...collectDeprecatedCamelCaseWarnings(
|
|
874
|
+
healthcheck,
|
|
875
|
+
`${location}.healthcheck`,
|
|
876
|
+
DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS
|
|
877
|
+
)
|
|
878
|
+
);
|
|
879
|
+
return warnings;
|
|
880
|
+
}
|
|
800
881
|
var COMMON_TARGET_SETTINGS = [
|
|
801
882
|
"use_target",
|
|
802
883
|
"provider_batching",
|
|
803
|
-
"providerBatching",
|
|
804
884
|
"subagent_mode_allowed",
|
|
805
|
-
"
|
|
806
|
-
"fallback_targets",
|
|
807
|
-
"fallbackTargets"
|
|
885
|
+
"fallback_targets"
|
|
808
886
|
];
|
|
887
|
+
var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
|
|
809
888
|
var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
810
889
|
name: external_exports2.string().min(1, "target name is required"),
|
|
811
890
|
provider: external_exports2.string().optional(),
|
|
@@ -815,43 +894,40 @@ var BASE_TARGET_SCHEMA = external_exports2.object({
|
|
|
815
894
|
// backward compat
|
|
816
895
|
workers: external_exports2.number().int().min(1).optional(),
|
|
817
896
|
workspace_template: external_exports2.string().optional(),
|
|
818
|
-
workspaceTemplate: external_exports2.string().optional(),
|
|
819
897
|
subagent_mode_allowed: external_exports2.boolean().optional(),
|
|
820
|
-
fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
821
|
-
fallbackTargets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
898
|
+
fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional()
|
|
822
899
|
}).passthrough();
|
|
823
900
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
901
|
+
var DEFAULT_AZURE_RESPONSES_API_VERSION = "v1";
|
|
824
902
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
825
|
-
function normalizeAzureApiVersion(value) {
|
|
903
|
+
function normalizeAzureApiVersion(value, apiFormat) {
|
|
904
|
+
const defaultVersion = apiFormat === "responses" ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;
|
|
826
905
|
if (!value) {
|
|
827
|
-
return
|
|
906
|
+
return defaultVersion;
|
|
828
907
|
}
|
|
829
908
|
const trimmed = value.trim();
|
|
830
909
|
if (trimmed.length === 0) {
|
|
831
|
-
return
|
|
910
|
+
return defaultVersion;
|
|
832
911
|
}
|
|
833
912
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
834
|
-
return withoutPrefix.length > 0 ? withoutPrefix :
|
|
913
|
+
return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
|
|
835
914
|
}
|
|
836
915
|
function resolveRetryConfig(target) {
|
|
837
|
-
const maxRetries = resolveOptionalNumber(
|
|
838
|
-
target.max_retries ?? target.maxRetries,
|
|
839
|
-
`${target.name} max retries`
|
|
840
|
-
);
|
|
916
|
+
const maxRetries = resolveOptionalNumber(target.max_retries, `${target.name} max retries`);
|
|
841
917
|
const initialDelayMs = resolveOptionalNumber(
|
|
842
|
-
target.retry_initial_delay_ms
|
|
918
|
+
target.retry_initial_delay_ms,
|
|
843
919
|
`${target.name} retry initial delay`
|
|
844
920
|
);
|
|
845
921
|
const maxDelayMs = resolveOptionalNumber(
|
|
846
|
-
target.retry_max_delay_ms
|
|
922
|
+
target.retry_max_delay_ms,
|
|
847
923
|
`${target.name} retry max delay`
|
|
848
924
|
);
|
|
849
925
|
const backoffFactor = resolveOptionalNumber(
|
|
850
|
-
target.retry_backoff_factor
|
|
926
|
+
target.retry_backoff_factor,
|
|
851
927
|
`${target.name} retry backoff factor`
|
|
852
928
|
);
|
|
853
929
|
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
854
|
-
target.retry_status_codes
|
|
930
|
+
target.retry_status_codes,
|
|
855
931
|
`${target.name} retry status codes`
|
|
856
932
|
);
|
|
857
933
|
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
@@ -865,9 +941,56 @@ function resolveRetryConfig(target) {
|
|
|
865
941
|
retryableStatusCodes
|
|
866
942
|
};
|
|
867
943
|
}
|
|
868
|
-
function
|
|
944
|
+
function resolveDelegatedTargetDefinition(name21, definitions, env = process.env) {
|
|
945
|
+
let definition = definitions.get(name21);
|
|
946
|
+
if (!definition) {
|
|
947
|
+
return void 0;
|
|
948
|
+
}
|
|
949
|
+
const visited = [definition.name];
|
|
950
|
+
for (let depth = 0; depth < 10; depth++) {
|
|
951
|
+
const rawUseTarget = typeof definition.use_target === "string" ? definition.use_target.trim() : void 0;
|
|
952
|
+
if (!rawUseTarget) {
|
|
953
|
+
return definition;
|
|
954
|
+
}
|
|
955
|
+
const envMatch = rawUseTarget.match(USE_TARGET_ENV_PATTERN);
|
|
956
|
+
const envVarName = envMatch?.[1];
|
|
957
|
+
const resolvedName = envVarName ? env[envVarName]?.trim() ?? "" : rawUseTarget;
|
|
958
|
+
if (resolvedName.length === 0) {
|
|
959
|
+
if (envVarName) {
|
|
960
|
+
throw new Error(
|
|
961
|
+
`Target "${definition.name}" uses use_target: \${{ ${envVarName} }}, but ${envVarName} is not set. Set ${envVarName} to the name of a concrete target (for example, "azure") before running the eval.`
|
|
962
|
+
);
|
|
963
|
+
}
|
|
964
|
+
throw new Error(
|
|
965
|
+
`Target "${definition.name}" has an empty use_target value. Point it at a concrete target name before running the eval.`
|
|
966
|
+
);
|
|
967
|
+
}
|
|
968
|
+
const next = definitions.get(resolvedName);
|
|
969
|
+
if (!next) {
|
|
970
|
+
if (envVarName) {
|
|
971
|
+
throw new Error(
|
|
972
|
+
`Target "${definition.name}" uses use_target: \${{ ${envVarName} }}, which resolved to "${resolvedName}", but no target named "${resolvedName}" exists.`
|
|
973
|
+
);
|
|
974
|
+
}
|
|
975
|
+
throw new Error(
|
|
976
|
+
`Target "${definition.name}" uses use_target: "${resolvedName}", but no target named "${resolvedName}" exists.`
|
|
977
|
+
);
|
|
978
|
+
}
|
|
979
|
+
if (visited.includes(next.name)) {
|
|
980
|
+
const chain = [...visited, next.name].join(" -> ");
|
|
981
|
+
throw new Error(`Circular use_target reference detected: ${chain}`);
|
|
982
|
+
}
|
|
983
|
+
definition = next;
|
|
984
|
+
visited.push(definition.name);
|
|
985
|
+
}
|
|
986
|
+
throw new Error(
|
|
987
|
+
`Target "${name21}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
|
|
988
|
+
);
|
|
989
|
+
}
|
|
990
|
+
function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
|
|
991
|
+
assertNoDeprecatedCamelCaseTargetFields(definition);
|
|
869
992
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
870
|
-
if (parsed.workspace_template !== void 0
|
|
993
|
+
if (parsed.workspace_template !== void 0) {
|
|
871
994
|
throw new Error(
|
|
872
995
|
`${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
873
996
|
);
|
|
@@ -883,13 +1006,9 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
883
1006
|
`${parsed.name} provider`,
|
|
884
1007
|
true
|
|
885
1008
|
).toLowerCase();
|
|
886
|
-
const providerBatching = resolveOptionalBoolean(
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
const subagentModeAllowed = resolveOptionalBoolean(
|
|
890
|
-
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
891
|
-
);
|
|
892
|
-
const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
|
|
1009
|
+
const providerBatching = resolveOptionalBoolean(parsed.provider_batching);
|
|
1010
|
+
const subagentModeAllowed = resolveOptionalBoolean(parsed.subagent_mode_allowed);
|
|
1011
|
+
const fallbackTargets = parsed.fallback_targets;
|
|
893
1012
|
const base = {
|
|
894
1013
|
name: parsed.name,
|
|
895
1014
|
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
@@ -1039,20 +1158,22 @@ function normalizeOpenAIBaseUrl(value) {
|
|
|
1039
1158
|
return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
|
|
1040
1159
|
}
|
|
1041
1160
|
function resolveAzureConfig(target, env) {
|
|
1042
|
-
const endpointSource = target.endpoint ?? target.resource
|
|
1043
|
-
const apiKeySource = target.api_key
|
|
1044
|
-
const deploymentSource = target.deployment ?? target.
|
|
1161
|
+
const endpointSource = target.endpoint ?? target.resource;
|
|
1162
|
+
const apiKeySource = target.api_key;
|
|
1163
|
+
const deploymentSource = target.deployment ?? target.model;
|
|
1045
1164
|
const versionSource = target.version ?? target.api_version;
|
|
1046
1165
|
const temperatureSource = target.temperature;
|
|
1047
|
-
const maxTokensSource = target.max_output_tokens
|
|
1166
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1048
1167
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
1049
1168
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
1050
1169
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
1170
|
+
const apiFormat = resolveApiFormat(target, env, target.name);
|
|
1051
1171
|
const version = normalizeAzureApiVersion(
|
|
1052
1172
|
resolveOptionalString(versionSource, env, `${target.name} api version`, {
|
|
1053
1173
|
allowLiteral: true,
|
|
1054
1174
|
optionalEnv: true
|
|
1055
|
-
})
|
|
1175
|
+
}),
|
|
1176
|
+
apiFormat
|
|
1056
1177
|
);
|
|
1057
1178
|
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
1058
1179
|
const maxOutputTokens = resolveOptionalNumber(
|
|
@@ -1065,13 +1186,17 @@ function resolveAzureConfig(target, env) {
|
|
|
1065
1186
|
deploymentName,
|
|
1066
1187
|
apiKey,
|
|
1067
1188
|
version,
|
|
1189
|
+
apiFormat,
|
|
1068
1190
|
temperature,
|
|
1069
1191
|
maxOutputTokens,
|
|
1070
1192
|
retry
|
|
1071
1193
|
};
|
|
1072
1194
|
}
|
|
1073
|
-
function resolveApiFormat(target, targetName) {
|
|
1074
|
-
const raw = target.api_format
|
|
1195
|
+
function resolveApiFormat(target, env, targetName) {
|
|
1196
|
+
const raw = resolveOptionalString(target.api_format, env, `${targetName} api format`, {
|
|
1197
|
+
allowLiteral: true,
|
|
1198
|
+
optionalEnv: true
|
|
1199
|
+
});
|
|
1075
1200
|
if (raw === void 0) return void 0;
|
|
1076
1201
|
if (raw === "chat" || raw === "responses") return raw;
|
|
1077
1202
|
throw new Error(
|
|
@@ -1079,11 +1204,11 @@ function resolveApiFormat(target, targetName) {
|
|
|
1079
1204
|
);
|
|
1080
1205
|
}
|
|
1081
1206
|
function resolveOpenAIConfig(target, env) {
|
|
1082
|
-
const endpointSource = target.endpoint ?? target.base_url
|
|
1083
|
-
const apiKeySource = target.api_key
|
|
1207
|
+
const endpointSource = target.endpoint ?? target.base_url;
|
|
1208
|
+
const apiKeySource = target.api_key;
|
|
1084
1209
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1085
1210
|
const temperatureSource = target.temperature;
|
|
1086
|
-
const maxTokensSource = target.max_output_tokens
|
|
1211
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1087
1212
|
const baseURL = normalizeOpenAIBaseUrl(
|
|
1088
1213
|
resolveOptionalString(endpointSource, env, `${target.name} endpoint`, {
|
|
1089
1214
|
allowLiteral: true,
|
|
@@ -1097,17 +1222,17 @@ function resolveOpenAIConfig(target, env) {
|
|
|
1097
1222
|
baseURL,
|
|
1098
1223
|
apiKey,
|
|
1099
1224
|
model,
|
|
1100
|
-
apiFormat: resolveApiFormat(target, target.name),
|
|
1225
|
+
apiFormat: resolveApiFormat(target, env, target.name),
|
|
1101
1226
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1102
1227
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1103
1228
|
retry
|
|
1104
1229
|
};
|
|
1105
1230
|
}
|
|
1106
1231
|
function resolveOpenRouterConfig(target, env) {
|
|
1107
|
-
const apiKeySource = target.api_key
|
|
1232
|
+
const apiKeySource = target.api_key;
|
|
1108
1233
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1109
1234
|
const temperatureSource = target.temperature;
|
|
1110
|
-
const maxTokensSource = target.max_output_tokens
|
|
1235
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1111
1236
|
const retry = resolveRetryConfig(target);
|
|
1112
1237
|
return {
|
|
1113
1238
|
apiKey: resolveString(apiKeySource, env, `${target.name} OpenRouter api key`),
|
|
@@ -1118,11 +1243,11 @@ function resolveOpenRouterConfig(target, env) {
|
|
|
1118
1243
|
};
|
|
1119
1244
|
}
|
|
1120
1245
|
function resolveAnthropicConfig(target, env) {
|
|
1121
|
-
const apiKeySource = target.api_key
|
|
1246
|
+
const apiKeySource = target.api_key;
|
|
1122
1247
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1123
1248
|
const temperatureSource = target.temperature;
|
|
1124
|
-
const maxTokensSource = target.max_output_tokens
|
|
1125
|
-
const thinkingBudgetSource = target.thinking_budget
|
|
1249
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1250
|
+
const thinkingBudgetSource = target.thinking_budget;
|
|
1126
1251
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
1127
1252
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
1128
1253
|
const retry = resolveRetryConfig(target);
|
|
@@ -1136,10 +1261,10 @@ function resolveAnthropicConfig(target, env) {
|
|
|
1136
1261
|
};
|
|
1137
1262
|
}
|
|
1138
1263
|
function resolveGeminiConfig(target, env) {
|
|
1139
|
-
const apiKeySource = target.api_key
|
|
1264
|
+
const apiKeySource = target.api_key;
|
|
1140
1265
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
1141
1266
|
const temperatureSource = target.temperature;
|
|
1142
|
-
const maxTokensSource = target.max_output_tokens
|
|
1267
|
+
const maxTokensSource = target.max_output_tokens;
|
|
1143
1268
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
1144
1269
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
1145
1270
|
allowLiteral: true,
|
|
@@ -1159,11 +1284,11 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
1159
1284
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
1160
1285
|
const argsSource = target.args ?? target.arguments;
|
|
1161
1286
|
const cwdSource = target.cwd;
|
|
1162
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1163
|
-
const timeoutSource = target.timeout_seconds
|
|
1164
|
-
const logDirSource = target.log_dir ?? target.
|
|
1165
|
-
const logFormatSource = target.log_format ?? target.
|
|
1166
|
-
const systemPromptSource = target.system_prompt
|
|
1287
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1288
|
+
const timeoutSource = target.timeout_seconds;
|
|
1289
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1290
|
+
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
1291
|
+
const systemPromptSource = target.system_prompt;
|
|
1167
1292
|
const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
|
|
1168
1293
|
allowLiteral: true,
|
|
1169
1294
|
optionalEnv: true
|
|
@@ -1227,16 +1352,16 @@ function normalizeCodexLogFormat(value) {
|
|
|
1227
1352
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
1228
1353
|
}
|
|
1229
1354
|
function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
1230
|
-
const cliUrlSource = target.cli_url
|
|
1231
|
-
const cliPathSource = target.cli_path
|
|
1232
|
-
const githubTokenSource = target.github_token
|
|
1355
|
+
const cliUrlSource = target.cli_url;
|
|
1356
|
+
const cliPathSource = target.cli_path;
|
|
1357
|
+
const githubTokenSource = target.github_token;
|
|
1233
1358
|
const modelSource = target.model;
|
|
1234
1359
|
const cwdSource = target.cwd;
|
|
1235
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1236
|
-
const timeoutSource = target.timeout_seconds
|
|
1237
|
-
const logDirSource = target.log_dir ?? target.
|
|
1238
|
-
const logFormatSource = target.log_format
|
|
1239
|
-
const systemPromptSource = target.system_prompt
|
|
1360
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1361
|
+
const timeoutSource = target.timeout_seconds;
|
|
1362
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1363
|
+
const logFormatSource = target.log_format;
|
|
1364
|
+
const systemPromptSource = target.system_prompt;
|
|
1240
1365
|
const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
|
|
1241
1366
|
allowLiteral: true,
|
|
1242
1367
|
optionalEnv: true
|
|
@@ -1309,11 +1434,11 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
1309
1434
|
const modelSource = target.model;
|
|
1310
1435
|
const argsSource = target.args ?? target.arguments;
|
|
1311
1436
|
const cwdSource = target.cwd;
|
|
1312
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1313
|
-
const timeoutSource = target.timeout_seconds
|
|
1314
|
-
const logDirSource = target.log_dir ?? target.
|
|
1315
|
-
const logFormatSource = target.log_format
|
|
1316
|
-
const systemPromptSource = target.system_prompt
|
|
1437
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1438
|
+
const timeoutSource = target.timeout_seconds;
|
|
1439
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1440
|
+
const logFormatSource = target.log_format;
|
|
1441
|
+
const systemPromptSource = target.system_prompt;
|
|
1317
1442
|
const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
|
|
1318
1443
|
allowLiteral: true,
|
|
1319
1444
|
optionalEnv: true
|
|
@@ -1377,16 +1502,16 @@ function normalizeCopilotLogFormat(value) {
|
|
|
1377
1502
|
}
|
|
1378
1503
|
function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
1379
1504
|
const subproviderSource = target.subprovider;
|
|
1380
|
-
const modelSource = target.model ?? target.pi_model
|
|
1381
|
-
const apiKeySource = target.api_key
|
|
1382
|
-
const toolsSource = target.tools ?? target.pi_tools
|
|
1383
|
-
const thinkingSource = target.thinking ?? target.pi_thinking
|
|
1505
|
+
const modelSource = target.model ?? target.pi_model;
|
|
1506
|
+
const apiKeySource = target.api_key;
|
|
1507
|
+
const toolsSource = target.tools ?? target.pi_tools;
|
|
1508
|
+
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
1384
1509
|
const cwdSource = target.cwd;
|
|
1385
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1386
|
-
const timeoutSource = target.timeout_seconds
|
|
1387
|
-
const logDirSource = target.log_dir ?? target.
|
|
1388
|
-
const logFormatSource = target.log_format
|
|
1389
|
-
const systemPromptSource = target.system_prompt
|
|
1510
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1511
|
+
const timeoutSource = target.timeout_seconds;
|
|
1512
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1513
|
+
const logFormatSource = target.log_format;
|
|
1514
|
+
const systemPromptSource = target.system_prompt;
|
|
1390
1515
|
const subprovider = resolveOptionalString(
|
|
1391
1516
|
subproviderSource,
|
|
1392
1517
|
env,
|
|
@@ -1404,6 +1529,11 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1404
1529
|
allowLiteral: false,
|
|
1405
1530
|
optionalEnv: true
|
|
1406
1531
|
});
|
|
1532
|
+
const baseUrlSource = target.base_url ?? target.endpoint;
|
|
1533
|
+
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
|
|
1534
|
+
allowLiteral: true,
|
|
1535
|
+
optionalEnv: true
|
|
1536
|
+
});
|
|
1407
1537
|
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
|
|
1408
1538
|
allowLiteral: true,
|
|
1409
1539
|
optionalEnv: true
|
|
@@ -1444,6 +1574,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1444
1574
|
subprovider,
|
|
1445
1575
|
model,
|
|
1446
1576
|
apiKey,
|
|
1577
|
+
baseUrl,
|
|
1447
1578
|
tools,
|
|
1448
1579
|
thinking,
|
|
1449
1580
|
cwd,
|
|
@@ -1457,16 +1588,16 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
1457
1588
|
function resolvePiCliConfig(target, env, evalFilePath) {
|
|
1458
1589
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
1459
1590
|
const subproviderSource = target.subprovider;
|
|
1460
|
-
const modelSource = target.model ?? target.pi_model
|
|
1461
|
-
const apiKeySource = target.api_key
|
|
1462
|
-
const toolsSource = target.tools ?? target.pi_tools
|
|
1463
|
-
const thinkingSource = target.thinking ?? target.pi_thinking
|
|
1591
|
+
const modelSource = target.model ?? target.pi_model;
|
|
1592
|
+
const apiKeySource = target.api_key;
|
|
1593
|
+
const toolsSource = target.tools ?? target.pi_tools;
|
|
1594
|
+
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
1464
1595
|
const cwdSource = target.cwd;
|
|
1465
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1466
|
-
const timeoutSource = target.timeout_seconds
|
|
1467
|
-
const logDirSource = target.log_dir ?? target.
|
|
1468
|
-
const logFormatSource = target.log_format
|
|
1469
|
-
const systemPromptSource = target.system_prompt
|
|
1596
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1597
|
+
const timeoutSource = target.timeout_seconds;
|
|
1598
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1599
|
+
const logFormatSource = target.log_format;
|
|
1600
|
+
const systemPromptSource = target.system_prompt;
|
|
1470
1601
|
const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
|
|
1471
1602
|
allowLiteral: true,
|
|
1472
1603
|
optionalEnv: true
|
|
@@ -1485,6 +1616,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
1485
1616
|
allowLiteral: false,
|
|
1486
1617
|
optionalEnv: true
|
|
1487
1618
|
});
|
|
1619
|
+
const baseUrlSource = target.base_url ?? target.endpoint;
|
|
1620
|
+
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
|
|
1621
|
+
allowLiteral: true,
|
|
1622
|
+
optionalEnv: true
|
|
1623
|
+
});
|
|
1488
1624
|
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi-cli tools`, {
|
|
1489
1625
|
allowLiteral: true,
|
|
1490
1626
|
optionalEnv: true
|
|
@@ -1523,6 +1659,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
1523
1659
|
subprovider,
|
|
1524
1660
|
model,
|
|
1525
1661
|
apiKey,
|
|
1662
|
+
baseUrl,
|
|
1526
1663
|
tools,
|
|
1527
1664
|
thinking,
|
|
1528
1665
|
args,
|
|
@@ -1537,11 +1674,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
1537
1674
|
function resolveClaudeConfig(target, env, evalFilePath) {
|
|
1538
1675
|
const modelSource = target.model;
|
|
1539
1676
|
const cwdSource = target.cwd;
|
|
1540
|
-
const workspaceTemplateSource = target.workspace_template
|
|
1541
|
-
const timeoutSource = target.timeout_seconds
|
|
1542
|
-
const logDirSource = target.log_dir ?? target.
|
|
1543
|
-
const logFormatSource = target.log_format ?? target.
|
|
1544
|
-
const systemPromptSource = target.system_prompt
|
|
1677
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
1678
|
+
const timeoutSource = target.timeout_seconds;
|
|
1679
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
1680
|
+
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
|
|
1681
|
+
const systemPromptSource = target.system_prompt;
|
|
1545
1682
|
const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
|
|
1546
1683
|
allowLiteral: true,
|
|
1547
1684
|
optionalEnv: true
|
|
@@ -1574,8 +1711,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
1574
1711
|
});
|
|
1575
1712
|
const logFormat = normalizeClaudeLogFormat(logFormatSource);
|
|
1576
1713
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
1577
|
-
const maxTurns = typeof target.max_turns === "number" ? target.max_turns :
|
|
1578
|
-
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd :
|
|
1714
|
+
const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
|
|
1715
|
+
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
|
|
1579
1716
|
return {
|
|
1580
1717
|
model,
|
|
1581
1718
|
systemPrompt,
|
|
@@ -1606,9 +1743,7 @@ function resolveMockConfig(target) {
|
|
|
1606
1743
|
return { response };
|
|
1607
1744
|
}
|
|
1608
1745
|
function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
1609
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
1610
|
-
target.workspace_template ?? target.workspaceTemplate
|
|
1611
|
-
);
|
|
1746
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
|
|
1612
1747
|
let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
|
|
1613
1748
|
workspaceTemplateEnvVar,
|
|
1614
1749
|
env,
|
|
@@ -1623,9 +1758,9 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
1623
1758
|
}
|
|
1624
1759
|
const executableSource = target.executable;
|
|
1625
1760
|
const waitSource = target.wait;
|
|
1626
|
-
const dryRunSource = target.dry_run
|
|
1627
|
-
const subagentRootSource = target.subagent_root
|
|
1628
|
-
const timeoutSource = target.timeout_seconds
|
|
1761
|
+
const dryRunSource = target.dry_run;
|
|
1762
|
+
const subagentRootSource = target.subagent_root;
|
|
1763
|
+
const timeoutSource = target.timeout_seconds;
|
|
1629
1764
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
1630
1765
|
const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
|
|
1631
1766
|
allowLiteral: true,
|
|
@@ -1660,8 +1795,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
1660
1795
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
1661
1796
|
if (!parseResult.success) {
|
|
1662
1797
|
const firstError = parseResult.error.errors[0];
|
|
1663
|
-
const
|
|
1664
|
-
const prefix =
|
|
1798
|
+
const path410 = firstError?.path.join(".") || "";
|
|
1799
|
+
const prefix = path410 ? `${target.name} ${path410}: ` : `${target.name}: `;
|
|
1665
1800
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
1666
1801
|
}
|
|
1667
1802
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -1676,7 +1811,7 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
1676
1811
|
}
|
|
1677
1812
|
function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
|
|
1678
1813
|
const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
1679
|
-
const timeoutSeconds = target.timeout_seconds
|
|
1814
|
+
const timeoutSeconds = target.timeout_seconds;
|
|
1680
1815
|
const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
|
|
1681
1816
|
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
1682
1817
|
allowLiteral: true,
|
|
@@ -1740,10 +1875,10 @@ function resolveDiscover(value, targetName) {
|
|
|
1740
1875
|
throw new Error(`Target "${targetName}": discover must be "latest" (got "${String(value)}")`);
|
|
1741
1876
|
}
|
|
1742
1877
|
function resolveCopilotLogConfig(target, env) {
|
|
1743
|
-
const sessionDirSource = target.session_dir
|
|
1744
|
-
const sessionIdSource = target.session_id
|
|
1878
|
+
const sessionDirSource = target.session_dir;
|
|
1879
|
+
const sessionIdSource = target.session_id;
|
|
1745
1880
|
const discoverSource = target.discover;
|
|
1746
|
-
const sessionStateDirSource = target.session_state_dir
|
|
1881
|
+
const sessionStateDirSource = target.session_state_dir;
|
|
1747
1882
|
const cwdSource = target.cwd;
|
|
1748
1883
|
return {
|
|
1749
1884
|
sessionDir: resolveOptionalString(
|
|
@@ -1916,6 +2051,15 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
1916
2051
|
"vscode",
|
|
1917
2052
|
"vscode-insiders"
|
|
1918
2053
|
];
|
|
2054
|
+
var LLM_GRADER_CAPABLE_KINDS = [
|
|
2055
|
+
"openai",
|
|
2056
|
+
"openrouter",
|
|
2057
|
+
"azure",
|
|
2058
|
+
"anthropic",
|
|
2059
|
+
"gemini",
|
|
2060
|
+
"agentv",
|
|
2061
|
+
"mock"
|
|
2062
|
+
];
|
|
1919
2063
|
var KNOWN_PROVIDERS = [
|
|
1920
2064
|
"openai",
|
|
1921
2065
|
"openrouter",
|
|
@@ -1935,7 +2079,8 @@ var KNOWN_PROVIDERS = [
|
|
|
1935
2079
|
"mock",
|
|
1936
2080
|
"vscode",
|
|
1937
2081
|
"vscode-insiders",
|
|
1938
|
-
"agentv"
|
|
2082
|
+
"agentv",
|
|
2083
|
+
"transcript"
|
|
1939
2084
|
];
|
|
1940
2085
|
var PROVIDER_ALIASES = [
|
|
1941
2086
|
"azure-openai",
|
|
@@ -6744,7 +6889,7 @@ function createOpenRouter(options = {}) {
|
|
|
6744
6889
|
);
|
|
6745
6890
|
const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
|
|
6746
6891
|
provider: "openrouter.chat",
|
|
6747
|
-
url: ({ path:
|
|
6892
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6748
6893
|
headers: getHeaders,
|
|
6749
6894
|
compatibility,
|
|
6750
6895
|
fetch: options.fetch,
|
|
@@ -6752,7 +6897,7 @@ function createOpenRouter(options = {}) {
|
|
|
6752
6897
|
});
|
|
6753
6898
|
const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
|
|
6754
6899
|
provider: "openrouter.completion",
|
|
6755
|
-
url: ({ path:
|
|
6900
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6756
6901
|
headers: getHeaders,
|
|
6757
6902
|
compatibility,
|
|
6758
6903
|
fetch: options.fetch,
|
|
@@ -6760,14 +6905,14 @@ function createOpenRouter(options = {}) {
|
|
|
6760
6905
|
});
|
|
6761
6906
|
const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
|
|
6762
6907
|
provider: "openrouter.embedding",
|
|
6763
|
-
url: ({ path:
|
|
6908
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6764
6909
|
headers: getHeaders,
|
|
6765
6910
|
fetch: options.fetch,
|
|
6766
6911
|
extraBody: options.extraBody
|
|
6767
6912
|
});
|
|
6768
6913
|
const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
|
|
6769
6914
|
provider: "openrouter.image",
|
|
6770
|
-
url: ({ path:
|
|
6915
|
+
url: ({ path: path50 }) => `${baseURL}${path50}`,
|
|
6771
6916
|
headers: getHeaders,
|
|
6772
6917
|
fetch: options.fetch,
|
|
6773
6918
|
extraBody: options.extraBody
|
|
@@ -14278,19 +14423,21 @@ import { randomUUID as randomUUID6 } from "node:crypto";
|
|
|
14278
14423
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14279
14424
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
14280
14425
|
import path18 from "node:path";
|
|
14281
|
-
import { spawn as spawn3 } from "node:child_process";
|
|
14426
|
+
import { execSync, spawn as spawn3 } from "node:child_process";
|
|
14282
14427
|
import { randomUUID as randomUUID7 } from "node:crypto";
|
|
14283
|
-
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
14428
|
+
import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
|
|
14284
14429
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
14285
14430
|
import { tmpdir } from "node:os";
|
|
14286
14431
|
import path19 from "node:path";
|
|
14287
|
-
import { execSync } from "node:child_process";
|
|
14432
|
+
import { execSync as execSync2 } from "node:child_process";
|
|
14288
14433
|
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
14289
|
-
import { accessSync, createWriteStream as createWriteStream6 } from "node:fs";
|
|
14434
|
+
import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
|
|
14290
14435
|
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
14291
|
-
import
|
|
14436
|
+
import path21 from "node:path";
|
|
14292
14437
|
import { createInterface } from "node:readline";
|
|
14293
|
-
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
14438
|
+
import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
|
|
14439
|
+
import os2 from "node:os";
|
|
14440
|
+
import path20 from "node:path";
|
|
14294
14441
|
import { exec as exec2 } from "node:child_process";
|
|
14295
14442
|
import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
|
|
14296
14443
|
import path322 from "node:path";
|
|
@@ -14299,18 +14446,16 @@ import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
|
|
|
14299
14446
|
import path30 from "node:path";
|
|
14300
14447
|
import { constants as constants22 } from "node:fs";
|
|
14301
14448
|
import { access as access22, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
|
|
14302
|
-
import path21 from "node:path";
|
|
14303
14449
|
import path222 from "node:path";
|
|
14304
14450
|
import path23 from "node:path";
|
|
14305
|
-
import { readFile as readFile9 } from "node:fs/promises";
|
|
14306
14451
|
import path24 from "node:path";
|
|
14452
|
+
import { readFile as readFile9 } from "node:fs/promises";
|
|
14453
|
+
import path25 from "node:path";
|
|
14307
14454
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
14308
14455
|
import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
|
|
14309
14456
|
import path27 from "node:path";
|
|
14310
14457
|
import { promisify as promisify2 } from "node:util";
|
|
14311
14458
|
import path26 from "node:path";
|
|
14312
|
-
import os2 from "node:os";
|
|
14313
|
-
import path25 from "node:path";
|
|
14314
14459
|
import { copyFile, mkdir as mkdir10, readFile as readFile10, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
|
|
14315
14460
|
import path29 from "node:path";
|
|
14316
14461
|
import path28 from "node:path";
|
|
@@ -14361,12 +14506,15 @@ import { existsSync as existsSync5 } from "node:fs";
|
|
|
14361
14506
|
import path45 from "node:path";
|
|
14362
14507
|
import { mkdir as mkdir15, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
|
|
14363
14508
|
import path46 from "node:path";
|
|
14364
|
-
import { existsSync as existsSync6, mkdirSync, readFileSync as
|
|
14509
|
+
import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
|
|
14365
14510
|
import path47 from "node:path";
|
|
14366
14511
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
14367
14512
|
import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
|
|
14368
14513
|
import { homedir as homedir3 } from "node:os";
|
|
14369
14514
|
import path48 from "node:path";
|
|
14515
|
+
import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
|
|
14516
|
+
import { homedir as homedir4 } from "node:os";
|
|
14517
|
+
import path49 from "node:path";
|
|
14370
14518
|
import { readFile as readFile14 } from "node:fs/promises";
|
|
14371
14519
|
function computeTraceSummary(messages) {
|
|
14372
14520
|
const toolCallCounts = {};
|
|
@@ -15154,8 +15302,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15154
15302
|
const negate = rawEvaluator.negate === true ? true : void 0;
|
|
15155
15303
|
if (isCustomType) {
|
|
15156
15304
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15157
|
-
const required2 =
|
|
15158
|
-
|
|
15305
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15306
|
+
rawEvaluator.required,
|
|
15307
|
+
rawEvaluator.min_score,
|
|
15308
|
+
name21,
|
|
15309
|
+
evalId
|
|
15310
|
+
);
|
|
15311
|
+
const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "min_score", "negate"]);
|
|
15159
15312
|
const config2 = {};
|
|
15160
15313
|
for (const [key, value] of Object.entries(rawEvaluator)) {
|
|
15161
15314
|
if (!knownProps2.has(key) && value !== void 0) {
|
|
@@ -15167,6 +15320,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15167
15320
|
type: customTypeName,
|
|
15168
15321
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15169
15322
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15323
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15170
15324
|
...negate !== void 0 ? { negate } : {},
|
|
15171
15325
|
...Object.keys(config2).length > 0 ? { config: config2 } : {}
|
|
15172
15326
|
});
|
|
@@ -15236,7 +15390,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15236
15390
|
);
|
|
15237
15391
|
}
|
|
15238
15392
|
}
|
|
15239
|
-
const required2 =
|
|
15393
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15394
|
+
rawEvaluator.required,
|
|
15395
|
+
rawEvaluator.min_score,
|
|
15396
|
+
name21,
|
|
15397
|
+
evalId
|
|
15398
|
+
);
|
|
15240
15399
|
const knownProps2 = /* @__PURE__ */ new Set([
|
|
15241
15400
|
"name",
|
|
15242
15401
|
"type",
|
|
@@ -15262,6 +15421,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15262
15421
|
resolvedCwd,
|
|
15263
15422
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15264
15423
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15424
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15265
15425
|
...negate !== void 0 ? { negate } : {},
|
|
15266
15426
|
...Object.keys(config2).length > 0 ? { config: config2 } : {},
|
|
15267
15427
|
...targetConfig !== void 0 ? { target: targetConfig } : {}
|
|
@@ -15390,7 +15550,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15390
15550
|
};
|
|
15391
15551
|
}
|
|
15392
15552
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15393
|
-
const required2 =
|
|
15553
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15554
|
+
rawEvaluator.required,
|
|
15555
|
+
rawEvaluator.min_score,
|
|
15556
|
+
name21,
|
|
15557
|
+
evalId
|
|
15558
|
+
);
|
|
15394
15559
|
evaluators.push({
|
|
15395
15560
|
name: name21,
|
|
15396
15561
|
type: "composite",
|
|
@@ -15398,6 +15563,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15398
15563
|
aggregator,
|
|
15399
15564
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15400
15565
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15566
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15401
15567
|
...negate !== void 0 ? { negate } : {}
|
|
15402
15568
|
});
|
|
15403
15569
|
continue;
|
|
@@ -15508,7 +15674,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15508
15674
|
continue;
|
|
15509
15675
|
}
|
|
15510
15676
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15511
|
-
const required2 =
|
|
15677
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15678
|
+
rawEvaluator.required,
|
|
15679
|
+
rawEvaluator.min_score,
|
|
15680
|
+
name21,
|
|
15681
|
+
evalId
|
|
15682
|
+
);
|
|
15512
15683
|
const config2 = {
|
|
15513
15684
|
name: name21,
|
|
15514
15685
|
type: "tool-trajectory",
|
|
@@ -15517,6 +15688,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15517
15688
|
...expected ? { expected } : {},
|
|
15518
15689
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15519
15690
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15691
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15520
15692
|
...negate !== void 0 ? { negate } : {},
|
|
15521
15693
|
...argsMatch2 !== void 0 ? { argsMatch: argsMatch2 } : {}
|
|
15522
15694
|
};
|
|
@@ -15579,7 +15751,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15579
15751
|
const aggregation = asString(rawEvaluator.aggregation);
|
|
15580
15752
|
const validAggregation = isValidFieldAggregationType(aggregation) ? aggregation : void 0;
|
|
15581
15753
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15582
|
-
const required2 =
|
|
15754
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15755
|
+
rawEvaluator.required,
|
|
15756
|
+
rawEvaluator.min_score,
|
|
15757
|
+
name21,
|
|
15758
|
+
evalId
|
|
15759
|
+
);
|
|
15583
15760
|
evaluators.push({
|
|
15584
15761
|
name: name21,
|
|
15585
15762
|
type: "field-accuracy",
|
|
@@ -15587,6 +15764,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15587
15764
|
...validAggregation ? { aggregation: validAggregation } : {},
|
|
15588
15765
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15589
15766
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15767
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15590
15768
|
...negate !== void 0 ? { negate } : {}
|
|
15591
15769
|
});
|
|
15592
15770
|
continue;
|
|
@@ -15600,13 +15778,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15600
15778
|
continue;
|
|
15601
15779
|
}
|
|
15602
15780
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15603
|
-
const required2 =
|
|
15781
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15782
|
+
rawEvaluator.required,
|
|
15783
|
+
rawEvaluator.min_score,
|
|
15784
|
+
name21,
|
|
15785
|
+
evalId
|
|
15786
|
+
);
|
|
15604
15787
|
evaluators.push({
|
|
15605
15788
|
name: name21,
|
|
15606
15789
|
type: "latency",
|
|
15607
15790
|
threshold,
|
|
15608
15791
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15609
15792
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15793
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15610
15794
|
...negate !== void 0 ? { negate } : {}
|
|
15611
15795
|
});
|
|
15612
15796
|
continue;
|
|
@@ -15620,13 +15804,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15620
15804
|
continue;
|
|
15621
15805
|
}
|
|
15622
15806
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15623
|
-
const required2 =
|
|
15807
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15808
|
+
rawEvaluator.required,
|
|
15809
|
+
rawEvaluator.min_score,
|
|
15810
|
+
name21,
|
|
15811
|
+
evalId
|
|
15812
|
+
);
|
|
15624
15813
|
evaluators.push({
|
|
15625
15814
|
name: name21,
|
|
15626
15815
|
type: "cost",
|
|
15627
15816
|
budget,
|
|
15628
15817
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15629
15818
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15819
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15630
15820
|
...negate !== void 0 ? { negate } : {}
|
|
15631
15821
|
});
|
|
15632
15822
|
continue;
|
|
@@ -15658,13 +15848,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15658
15848
|
continue;
|
|
15659
15849
|
}
|
|
15660
15850
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15661
|
-
const required2 =
|
|
15851
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15852
|
+
rawEvaluator.required,
|
|
15853
|
+
rawEvaluator.min_score,
|
|
15854
|
+
name21,
|
|
15855
|
+
evalId
|
|
15856
|
+
);
|
|
15662
15857
|
evaluators.push({
|
|
15663
15858
|
name: name21,
|
|
15664
15859
|
type: "token-usage",
|
|
15665
15860
|
...validLimits,
|
|
15666
15861
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15667
15862
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15863
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15668
15864
|
...negate !== void 0 ? { negate } : {}
|
|
15669
15865
|
});
|
|
15670
15866
|
continue;
|
|
@@ -15710,13 +15906,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15710
15906
|
continue;
|
|
15711
15907
|
}
|
|
15712
15908
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15713
|
-
const required2 =
|
|
15909
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15910
|
+
rawEvaluator.required,
|
|
15911
|
+
rawEvaluator.min_score,
|
|
15912
|
+
name21,
|
|
15913
|
+
evalId
|
|
15914
|
+
);
|
|
15714
15915
|
evaluators.push({
|
|
15715
15916
|
name: name21,
|
|
15716
15917
|
type: "execution-metrics",
|
|
15717
15918
|
...validThresholds,
|
|
15718
15919
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15719
15920
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15921
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15720
15922
|
...negate !== void 0 ? { negate } : {}
|
|
15721
15923
|
});
|
|
15722
15924
|
continue;
|
|
@@ -15730,7 +15932,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15730
15932
|
const rawShouldTrigger = rawEvaluator.should_trigger;
|
|
15731
15933
|
const shouldTrigger = typeof rawShouldTrigger === "boolean" ? rawShouldTrigger : void 0;
|
|
15732
15934
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15733
|
-
const required2 =
|
|
15935
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15936
|
+
rawEvaluator.required,
|
|
15937
|
+
rawEvaluator.min_score,
|
|
15938
|
+
name21,
|
|
15939
|
+
evalId
|
|
15940
|
+
);
|
|
15734
15941
|
evaluators.push({
|
|
15735
15942
|
name: name21,
|
|
15736
15943
|
type: "skill-trigger",
|
|
@@ -15738,6 +15945,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15738
15945
|
...shouldTrigger !== void 0 ? { should_trigger: shouldTrigger } : {},
|
|
15739
15946
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15740
15947
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15948
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15741
15949
|
...negate !== void 0 ? { negate } : {}
|
|
15742
15950
|
});
|
|
15743
15951
|
continue;
|
|
@@ -15749,13 +15957,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15749
15957
|
continue;
|
|
15750
15958
|
}
|
|
15751
15959
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15752
|
-
const required2 =
|
|
15960
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15961
|
+
rawEvaluator.required,
|
|
15962
|
+
rawEvaluator.min_score,
|
|
15963
|
+
name21,
|
|
15964
|
+
evalId
|
|
15965
|
+
);
|
|
15753
15966
|
evaluators.push({
|
|
15754
15967
|
name: name21,
|
|
15755
15968
|
type: "contains",
|
|
15756
15969
|
value,
|
|
15757
15970
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15758
15971
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15972
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15759
15973
|
...negate !== void 0 ? { negate } : {}
|
|
15760
15974
|
});
|
|
15761
15975
|
continue;
|
|
@@ -15769,13 +15983,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15769
15983
|
continue;
|
|
15770
15984
|
}
|
|
15771
15985
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15772
|
-
const required2 =
|
|
15986
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
15987
|
+
rawEvaluator.required,
|
|
15988
|
+
rawEvaluator.min_score,
|
|
15989
|
+
name21,
|
|
15990
|
+
evalId
|
|
15991
|
+
);
|
|
15773
15992
|
evaluators.push({
|
|
15774
15993
|
name: name21,
|
|
15775
15994
|
type: typeValue,
|
|
15776
15995
|
value,
|
|
15777
15996
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15778
15997
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
15998
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15779
15999
|
...negate !== void 0 ? { negate } : {}
|
|
15780
16000
|
});
|
|
15781
16001
|
continue;
|
|
@@ -15787,13 +16007,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15787
16007
|
continue;
|
|
15788
16008
|
}
|
|
15789
16009
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15790
|
-
const required2 =
|
|
16010
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16011
|
+
rawEvaluator.required,
|
|
16012
|
+
rawEvaluator.min_score,
|
|
16013
|
+
name21,
|
|
16014
|
+
evalId
|
|
16015
|
+
);
|
|
15791
16016
|
evaluators.push({
|
|
15792
16017
|
name: name21,
|
|
15793
16018
|
type: "icontains",
|
|
15794
16019
|
value,
|
|
15795
16020
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15796
16021
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16022
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15797
16023
|
...negate !== void 0 ? { negate } : {}
|
|
15798
16024
|
});
|
|
15799
16025
|
continue;
|
|
@@ -15807,13 +16033,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15807
16033
|
continue;
|
|
15808
16034
|
}
|
|
15809
16035
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15810
|
-
const required2 =
|
|
16036
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16037
|
+
rawEvaluator.required,
|
|
16038
|
+
rawEvaluator.min_score,
|
|
16039
|
+
name21,
|
|
16040
|
+
evalId
|
|
16041
|
+
);
|
|
15811
16042
|
evaluators.push({
|
|
15812
16043
|
name: name21,
|
|
15813
16044
|
type: typeValue,
|
|
15814
16045
|
value,
|
|
15815
16046
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15816
16047
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16048
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15817
16049
|
...negate !== void 0 ? { negate } : {}
|
|
15818
16050
|
});
|
|
15819
16051
|
continue;
|
|
@@ -15825,13 +16057,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15825
16057
|
continue;
|
|
15826
16058
|
}
|
|
15827
16059
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15828
|
-
const required2 =
|
|
16060
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16061
|
+
rawEvaluator.required,
|
|
16062
|
+
rawEvaluator.min_score,
|
|
16063
|
+
name21,
|
|
16064
|
+
evalId
|
|
16065
|
+
);
|
|
15829
16066
|
evaluators.push({
|
|
15830
16067
|
name: name21,
|
|
15831
16068
|
type: typeValue,
|
|
15832
16069
|
value,
|
|
15833
16070
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15834
16071
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16072
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15835
16073
|
...negate !== void 0 ? { negate } : {}
|
|
15836
16074
|
});
|
|
15837
16075
|
continue;
|
|
@@ -15844,7 +16082,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15844
16082
|
}
|
|
15845
16083
|
const flags = asString(rawEvaluator.flags);
|
|
15846
16084
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15847
|
-
const required2 =
|
|
16085
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16086
|
+
rawEvaluator.required,
|
|
16087
|
+
rawEvaluator.min_score,
|
|
16088
|
+
name21,
|
|
16089
|
+
evalId
|
|
16090
|
+
);
|
|
15848
16091
|
evaluators.push({
|
|
15849
16092
|
name: name21,
|
|
15850
16093
|
type: "regex",
|
|
@@ -15852,18 +16095,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15852
16095
|
...flags !== void 0 ? { flags } : {},
|
|
15853
16096
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15854
16097
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16098
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15855
16099
|
...negate !== void 0 ? { negate } : {}
|
|
15856
16100
|
});
|
|
15857
16101
|
continue;
|
|
15858
16102
|
}
|
|
15859
16103
|
if (typeValue === "is-json") {
|
|
15860
16104
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15861
|
-
const required2 =
|
|
16105
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16106
|
+
rawEvaluator.required,
|
|
16107
|
+
rawEvaluator.min_score,
|
|
16108
|
+
name21,
|
|
16109
|
+
evalId
|
|
16110
|
+
);
|
|
15862
16111
|
evaluators.push({
|
|
15863
16112
|
name: name21,
|
|
15864
16113
|
type: "is-json",
|
|
15865
16114
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15866
16115
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16116
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15867
16117
|
...negate !== void 0 ? { negate } : {}
|
|
15868
16118
|
});
|
|
15869
16119
|
continue;
|
|
@@ -15875,13 +16125,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15875
16125
|
continue;
|
|
15876
16126
|
}
|
|
15877
16127
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15878
|
-
const required2 =
|
|
16128
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16129
|
+
rawEvaluator.required,
|
|
16130
|
+
rawEvaluator.min_score,
|
|
16131
|
+
name21,
|
|
16132
|
+
evalId
|
|
16133
|
+
);
|
|
15879
16134
|
evaluators.push({
|
|
15880
16135
|
name: name21,
|
|
15881
16136
|
type: "equals",
|
|
15882
16137
|
value,
|
|
15883
16138
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15884
16139
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16140
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15885
16141
|
...negate !== void 0 ? { negate } : {}
|
|
15886
16142
|
});
|
|
15887
16143
|
continue;
|
|
@@ -15917,7 +16173,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15917
16173
|
continue;
|
|
15918
16174
|
}
|
|
15919
16175
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15920
|
-
const required2 =
|
|
16176
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16177
|
+
rawEvaluator.required,
|
|
16178
|
+
rawEvaluator.min_score,
|
|
16179
|
+
name21,
|
|
16180
|
+
evalId
|
|
16181
|
+
);
|
|
15921
16182
|
evaluators.push({
|
|
15922
16183
|
name: name21,
|
|
15923
16184
|
type: "llm-grader",
|
|
@@ -15925,6 +16186,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15925
16186
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
15926
16187
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
15927
16188
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16189
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
15928
16190
|
...negate !== void 0 ? { negate } : {}
|
|
15929
16191
|
});
|
|
15930
16192
|
continue;
|
|
@@ -15994,7 +16256,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
15994
16256
|
continue;
|
|
15995
16257
|
}
|
|
15996
16258
|
const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
15997
|
-
const required2 =
|
|
16259
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
16260
|
+
rawEvaluator.required,
|
|
16261
|
+
rawEvaluator.min_score,
|
|
16262
|
+
name21,
|
|
16263
|
+
evalId
|
|
16264
|
+
);
|
|
15998
16265
|
evaluators.push({
|
|
15999
16266
|
name: name21,
|
|
16000
16267
|
type: "llm-grader",
|
|
@@ -16002,12 +16269,18 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16002
16269
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
16003
16270
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
16004
16271
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
16272
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
16005
16273
|
...negate !== void 0 ? { negate } : {}
|
|
16006
16274
|
});
|
|
16007
16275
|
continue;
|
|
16008
16276
|
}
|
|
16009
16277
|
const weight = validateWeight(rawEvaluator.weight, name21, evalId);
|
|
16010
|
-
const required =
|
|
16278
|
+
const { required, min_score } = parseRequiredAndMinScore(
|
|
16279
|
+
rawEvaluator.required,
|
|
16280
|
+
rawEvaluator.min_score,
|
|
16281
|
+
name21,
|
|
16282
|
+
evalId
|
|
16283
|
+
);
|
|
16011
16284
|
const knownProps = /* @__PURE__ */ new Set([
|
|
16012
16285
|
"name",
|
|
16013
16286
|
"type",
|
|
@@ -16018,6 +16291,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16018
16291
|
"weight",
|
|
16019
16292
|
"config",
|
|
16020
16293
|
"required",
|
|
16294
|
+
"min_score",
|
|
16021
16295
|
"negate",
|
|
16022
16296
|
"max_steps",
|
|
16023
16297
|
"maxSteps",
|
|
@@ -16047,6 +16321,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
16047
16321
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
16048
16322
|
...weight !== void 0 ? { weight } : {},
|
|
16049
16323
|
...required !== void 0 ? { required } : {},
|
|
16324
|
+
...min_score !== void 0 ? { min_score } : {},
|
|
16050
16325
|
...negate !== void 0 ? { negate } : {},
|
|
16051
16326
|
...finalConfig ? { config: finalConfig } : {},
|
|
16052
16327
|
...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
|
|
@@ -16178,10 +16453,23 @@ ${detailBlock}${ANSI_RESET4}`);
|
|
|
16178
16453
|
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
|
|
16179
16454
|
}
|
|
16180
16455
|
}
|
|
16181
|
-
function
|
|
16182
|
-
|
|
16183
|
-
if (typeof
|
|
16184
|
-
|
|
16456
|
+
function parseRequiredAndMinScore(rawRequired, rawMinScore, evaluatorName, evalId) {
|
|
16457
|
+
const result = {};
|
|
16458
|
+
if (typeof rawMinScore === "number" && rawMinScore > 0 && rawMinScore <= 1) {
|
|
16459
|
+
result.min_score = rawMinScore;
|
|
16460
|
+
}
|
|
16461
|
+
if (rawRequired === true) {
|
|
16462
|
+
result.required = true;
|
|
16463
|
+
} else if (typeof rawRequired === "number" && rawRequired > 0 && rawRequired <= 1) {
|
|
16464
|
+
if (result.min_score === void 0) {
|
|
16465
|
+
result.min_score = rawRequired;
|
|
16466
|
+
}
|
|
16467
|
+
result.required = rawRequired;
|
|
16468
|
+
logWarning2(
|
|
16469
|
+
`Evaluator '${evaluatorName}' in '${evalId}': 'required: ${rawRequired}' is deprecated. Use 'required: true' + 'min_score: ${rawRequired}' instead.`
|
|
16470
|
+
);
|
|
16471
|
+
}
|
|
16472
|
+
return result;
|
|
16185
16473
|
}
|
|
16186
16474
|
function validateWeight(rawWeight, evaluatorName, evalId) {
|
|
16187
16475
|
if (rawWeight === void 0) {
|
|
@@ -16224,16 +16512,30 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
16224
16512
|
const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
|
|
16225
16513
|
const expectedOutcome = asString(rawRubric.outcome) ?? "";
|
|
16226
16514
|
const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
|
|
16515
|
+
let minScore;
|
|
16227
16516
|
let requiredMinScore;
|
|
16228
16517
|
let required;
|
|
16229
|
-
if (typeof rawRubric.
|
|
16230
|
-
const
|
|
16231
|
-
if (
|
|
16518
|
+
if (typeof rawRubric.min_score === "number") {
|
|
16519
|
+
const ms = rawRubric.min_score;
|
|
16520
|
+
if (ms <= 0 || ms > 1) {
|
|
16521
|
+
throw new Error(
|
|
16522
|
+
`Invalid min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be in (0, 1] (got ${ms})`
|
|
16523
|
+
);
|
|
16524
|
+
}
|
|
16525
|
+
minScore = ms;
|
|
16526
|
+
requiredMinScore = Math.round(ms * 10);
|
|
16527
|
+
} else if (typeof rawRubric.required_min_score === "number") {
|
|
16528
|
+
const rms = rawRubric.required_min_score;
|
|
16529
|
+
if (!Number.isInteger(rms) || rms < 0 || rms > 10) {
|
|
16232
16530
|
throw new Error(
|
|
16233
|
-
`Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${
|
|
16531
|
+
`Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${rms})`
|
|
16234
16532
|
);
|
|
16235
16533
|
}
|
|
16236
|
-
requiredMinScore =
|
|
16534
|
+
requiredMinScore = rms;
|
|
16535
|
+
minScore = rms / 10;
|
|
16536
|
+
logWarning2(
|
|
16537
|
+
`Rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': 'required_min_score: ${rms}' is deprecated. Use 'min_score: ${rms / 10}' (0-1 scale) instead.`
|
|
16538
|
+
);
|
|
16237
16539
|
}
|
|
16238
16540
|
if (typeof rawRubric.required === "boolean") {
|
|
16239
16541
|
required = rawRubric.required;
|
|
@@ -16253,6 +16555,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
16253
16555
|
weight,
|
|
16254
16556
|
...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
|
|
16255
16557
|
...required !== void 0 ? { required } : {},
|
|
16558
|
+
...minScore !== void 0 ? { min_score: minScore } : {},
|
|
16256
16559
|
...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
|
|
16257
16560
|
score_ranges: scoreRanges
|
|
16258
16561
|
});
|
|
@@ -16269,6 +16572,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
16269
16572
|
weight,
|
|
16270
16573
|
// Default to required: true if not specified (backward compatibility)
|
|
16271
16574
|
required: required ?? true,
|
|
16575
|
+
...minScore !== void 0 ? { min_score: minScore } : {},
|
|
16272
16576
|
...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
|
|
16273
16577
|
});
|
|
16274
16578
|
}
|
|
@@ -16397,12 +16701,22 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
16397
16701
|
id: asString(rubric.id) ?? `rubric-${index + 1}`,
|
|
16398
16702
|
weight: typeof rubric.weight === "number" ? rubric.weight : 1
|
|
16399
16703
|
};
|
|
16704
|
+
let inlineMinScore;
|
|
16705
|
+
let inlineRequiredMinScore;
|
|
16706
|
+
if (typeof rubric.min_score === "number") {
|
|
16707
|
+
inlineMinScore = rubric.min_score;
|
|
16708
|
+
inlineRequiredMinScore = Math.round(inlineMinScore * 10);
|
|
16709
|
+
} else if (typeof rubric.required_min_score === "number") {
|
|
16710
|
+
inlineRequiredMinScore = rubric.required_min_score;
|
|
16711
|
+
inlineMinScore = inlineRequiredMinScore / 10;
|
|
16712
|
+
}
|
|
16400
16713
|
if (scoreRanges && scoreRanges.length > 0) {
|
|
16401
16714
|
return {
|
|
16402
16715
|
...baseRubric,
|
|
16403
16716
|
...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
|
|
16404
16717
|
...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
|
|
16405
|
-
...
|
|
16718
|
+
...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
|
|
16719
|
+
...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {},
|
|
16406
16720
|
score_ranges: scoreRanges
|
|
16407
16721
|
};
|
|
16408
16722
|
}
|
|
@@ -16410,7 +16724,8 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
16410
16724
|
...baseRubric,
|
|
16411
16725
|
outcome: expectedOutcome,
|
|
16412
16726
|
required: typeof rubric.required === "boolean" ? rubric.required : true,
|
|
16413
|
-
...
|
|
16727
|
+
...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
|
|
16728
|
+
...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {}
|
|
16414
16729
|
};
|
|
16415
16730
|
}).filter((r) => r.outcome && r.outcome.length > 0 || "score_ranges" in r && r.score_ranges);
|
|
16416
16731
|
if (rubricItems.length === 0) {
|
|
@@ -16792,6 +17107,9 @@ function resolveExpectedMessages(raw) {
|
|
|
16792
17107
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
16793
17108
|
var ANSI_RED2 = "\x1B[31m";
|
|
16794
17109
|
var ANSI_RESET6 = "\x1B[0m";
|
|
17110
|
+
function matchesFilter(id, filter2) {
|
|
17111
|
+
return typeof filter2 === "string" ? micromatch.isMatch(id, filter2) : filter2.some((pattern) => micromatch.isMatch(id, pattern));
|
|
17112
|
+
}
|
|
16795
17113
|
function detectFormat(filePath) {
|
|
16796
17114
|
const ext = path6.extname(filePath).toLowerCase();
|
|
16797
17115
|
if (ext === ".jsonl") return "jsonl";
|
|
@@ -16859,40 +17177,40 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16859
17177
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
16860
17178
|
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
16861
17179
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
16862
|
-
const
|
|
16863
|
-
const
|
|
17180
|
+
const fallbackSuiteName = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
17181
|
+
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
16864
17182
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
16865
17183
|
const globalExecution = sidecar.execution;
|
|
16866
17184
|
if (verbose) {
|
|
16867
17185
|
console.log(`
|
|
16868
|
-
[JSONL
|
|
17186
|
+
[JSONL Suite: ${evalFilePath}]`);
|
|
16869
17187
|
console.log(` Cases: ${rawCases.length}`);
|
|
16870
|
-
console.log(`
|
|
17188
|
+
console.log(` Suite: ${suiteName}`);
|
|
16871
17189
|
if (sidecar.description) {
|
|
16872
17190
|
console.log(` Description: ${sidecar.description}`);
|
|
16873
17191
|
}
|
|
16874
17192
|
}
|
|
16875
17193
|
const results = [];
|
|
16876
17194
|
for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
|
|
16877
|
-
const
|
|
17195
|
+
const testCaseConfig = rawCases[lineIndex];
|
|
16878
17196
|
const lineNumber = lineIndex + 1;
|
|
16879
|
-
const id = asString4(
|
|
16880
|
-
if (filterPattern && (!id || !
|
|
17197
|
+
const id = asString4(testCaseConfig.id);
|
|
17198
|
+
if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
|
|
16881
17199
|
continue;
|
|
16882
17200
|
}
|
|
16883
|
-
const conversationId = asString4(
|
|
16884
|
-
let outcome = asString4(
|
|
16885
|
-
if (!outcome &&
|
|
16886
|
-
outcome = asString4(
|
|
17201
|
+
const conversationId = asString4(testCaseConfig.conversation_id);
|
|
17202
|
+
let outcome = asString4(testCaseConfig.criteria);
|
|
17203
|
+
if (!outcome && testCaseConfig.expected_outcome !== void 0) {
|
|
17204
|
+
outcome = asString4(testCaseConfig.expected_outcome);
|
|
16887
17205
|
if (outcome) {
|
|
16888
17206
|
logWarning4(
|
|
16889
|
-
`Test '${asString4(
|
|
17207
|
+
`Test '${asString4(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
16890
17208
|
);
|
|
16891
17209
|
}
|
|
16892
17210
|
}
|
|
16893
|
-
const rawInputMessages = resolveInputMessages(
|
|
16894
|
-
const expectedMessages = resolveExpectedMessages(
|
|
16895
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
17211
|
+
const rawInputMessages = resolveInputMessages(testCaseConfig);
|
|
17212
|
+
const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
|
|
17213
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assert !== void 0;
|
|
16896
17214
|
if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
|
|
16897
17215
|
logError2(
|
|
16898
17216
|
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
@@ -16929,18 +17247,23 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16929
17247
|
}
|
|
16930
17248
|
}
|
|
16931
17249
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
16932
|
-
const caseExecution = isJsonObject(
|
|
17250
|
+
const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
|
|
16933
17251
|
const mergedExecution = caseExecution ?? globalExecution;
|
|
16934
|
-
const
|
|
17252
|
+
const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
|
|
16935
17253
|
let evaluators;
|
|
16936
17254
|
try {
|
|
16937
|
-
evaluators = await parseEvaluators(
|
|
17255
|
+
evaluators = await parseEvaluators(
|
|
17256
|
+
testCaseConfig,
|
|
17257
|
+
mergedExecution,
|
|
17258
|
+
searchRoots,
|
|
17259
|
+
id ?? "unknown"
|
|
17260
|
+
);
|
|
16938
17261
|
} catch (error) {
|
|
16939
17262
|
const message = error instanceof Error ? error.message : String(error);
|
|
16940
17263
|
logError2(`Skipping test '${id}' at line ${lineNumber}: ${message}`);
|
|
16941
17264
|
continue;
|
|
16942
17265
|
}
|
|
16943
|
-
const inlineRubrics =
|
|
17266
|
+
const inlineRubrics = testCaseConfig.rubrics;
|
|
16944
17267
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
16945
17268
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
16946
17269
|
if (rubricEvaluator) {
|
|
@@ -16951,7 +17274,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16951
17274
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
16952
17275
|
const testCase = {
|
|
16953
17276
|
id,
|
|
16954
|
-
|
|
17277
|
+
suite: suiteName,
|
|
16955
17278
|
conversation_id: conversationId,
|
|
16956
17279
|
question,
|
|
16957
17280
|
input: inputMessages,
|
|
@@ -16959,7 +17282,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
16959
17282
|
reference_answer: referenceAnswer,
|
|
16960
17283
|
file_paths: userFilePaths,
|
|
16961
17284
|
criteria: outcome ?? "",
|
|
16962
|
-
evaluator:
|
|
17285
|
+
evaluator: testCaseEvaluatorKind,
|
|
16963
17286
|
assertions: evaluators
|
|
16964
17287
|
};
|
|
16965
17288
|
results.push(testCase);
|
|
@@ -17135,6 +17458,9 @@ function buildChatPromptFromSegments(options) {
|
|
|
17135
17458
|
var ANSI_YELLOW6 = "\x1B[33m";
|
|
17136
17459
|
var ANSI_RED3 = "\x1B[31m";
|
|
17137
17460
|
var ANSI_RESET7 = "\x1B[0m";
|
|
17461
|
+
function matchesFilter2(id, filter2) {
|
|
17462
|
+
return typeof filter2 === "string" ? micromatch2.isMatch(id, filter2) : filter2.some((pattern) => micromatch2.isMatch(id, pattern));
|
|
17463
|
+
}
|
|
17138
17464
|
function resolveTests(suite) {
|
|
17139
17465
|
if (suite.tests !== void 0) return suite.tests;
|
|
17140
17466
|
if (suite.eval_cases !== void 0) {
|
|
@@ -17214,18 +17540,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17214
17540
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
17215
17541
|
}
|
|
17216
17542
|
const suite = interpolated;
|
|
17217
|
-
const
|
|
17218
|
-
const
|
|
17219
|
-
const
|
|
17220
|
-
const
|
|
17543
|
+
const suiteNameFromFile = asString5(suite.name)?.trim();
|
|
17544
|
+
const fallbackSuiteName = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
17545
|
+
const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
|
|
17546
|
+
const rawTestCases = resolveTests(suite);
|
|
17221
17547
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
17222
17548
|
const evalFileDir = path7.dirname(absoluteTestPath);
|
|
17223
|
-
let
|
|
17224
|
-
if (typeof
|
|
17225
|
-
const externalPath = path7.resolve(evalFileDir,
|
|
17226
|
-
|
|
17227
|
-
} else if (Array.isArray(
|
|
17228
|
-
|
|
17549
|
+
let expandedTestCases;
|
|
17550
|
+
if (typeof rawTestCases === "string") {
|
|
17551
|
+
const externalPath = path7.resolve(evalFileDir, rawTestCases);
|
|
17552
|
+
expandedTestCases = await loadCasesFromFile(externalPath);
|
|
17553
|
+
} else if (Array.isArray(rawTestCases)) {
|
|
17554
|
+
expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
|
|
17229
17555
|
} else {
|
|
17230
17556
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
17231
17557
|
}
|
|
@@ -17240,32 +17566,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17240
17566
|
}
|
|
17241
17567
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
17242
17568
|
const results = [];
|
|
17243
|
-
for (const
|
|
17244
|
-
if (!isJsonObject(
|
|
17569
|
+
for (const rawTestCase of expandedTestCases) {
|
|
17570
|
+
if (!isJsonObject(rawTestCase)) {
|
|
17245
17571
|
logWarning5("Skipping invalid test entry (expected object)");
|
|
17246
17572
|
continue;
|
|
17247
17573
|
}
|
|
17248
|
-
const
|
|
17249
|
-
const id = asString5(
|
|
17250
|
-
if (filterPattern && (!id || !
|
|
17574
|
+
const testCaseConfig = rawTestCase;
|
|
17575
|
+
const id = asString5(testCaseConfig.id);
|
|
17576
|
+
if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
|
|
17251
17577
|
continue;
|
|
17252
17578
|
}
|
|
17253
|
-
const conversationId = asString5(
|
|
17254
|
-
let outcome = asString5(
|
|
17255
|
-
if (!outcome &&
|
|
17256
|
-
outcome = asString5(
|
|
17579
|
+
const conversationId = asString5(testCaseConfig.conversation_id);
|
|
17580
|
+
let outcome = asString5(testCaseConfig.criteria);
|
|
17581
|
+
if (!outcome && testCaseConfig.expected_outcome !== void 0) {
|
|
17582
|
+
outcome = asString5(testCaseConfig.expected_outcome);
|
|
17257
17583
|
if (outcome) {
|
|
17258
17584
|
logWarning5(
|
|
17259
|
-
`Test '${asString5(
|
|
17585
|
+
`Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
17260
17586
|
);
|
|
17261
17587
|
}
|
|
17262
17588
|
}
|
|
17263
|
-
const caseExecution = isJsonObject(
|
|
17589
|
+
const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
|
|
17264
17590
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
17591
|
+
const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
|
|
17265
17592
|
const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
|
|
17266
|
-
const testInputMessages = resolveInputMessages(
|
|
17267
|
-
const expectedMessages = resolveExpectedMessages(
|
|
17268
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
17593
|
+
const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
|
|
17594
|
+
const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
|
|
17595
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0;
|
|
17269
17596
|
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
17270
17597
|
logError3(
|
|
17271
17598
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions`
|
|
@@ -17312,16 +17639,21 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17312
17639
|
}
|
|
17313
17640
|
}
|
|
17314
17641
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
17315
|
-
const
|
|
17642
|
+
const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
|
|
17316
17643
|
let evaluators;
|
|
17317
17644
|
try {
|
|
17318
|
-
evaluators = await parseEvaluators(
|
|
17645
|
+
evaluators = await parseEvaluators(
|
|
17646
|
+
testCaseConfig,
|
|
17647
|
+
globalExecution,
|
|
17648
|
+
searchRoots,
|
|
17649
|
+
id ?? "unknown"
|
|
17650
|
+
);
|
|
17319
17651
|
} catch (error) {
|
|
17320
17652
|
const message = error instanceof Error ? error.message : String(error);
|
|
17321
17653
|
logError3(`Skipping test '${id}': ${message}`);
|
|
17322
17654
|
continue;
|
|
17323
17655
|
}
|
|
17324
|
-
const inlineRubrics =
|
|
17656
|
+
const inlineRubrics = testCaseConfig.rubrics;
|
|
17325
17657
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
17326
17658
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
17327
17659
|
if (rubricEvaluator) {
|
|
@@ -17330,13 +17662,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17330
17662
|
}
|
|
17331
17663
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
17332
17664
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
17333
|
-
const caseWorkspace = await resolveWorkspaceConfig(
|
|
17665
|
+
const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
|
|
17334
17666
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
17335
|
-
const metadata = isJsonObject(
|
|
17336
|
-
const caseTargets = extractTargetsFromTestCase(
|
|
17667
|
+
const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
17668
|
+
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
17337
17669
|
const testCase = {
|
|
17338
17670
|
id,
|
|
17339
|
-
|
|
17671
|
+
suite: suiteName,
|
|
17340
17672
|
category: options?.category,
|
|
17341
17673
|
conversation_id: conversationId,
|
|
17342
17674
|
question,
|
|
@@ -17345,11 +17677,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
17345
17677
|
reference_answer: referenceAnswer,
|
|
17346
17678
|
file_paths: userFilePaths,
|
|
17347
17679
|
criteria: outcome ?? "",
|
|
17348
|
-
evaluator:
|
|
17680
|
+
evaluator: testCaseEvaluatorKind,
|
|
17349
17681
|
assertions: evaluators,
|
|
17350
17682
|
workspace: mergedWorkspace,
|
|
17351
17683
|
metadata,
|
|
17352
|
-
targets: caseTargets
|
|
17684
|
+
targets: caseTargets,
|
|
17685
|
+
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
|
|
17353
17686
|
};
|
|
17354
17687
|
results.push(testCase);
|
|
17355
17688
|
}
|
|
@@ -17880,7 +18213,7 @@ var AzureProvider = class {
|
|
|
17880
18213
|
};
|
|
17881
18214
|
this.retryConfig = config.retry;
|
|
17882
18215
|
const azure = createAzure(buildAzureOptions(config));
|
|
17883
|
-
this.model = azure.chat(config.deploymentName);
|
|
18216
|
+
this.model = config.apiFormat === "responses" ? azure(config.deploymentName) : azure.chat(config.deploymentName);
|
|
17884
18217
|
}
|
|
17885
18218
|
id;
|
|
17886
18219
|
kind = "azure";
|
|
@@ -18006,7 +18339,9 @@ function buildAzureOptions(config) {
|
|
|
18006
18339
|
const options = {
|
|
18007
18340
|
apiKey: config.apiKey,
|
|
18008
18341
|
apiVersion: config.version,
|
|
18009
|
-
|
|
18342
|
+
// Chat completions still use deployment-scoped Azure URLs for compatibility
|
|
18343
|
+
// with existing deployments. Responses API should use the SDK's v1 path.
|
|
18344
|
+
useDeploymentBasedUrls: config.apiFormat !== "responses"
|
|
18010
18345
|
};
|
|
18011
18346
|
const baseURL = normalizeAzureBaseUrl(config.resourceName);
|
|
18012
18347
|
if (baseURL) {
|
|
@@ -19447,15 +19782,16 @@ var CliProvider = class {
|
|
|
19447
19782
|
outputFilePath
|
|
19448
19783
|
);
|
|
19449
19784
|
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
19785
|
+
const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
|
|
19450
19786
|
if (this.verbose) {
|
|
19451
19787
|
console.log(
|
|
19452
|
-
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${
|
|
19788
|
+
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
|
|
19453
19789
|
);
|
|
19454
19790
|
}
|
|
19455
19791
|
try {
|
|
19456
19792
|
const startTime = Date.now();
|
|
19457
19793
|
const result = await this.runCommand(renderedCommand, {
|
|
19458
|
-
cwd:
|
|
19794
|
+
cwd: effectiveCwd,
|
|
19459
19795
|
env: process.env,
|
|
19460
19796
|
timeoutMs: this.config.timeoutMs,
|
|
19461
19797
|
signal: controller.signal
|
|
@@ -19488,7 +19824,7 @@ var CliProvider = class {
|
|
|
19488
19824
|
command: renderedCommand,
|
|
19489
19825
|
stderr: result.stderr,
|
|
19490
19826
|
exitCode: result.exitCode ?? 0,
|
|
19491
|
-
cwd:
|
|
19827
|
+
cwd: effectiveCwd,
|
|
19492
19828
|
outputFile: outputFilePath
|
|
19493
19829
|
}
|
|
19494
19830
|
};
|
|
@@ -19506,7 +19842,7 @@ var CliProvider = class {
|
|
|
19506
19842
|
command: renderedCommand,
|
|
19507
19843
|
stderr: result.stderr,
|
|
19508
19844
|
exitCode: result.exitCode ?? 0,
|
|
19509
|
-
cwd:
|
|
19845
|
+
cwd: effectiveCwd,
|
|
19510
19846
|
outputFile: outputFilePath,
|
|
19511
19847
|
error: errorMessage
|
|
19512
19848
|
}
|
|
@@ -19521,7 +19857,7 @@ var CliProvider = class {
|
|
|
19521
19857
|
command: renderedCommand,
|
|
19522
19858
|
stderr: result.stderr,
|
|
19523
19859
|
exitCode: result.exitCode ?? 0,
|
|
19524
|
-
cwd:
|
|
19860
|
+
cwd: effectiveCwd,
|
|
19525
19861
|
outputFile: outputFilePath,
|
|
19526
19862
|
recordId: evalCaseId
|
|
19527
19863
|
}
|
|
@@ -21464,6 +21800,73 @@ function subscribeToPiLogEntries(listener) {
|
|
|
21464
21800
|
store.delete(listener);
|
|
21465
21801
|
};
|
|
21466
21802
|
}
|
|
21803
|
+
var SUBPROVIDER_ALIASES = {
|
|
21804
|
+
azure: "azure-openai-responses"
|
|
21805
|
+
};
|
|
21806
|
+
var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
|
|
21807
|
+
// Azure v1 endpoints are OpenAI-compatible; use the standard client
|
|
21808
|
+
// to avoid AzureOpenAI adding api-version query params.
|
|
21809
|
+
azure: "openai-responses"
|
|
21810
|
+
};
|
|
21811
|
+
var ENV_KEY_MAP = {
|
|
21812
|
+
google: "GEMINI_API_KEY",
|
|
21813
|
+
gemini: "GEMINI_API_KEY",
|
|
21814
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
21815
|
+
openai: "OPENAI_API_KEY",
|
|
21816
|
+
groq: "GROQ_API_KEY",
|
|
21817
|
+
xai: "XAI_API_KEY",
|
|
21818
|
+
openrouter: "OPENROUTER_API_KEY",
|
|
21819
|
+
azure: "AZURE_OPENAI_API_KEY"
|
|
21820
|
+
};
|
|
21821
|
+
var ENV_BASE_URL_MAP = {
|
|
21822
|
+
openai: "OPENAI_BASE_URL",
|
|
21823
|
+
azure: "AZURE_OPENAI_BASE_URL",
|
|
21824
|
+
openrouter: "OPENROUTER_BASE_URL"
|
|
21825
|
+
};
|
|
21826
|
+
function resolveSubprovider(name21, hasBaseUrl = false) {
|
|
21827
|
+
const lower = name21.toLowerCase();
|
|
21828
|
+
if (hasBaseUrl) {
|
|
21829
|
+
const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
|
|
21830
|
+
if (alias) return alias;
|
|
21831
|
+
}
|
|
21832
|
+
return SUBPROVIDER_ALIASES[lower] ?? name21;
|
|
21833
|
+
}
|
|
21834
|
+
function resolveCliProvider(name21) {
|
|
21835
|
+
const lower = name21.toLowerCase();
|
|
21836
|
+
if (lower === "azure") return "azure-openai-responses";
|
|
21837
|
+
return name21;
|
|
21838
|
+
}
|
|
21839
|
+
function resolveEnvKeyName(provider, hasBaseUrl = false) {
|
|
21840
|
+
const lower = provider.toLowerCase();
|
|
21841
|
+
if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
|
|
21842
|
+
return ENV_KEY_MAP[lower];
|
|
21843
|
+
}
|
|
21844
|
+
function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
|
|
21845
|
+
const lower = provider.toLowerCase();
|
|
21846
|
+
if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
|
|
21847
|
+
return ENV_BASE_URL_MAP[lower];
|
|
21848
|
+
}
|
|
21849
|
+
function extractAzureResourceName(baseUrl) {
|
|
21850
|
+
const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
|
|
21851
|
+
if (urlMatch) return urlMatch[1];
|
|
21852
|
+
return baseUrl;
|
|
21853
|
+
}
|
|
21854
|
+
function normalizeAzureSdkBaseUrl(baseUrl) {
|
|
21855
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
21856
|
+
if (!trimmed) {
|
|
21857
|
+
return trimmed;
|
|
21858
|
+
}
|
|
21859
|
+
if (!/^https?:\/\//i.test(trimmed)) {
|
|
21860
|
+
return `https://${trimmed}.openai.azure.com/openai/v1`;
|
|
21861
|
+
}
|
|
21862
|
+
if (/\/openai\/v1$/i.test(trimmed)) {
|
|
21863
|
+
return trimmed;
|
|
21864
|
+
}
|
|
21865
|
+
if (/\/openai$/i.test(trimmed)) {
|
|
21866
|
+
return `${trimmed}/v1`;
|
|
21867
|
+
}
|
|
21868
|
+
return `${trimmed}/openai/v1`;
|
|
21869
|
+
}
|
|
21467
21870
|
function extractPiTextContent(content) {
|
|
21468
21871
|
if (typeof content === "string") {
|
|
21469
21872
|
return content;
|
|
@@ -21619,12 +22022,12 @@ var PiCliProvider = class {
|
|
|
21619
22022
|
buildPiArgs(prompt, inputFiles) {
|
|
21620
22023
|
const args = [];
|
|
21621
22024
|
if (this.config.subprovider) {
|
|
21622
|
-
args.push("--provider", this.config.subprovider);
|
|
22025
|
+
args.push("--provider", resolveCliProvider(this.config.subprovider));
|
|
21623
22026
|
}
|
|
21624
22027
|
if (this.config.model) {
|
|
21625
22028
|
args.push("--model", this.config.model);
|
|
21626
22029
|
}
|
|
21627
|
-
if (this.config.apiKey) {
|
|
22030
|
+
if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
|
|
21628
22031
|
args.push("--api-key", this.config.apiKey);
|
|
21629
22032
|
}
|
|
21630
22033
|
args.push("--mode", "json");
|
|
@@ -21676,35 +22079,35 @@ ${prompt}` : prompt;
|
|
|
21676
22079
|
}
|
|
21677
22080
|
buildEnv() {
|
|
21678
22081
|
const env = { ...process.env };
|
|
21679
|
-
|
|
21680
|
-
|
|
21681
|
-
|
|
21682
|
-
|
|
21683
|
-
|
|
21684
|
-
|
|
21685
|
-
|
|
21686
|
-
|
|
21687
|
-
|
|
21688
|
-
|
|
21689
|
-
|
|
21690
|
-
|
|
21691
|
-
|
|
21692
|
-
|
|
22082
|
+
const provider = this.config.subprovider?.toLowerCase() ?? "google";
|
|
22083
|
+
if (provider === "azure") {
|
|
22084
|
+
if (this.config.apiKey) {
|
|
22085
|
+
env.AZURE_OPENAI_API_KEY = this.config.apiKey;
|
|
22086
|
+
}
|
|
22087
|
+
if (this.config.baseUrl) {
|
|
22088
|
+
env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
|
|
22089
|
+
}
|
|
22090
|
+
} else {
|
|
22091
|
+
if (this.config.apiKey) {
|
|
22092
|
+
const envKey = resolveEnvKeyName(provider);
|
|
22093
|
+
if (envKey) {
|
|
22094
|
+
env[envKey] = this.config.apiKey;
|
|
22095
|
+
}
|
|
21693
22096
|
}
|
|
21694
22097
|
}
|
|
21695
22098
|
if (this.config.subprovider) {
|
|
21696
|
-
const
|
|
22099
|
+
const resolvedProvider = resolveCliProvider(this.config.subprovider);
|
|
21697
22100
|
const PROVIDER_OWN_PREFIXES = {
|
|
21698
22101
|
openrouter: ["OPENROUTER_"],
|
|
21699
22102
|
anthropic: ["ANTHROPIC_"],
|
|
21700
22103
|
openai: ["OPENAI_"],
|
|
21701
|
-
azure: ["AZURE_OPENAI_"],
|
|
22104
|
+
"azure-openai-responses": ["AZURE_OPENAI_"],
|
|
21702
22105
|
google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
21703
22106
|
gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
21704
22107
|
groq: ["GROQ_"],
|
|
21705
22108
|
xai: ["XAI_"]
|
|
21706
22109
|
};
|
|
21707
|
-
const ownPrefixes = PROVIDER_OWN_PREFIXES[
|
|
22110
|
+
const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
|
|
21708
22111
|
const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
|
|
21709
22112
|
for (const key of Object.keys(env)) {
|
|
21710
22113
|
if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
|
|
@@ -21995,6 +22398,24 @@ function extractMessages(events) {
|
|
|
21995
22398
|
}
|
|
21996
22399
|
}
|
|
21997
22400
|
}
|
|
22401
|
+
if (messages) {
|
|
22402
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
22403
|
+
if (messages[i].role === "assistant" && !messages[i].content) {
|
|
22404
|
+
for (let j = events.length - 1; j >= 0; j--) {
|
|
22405
|
+
const evt = events[j];
|
|
22406
|
+
if (!evt || evt.type !== "message_end") continue;
|
|
22407
|
+
const msg = evt.message;
|
|
22408
|
+
if (msg?.role !== "assistant") continue;
|
|
22409
|
+
const text2 = extractPiTextContent(msg.content);
|
|
22410
|
+
if (text2) {
|
|
22411
|
+
messages[i] = { ...messages[i], content: text2 };
|
|
22412
|
+
break;
|
|
22413
|
+
}
|
|
22414
|
+
}
|
|
22415
|
+
break;
|
|
22416
|
+
}
|
|
22417
|
+
}
|
|
22418
|
+
}
|
|
21998
22419
|
const eventToolCalls = extractToolCallsFromEvents(events);
|
|
21999
22420
|
if (eventToolCalls.length > 0) {
|
|
22000
22421
|
injectEventToolCalls(messages, eventToolCalls);
|
|
@@ -22179,17 +22600,43 @@ function formatTimeoutSuffix3(timeoutMs) {
|
|
|
22179
22600
|
if (!timeoutMs || timeoutMs <= 0) return "";
|
|
22180
22601
|
return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
|
|
22181
22602
|
}
|
|
22603
|
+
function resolveWindowsCmd(executable) {
|
|
22604
|
+
if (process.platform !== "win32") return [executable, []];
|
|
22605
|
+
const lower = executable.toLowerCase();
|
|
22606
|
+
if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
|
|
22607
|
+
let fullPath;
|
|
22608
|
+
try {
|
|
22609
|
+
fullPath = execSync(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
|
|
22610
|
+
} catch {
|
|
22611
|
+
return [executable, []];
|
|
22612
|
+
}
|
|
22613
|
+
const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
|
|
22614
|
+
try {
|
|
22615
|
+
const content = readFileSync2(cmdPath, "utf-8");
|
|
22616
|
+
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
22617
|
+
if (match) {
|
|
22618
|
+
const dp0 = path19.dirname(path19.resolve(cmdPath));
|
|
22619
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path19.sep}`);
|
|
22620
|
+
try {
|
|
22621
|
+
accessSync(scriptPath);
|
|
22622
|
+
return ["node", [scriptPath]];
|
|
22623
|
+
} catch {
|
|
22624
|
+
}
|
|
22625
|
+
}
|
|
22626
|
+
} catch {
|
|
22627
|
+
}
|
|
22628
|
+
return [executable, []];
|
|
22629
|
+
}
|
|
22182
22630
|
async function defaultPiRunner(options) {
|
|
22183
22631
|
return await new Promise((resolve2, reject) => {
|
|
22184
22632
|
const parts = options.executable.split(/\s+/);
|
|
22185
|
-
const
|
|
22186
|
-
const executableArgs = parts.slice(1);
|
|
22633
|
+
const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
|
|
22634
|
+
const executableArgs = [...prefixArgs, ...parts.slice(1)];
|
|
22187
22635
|
const allArgs = [...executableArgs, ...options.args];
|
|
22188
|
-
const child = spawn3(
|
|
22636
|
+
const child = spawn3(resolvedExe, allArgs, {
|
|
22189
22637
|
cwd: options.cwd,
|
|
22190
22638
|
env: options.env,
|
|
22191
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
22192
|
-
shell: false
|
|
22639
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
22193
22640
|
});
|
|
22194
22641
|
let stdout = "";
|
|
22195
22642
|
let stderr = "";
|
|
@@ -22242,6 +22689,30 @@ async function defaultPiRunner(options) {
|
|
|
22242
22689
|
});
|
|
22243
22690
|
});
|
|
22244
22691
|
}
|
|
22692
|
+
var logged = false;
|
|
22693
|
+
function getAgentvHome() {
|
|
22694
|
+
const envHome = process.env.AGENTV_HOME;
|
|
22695
|
+
if (envHome && envHome !== "undefined") {
|
|
22696
|
+
if (!logged) {
|
|
22697
|
+
logged = true;
|
|
22698
|
+
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
22699
|
+
}
|
|
22700
|
+
return envHome;
|
|
22701
|
+
}
|
|
22702
|
+
return path20.join(os2.homedir(), ".agentv");
|
|
22703
|
+
}
|
|
22704
|
+
function getWorkspacesRoot() {
|
|
22705
|
+
return path20.join(getAgentvHome(), "workspaces");
|
|
22706
|
+
}
|
|
22707
|
+
function getSubagentsRoot() {
|
|
22708
|
+
return path20.join(getAgentvHome(), "subagents");
|
|
22709
|
+
}
|
|
22710
|
+
function getTraceStateRoot() {
|
|
22711
|
+
return path20.join(getAgentvHome(), "trace-state");
|
|
22712
|
+
}
|
|
22713
|
+
function getWorkspacePoolRoot() {
|
|
22714
|
+
return path20.join(getAgentvHome(), "workspace-pool");
|
|
22715
|
+
}
|
|
22245
22716
|
var piCodingAgentModule = null;
|
|
22246
22717
|
var piAiModule = null;
|
|
22247
22718
|
var loadingPromise = null;
|
|
@@ -22259,46 +22730,126 @@ async function promptInstall() {
|
|
|
22259
22730
|
rl.close();
|
|
22260
22731
|
}
|
|
22261
22732
|
}
|
|
22262
|
-
function
|
|
22263
|
-
|
|
22264
|
-
|
|
22265
|
-
|
|
22733
|
+
function findManagedSdkInstallRoot() {
|
|
22734
|
+
return path21.join(getAgentvHome(), "deps", "pi-sdk");
|
|
22735
|
+
}
|
|
22736
|
+
function resolveGlobalNpmRoot() {
|
|
22737
|
+
try {
|
|
22738
|
+
const root = execSync2("npm root -g", {
|
|
22739
|
+
encoding: "utf-8",
|
|
22740
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
22741
|
+
}).trim();
|
|
22742
|
+
return root.length > 0 ? root : void 0;
|
|
22743
|
+
} catch {
|
|
22744
|
+
return void 0;
|
|
22745
|
+
}
|
|
22746
|
+
}
|
|
22747
|
+
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
22748
|
+
return path21.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
22749
|
+
}
|
|
22750
|
+
function findAccessiblePath(paths) {
|
|
22751
|
+
for (const candidate of paths) {
|
|
22266
22752
|
try {
|
|
22267
|
-
|
|
22268
|
-
|
|
22269
|
-
return dir;
|
|
22753
|
+
accessSync2(candidate);
|
|
22754
|
+
return candidate;
|
|
22270
22755
|
} catch {
|
|
22271
|
-
const parent = path20.dirname(dir);
|
|
22272
|
-
if (parent === dir) break;
|
|
22273
|
-
dir = parent;
|
|
22274
22756
|
}
|
|
22275
22757
|
}
|
|
22276
|
-
return
|
|
22758
|
+
return void 0;
|
|
22277
22759
|
}
|
|
22278
|
-
async function
|
|
22760
|
+
async function tryImportLocalSdkModules() {
|
|
22279
22761
|
try {
|
|
22280
22762
|
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
22281
22763
|
import("@mariozechner/pi-coding-agent"),
|
|
22282
22764
|
import("@mariozechner/pi-ai")
|
|
22283
22765
|
]);
|
|
22766
|
+
return true;
|
|
22284
22767
|
} catch {
|
|
22285
|
-
|
|
22286
|
-
|
|
22287
|
-
|
|
22288
|
-
|
|
22289
|
-
|
|
22290
|
-
|
|
22291
|
-
|
|
22292
|
-
|
|
22293
|
-
|
|
22294
|
-
|
|
22295
|
-
|
|
22296
|
-
|
|
22297
|
-
|
|
22298
|
-
|
|
22299
|
-
|
|
22768
|
+
return false;
|
|
22769
|
+
}
|
|
22770
|
+
}
|
|
22771
|
+
async function tryImportManagedSdkModules() {
|
|
22772
|
+
const managedRoot = findManagedSdkInstallRoot();
|
|
22773
|
+
const piCodingAgentEntry = findAccessiblePath([
|
|
22774
|
+
path21.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
22775
|
+
]);
|
|
22776
|
+
const piAiEntry = findAccessiblePath([
|
|
22777
|
+
path21.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
22778
|
+
path21.join(
|
|
22779
|
+
managedRoot,
|
|
22780
|
+
"node_modules",
|
|
22781
|
+
"@mariozechner",
|
|
22782
|
+
"pi-coding-agent",
|
|
22783
|
+
"node_modules",
|
|
22784
|
+
"@mariozechner",
|
|
22785
|
+
"pi-ai",
|
|
22786
|
+
"dist",
|
|
22787
|
+
"index.js"
|
|
22788
|
+
)
|
|
22789
|
+
]);
|
|
22790
|
+
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
22791
|
+
try {
|
|
22792
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
22793
|
+
import(pathToFileURL(piCodingAgentEntry).href),
|
|
22794
|
+
import(pathToFileURL(piAiEntry).href)
|
|
22795
|
+
]);
|
|
22796
|
+
return true;
|
|
22797
|
+
} catch {
|
|
22798
|
+
return false;
|
|
22799
|
+
}
|
|
22800
|
+
}
|
|
22801
|
+
async function tryImportGlobalSdkModules() {
|
|
22802
|
+
const globalNpmRoot = resolveGlobalNpmRoot();
|
|
22803
|
+
if (!globalNpmRoot) return false;
|
|
22804
|
+
const piCodingAgentEntry = findAccessiblePath([
|
|
22805
|
+
buildGlobalModuleEntry("@mariozechner/pi-coding-agent", globalNpmRoot)
|
|
22806
|
+
]);
|
|
22807
|
+
const piAiEntry = findAccessiblePath([
|
|
22808
|
+
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
22809
|
+
path21.join(
|
|
22810
|
+
globalNpmRoot,
|
|
22811
|
+
"@mariozechner",
|
|
22812
|
+
"pi-coding-agent",
|
|
22813
|
+
"node_modules",
|
|
22814
|
+
"@mariozechner",
|
|
22815
|
+
"pi-ai",
|
|
22816
|
+
"dist",
|
|
22817
|
+
"index.js"
|
|
22818
|
+
)
|
|
22819
|
+
]);
|
|
22820
|
+
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
22821
|
+
try {
|
|
22822
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
22823
|
+
import(pathToFileURL(piCodingAgentEntry).href),
|
|
22824
|
+
import(pathToFileURL(piAiEntry).href)
|
|
22825
|
+
]);
|
|
22826
|
+
return true;
|
|
22827
|
+
} catch {
|
|
22828
|
+
return false;
|
|
22829
|
+
}
|
|
22830
|
+
}
|
|
22831
|
+
function installSdkModules(installDir) {
|
|
22832
|
+
console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
|
|
22833
|
+
mkdirSync(installDir, { recursive: true });
|
|
22834
|
+
execSync2("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
|
|
22835
|
+
cwd: installDir,
|
|
22836
|
+
stdio: "inherit"
|
|
22837
|
+
});
|
|
22838
|
+
}
|
|
22839
|
+
async function doLoadSdkModules() {
|
|
22840
|
+
if (await tryImportLocalSdkModules() || await tryImportManagedSdkModules() || await tryImportGlobalSdkModules()) {
|
|
22841
|
+
return;
|
|
22842
|
+
}
|
|
22843
|
+
if (await promptInstall()) {
|
|
22844
|
+
const installDir = findManagedSdkInstallRoot();
|
|
22845
|
+
installSdkModules(installDir);
|
|
22846
|
+
if (await tryImportManagedSdkModules()) {
|
|
22847
|
+
return;
|
|
22300
22848
|
}
|
|
22301
22849
|
}
|
|
22850
|
+
throw new Error(
|
|
22851
|
+
"pi-coding-agent SDK is not installed. Install it with:\n npm install @mariozechner/pi-coding-agent"
|
|
22852
|
+
);
|
|
22302
22853
|
}
|
|
22303
22854
|
async function loadSdkModules() {
|
|
22304
22855
|
if (!piCodingAgentModule || !piAiModule) {
|
|
@@ -22326,7 +22877,9 @@ async function loadSdkModules() {
|
|
|
22326
22877
|
codingTools: piSdk.codingTools,
|
|
22327
22878
|
toolMap,
|
|
22328
22879
|
SessionManager: piSdk.SessionManager,
|
|
22329
|
-
getModel: piAi.getModel
|
|
22880
|
+
getModel: piAi.getModel,
|
|
22881
|
+
// biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
|
|
22882
|
+
registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
|
|
22330
22883
|
};
|
|
22331
22884
|
}
|
|
22332
22885
|
var PiCodingAgentProvider = class {
|
|
@@ -22348,17 +22901,35 @@ var PiCodingAgentProvider = class {
|
|
|
22348
22901
|
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
22349
22902
|
const startMs = Date.now();
|
|
22350
22903
|
const sdk = await loadSdkModules();
|
|
22904
|
+
sdk.registerBuiltInApiProviders();
|
|
22351
22905
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
22352
22906
|
try {
|
|
22353
22907
|
const cwd = this.resolveCwd(request.cwd);
|
|
22354
|
-
const
|
|
22908
|
+
const rawProvider = this.config.subprovider ?? "google";
|
|
22909
|
+
const normalizedBaseUrl = this.normalizeSdkBaseUrl(rawProvider, this.config.baseUrl);
|
|
22910
|
+
const hasBaseUrl = !!normalizedBaseUrl;
|
|
22911
|
+
const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
|
|
22355
22912
|
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
22356
|
-
this.setApiKeyEnv(
|
|
22357
|
-
|
|
22913
|
+
this.setApiKeyEnv(rawProvider, hasBaseUrl);
|
|
22914
|
+
this.setBaseUrlEnv(rawProvider, normalizedBaseUrl, hasBaseUrl);
|
|
22915
|
+
let model = sdk.getModel(providerName, modelId);
|
|
22916
|
+
if (model && normalizedBaseUrl) {
|
|
22917
|
+
model = { ...model, baseUrl: normalizedBaseUrl };
|
|
22918
|
+
}
|
|
22358
22919
|
if (!model) {
|
|
22359
|
-
|
|
22360
|
-
|
|
22361
|
-
|
|
22920
|
+
const envProvider = providerName.replace(/-responses$/, "");
|
|
22921
|
+
model = {
|
|
22922
|
+
id: modelId,
|
|
22923
|
+
name: modelId,
|
|
22924
|
+
api: providerName,
|
|
22925
|
+
provider: envProvider,
|
|
22926
|
+
baseUrl: normalizedBaseUrl ?? "",
|
|
22927
|
+
reasoning: false,
|
|
22928
|
+
input: ["text"],
|
|
22929
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
22930
|
+
contextWindow: 128e3,
|
|
22931
|
+
maxTokens: 16384
|
|
22932
|
+
};
|
|
22362
22933
|
}
|
|
22363
22934
|
const tools = this.resolveTools(sdk);
|
|
22364
22935
|
const { session } = await sdk.createAgentSession({
|
|
@@ -22511,28 +23082,35 @@ ${fileList}`;
|
|
|
22511
23082
|
}
|
|
22512
23083
|
}
|
|
22513
23084
|
/** Maps config apiKey to the provider-specific env var the SDK reads. */
|
|
22514
|
-
setApiKeyEnv(providerName) {
|
|
23085
|
+
setApiKeyEnv(providerName, hasBaseUrl = false) {
|
|
22515
23086
|
if (!this.config.apiKey) return;
|
|
22516
|
-
const
|
|
22517
|
-
google: "GEMINI_API_KEY",
|
|
22518
|
-
gemini: "GEMINI_API_KEY",
|
|
22519
|
-
anthropic: "ANTHROPIC_API_KEY",
|
|
22520
|
-
openai: "OPENAI_API_KEY",
|
|
22521
|
-
groq: "GROQ_API_KEY",
|
|
22522
|
-
xai: "XAI_API_KEY",
|
|
22523
|
-
openrouter: "OPENROUTER_API_KEY"
|
|
22524
|
-
};
|
|
22525
|
-
const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
|
|
23087
|
+
const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
|
|
22526
23088
|
if (envKey) {
|
|
22527
23089
|
process.env[envKey] = this.config.apiKey;
|
|
22528
23090
|
}
|
|
22529
23091
|
}
|
|
23092
|
+
/** Maps config baseUrl to the provider-specific env var the SDK reads. */
|
|
23093
|
+
setBaseUrlEnv(providerName, baseUrl = this.config.baseUrl, hasBaseUrl = false) {
|
|
23094
|
+
const normalizedBaseUrl = this.normalizeSdkBaseUrl(providerName, baseUrl);
|
|
23095
|
+
if (!normalizedBaseUrl) return;
|
|
23096
|
+
const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
|
|
23097
|
+
if (envKey) {
|
|
23098
|
+
process.env[envKey] = normalizedBaseUrl;
|
|
23099
|
+
}
|
|
23100
|
+
}
|
|
23101
|
+
normalizeSdkBaseUrl(providerName, baseUrl) {
|
|
23102
|
+
if (!baseUrl) return void 0;
|
|
23103
|
+
if (providerName.toLowerCase() === "azure") {
|
|
23104
|
+
return normalizeAzureSdkBaseUrl(baseUrl);
|
|
23105
|
+
}
|
|
23106
|
+
return baseUrl;
|
|
23107
|
+
}
|
|
22530
23108
|
resolveCwd(cwdOverride) {
|
|
22531
23109
|
if (cwdOverride) {
|
|
22532
|
-
return
|
|
23110
|
+
return path21.resolve(cwdOverride);
|
|
22533
23111
|
}
|
|
22534
23112
|
if (this.config.cwd) {
|
|
22535
|
-
return
|
|
23113
|
+
return path21.resolve(this.config.cwd);
|
|
22536
23114
|
}
|
|
22537
23115
|
return process.cwd();
|
|
22538
23116
|
}
|
|
@@ -22551,9 +23129,9 @@ ${fileList}`;
|
|
|
22551
23129
|
}
|
|
22552
23130
|
resolveLogDirectory() {
|
|
22553
23131
|
if (this.config.logDir) {
|
|
22554
|
-
return
|
|
23132
|
+
return path21.resolve(this.config.logDir);
|
|
22555
23133
|
}
|
|
22556
|
-
return
|
|
23134
|
+
return path21.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
22557
23135
|
}
|
|
22558
23136
|
async createStreamLogger(request) {
|
|
22559
23137
|
const logDir = this.resolveLogDirectory();
|
|
@@ -22567,7 +23145,7 @@ ${fileList}`;
|
|
|
22567
23145
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
22568
23146
|
return void 0;
|
|
22569
23147
|
}
|
|
22570
|
-
const filePath =
|
|
23148
|
+
const filePath = path21.join(logDir, buildLogFilename6(request, this.targetName));
|
|
22571
23149
|
try {
|
|
22572
23150
|
const logger = await PiStreamLogger2.create({
|
|
22573
23151
|
filePath,
|
|
@@ -22791,7 +23369,7 @@ async function readDirEntries(target) {
|
|
|
22791
23369
|
const entries = await readdir2(target, { withFileTypes: true });
|
|
22792
23370
|
return entries.map((entry) => ({
|
|
22793
23371
|
name: entry.name,
|
|
22794
|
-
absolutePath:
|
|
23372
|
+
absolutePath: path222.join(target, entry.name),
|
|
22795
23373
|
isDirectory: entry.isDirectory()
|
|
22796
23374
|
}));
|
|
22797
23375
|
}
|
|
@@ -22805,7 +23383,7 @@ async function removeIfExists(target) {
|
|
|
22805
23383
|
}
|
|
22806
23384
|
}
|
|
22807
23385
|
function pathToFileUri2(filePath) {
|
|
22808
|
-
const absolutePath =
|
|
23386
|
+
const absolutePath = path23.isAbsolute(filePath) ? filePath : path23.resolve(filePath);
|
|
22809
23387
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
22810
23388
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
22811
23389
|
return `file:///${normalizedPath}`;
|
|
@@ -22897,8 +23475,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
22897
23475
|
});
|
|
22898
23476
|
}
|
|
22899
23477
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
22900
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
22901
|
-
const responseList = responseFiles.map((file) => `"${
|
|
23478
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path24.basename(file)}`).join("\n");
|
|
23479
|
+
const responseList = responseFiles.map((file) => `"${path24.basename(file)}"`).join(", ");
|
|
22902
23480
|
return renderTemplate2(templateContent, {
|
|
22903
23481
|
requestFiles: requestLines,
|
|
22904
23482
|
responseList
|
|
@@ -22958,7 +23536,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
22958
23536
|
}
|
|
22959
23537
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
22960
23538
|
if (!silent) {
|
|
22961
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
23539
|
+
const fileList = responseFilesFinal.map((file) => path25.basename(file)).join(", ");
|
|
22962
23540
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
22963
23541
|
}
|
|
22964
23542
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -22967,7 +23545,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
22967
23545
|
while (pending.size > 0) {
|
|
22968
23546
|
if (Date.now() >= deadline) {
|
|
22969
23547
|
if (!silent) {
|
|
22970
|
-
const remaining = [...pending].map((f) =>
|
|
23548
|
+
const remaining = [...pending].map((f) => path25.basename(f)).join(", ");
|
|
22971
23549
|
console.error(
|
|
22972
23550
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
22973
23551
|
);
|
|
@@ -23014,30 +23592,6 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
23014
23592
|
}
|
|
23015
23593
|
return true;
|
|
23016
23594
|
}
|
|
23017
|
-
var logged = false;
|
|
23018
|
-
function getAgentvHome() {
|
|
23019
|
-
const envHome = process.env.AGENTV_HOME;
|
|
23020
|
-
if (envHome && envHome !== "undefined") {
|
|
23021
|
-
if (!logged) {
|
|
23022
|
-
logged = true;
|
|
23023
|
-
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
23024
|
-
}
|
|
23025
|
-
return envHome;
|
|
23026
|
-
}
|
|
23027
|
-
return path25.join(os2.homedir(), ".agentv");
|
|
23028
|
-
}
|
|
23029
|
-
function getWorkspacesRoot() {
|
|
23030
|
-
return path25.join(getAgentvHome(), "workspaces");
|
|
23031
|
-
}
|
|
23032
|
-
function getSubagentsRoot() {
|
|
23033
|
-
return path25.join(getAgentvHome(), "subagents");
|
|
23034
|
-
}
|
|
23035
|
-
function getTraceStateRoot() {
|
|
23036
|
-
return path25.join(getAgentvHome(), "trace-state");
|
|
23037
|
-
}
|
|
23038
|
-
function getWorkspacePoolRoot() {
|
|
23039
|
-
return path25.join(getAgentvHome(), "workspace-pool");
|
|
23040
|
-
}
|
|
23041
23595
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
23042
23596
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
23043
23597
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
@@ -24258,9 +24812,10 @@ function resolveAndCreateProvider(definition, env = process.env) {
|
|
|
24258
24812
|
const resolved = resolveTargetDefinition(definition, env);
|
|
24259
24813
|
return createProvider(resolved);
|
|
24260
24814
|
}
|
|
24261
|
-
var
|
|
24262
|
-
|
|
24263
|
-
|
|
24815
|
+
var DEFAULT_THRESHOLD = 0.8;
|
|
24816
|
+
var PASS_THRESHOLD = DEFAULT_THRESHOLD;
|
|
24817
|
+
function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
|
|
24818
|
+
return score >= threshold ? "pass" : "fail";
|
|
24264
24819
|
}
|
|
24265
24820
|
function clampScore(value) {
|
|
24266
24821
|
if (Number.isNaN(value) || !Number.isFinite(value)) {
|
|
@@ -24442,13 +24997,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
24442
24997
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
24443
24998
|
const { mkdir: mkdir16, readFile: readFile15, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
24444
24999
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
24445
|
-
const
|
|
25000
|
+
const path50 = await import("node:path");
|
|
24446
25001
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
24447
|
-
const dir =
|
|
25002
|
+
const dir = path50.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
24448
25003
|
await mkdir16(dir, { recursive: true });
|
|
24449
|
-
const stdinPath =
|
|
24450
|
-
const stdoutPath =
|
|
24451
|
-
const stderrPath =
|
|
25004
|
+
const stdinPath = path50.join(dir, "stdin.txt");
|
|
25005
|
+
const stdoutPath = path50.join(dir, "stdout.txt");
|
|
25006
|
+
const stderrPath = path50.join(dir, "stderr.txt");
|
|
24452
25007
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
24453
25008
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
24454
25009
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -25629,7 +26184,7 @@ ${outputSchema2}`;
|
|
|
25629
26184
|
parts.push("[[ ## scoring_criteria ## ]]");
|
|
25630
26185
|
for (const rubric of rubrics) {
|
|
25631
26186
|
const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
|
|
25632
|
-
const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
|
|
26187
|
+
const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
|
|
25633
26188
|
parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
|
|
25634
26189
|
if (rubric.outcome) {
|
|
25635
26190
|
parts.push(`Description: ${rubric.outcome}`);
|
|
@@ -25683,54 +26238,106 @@ ${outputSchema2}`;
|
|
|
25683
26238
|
async runWithRetry(options) {
|
|
25684
26239
|
const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
|
|
25685
26240
|
let lastError;
|
|
26241
|
+
let lastInvalidResponse;
|
|
26242
|
+
let shouldAttemptStructureFix = false;
|
|
25686
26243
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
25687
26244
|
try {
|
|
25688
|
-
const
|
|
25689
|
-
|
|
25690
|
-
|
|
25691
|
-
|
|
25692
|
-
|
|
25693
|
-
|
|
25694
|
-
|
|
25695
|
-
|
|
25696
|
-
|
|
25697
|
-
|
|
25698
|
-
|
|
25699
|
-
|
|
25700
|
-
|
|
25701
|
-
|
|
25702
|
-
|
|
25703
|
-
|
|
25704
|
-
]
|
|
25705
|
-
}
|
|
25706
|
-
],
|
|
25707
|
-
...modelOptions
|
|
25708
|
-
}) : await generateText({
|
|
25709
|
-
model,
|
|
25710
|
-
system: systemPrompt,
|
|
25711
|
-
prompt: userPrompt,
|
|
25712
|
-
...modelOptions
|
|
25713
|
-
});
|
|
25714
|
-
const data2 = schema.parse(parseJsonFromText(result.text));
|
|
25715
|
-
const rawUsage = result.usage;
|
|
25716
|
-
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
|
|
25717
|
-
return { data: data2, tokenUsage };
|
|
26245
|
+
const result = await this.generateStructuredResponse({
|
|
26246
|
+
context: context2,
|
|
26247
|
+
graderProvider,
|
|
26248
|
+
systemPrompt,
|
|
26249
|
+
userPrompt,
|
|
26250
|
+
images
|
|
26251
|
+
});
|
|
26252
|
+
const canRepairResponse = result.text.trim().length > 0;
|
|
26253
|
+
lastInvalidResponse = canRepairResponse ? result : void 0;
|
|
26254
|
+
let data;
|
|
26255
|
+
try {
|
|
26256
|
+
data = schema.parse(parseJsonFromText(result.text));
|
|
26257
|
+
} catch (e) {
|
|
26258
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
26259
|
+
shouldAttemptStructureFix = canRepairResponse;
|
|
26260
|
+
continue;
|
|
25718
26261
|
}
|
|
25719
|
-
|
|
25720
|
-
|
|
26262
|
+
return {
|
|
26263
|
+
data,
|
|
26264
|
+
providerResponse: result.providerResponse,
|
|
26265
|
+
tokenUsage: result.tokenUsage
|
|
26266
|
+
};
|
|
26267
|
+
} catch (e) {
|
|
26268
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
26269
|
+
}
|
|
26270
|
+
}
|
|
26271
|
+
if (shouldAttemptStructureFix && lastInvalidResponse) {
|
|
26272
|
+
try {
|
|
26273
|
+
const repaired = await this.generateStructuredResponse({
|
|
26274
|
+
context: context2,
|
|
26275
|
+
graderProvider,
|
|
25721
26276
|
systemPrompt,
|
|
25722
|
-
|
|
25723
|
-
|
|
25724
|
-
|
|
25725
|
-
|
|
26277
|
+
userPrompt: buildStructureRepairPrompt({
|
|
26278
|
+
validationError: lastError?.message ?? "Schema validation failed",
|
|
26279
|
+
invalidResponse: lastInvalidResponse.text
|
|
26280
|
+
})
|
|
25726
26281
|
});
|
|
25727
|
-
const data = schema.parse(parseJsonFromText(
|
|
25728
|
-
return {
|
|
26282
|
+
const data = schema.parse(parseJsonFromText(repaired.text));
|
|
26283
|
+
return {
|
|
26284
|
+
data,
|
|
26285
|
+
providerResponse: repaired.providerResponse,
|
|
26286
|
+
tokenUsage: sumTokenUsage(lastInvalidResponse.tokenUsage, repaired.tokenUsage)
|
|
26287
|
+
};
|
|
25729
26288
|
} catch (e) {
|
|
25730
26289
|
lastError = e instanceof Error ? e : new Error(String(e));
|
|
25731
26290
|
}
|
|
25732
26291
|
}
|
|
25733
|
-
throw new Error(
|
|
26292
|
+
throw new Error(
|
|
26293
|
+
`Failed to parse evaluator response after 3 attempts and 1 structure-fix attempt: ${lastError?.message}`
|
|
26294
|
+
);
|
|
26295
|
+
}
|
|
26296
|
+
async generateStructuredResponse(options) {
|
|
26297
|
+
const { context: context2, graderProvider, systemPrompt, userPrompt, images } = options;
|
|
26298
|
+
const model = graderProvider.asLanguageModel?.();
|
|
26299
|
+
if (model) {
|
|
26300
|
+
const modelOptions = {
|
|
26301
|
+
...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
|
|
26302
|
+
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
26303
|
+
};
|
|
26304
|
+
const hasImages = images && images.length > 0;
|
|
26305
|
+
const result = hasImages ? await generateText({
|
|
26306
|
+
model,
|
|
26307
|
+
system: systemPrompt,
|
|
26308
|
+
messages: [
|
|
26309
|
+
{
|
|
26310
|
+
role: "user",
|
|
26311
|
+
content: [
|
|
26312
|
+
{ type: "text", text: userPrompt },
|
|
26313
|
+
...toAiSdkImageParts(images)
|
|
26314
|
+
]
|
|
26315
|
+
}
|
|
26316
|
+
],
|
|
26317
|
+
...modelOptions
|
|
26318
|
+
}) : await generateText({
|
|
26319
|
+
model,
|
|
26320
|
+
system: systemPrompt,
|
|
26321
|
+
prompt: userPrompt,
|
|
26322
|
+
...modelOptions
|
|
26323
|
+
});
|
|
26324
|
+
const rawUsage = result.usage;
|
|
26325
|
+
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
|
|
26326
|
+
return { text: result.text, tokenUsage };
|
|
26327
|
+
}
|
|
26328
|
+
const response = await graderProvider.invoke({
|
|
26329
|
+
question: userPrompt,
|
|
26330
|
+
systemPrompt,
|
|
26331
|
+
evalCaseId: context2.evalCase.id,
|
|
26332
|
+
attempt: context2.attempt,
|
|
26333
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
26334
|
+
temperature: this.temperature
|
|
26335
|
+
});
|
|
26336
|
+
return {
|
|
26337
|
+
text: extractLastAssistantContent(response.output),
|
|
26338
|
+
providerResponse: response,
|
|
26339
|
+
tokenUsage: response.tokenUsage
|
|
26340
|
+
};
|
|
25734
26341
|
}
|
|
25735
26342
|
};
|
|
25736
26343
|
function buildOutputSchema() {
|
|
@@ -25750,6 +26357,29 @@ function buildOutputSchema() {
|
|
|
25750
26357
|
"}"
|
|
25751
26358
|
].join("\n");
|
|
25752
26359
|
}
|
|
26360
|
+
function buildStructureRepairPrompt(options) {
|
|
26361
|
+
const { validationError, invalidResponse } = options;
|
|
26362
|
+
return [
|
|
26363
|
+
"The following evaluation response has useful grading content but invalid JSON structure.",
|
|
26364
|
+
"Repair it to satisfy the schema in the system prompt.",
|
|
26365
|
+
"Preserve the evaluation meaning, do not re-grade the answer, and return only a single JSON object.",
|
|
26366
|
+
"",
|
|
26367
|
+
"Validation error:",
|
|
26368
|
+
validationError,
|
|
26369
|
+
"",
|
|
26370
|
+
"Invalid response:",
|
|
26371
|
+
invalidResponse
|
|
26372
|
+
].join("\n");
|
|
26373
|
+
}
|
|
26374
|
+
function sumTokenUsage(first, second) {
|
|
26375
|
+
if (!first && !second) {
|
|
26376
|
+
return void 0;
|
|
26377
|
+
}
|
|
26378
|
+
return {
|
|
26379
|
+
input: (first?.input ?? 0) + (second?.input ?? 0),
|
|
26380
|
+
output: (first?.output ?? 0) + (second?.output ?? 0)
|
|
26381
|
+
};
|
|
26382
|
+
}
|
|
25753
26383
|
function buildRubricOutputSchema() {
|
|
25754
26384
|
return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
|
|
25755
26385
|
You must return a valid JSON object matching this schema:
|
|
@@ -25849,19 +26479,21 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
25849
26479
|
rawScores[rubric.id] = rawScore;
|
|
25850
26480
|
totalWeight += rubric.weight;
|
|
25851
26481
|
weightedScoreSum += normalizedScore * rubric.weight;
|
|
25852
|
-
let
|
|
25853
|
-
if (rubric.
|
|
25854
|
-
|
|
26482
|
+
let minScoreThreshold;
|
|
26483
|
+
if (rubric.min_score !== void 0) {
|
|
26484
|
+
minScoreThreshold = rubric.min_score;
|
|
26485
|
+
} else if (rubric.required_min_score !== void 0) {
|
|
26486
|
+
minScoreThreshold = rubric.required_min_score / 10;
|
|
25855
26487
|
} else if (rubric.required === true) {
|
|
25856
|
-
|
|
26488
|
+
minScoreThreshold = 1;
|
|
25857
26489
|
}
|
|
25858
26490
|
const matchingRange = rubric.score_ranges?.find(
|
|
25859
26491
|
(r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
|
|
25860
26492
|
);
|
|
25861
26493
|
const rangeDescription = matchingRange?.outcome ?? "";
|
|
25862
26494
|
const criterionLabel = rubric.outcome ?? rubric.id;
|
|
25863
|
-
const passed = !(
|
|
25864
|
-
if (
|
|
26495
|
+
const passed = !(minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) && rawScore >= 7;
|
|
26496
|
+
if (minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) {
|
|
25865
26497
|
failedRequired = true;
|
|
25866
26498
|
}
|
|
25867
26499
|
assertions.push({
|
|
@@ -25938,11 +26570,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
25938
26570
|
execute: async (input) => {
|
|
25939
26571
|
try {
|
|
25940
26572
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
25941
|
-
const
|
|
25942
|
-
if (
|
|
26573
|
+
const stat11 = await fs2.stat(resolved);
|
|
26574
|
+
if (stat11.isDirectory()) {
|
|
25943
26575
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
25944
26576
|
}
|
|
25945
|
-
const buffer = Buffer.alloc(Math.min(
|
|
26577
|
+
const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
|
|
25946
26578
|
const fd = await fs2.open(resolved, "r");
|
|
25947
26579
|
try {
|
|
25948
26580
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -25950,8 +26582,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
25950
26582
|
await fd.close();
|
|
25951
26583
|
}
|
|
25952
26584
|
const content = buffer.toString("utf-8");
|
|
25953
|
-
const truncated =
|
|
25954
|
-
return { content, truncated, size:
|
|
26585
|
+
const truncated = stat11.size > MAX_FILE_SIZE;
|
|
26586
|
+
return { content, truncated, size: stat11.size };
|
|
25955
26587
|
} catch (error) {
|
|
25956
26588
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
25957
26589
|
}
|
|
@@ -26002,8 +26634,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
26002
26634
|
const ext = path35.extname(entry.name).toLowerCase();
|
|
26003
26635
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
26004
26636
|
try {
|
|
26005
|
-
const
|
|
26006
|
-
if (
|
|
26637
|
+
const stat11 = await fs2.stat(fullPath);
|
|
26638
|
+
if (stat11.size > MAX_FILE_SIZE) continue;
|
|
26007
26639
|
const content = await fs2.readFile(fullPath, "utf-8");
|
|
26008
26640
|
const lines = content.split("\n");
|
|
26009
26641
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -26636,115 +27268,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
26636
27268
|
* Evaluate a single field against the expected value.
|
|
26637
27269
|
*/
|
|
26638
27270
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
26639
|
-
const { path:
|
|
26640
|
-
const candidateValue = resolvePath(candidateData,
|
|
26641
|
-
const expectedValue = resolvePath(expectedData,
|
|
27271
|
+
const { path: path50, match, required = true, weight = 1 } = fieldConfig;
|
|
27272
|
+
const candidateValue = resolvePath(candidateData, path50);
|
|
27273
|
+
const expectedValue = resolvePath(expectedData, path50);
|
|
26642
27274
|
if (expectedValue === void 0) {
|
|
26643
27275
|
return {
|
|
26644
|
-
path:
|
|
27276
|
+
path: path50,
|
|
26645
27277
|
score: 1,
|
|
26646
27278
|
// No expected value means no comparison needed
|
|
26647
27279
|
weight,
|
|
26648
27280
|
hit: true,
|
|
26649
|
-
message: `${
|
|
27281
|
+
message: `${path50}: no expected value`
|
|
26650
27282
|
};
|
|
26651
27283
|
}
|
|
26652
27284
|
if (candidateValue === void 0) {
|
|
26653
27285
|
if (required) {
|
|
26654
27286
|
return {
|
|
26655
|
-
path:
|
|
27287
|
+
path: path50,
|
|
26656
27288
|
score: 0,
|
|
26657
27289
|
weight,
|
|
26658
27290
|
hit: false,
|
|
26659
|
-
message: `${
|
|
27291
|
+
message: `${path50} (required, missing)`
|
|
26660
27292
|
};
|
|
26661
27293
|
}
|
|
26662
27294
|
return {
|
|
26663
|
-
path:
|
|
27295
|
+
path: path50,
|
|
26664
27296
|
score: 1,
|
|
26665
27297
|
// Don't penalize missing optional fields
|
|
26666
27298
|
weight: 0,
|
|
26667
27299
|
// Zero weight means it won't affect the score
|
|
26668
27300
|
hit: true,
|
|
26669
|
-
message: `${
|
|
27301
|
+
message: `${path50}: optional field missing`
|
|
26670
27302
|
};
|
|
26671
27303
|
}
|
|
26672
27304
|
switch (match) {
|
|
26673
27305
|
case "exact":
|
|
26674
|
-
return this.compareExact(
|
|
27306
|
+
return this.compareExact(path50, candidateValue, expectedValue, weight);
|
|
26675
27307
|
case "numeric_tolerance":
|
|
26676
27308
|
return this.compareNumericTolerance(
|
|
26677
|
-
|
|
27309
|
+
path50,
|
|
26678
27310
|
candidateValue,
|
|
26679
27311
|
expectedValue,
|
|
26680
27312
|
fieldConfig,
|
|
26681
27313
|
weight
|
|
26682
27314
|
);
|
|
26683
27315
|
case "date":
|
|
26684
|
-
return this.compareDate(
|
|
27316
|
+
return this.compareDate(path50, candidateValue, expectedValue, fieldConfig, weight);
|
|
26685
27317
|
default:
|
|
26686
27318
|
return {
|
|
26687
|
-
path:
|
|
27319
|
+
path: path50,
|
|
26688
27320
|
score: 0,
|
|
26689
27321
|
weight,
|
|
26690
27322
|
hit: false,
|
|
26691
|
-
message: `${
|
|
27323
|
+
message: `${path50}: unknown match type "${match}"`
|
|
26692
27324
|
};
|
|
26693
27325
|
}
|
|
26694
27326
|
}
|
|
26695
27327
|
/**
|
|
26696
27328
|
* Exact equality comparison.
|
|
26697
27329
|
*/
|
|
26698
|
-
compareExact(
|
|
27330
|
+
compareExact(path50, candidateValue, expectedValue, weight) {
|
|
26699
27331
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
26700
27332
|
return {
|
|
26701
|
-
path:
|
|
27333
|
+
path: path50,
|
|
26702
27334
|
score: 1,
|
|
26703
27335
|
weight,
|
|
26704
27336
|
hit: true,
|
|
26705
|
-
message:
|
|
27337
|
+
message: path50
|
|
26706
27338
|
};
|
|
26707
27339
|
}
|
|
26708
27340
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
26709
27341
|
return {
|
|
26710
|
-
path:
|
|
27342
|
+
path: path50,
|
|
26711
27343
|
score: 0,
|
|
26712
27344
|
weight,
|
|
26713
27345
|
hit: false,
|
|
26714
|
-
message: `${
|
|
27346
|
+
message: `${path50} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
26715
27347
|
};
|
|
26716
27348
|
}
|
|
26717
27349
|
return {
|
|
26718
|
-
path:
|
|
27350
|
+
path: path50,
|
|
26719
27351
|
score: 0,
|
|
26720
27352
|
weight,
|
|
26721
27353
|
hit: false,
|
|
26722
|
-
message: `${
|
|
27354
|
+
message: `${path50} (value mismatch)`
|
|
26723
27355
|
};
|
|
26724
27356
|
}
|
|
26725
27357
|
/**
|
|
26726
27358
|
* Numeric comparison with absolute or relative tolerance.
|
|
26727
27359
|
*/
|
|
26728
|
-
compareNumericTolerance(
|
|
27360
|
+
compareNumericTolerance(path50, candidateValue, expectedValue, fieldConfig, weight) {
|
|
26729
27361
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
26730
27362
|
const candidateNum = toNumber(candidateValue);
|
|
26731
27363
|
const expectedNum = toNumber(expectedValue);
|
|
26732
27364
|
if (candidateNum === null || expectedNum === null) {
|
|
26733
27365
|
return {
|
|
26734
|
-
path:
|
|
27366
|
+
path: path50,
|
|
26735
27367
|
score: 0,
|
|
26736
27368
|
weight,
|
|
26737
27369
|
hit: false,
|
|
26738
|
-
message: `${
|
|
27370
|
+
message: `${path50} (non-numeric value)`
|
|
26739
27371
|
};
|
|
26740
27372
|
}
|
|
26741
27373
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
26742
27374
|
return {
|
|
26743
|
-
path:
|
|
27375
|
+
path: path50,
|
|
26744
27376
|
score: 0,
|
|
26745
27377
|
weight,
|
|
26746
27378
|
hit: false,
|
|
26747
|
-
message: `${
|
|
27379
|
+
message: `${path50} (invalid numeric value)`
|
|
26748
27380
|
};
|
|
26749
27381
|
}
|
|
26750
27382
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -26757,61 +27389,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
26757
27389
|
}
|
|
26758
27390
|
if (withinTolerance) {
|
|
26759
27391
|
return {
|
|
26760
|
-
path:
|
|
27392
|
+
path: path50,
|
|
26761
27393
|
score: 1,
|
|
26762
27394
|
weight,
|
|
26763
27395
|
hit: true,
|
|
26764
|
-
message: `${
|
|
27396
|
+
message: `${path50} (within tolerance: diff=${diff.toFixed(2)})`
|
|
26765
27397
|
};
|
|
26766
27398
|
}
|
|
26767
27399
|
return {
|
|
26768
|
-
path:
|
|
27400
|
+
path: path50,
|
|
26769
27401
|
score: 0,
|
|
26770
27402
|
weight,
|
|
26771
27403
|
hit: false,
|
|
26772
|
-
message: `${
|
|
27404
|
+
message: `${path50} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
26773
27405
|
};
|
|
26774
27406
|
}
|
|
26775
27407
|
/**
|
|
26776
27408
|
* Date comparison with format normalization.
|
|
26777
27409
|
*/
|
|
26778
|
-
compareDate(
|
|
27410
|
+
compareDate(path50, candidateValue, expectedValue, fieldConfig, weight) {
|
|
26779
27411
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
26780
27412
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
26781
27413
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
26782
27414
|
if (candidateDate === null) {
|
|
26783
27415
|
return {
|
|
26784
|
-
path:
|
|
27416
|
+
path: path50,
|
|
26785
27417
|
score: 0,
|
|
26786
27418
|
weight,
|
|
26787
27419
|
hit: false,
|
|
26788
|
-
message: `${
|
|
27420
|
+
message: `${path50} (unparseable candidate date)`
|
|
26789
27421
|
};
|
|
26790
27422
|
}
|
|
26791
27423
|
if (expectedDate === null) {
|
|
26792
27424
|
return {
|
|
26793
|
-
path:
|
|
27425
|
+
path: path50,
|
|
26794
27426
|
score: 0,
|
|
26795
27427
|
weight,
|
|
26796
27428
|
hit: false,
|
|
26797
|
-
message: `${
|
|
27429
|
+
message: `${path50} (unparseable expected date)`
|
|
26798
27430
|
};
|
|
26799
27431
|
}
|
|
26800
27432
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
26801
27433
|
return {
|
|
26802
|
-
path:
|
|
27434
|
+
path: path50,
|
|
26803
27435
|
score: 1,
|
|
26804
27436
|
weight,
|
|
26805
27437
|
hit: true,
|
|
26806
|
-
message:
|
|
27438
|
+
message: path50
|
|
26807
27439
|
};
|
|
26808
27440
|
}
|
|
26809
27441
|
return {
|
|
26810
|
-
path:
|
|
27442
|
+
path: path50,
|
|
26811
27443
|
score: 0,
|
|
26812
27444
|
weight,
|
|
26813
27445
|
hit: false,
|
|
26814
|
-
message: `${
|
|
27446
|
+
message: `${path50} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
26815
27447
|
};
|
|
26816
27448
|
}
|
|
26817
27449
|
/**
|
|
@@ -26844,11 +27476,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
26844
27476
|
};
|
|
26845
27477
|
}
|
|
26846
27478
|
};
|
|
26847
|
-
function resolvePath(obj,
|
|
26848
|
-
if (!
|
|
27479
|
+
function resolvePath(obj, path50) {
|
|
27480
|
+
if (!path50 || !obj) {
|
|
26849
27481
|
return void 0;
|
|
26850
27482
|
}
|
|
26851
|
-
const parts =
|
|
27483
|
+
const parts = path50.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
26852
27484
|
let current = obj;
|
|
26853
27485
|
for (const part of parts) {
|
|
26854
27486
|
if (current === null || current === void 0) {
|
|
@@ -27330,8 +27962,8 @@ var TokenUsageEvaluator = class {
|
|
|
27330
27962
|
};
|
|
27331
27963
|
}
|
|
27332
27964
|
};
|
|
27333
|
-
function getNestedValue(obj,
|
|
27334
|
-
const parts =
|
|
27965
|
+
function getNestedValue(obj, path50) {
|
|
27966
|
+
const parts = path50.split(".");
|
|
27335
27967
|
let current = obj;
|
|
27336
27968
|
for (const part of parts) {
|
|
27337
27969
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -29054,7 +29686,7 @@ var WorkspacePoolManager = class {
|
|
|
29054
29686
|
}
|
|
29055
29687
|
/**
|
|
29056
29688
|
* Reset an existing slot for reuse:
|
|
29057
|
-
* 1. Reset repos (git reset --hard
|
|
29689
|
+
* 1. Reset repos (fetch from origin when resolve=remote, then git reset --hard && git clean per repo)
|
|
29058
29690
|
* 2. Re-copy template files (skip repo directories)
|
|
29059
29691
|
*/
|
|
29060
29692
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
@@ -29067,7 +29699,17 @@ var WorkspacePoolManager = class {
|
|
|
29067
29699
|
continue;
|
|
29068
29700
|
}
|
|
29069
29701
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
29070
|
-
|
|
29702
|
+
const resolve2 = repo.checkout?.resolve ?? "remote";
|
|
29703
|
+
if (resolve2 === "remote") {
|
|
29704
|
+
const fetchArgs = ["fetch", "origin", ref];
|
|
29705
|
+
if (repo.clone?.depth) {
|
|
29706
|
+
fetchArgs.splice(1, 0, "--depth", String(repo.clone.depth));
|
|
29707
|
+
}
|
|
29708
|
+
await git(fetchArgs, { cwd: repoDir });
|
|
29709
|
+
await git(["reset", "--hard", "FETCH_HEAD"], { cwd: repoDir });
|
|
29710
|
+
} else {
|
|
29711
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
29712
|
+
}
|
|
29071
29713
|
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
29072
29714
|
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
29073
29715
|
}
|
|
@@ -29350,7 +29992,7 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
29350
29992
|
}
|
|
29351
29993
|
return result.stdout;
|
|
29352
29994
|
}
|
|
29353
|
-
function classifyQualityStatus(score, threshold =
|
|
29995
|
+
function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
|
|
29354
29996
|
return score >= threshold ? "ok" : "quality_failure";
|
|
29355
29997
|
}
|
|
29356
29998
|
function buildSkippedEvaluatorError(scores) {
|
|
@@ -29442,7 +30084,7 @@ async function runEvaluation(options) {
|
|
|
29442
30084
|
const filteredEvalCases = filterEvalCases(evalCases, filter2);
|
|
29443
30085
|
if (filteredEvalCases.length === 0) {
|
|
29444
30086
|
if (filter2) {
|
|
29445
|
-
throw new Error(`No tests matched filter '${filter2}' in ${evalFilePath}`);
|
|
30087
|
+
throw new Error(`No tests matched filter '${formatFilter(filter2)}' in ${evalFilePath}`);
|
|
29446
30088
|
}
|
|
29447
30089
|
return [];
|
|
29448
30090
|
}
|
|
@@ -29468,20 +30110,10 @@ async function runEvaluation(options) {
|
|
|
29468
30110
|
if (resolvedTargetsByName.has(name21)) {
|
|
29469
30111
|
return resolvedTargetsByName.get(name21);
|
|
29470
30112
|
}
|
|
29471
|
-
|
|
30113
|
+
const definition = resolveDelegatedTargetDefinition(name21, targetDefinitions, envLookup);
|
|
29472
30114
|
if (!definition) {
|
|
29473
30115
|
return void 0;
|
|
29474
30116
|
}
|
|
29475
|
-
for (let depth = 0; depth < 5; depth++) {
|
|
29476
|
-
const useTarget = definition.use_target;
|
|
29477
|
-
if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
|
|
29478
|
-
const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
29479
|
-
const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
|
|
29480
|
-
if (resolvedName.length === 0) break;
|
|
29481
|
-
const next = targetDefinitions.get(resolvedName);
|
|
29482
|
-
if (!next) break;
|
|
29483
|
-
definition = next;
|
|
29484
|
-
}
|
|
29485
30117
|
const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
|
|
29486
30118
|
resolvedTargetsByName.set(name21, resolved);
|
|
29487
30119
|
return resolved;
|
|
@@ -29504,6 +30136,9 @@ async function runEvaluation(options) {
|
|
|
29504
30136
|
const graderName = targetContext.graderTarget ?? targetContext.name;
|
|
29505
30137
|
const resolvedGrader = resolveTargetByName(graderName);
|
|
29506
30138
|
if (!resolvedGrader) {
|
|
30139
|
+
if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
|
|
30140
|
+
return void 0;
|
|
30141
|
+
}
|
|
29507
30142
|
return getOrCreateProvider(targetContext);
|
|
29508
30143
|
}
|
|
29509
30144
|
return getOrCreateProvider(resolvedGrader);
|
|
@@ -29834,7 +30469,7 @@ async function runEvaluation(options) {
|
|
|
29834
30469
|
const budgetResult = {
|
|
29835
30470
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
29836
30471
|
testId: evalCase.id,
|
|
29837
|
-
|
|
30472
|
+
suite: evalCase.suite,
|
|
29838
30473
|
category: evalCase.category,
|
|
29839
30474
|
score: 0,
|
|
29840
30475
|
assertions: [],
|
|
@@ -29871,7 +30506,7 @@ async function runEvaluation(options) {
|
|
|
29871
30506
|
const haltResult = {
|
|
29872
30507
|
timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
29873
30508
|
testId: evalCase.id,
|
|
29874
|
-
|
|
30509
|
+
suite: evalCase.suite,
|
|
29875
30510
|
category: evalCase.category,
|
|
29876
30511
|
score: 0,
|
|
29877
30512
|
assertions: [],
|
|
@@ -30183,7 +30818,7 @@ async function runBatchEvaluation(options) {
|
|
|
30183
30818
|
targetResolver,
|
|
30184
30819
|
availableTargets,
|
|
30185
30820
|
verbose,
|
|
30186
|
-
threshold: batchThreshold
|
|
30821
|
+
threshold: evalCase.threshold ?? batchThreshold
|
|
30187
30822
|
});
|
|
30188
30823
|
if (providerError) {
|
|
30189
30824
|
result = {
|
|
@@ -30645,8 +31280,9 @@ async function runEvalCase(options) {
|
|
|
30645
31280
|
fileChanges,
|
|
30646
31281
|
workspacePath,
|
|
30647
31282
|
verbose,
|
|
30648
|
-
threshold: caseThreshold
|
|
31283
|
+
threshold: evalCase.threshold ?? caseThreshold
|
|
30649
31284
|
});
|
|
31285
|
+
const effectiveThreshold = evalCase.threshold ?? caseThreshold;
|
|
30650
31286
|
const totalDurationMs = Date.now() - caseStartMs;
|
|
30651
31287
|
const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
|
|
30652
31288
|
const evalRunTokenUsage = tokenUsage || graderTokens ? {
|
|
@@ -30660,7 +31296,7 @@ async function runEvalCase(options) {
|
|
|
30660
31296
|
...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
|
|
30661
31297
|
};
|
|
30662
31298
|
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
30663
|
-
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score,
|
|
31299
|
+
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, effectiveThreshold);
|
|
30664
31300
|
const targetUsedField = targetUsed ? { targetUsed } : {};
|
|
30665
31301
|
const finalResult = providerError ? {
|
|
30666
31302
|
...result,
|
|
@@ -30861,7 +31497,8 @@ async function evaluateCandidate(options) {
|
|
|
30861
31497
|
targetResolver,
|
|
30862
31498
|
availableTargets,
|
|
30863
31499
|
fileChanges,
|
|
30864
|
-
workspacePath
|
|
31500
|
+
workspacePath,
|
|
31501
|
+
threshold: evalThreshold
|
|
30865
31502
|
});
|
|
30866
31503
|
const completedAt = nowFn();
|
|
30867
31504
|
let agentRequest;
|
|
@@ -30892,7 +31529,7 @@ async function evaluateCandidate(options) {
|
|
|
30892
31529
|
return {
|
|
30893
31530
|
timestamp: completedAt.toISOString(),
|
|
30894
31531
|
testId: evalCase.id,
|
|
30895
|
-
|
|
31532
|
+
suite: evalCase.suite,
|
|
30896
31533
|
category: evalCase.category,
|
|
30897
31534
|
conversationId: evalCase.conversation_id,
|
|
30898
31535
|
score: score.score,
|
|
@@ -30935,7 +31572,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
30935
31572
|
targetResolver,
|
|
30936
31573
|
availableTargets,
|
|
30937
31574
|
fileChanges,
|
|
30938
|
-
workspacePath
|
|
31575
|
+
workspacePath,
|
|
31576
|
+
threshold
|
|
30939
31577
|
} = options;
|
|
30940
31578
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
30941
31579
|
return runEvaluatorList({
|
|
@@ -30961,7 +31599,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
30961
31599
|
targetResolver,
|
|
30962
31600
|
availableTargets,
|
|
30963
31601
|
fileChanges,
|
|
30964
|
-
workspacePath
|
|
31602
|
+
workspacePath,
|
|
31603
|
+
threshold
|
|
30965
31604
|
});
|
|
30966
31605
|
}
|
|
30967
31606
|
const evaluatorKind = evalCase.evaluator ?? "llm-grader";
|
|
@@ -31063,7 +31702,8 @@ async function runEvaluatorList(options) {
|
|
|
31063
31702
|
name: evaluatorConfig.name,
|
|
31064
31703
|
type: evaluatorConfig.type,
|
|
31065
31704
|
weight,
|
|
31066
|
-
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
|
|
31705
|
+
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
|
|
31706
|
+
...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
|
|
31067
31707
|
});
|
|
31068
31708
|
scores.push({
|
|
31069
31709
|
name: evaluatorConfig.name,
|
|
@@ -31098,7 +31738,8 @@ async function runEvaluatorList(options) {
|
|
|
31098
31738
|
name: evaluatorConfig.name ?? "unknown",
|
|
31099
31739
|
type: evaluatorConfig.type ?? "llm-grader",
|
|
31100
31740
|
weight,
|
|
31101
|
-
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
|
|
31741
|
+
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
|
|
31742
|
+
...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
|
|
31102
31743
|
});
|
|
31103
31744
|
scores.push({
|
|
31104
31745
|
name: evaluatorConfig.name ?? "unknown",
|
|
@@ -31132,9 +31773,10 @@ async function runEvaluatorList(options) {
|
|
|
31132
31773
|
}
|
|
31133
31774
|
}
|
|
31134
31775
|
}
|
|
31776
|
+
const effectiveThreshold = options.threshold ?? DEFAULT_THRESHOLD;
|
|
31135
31777
|
const hasRequiredFailure = scored.some((entry) => {
|
|
31136
31778
|
if (!entry.required) return false;
|
|
31137
|
-
const minScore = typeof entry.required === "number" ? entry.required :
|
|
31779
|
+
const minScore = entry.min_score ?? (typeof entry.required === "number" ? entry.required : effectiveThreshold);
|
|
31138
31780
|
return entry.score.score < minScore;
|
|
31139
31781
|
});
|
|
31140
31782
|
const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
|
|
@@ -31145,17 +31787,23 @@ async function runEvaluatorList(options) {
|
|
|
31145
31787
|
const expectedAspectCount = assertions.length || 1;
|
|
31146
31788
|
const score = {
|
|
31147
31789
|
score: aggregateScore,
|
|
31148
|
-
verdict: scoreToVerdict(aggregateScore),
|
|
31790
|
+
verdict: scoreToVerdict(aggregateScore, effectiveThreshold),
|
|
31149
31791
|
assertions,
|
|
31150
31792
|
expectedAspectCount
|
|
31151
31793
|
};
|
|
31152
31794
|
return { score, scores };
|
|
31153
31795
|
}
|
|
31796
|
+
function formatFilter(filter2) {
|
|
31797
|
+
return typeof filter2 === "string" ? filter2 : filter2.join(", ");
|
|
31798
|
+
}
|
|
31799
|
+
function matchesFilter3(id, filter2) {
|
|
31800
|
+
return typeof filter2 === "string" ? micromatch3.isMatch(id, filter2) : filter2.some((pattern) => micromatch3.isMatch(id, pattern));
|
|
31801
|
+
}
|
|
31154
31802
|
function filterEvalCases(evalCases, filter2) {
|
|
31155
31803
|
if (!filter2) {
|
|
31156
31804
|
return evalCases;
|
|
31157
31805
|
}
|
|
31158
|
-
return evalCases.filter((evalCase) =>
|
|
31806
|
+
return evalCases.filter((evalCase) => matchesFilter3(evalCase.id, filter2));
|
|
31159
31807
|
}
|
|
31160
31808
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
31161
31809
|
const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
|
|
@@ -31242,7 +31890,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
31242
31890
|
return {
|
|
31243
31891
|
timestamp: timestamp.toISOString(),
|
|
31244
31892
|
testId: evalCase.id,
|
|
31245
|
-
|
|
31893
|
+
suite: evalCase.suite,
|
|
31246
31894
|
category: evalCase.category,
|
|
31247
31895
|
conversationId: evalCase.conversation_id,
|
|
31248
31896
|
score: 0,
|
|
@@ -31506,6 +32154,7 @@ async function evaluate(config) {
|
|
|
31506
32154
|
verbose: config.verbose,
|
|
31507
32155
|
maxConcurrency: config.workers ?? 3,
|
|
31508
32156
|
filter: config.filter,
|
|
32157
|
+
threshold: config.threshold,
|
|
31509
32158
|
evalCases,
|
|
31510
32159
|
onResult: async (result) => {
|
|
31511
32160
|
collectedResults.push(result);
|
|
@@ -31516,19 +32165,19 @@ async function evaluate(config) {
|
|
|
31516
32165
|
const durationMs = Date.now() - startTime;
|
|
31517
32166
|
return {
|
|
31518
32167
|
results: allResults,
|
|
31519
|
-
summary: computeSummary(allResults, durationMs)
|
|
32168
|
+
summary: computeSummary(allResults, durationMs, config.threshold)
|
|
31520
32169
|
};
|
|
31521
32170
|
}
|
|
31522
32171
|
function mapAssertionType(type) {
|
|
31523
32172
|
return type.replace(/_/g, "-");
|
|
31524
32173
|
}
|
|
31525
|
-
function computeSummary(results, durationMs) {
|
|
32174
|
+
function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
|
|
31526
32175
|
const total = results.length;
|
|
31527
32176
|
let passed = 0;
|
|
31528
32177
|
let scoreSum = 0;
|
|
31529
32178
|
for (const r of results) {
|
|
31530
32179
|
scoreSum += r.score;
|
|
31531
|
-
if (r.score >=
|
|
32180
|
+
if (r.score >= threshold) {
|
|
31532
32181
|
passed++;
|
|
31533
32182
|
}
|
|
31534
32183
|
}
|
|
@@ -31559,7 +32208,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
31559
32208
|
return null;
|
|
31560
32209
|
}
|
|
31561
32210
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
31562
|
-
const { readFileSync:
|
|
32211
|
+
const { readFileSync: readFileSync4 } = await import("node:fs");
|
|
31563
32212
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
31564
32213
|
const envFiles = [];
|
|
31565
32214
|
for (const dir of chain) {
|
|
@@ -31568,7 +32217,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
31568
32217
|
}
|
|
31569
32218
|
for (let i = 0; i < envFiles.length; i++) {
|
|
31570
32219
|
try {
|
|
31571
|
-
const content =
|
|
32220
|
+
const content = readFileSync4(envFiles[i], "utf8");
|
|
31572
32221
|
for (const line of content.split("\n")) {
|
|
31573
32222
|
const trimmed = line.trim();
|
|
31574
32223
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -31638,7 +32287,7 @@ var CONFIG_FILE_NAMES = [
|
|
|
31638
32287
|
];
|
|
31639
32288
|
async function loadTsConfig(projectRoot) {
|
|
31640
32289
|
const { existsSync: existsSync7 } = await import("node:fs");
|
|
31641
|
-
const { pathToFileURL } = await import("node:url");
|
|
32290
|
+
const { pathToFileURL: pathToFileURL2 } = await import("node:url");
|
|
31642
32291
|
const { join: join2 } = await import("node:path");
|
|
31643
32292
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
31644
32293
|
const filePath = join2(projectRoot, fileName);
|
|
@@ -31646,7 +32295,7 @@ async function loadTsConfig(projectRoot) {
|
|
|
31646
32295
|
continue;
|
|
31647
32296
|
}
|
|
31648
32297
|
try {
|
|
31649
|
-
const fileUrl =
|
|
32298
|
+
const fileUrl = pathToFileURL2(filePath).href;
|
|
31650
32299
|
const mod = await import(fileUrl);
|
|
31651
32300
|
const config = mod.default ?? mod;
|
|
31652
32301
|
return AgentVConfigSchema.parse(config);
|
|
@@ -31779,7 +32428,7 @@ function loadProjectRegistry() {
|
|
|
31779
32428
|
return { projects: [] };
|
|
31780
32429
|
}
|
|
31781
32430
|
try {
|
|
31782
|
-
const raw =
|
|
32431
|
+
const raw = readFileSync3(registryPath, "utf-8");
|
|
31783
32432
|
const parsed = parseYaml3(raw);
|
|
31784
32433
|
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
31785
32434
|
return { projects: [] };
|
|
@@ -31793,7 +32442,7 @@ function saveProjectRegistry(registry) {
|
|
|
31793
32442
|
const registryPath = getProjectsRegistryPath();
|
|
31794
32443
|
const dir = path47.dirname(registryPath);
|
|
31795
32444
|
if (!existsSync6(dir)) {
|
|
31796
|
-
|
|
32445
|
+
mkdirSync2(dir, { recursive: true });
|
|
31797
32446
|
}
|
|
31798
32447
|
writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
|
|
31799
32448
|
}
|
|
@@ -32053,7 +32702,7 @@ var OtelTraceExporter = class {
|
|
|
32053
32702
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
32054
32703
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
32055
32704
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
32056
|
-
if (result.
|
|
32705
|
+
if (result.suite) rootSpan.setAttribute("agentv.suite", result.suite);
|
|
32057
32706
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
32058
32707
|
if (captureContent && result.output.length > 0) {
|
|
32059
32708
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -32262,7 +32911,7 @@ var OtelStreamingObserver = class {
|
|
|
32262
32911
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
32263
32912
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
32264
32913
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
32265
|
-
if (evalSet) this.rootSpan.setAttribute("agentv.
|
|
32914
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.suite", evalSet);
|
|
32266
32915
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
32267
32916
|
}
|
|
32268
32917
|
/** Create and immediately export a tool span */
|
|
@@ -32608,7 +33257,230 @@ function extractToolResultContent(content) {
|
|
|
32608
33257
|
}
|
|
32609
33258
|
return parts.length > 0 ? parts.join("") : void 0;
|
|
32610
33259
|
}
|
|
32611
|
-
|
|
33260
|
+
function parseCodexSession(jsonl) {
|
|
33261
|
+
const messages = [];
|
|
33262
|
+
let sessionId = "";
|
|
33263
|
+
let cwd;
|
|
33264
|
+
let model;
|
|
33265
|
+
let version;
|
|
33266
|
+
let startTimestamp;
|
|
33267
|
+
let endTimestamp;
|
|
33268
|
+
const pendingCalls = /* @__PURE__ */ new Map();
|
|
33269
|
+
const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
|
|
33270
|
+
for (const line of lines) {
|
|
33271
|
+
let entry;
|
|
33272
|
+
try {
|
|
33273
|
+
entry = JSON.parse(line);
|
|
33274
|
+
} catch {
|
|
33275
|
+
continue;
|
|
33276
|
+
}
|
|
33277
|
+
if (!entry.type) continue;
|
|
33278
|
+
if (entry.timestamp) {
|
|
33279
|
+
if (!startTimestamp) startTimestamp = entry.timestamp;
|
|
33280
|
+
endTimestamp = entry.timestamp;
|
|
33281
|
+
}
|
|
33282
|
+
const payload = entry.payload ?? {};
|
|
33283
|
+
switch (entry.type) {
|
|
33284
|
+
case "session_meta": {
|
|
33285
|
+
sessionId = String(payload.id ?? "");
|
|
33286
|
+
cwd = payload.cwd ? String(payload.cwd) : void 0;
|
|
33287
|
+
version = payload.cli_version ? String(payload.cli_version) : void 0;
|
|
33288
|
+
if (payload.model && !model) {
|
|
33289
|
+
model = String(payload.model);
|
|
33290
|
+
}
|
|
33291
|
+
break;
|
|
33292
|
+
}
|
|
33293
|
+
case "turn_context": {
|
|
33294
|
+
if (payload.model && !model) {
|
|
33295
|
+
model = String(payload.model);
|
|
33296
|
+
}
|
|
33297
|
+
if (payload.cwd && !cwd) {
|
|
33298
|
+
cwd = String(payload.cwd);
|
|
33299
|
+
}
|
|
33300
|
+
break;
|
|
33301
|
+
}
|
|
33302
|
+
case "response_item": {
|
|
33303
|
+
const itemType = String(payload.type ?? "");
|
|
33304
|
+
const role = String(payload.role ?? "");
|
|
33305
|
+
switch (itemType) {
|
|
33306
|
+
case "message": {
|
|
33307
|
+
if (role === "developer") break;
|
|
33308
|
+
const content = extractResponseItemContent(payload.content);
|
|
33309
|
+
if (role === "user" && content) {
|
|
33310
|
+
messages.push({ role: "user", content });
|
|
33311
|
+
} else if (role === "assistant" && content) {
|
|
33312
|
+
messages.push({ role: "assistant", content });
|
|
33313
|
+
}
|
|
33314
|
+
break;
|
|
33315
|
+
}
|
|
33316
|
+
case "function_call": {
|
|
33317
|
+
const toolName = String(payload.name ?? "");
|
|
33318
|
+
const callId = String(payload.call_id ?? "");
|
|
33319
|
+
let input;
|
|
33320
|
+
if (typeof payload.arguments === "string") {
|
|
33321
|
+
try {
|
|
33322
|
+
input = JSON.parse(payload.arguments);
|
|
33323
|
+
} catch {
|
|
33324
|
+
input = payload.arguments;
|
|
33325
|
+
}
|
|
33326
|
+
} else {
|
|
33327
|
+
input = payload.arguments;
|
|
33328
|
+
}
|
|
33329
|
+
const toolCall = { tool: toolName, input, id: callId };
|
|
33330
|
+
const msgIdx = messages.length;
|
|
33331
|
+
messages.push({
|
|
33332
|
+
role: "assistant",
|
|
33333
|
+
toolCalls: [toolCall]
|
|
33334
|
+
});
|
|
33335
|
+
if (callId) {
|
|
33336
|
+
pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
|
|
33337
|
+
}
|
|
33338
|
+
break;
|
|
33339
|
+
}
|
|
33340
|
+
case "custom_tool_call": {
|
|
33341
|
+
const toolName = String(payload.name ?? "");
|
|
33342
|
+
const callId = String(payload.call_id ?? "");
|
|
33343
|
+
let input;
|
|
33344
|
+
if (typeof payload.arguments === "string") {
|
|
33345
|
+
try {
|
|
33346
|
+
input = JSON.parse(payload.arguments);
|
|
33347
|
+
} catch {
|
|
33348
|
+
input = payload.arguments;
|
|
33349
|
+
}
|
|
33350
|
+
} else {
|
|
33351
|
+
input = payload.arguments;
|
|
33352
|
+
}
|
|
33353
|
+
const toolCall = { tool: toolName, input, id: callId };
|
|
33354
|
+
const msgIdx = messages.length;
|
|
33355
|
+
messages.push({
|
|
33356
|
+
role: "assistant",
|
|
33357
|
+
toolCalls: [toolCall]
|
|
33358
|
+
});
|
|
33359
|
+
if (callId) {
|
|
33360
|
+
pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
|
|
33361
|
+
}
|
|
33362
|
+
break;
|
|
33363
|
+
}
|
|
33364
|
+
case "function_call_output":
|
|
33365
|
+
case "custom_tool_call_output": {
|
|
33366
|
+
const callId = String(payload.call_id ?? "");
|
|
33367
|
+
const pending = pendingCalls.get(callId);
|
|
33368
|
+
if (pending) {
|
|
33369
|
+
const existingMsg = messages[pending.msgIdx];
|
|
33370
|
+
const existingCalls = [...existingMsg.toolCalls ?? []];
|
|
33371
|
+
existingCalls[pending.toolIdx] = {
|
|
33372
|
+
...existingCalls[pending.toolIdx],
|
|
33373
|
+
output: payload.output
|
|
33374
|
+
};
|
|
33375
|
+
messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
|
|
33376
|
+
pendingCalls.delete(callId);
|
|
33377
|
+
}
|
|
33378
|
+
break;
|
|
33379
|
+
}
|
|
33380
|
+
// Skip reasoning blocks (thinking tokens)
|
|
33381
|
+
case "reasoning":
|
|
33382
|
+
break;
|
|
33383
|
+
}
|
|
33384
|
+
break;
|
|
33385
|
+
}
|
|
33386
|
+
}
|
|
33387
|
+
}
|
|
33388
|
+
let durationMs;
|
|
33389
|
+
if (startTimestamp && endTimestamp) {
|
|
33390
|
+
durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
|
|
33391
|
+
}
|
|
33392
|
+
const source = {
|
|
33393
|
+
provider: "codex",
|
|
33394
|
+
sessionId,
|
|
33395
|
+
cwd,
|
|
33396
|
+
startedAt: startTimestamp,
|
|
33397
|
+
model,
|
|
33398
|
+
version
|
|
33399
|
+
};
|
|
33400
|
+
return {
|
|
33401
|
+
messages,
|
|
33402
|
+
source,
|
|
33403
|
+
// Codex rollout files don't include token counts (only rate limit info)
|
|
33404
|
+
tokenUsage: void 0,
|
|
33405
|
+
durationMs,
|
|
33406
|
+
costUsd: null
|
|
33407
|
+
};
|
|
33408
|
+
}
|
|
33409
|
+
function extractResponseItemContent(content) {
|
|
33410
|
+
if (typeof content === "string") return content;
|
|
33411
|
+
if (!Array.isArray(content)) return void 0;
|
|
33412
|
+
const parts = [];
|
|
33413
|
+
for (const block of content) {
|
|
33414
|
+
if (typeof block === "object" && block !== null) {
|
|
33415
|
+
const b = block;
|
|
33416
|
+
if (typeof b.text === "string") {
|
|
33417
|
+
parts.push(b.text);
|
|
33418
|
+
}
|
|
33419
|
+
}
|
|
33420
|
+
}
|
|
33421
|
+
return parts.length > 0 ? parts.join("") : void 0;
|
|
33422
|
+
}
|
|
33423
|
+
var DEFAULT_SESSIONS_DIR = () => path48.join(homedir3(), ".codex", "sessions");
|
|
33424
|
+
async function discoverCodexSessions(opts) {
|
|
33425
|
+
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
33426
|
+
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
33427
|
+
const sessions = [];
|
|
33428
|
+
let yearDirs;
|
|
33429
|
+
try {
|
|
33430
|
+
yearDirs = await readdir8(sessionsDir);
|
|
33431
|
+
} catch {
|
|
33432
|
+
return [];
|
|
33433
|
+
}
|
|
33434
|
+
for (const year of yearDirs) {
|
|
33435
|
+
const yearPath = path48.join(sessionsDir, year);
|
|
33436
|
+
let monthDirs;
|
|
33437
|
+
try {
|
|
33438
|
+
monthDirs = await readdir8(yearPath);
|
|
33439
|
+
} catch {
|
|
33440
|
+
continue;
|
|
33441
|
+
}
|
|
33442
|
+
for (const month of monthDirs) {
|
|
33443
|
+
const monthPath = path48.join(yearPath, month);
|
|
33444
|
+
let dayDirs;
|
|
33445
|
+
try {
|
|
33446
|
+
dayDirs = await readdir8(monthPath);
|
|
33447
|
+
} catch {
|
|
33448
|
+
continue;
|
|
33449
|
+
}
|
|
33450
|
+
for (const day of dayDirs) {
|
|
33451
|
+
if (opts?.date) {
|
|
33452
|
+
const dirDate = `${year}-${month}-${day}`;
|
|
33453
|
+
if (dirDate !== opts.date) continue;
|
|
33454
|
+
}
|
|
33455
|
+
const dayPath = path48.join(monthPath, day);
|
|
33456
|
+
let files;
|
|
33457
|
+
try {
|
|
33458
|
+
files = await readdir8(dayPath);
|
|
33459
|
+
} catch {
|
|
33460
|
+
continue;
|
|
33461
|
+
}
|
|
33462
|
+
for (const file of files) {
|
|
33463
|
+
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
33464
|
+
const filePath = path48.join(dayPath, file);
|
|
33465
|
+
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
33466
|
+
const parts = nameWithoutExt.split("-");
|
|
33467
|
+
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
33468
|
+
let updatedAt;
|
|
33469
|
+
try {
|
|
33470
|
+
const fileStat = await stat9(filePath);
|
|
33471
|
+
updatedAt = fileStat.mtime;
|
|
33472
|
+
} catch {
|
|
33473
|
+
updatedAt = /* @__PURE__ */ new Date(0);
|
|
33474
|
+
}
|
|
33475
|
+
sessions.push({ sessionId, filePath, filename: file, updatedAt });
|
|
33476
|
+
}
|
|
33477
|
+
}
|
|
33478
|
+
}
|
|
33479
|
+
}
|
|
33480
|
+
sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
33481
|
+
return sessions.slice(0, limit);
|
|
33482
|
+
}
|
|
33483
|
+
var DEFAULT_PROJECTS_DIR = () => path49.join(homedir4(), ".claude", "projects");
|
|
32612
33484
|
function encodeProjectPath(projectPath) {
|
|
32613
33485
|
return projectPath.replace(/\//g, "-");
|
|
32614
33486
|
}
|
|
@@ -32617,7 +33489,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
32617
33489
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
32618
33490
|
let projectDirs;
|
|
32619
33491
|
try {
|
|
32620
|
-
projectDirs = await
|
|
33492
|
+
projectDirs = await readdir9(projectsDir);
|
|
32621
33493
|
} catch {
|
|
32622
33494
|
return [];
|
|
32623
33495
|
}
|
|
@@ -32627,10 +33499,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
32627
33499
|
}
|
|
32628
33500
|
const sessions = [];
|
|
32629
33501
|
for (const projectDir of projectDirs) {
|
|
32630
|
-
const dirPath =
|
|
33502
|
+
const dirPath = path49.join(projectsDir, projectDir);
|
|
32631
33503
|
let entries;
|
|
32632
33504
|
try {
|
|
32633
|
-
entries = await
|
|
33505
|
+
entries = await readdir9(dirPath);
|
|
32634
33506
|
} catch {
|
|
32635
33507
|
continue;
|
|
32636
33508
|
}
|
|
@@ -32638,10 +33510,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
32638
33510
|
if (!entry.endsWith(".jsonl")) continue;
|
|
32639
33511
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
32640
33512
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
32641
|
-
const filePath =
|
|
33513
|
+
const filePath = path49.join(dirPath, entry);
|
|
32642
33514
|
let updatedAt;
|
|
32643
33515
|
try {
|
|
32644
|
-
const fileStat = await
|
|
33516
|
+
const fileStat = await stat10(filePath);
|
|
32645
33517
|
updatedAt = fileStat.mtime;
|
|
32646
33518
|
} catch {
|
|
32647
33519
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -32657,9 +33529,82 @@ async function discoverClaudeSessions(opts) {
|
|
|
32657
33529
|
sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
32658
33530
|
return sessions.slice(0, limit);
|
|
32659
33531
|
}
|
|
33532
|
+
function toTranscriptJsonLine(entry) {
|
|
33533
|
+
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
33534
|
+
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
33535
|
+
return {
|
|
33536
|
+
input,
|
|
33537
|
+
output: entry.messages,
|
|
33538
|
+
token_usage: entry.tokenUsage ? {
|
|
33539
|
+
input: entry.tokenUsage.input,
|
|
33540
|
+
output: entry.tokenUsage.output,
|
|
33541
|
+
cached: entry.tokenUsage.cached
|
|
33542
|
+
} : void 0,
|
|
33543
|
+
duration_ms: entry.durationMs,
|
|
33544
|
+
cost_usd: entry.costUsd,
|
|
33545
|
+
source: {
|
|
33546
|
+
provider: entry.source.provider,
|
|
33547
|
+
session_id: entry.source.sessionId,
|
|
33548
|
+
model: entry.source.model,
|
|
33549
|
+
timestamp: entry.source.startedAt,
|
|
33550
|
+
git_branch: entry.source.gitBranch,
|
|
33551
|
+
cwd: entry.source.cwd ?? entry.source.projectPath,
|
|
33552
|
+
version: entry.source.version
|
|
33553
|
+
}
|
|
33554
|
+
};
|
|
33555
|
+
}
|
|
33556
|
+
async function readTranscriptJsonl(filePath) {
|
|
33557
|
+
const text2 = await readFile14(filePath, "utf8");
|
|
33558
|
+
return text2.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
33559
|
+
}
|
|
32660
33560
|
async function readTranscriptFile(filePath) {
|
|
32661
33561
|
return readFile14(filePath, "utf8");
|
|
32662
33562
|
}
|
|
33563
|
+
var TranscriptProvider = class _TranscriptProvider {
|
|
33564
|
+
id;
|
|
33565
|
+
kind = "transcript";
|
|
33566
|
+
targetName;
|
|
33567
|
+
lines;
|
|
33568
|
+
cursor = 0;
|
|
33569
|
+
constructor(targetName, lines) {
|
|
33570
|
+
this.targetName = targetName;
|
|
33571
|
+
this.id = `transcript:${targetName}`;
|
|
33572
|
+
this.lines = lines;
|
|
33573
|
+
}
|
|
33574
|
+
/**
|
|
33575
|
+
* Create a TranscriptProvider from a JSONL file path.
|
|
33576
|
+
*/
|
|
33577
|
+
static async fromFile(filePath) {
|
|
33578
|
+
const lines = await readTranscriptJsonl(filePath);
|
|
33579
|
+
if (lines.length === 0) {
|
|
33580
|
+
throw new Error(`Transcript file is empty: ${filePath}`);
|
|
33581
|
+
}
|
|
33582
|
+
const providerName = lines[0].source.provider ?? "transcript";
|
|
33583
|
+
return new _TranscriptProvider(providerName, lines);
|
|
33584
|
+
}
|
|
33585
|
+
get lineCount() {
|
|
33586
|
+
return this.lines.length;
|
|
33587
|
+
}
|
|
33588
|
+
async invoke(_request) {
|
|
33589
|
+
if (this.cursor >= this.lines.length) {
|
|
33590
|
+
throw new Error(
|
|
33591
|
+
`Transcript exhausted: ${this.lines.length} line(s) available but ${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`
|
|
33592
|
+
);
|
|
33593
|
+
}
|
|
33594
|
+
const line = this.lines[this.cursor++];
|
|
33595
|
+
return {
|
|
33596
|
+
output: line.output,
|
|
33597
|
+
tokenUsage: line.token_usage ? {
|
|
33598
|
+
input: line.token_usage.input,
|
|
33599
|
+
output: line.token_usage.output,
|
|
33600
|
+
cached: line.token_usage.cached
|
|
33601
|
+
} : void 0,
|
|
33602
|
+
durationMs: line.duration_ms,
|
|
33603
|
+
costUsd: line.cost_usd ?? void 0,
|
|
33604
|
+
startTime: line.source.timestamp
|
|
33605
|
+
};
|
|
33606
|
+
}
|
|
33607
|
+
};
|
|
32663
33608
|
function createAgentKernel() {
|
|
32664
33609
|
return { status: "stub" };
|
|
32665
33610
|
}
|
|
@@ -32683,7 +33628,9 @@ export {
|
|
|
32683
33628
|
buildSearchRoots,
|
|
32684
33629
|
resolveFileReference,
|
|
32685
33630
|
CLI_PLACEHOLDERS,
|
|
33631
|
+
findDeprecatedCamelCaseTargetWarnings,
|
|
32686
33632
|
COMMON_TARGET_SETTINGS,
|
|
33633
|
+
resolveDelegatedTargetDefinition,
|
|
32687
33634
|
resolveTargetDefinition,
|
|
32688
33635
|
KNOWN_PROVIDERS,
|
|
32689
33636
|
PROVIDER_ALIASES,
|
|
@@ -32726,17 +33673,18 @@ export {
|
|
|
32726
33673
|
subscribeToCodexLogEntries,
|
|
32727
33674
|
consumeCopilotCliLogEntries,
|
|
32728
33675
|
subscribeToCopilotCliLogEntries,
|
|
33676
|
+
parseCopilotEvents,
|
|
32729
33677
|
discoverCopilotSessions,
|
|
32730
33678
|
consumeCopilotSdkLogEntries,
|
|
32731
33679
|
subscribeToCopilotSdkLogEntries,
|
|
32732
33680
|
consumePiLogEntries,
|
|
32733
33681
|
subscribeToPiLogEntries,
|
|
32734
|
-
ProviderRegistry,
|
|
32735
33682
|
getAgentvHome,
|
|
32736
33683
|
getWorkspacesRoot,
|
|
32737
33684
|
getSubagentsRoot,
|
|
32738
33685
|
getTraceStateRoot,
|
|
32739
33686
|
getWorkspacePoolRoot,
|
|
33687
|
+
ProviderRegistry,
|
|
32740
33688
|
ensureVSCodeSubagents,
|
|
32741
33689
|
readTargetDefinitions,
|
|
32742
33690
|
listTargetNames,
|
|
@@ -32744,6 +33692,7 @@ export {
|
|
|
32744
33692
|
createBuiltinProviderRegistry,
|
|
32745
33693
|
createProvider,
|
|
32746
33694
|
resolveAndCreateProvider,
|
|
33695
|
+
DEFAULT_THRESHOLD,
|
|
32747
33696
|
PASS_THRESHOLD,
|
|
32748
33697
|
scoreToVerdict,
|
|
32749
33698
|
clampScore,
|
|
@@ -32831,8 +33780,13 @@ export {
|
|
|
32831
33780
|
OtelTraceExporter,
|
|
32832
33781
|
OtelStreamingObserver,
|
|
32833
33782
|
parseClaudeSession,
|
|
33783
|
+
parseCodexSession,
|
|
33784
|
+
discoverCodexSessions,
|
|
32834
33785
|
discoverClaudeSessions,
|
|
33786
|
+
toTranscriptJsonLine,
|
|
33787
|
+
readTranscriptJsonl,
|
|
32835
33788
|
readTranscriptFile,
|
|
33789
|
+
TranscriptProvider,
|
|
32836
33790
|
createAgentKernel
|
|
32837
33791
|
};
|
|
32838
|
-
//# sourceMappingURL=chunk-
|
|
33792
|
+
//# sourceMappingURL=chunk-I6UE4LHZ.js.map
|