@agentv/core 4.6.1 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-ZK4GG7PR.js → chunk-VCVVKCC4.js} +268 -128
- package/dist/chunk-VCVVKCC4.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +110 -94
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +30 -71
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +1353 -466
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +301 -74
- package/dist/index.d.ts +301 -74
- package/dist/index.js +1053 -311
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-ZK4GG7PR.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -31,12 +31,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
31
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
32
32
|
|
|
33
33
|
// ../../node_modules/.bun/tsup@8.3.5+19811ebab77a7b1c/node_modules/tsup/assets/cjs_shims.js
|
|
34
|
-
var getImportMetaUrl, importMetaUrl;
|
|
35
34
|
var init_cjs_shims = __esm({
|
|
36
35
|
"../../node_modules/.bun/tsup@8.3.5+19811ebab77a7b1c/node_modules/tsup/assets/cjs_shims.js"() {
|
|
37
36
|
"use strict";
|
|
38
|
-
getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
|
|
39
|
-
importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
|
|
40
37
|
}
|
|
41
38
|
});
|
|
42
39
|
|
|
@@ -1435,6 +1432,7 @@ __export(index_exports, {
|
|
|
1435
1432
|
DEFAULT_EVALUATOR_TEMPLATE: () => DEFAULT_EVALUATOR_TEMPLATE,
|
|
1436
1433
|
DEFAULT_EVAL_PATTERNS: () => DEFAULT_EVAL_PATTERNS,
|
|
1437
1434
|
DEFAULT_EXPLORATION_TOOLS: () => DEFAULT_EXPLORATION_TOOLS,
|
|
1435
|
+
DEFAULT_THRESHOLD: () => DEFAULT_THRESHOLD,
|
|
1438
1436
|
DeterministicAssertionEvaluator: () => DeterministicAssertionEvaluator,
|
|
1439
1437
|
EvaluatorRegistry: () => EvaluatorRegistry,
|
|
1440
1438
|
ExecutionMetricsEvaluator: () => ExecutionMetricsEvaluator,
|
|
@@ -1456,6 +1454,7 @@ __export(index_exports, {
|
|
|
1456
1454
|
TemplateNotFoundError: () => TemplateNotFoundError,
|
|
1457
1455
|
TokenUsageEvaluator: () => TokenUsageEvaluator,
|
|
1458
1456
|
ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
|
|
1457
|
+
TranscriptProvider: () => TranscriptProvider,
|
|
1459
1458
|
WorkspaceCreationError: () => WorkspaceCreationError,
|
|
1460
1459
|
WorkspacePoolManager: () => WorkspacePoolManager,
|
|
1461
1460
|
addProject: () => addProject,
|
|
@@ -1492,6 +1491,7 @@ __export(index_exports, {
|
|
|
1492
1491
|
detectFormat: () => detectFormat,
|
|
1493
1492
|
discoverAssertions: () => discoverAssertions,
|
|
1494
1493
|
discoverClaudeSessions: () => discoverClaudeSessions,
|
|
1494
|
+
discoverCodexSessions: () => discoverCodexSessions,
|
|
1495
1495
|
discoverCopilotSessions: () => discoverCopilotSessions,
|
|
1496
1496
|
discoverGraders: () => discoverGraders,
|
|
1497
1497
|
discoverJudges: () => discoverGraders,
|
|
@@ -1552,6 +1552,8 @@ __export(index_exports, {
|
|
|
1552
1552
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
1553
1553
|
parseAgentSkillsEvals: () => parseAgentSkillsEvals,
|
|
1554
1554
|
parseClaudeSession: () => parseClaudeSession,
|
|
1555
|
+
parseCodexSession: () => parseCodexSession,
|
|
1556
|
+
parseCopilotEvents: () => parseCopilotEvents,
|
|
1555
1557
|
parseJsonFromText: () => parseJsonFromText,
|
|
1556
1558
|
parseJsonSafe: () => parseJsonSafe,
|
|
1557
1559
|
readJsonFile: () => readJsonFile,
|
|
@@ -1559,6 +1561,7 @@ __export(index_exports, {
|
|
|
1559
1561
|
readTestSuiteMetadata: () => readTestSuiteMetadata,
|
|
1560
1562
|
readTextFile: () => readTextFile,
|
|
1561
1563
|
readTranscriptFile: () => readTranscriptFile,
|
|
1564
|
+
readTranscriptJsonl: () => readTranscriptJsonl,
|
|
1562
1565
|
removeProject: () => removeProject,
|
|
1563
1566
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
1564
1567
|
resolveDelegatedTargetDefinition: () => resolveDelegatedTargetDefinition,
|
|
@@ -1591,6 +1594,7 @@ __export(index_exports, {
|
|
|
1591
1594
|
substituteVariables: () => substituteVariables,
|
|
1592
1595
|
toCamelCaseDeep: () => toCamelCaseDeep,
|
|
1593
1596
|
toSnakeCaseDeep: () => toSnakeCaseDeep,
|
|
1597
|
+
toTranscriptJsonLine: () => toTranscriptJsonLine,
|
|
1594
1598
|
tokensPerTool: () => tokensPerTool,
|
|
1595
1599
|
touchProject: () => touchProject,
|
|
1596
1600
|
transpileEvalYaml: () => transpileEvalYaml,
|
|
@@ -2675,8 +2679,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2675
2679
|
const negate = rawEvaluator.negate === true ? true : void 0;
|
|
2676
2680
|
if (isCustomType) {
|
|
2677
2681
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
2678
|
-
const required2 =
|
|
2679
|
-
|
|
2682
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
2683
|
+
rawEvaluator.required,
|
|
2684
|
+
rawEvaluator.min_score,
|
|
2685
|
+
name,
|
|
2686
|
+
evalId
|
|
2687
|
+
);
|
|
2688
|
+
const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "min_score", "negate"]);
|
|
2680
2689
|
const config2 = {};
|
|
2681
2690
|
for (const [key, value] of Object.entries(rawEvaluator)) {
|
|
2682
2691
|
if (!knownProps2.has(key) && value !== void 0) {
|
|
@@ -2688,6 +2697,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2688
2697
|
type: customTypeName,
|
|
2689
2698
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
2690
2699
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
2700
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
2691
2701
|
...negate !== void 0 ? { negate } : {},
|
|
2692
2702
|
...Object.keys(config2).length > 0 ? { config: config2 } : {}
|
|
2693
2703
|
});
|
|
@@ -2757,7 +2767,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2757
2767
|
);
|
|
2758
2768
|
}
|
|
2759
2769
|
}
|
|
2760
|
-
const required2 =
|
|
2770
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
2771
|
+
rawEvaluator.required,
|
|
2772
|
+
rawEvaluator.min_score,
|
|
2773
|
+
name,
|
|
2774
|
+
evalId
|
|
2775
|
+
);
|
|
2761
2776
|
const knownProps2 = /* @__PURE__ */ new Set([
|
|
2762
2777
|
"name",
|
|
2763
2778
|
"type",
|
|
@@ -2783,6 +2798,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2783
2798
|
resolvedCwd,
|
|
2784
2799
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
2785
2800
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
2801
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
2786
2802
|
...negate !== void 0 ? { negate } : {},
|
|
2787
2803
|
...Object.keys(config2).length > 0 ? { config: config2 } : {},
|
|
2788
2804
|
...targetConfig !== void 0 ? { target: targetConfig } : {}
|
|
@@ -2911,7 +2927,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2911
2927
|
};
|
|
2912
2928
|
}
|
|
2913
2929
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
2914
|
-
const required2 =
|
|
2930
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
2931
|
+
rawEvaluator.required,
|
|
2932
|
+
rawEvaluator.min_score,
|
|
2933
|
+
name,
|
|
2934
|
+
evalId
|
|
2935
|
+
);
|
|
2915
2936
|
evaluators.push({
|
|
2916
2937
|
name,
|
|
2917
2938
|
type: "composite",
|
|
@@ -2919,6 +2940,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2919
2940
|
aggregator,
|
|
2920
2941
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
2921
2942
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
2943
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
2922
2944
|
...negate !== void 0 ? { negate } : {}
|
|
2923
2945
|
});
|
|
2924
2946
|
continue;
|
|
@@ -3029,7 +3051,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3029
3051
|
continue;
|
|
3030
3052
|
}
|
|
3031
3053
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3032
|
-
const required2 =
|
|
3054
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3055
|
+
rawEvaluator.required,
|
|
3056
|
+
rawEvaluator.min_score,
|
|
3057
|
+
name,
|
|
3058
|
+
evalId
|
|
3059
|
+
);
|
|
3033
3060
|
const config2 = {
|
|
3034
3061
|
name,
|
|
3035
3062
|
type: "tool-trajectory",
|
|
@@ -3038,6 +3065,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3038
3065
|
...expected ? { expected } : {},
|
|
3039
3066
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3040
3067
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3068
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3041
3069
|
...negate !== void 0 ? { negate } : {},
|
|
3042
3070
|
...argsMatch2 !== void 0 ? { argsMatch: argsMatch2 } : {}
|
|
3043
3071
|
};
|
|
@@ -3100,7 +3128,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3100
3128
|
const aggregation = asString(rawEvaluator.aggregation);
|
|
3101
3129
|
const validAggregation = isValidFieldAggregationType(aggregation) ? aggregation : void 0;
|
|
3102
3130
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3103
|
-
const required2 =
|
|
3131
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3132
|
+
rawEvaluator.required,
|
|
3133
|
+
rawEvaluator.min_score,
|
|
3134
|
+
name,
|
|
3135
|
+
evalId
|
|
3136
|
+
);
|
|
3104
3137
|
evaluators.push({
|
|
3105
3138
|
name,
|
|
3106
3139
|
type: "field-accuracy",
|
|
@@ -3108,6 +3141,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3108
3141
|
...validAggregation ? { aggregation: validAggregation } : {},
|
|
3109
3142
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3110
3143
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3144
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3111
3145
|
...negate !== void 0 ? { negate } : {}
|
|
3112
3146
|
});
|
|
3113
3147
|
continue;
|
|
@@ -3121,13 +3155,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3121
3155
|
continue;
|
|
3122
3156
|
}
|
|
3123
3157
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3124
|
-
const required2 =
|
|
3158
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3159
|
+
rawEvaluator.required,
|
|
3160
|
+
rawEvaluator.min_score,
|
|
3161
|
+
name,
|
|
3162
|
+
evalId
|
|
3163
|
+
);
|
|
3125
3164
|
evaluators.push({
|
|
3126
3165
|
name,
|
|
3127
3166
|
type: "latency",
|
|
3128
3167
|
threshold,
|
|
3129
3168
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3130
3169
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3170
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3131
3171
|
...negate !== void 0 ? { negate } : {}
|
|
3132
3172
|
});
|
|
3133
3173
|
continue;
|
|
@@ -3141,13 +3181,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3141
3181
|
continue;
|
|
3142
3182
|
}
|
|
3143
3183
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3144
|
-
const required2 =
|
|
3184
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3185
|
+
rawEvaluator.required,
|
|
3186
|
+
rawEvaluator.min_score,
|
|
3187
|
+
name,
|
|
3188
|
+
evalId
|
|
3189
|
+
);
|
|
3145
3190
|
evaluators.push({
|
|
3146
3191
|
name,
|
|
3147
3192
|
type: "cost",
|
|
3148
3193
|
budget,
|
|
3149
3194
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3150
3195
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3196
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3151
3197
|
...negate !== void 0 ? { negate } : {}
|
|
3152
3198
|
});
|
|
3153
3199
|
continue;
|
|
@@ -3179,13 +3225,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3179
3225
|
continue;
|
|
3180
3226
|
}
|
|
3181
3227
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3182
|
-
const required2 =
|
|
3228
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3229
|
+
rawEvaluator.required,
|
|
3230
|
+
rawEvaluator.min_score,
|
|
3231
|
+
name,
|
|
3232
|
+
evalId
|
|
3233
|
+
);
|
|
3183
3234
|
evaluators.push({
|
|
3184
3235
|
name,
|
|
3185
3236
|
type: "token-usage",
|
|
3186
3237
|
...validLimits,
|
|
3187
3238
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3188
3239
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3240
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3189
3241
|
...negate !== void 0 ? { negate } : {}
|
|
3190
3242
|
});
|
|
3191
3243
|
continue;
|
|
@@ -3231,13 +3283,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3231
3283
|
continue;
|
|
3232
3284
|
}
|
|
3233
3285
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3234
|
-
const required2 =
|
|
3286
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3287
|
+
rawEvaluator.required,
|
|
3288
|
+
rawEvaluator.min_score,
|
|
3289
|
+
name,
|
|
3290
|
+
evalId
|
|
3291
|
+
);
|
|
3235
3292
|
evaluators.push({
|
|
3236
3293
|
name,
|
|
3237
3294
|
type: "execution-metrics",
|
|
3238
3295
|
...validThresholds,
|
|
3239
3296
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3240
3297
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3298
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3241
3299
|
...negate !== void 0 ? { negate } : {}
|
|
3242
3300
|
});
|
|
3243
3301
|
continue;
|
|
@@ -3251,7 +3309,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3251
3309
|
const rawShouldTrigger = rawEvaluator.should_trigger;
|
|
3252
3310
|
const shouldTrigger = typeof rawShouldTrigger === "boolean" ? rawShouldTrigger : void 0;
|
|
3253
3311
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3254
|
-
const required2 =
|
|
3312
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3313
|
+
rawEvaluator.required,
|
|
3314
|
+
rawEvaluator.min_score,
|
|
3315
|
+
name,
|
|
3316
|
+
evalId
|
|
3317
|
+
);
|
|
3255
3318
|
evaluators.push({
|
|
3256
3319
|
name,
|
|
3257
3320
|
type: "skill-trigger",
|
|
@@ -3259,6 +3322,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3259
3322
|
...shouldTrigger !== void 0 ? { should_trigger: shouldTrigger } : {},
|
|
3260
3323
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3261
3324
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3325
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3262
3326
|
...negate !== void 0 ? { negate } : {}
|
|
3263
3327
|
});
|
|
3264
3328
|
continue;
|
|
@@ -3270,13 +3334,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3270
3334
|
continue;
|
|
3271
3335
|
}
|
|
3272
3336
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3273
|
-
const required2 =
|
|
3337
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3338
|
+
rawEvaluator.required,
|
|
3339
|
+
rawEvaluator.min_score,
|
|
3340
|
+
name,
|
|
3341
|
+
evalId
|
|
3342
|
+
);
|
|
3274
3343
|
evaluators.push({
|
|
3275
3344
|
name,
|
|
3276
3345
|
type: "contains",
|
|
3277
3346
|
value,
|
|
3278
3347
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3279
3348
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3349
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3280
3350
|
...negate !== void 0 ? { negate } : {}
|
|
3281
3351
|
});
|
|
3282
3352
|
continue;
|
|
@@ -3290,13 +3360,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3290
3360
|
continue;
|
|
3291
3361
|
}
|
|
3292
3362
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3293
|
-
const required2 =
|
|
3363
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3364
|
+
rawEvaluator.required,
|
|
3365
|
+
rawEvaluator.min_score,
|
|
3366
|
+
name,
|
|
3367
|
+
evalId
|
|
3368
|
+
);
|
|
3294
3369
|
evaluators.push({
|
|
3295
3370
|
name,
|
|
3296
3371
|
type: typeValue,
|
|
3297
3372
|
value,
|
|
3298
3373
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3299
3374
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3375
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3300
3376
|
...negate !== void 0 ? { negate } : {}
|
|
3301
3377
|
});
|
|
3302
3378
|
continue;
|
|
@@ -3308,13 +3384,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3308
3384
|
continue;
|
|
3309
3385
|
}
|
|
3310
3386
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3311
|
-
const required2 =
|
|
3387
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3388
|
+
rawEvaluator.required,
|
|
3389
|
+
rawEvaluator.min_score,
|
|
3390
|
+
name,
|
|
3391
|
+
evalId
|
|
3392
|
+
);
|
|
3312
3393
|
evaluators.push({
|
|
3313
3394
|
name,
|
|
3314
3395
|
type: "icontains",
|
|
3315
3396
|
value,
|
|
3316
3397
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3317
3398
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3399
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3318
3400
|
...negate !== void 0 ? { negate } : {}
|
|
3319
3401
|
});
|
|
3320
3402
|
continue;
|
|
@@ -3328,13 +3410,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3328
3410
|
continue;
|
|
3329
3411
|
}
|
|
3330
3412
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3331
|
-
const required2 =
|
|
3413
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3414
|
+
rawEvaluator.required,
|
|
3415
|
+
rawEvaluator.min_score,
|
|
3416
|
+
name,
|
|
3417
|
+
evalId
|
|
3418
|
+
);
|
|
3332
3419
|
evaluators.push({
|
|
3333
3420
|
name,
|
|
3334
3421
|
type: typeValue,
|
|
3335
3422
|
value,
|
|
3336
3423
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3337
3424
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3425
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3338
3426
|
...negate !== void 0 ? { negate } : {}
|
|
3339
3427
|
});
|
|
3340
3428
|
continue;
|
|
@@ -3346,13 +3434,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3346
3434
|
continue;
|
|
3347
3435
|
}
|
|
3348
3436
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3349
|
-
const required2 =
|
|
3437
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3438
|
+
rawEvaluator.required,
|
|
3439
|
+
rawEvaluator.min_score,
|
|
3440
|
+
name,
|
|
3441
|
+
evalId
|
|
3442
|
+
);
|
|
3350
3443
|
evaluators.push({
|
|
3351
3444
|
name,
|
|
3352
3445
|
type: typeValue,
|
|
3353
3446
|
value,
|
|
3354
3447
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3355
3448
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3449
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3356
3450
|
...negate !== void 0 ? { negate } : {}
|
|
3357
3451
|
});
|
|
3358
3452
|
continue;
|
|
@@ -3365,7 +3459,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3365
3459
|
}
|
|
3366
3460
|
const flags = asString(rawEvaluator.flags);
|
|
3367
3461
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3368
|
-
const required2 =
|
|
3462
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3463
|
+
rawEvaluator.required,
|
|
3464
|
+
rawEvaluator.min_score,
|
|
3465
|
+
name,
|
|
3466
|
+
evalId
|
|
3467
|
+
);
|
|
3369
3468
|
evaluators.push({
|
|
3370
3469
|
name,
|
|
3371
3470
|
type: "regex",
|
|
@@ -3373,18 +3472,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3373
3472
|
...flags !== void 0 ? { flags } : {},
|
|
3374
3473
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3375
3474
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3475
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3376
3476
|
...negate !== void 0 ? { negate } : {}
|
|
3377
3477
|
});
|
|
3378
3478
|
continue;
|
|
3379
3479
|
}
|
|
3380
3480
|
if (typeValue === "is-json") {
|
|
3381
3481
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3382
|
-
const required2 =
|
|
3482
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3483
|
+
rawEvaluator.required,
|
|
3484
|
+
rawEvaluator.min_score,
|
|
3485
|
+
name,
|
|
3486
|
+
evalId
|
|
3487
|
+
);
|
|
3383
3488
|
evaluators.push({
|
|
3384
3489
|
name,
|
|
3385
3490
|
type: "is-json",
|
|
3386
3491
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3387
3492
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3493
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3388
3494
|
...negate !== void 0 ? { negate } : {}
|
|
3389
3495
|
});
|
|
3390
3496
|
continue;
|
|
@@ -3396,13 +3502,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3396
3502
|
continue;
|
|
3397
3503
|
}
|
|
3398
3504
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3399
|
-
const required2 =
|
|
3505
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3506
|
+
rawEvaluator.required,
|
|
3507
|
+
rawEvaluator.min_score,
|
|
3508
|
+
name,
|
|
3509
|
+
evalId
|
|
3510
|
+
);
|
|
3400
3511
|
evaluators.push({
|
|
3401
3512
|
name,
|
|
3402
3513
|
type: "equals",
|
|
3403
3514
|
value,
|
|
3404
3515
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3405
3516
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3517
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3406
3518
|
...negate !== void 0 ? { negate } : {}
|
|
3407
3519
|
});
|
|
3408
3520
|
continue;
|
|
@@ -3438,7 +3550,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3438
3550
|
continue;
|
|
3439
3551
|
}
|
|
3440
3552
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3441
|
-
const required2 =
|
|
3553
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3554
|
+
rawEvaluator.required,
|
|
3555
|
+
rawEvaluator.min_score,
|
|
3556
|
+
name,
|
|
3557
|
+
evalId
|
|
3558
|
+
);
|
|
3442
3559
|
evaluators.push({
|
|
3443
3560
|
name,
|
|
3444
3561
|
type: "llm-grader",
|
|
@@ -3446,6 +3563,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3446
3563
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
3447
3564
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3448
3565
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3566
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3449
3567
|
...negate !== void 0 ? { negate } : {}
|
|
3450
3568
|
});
|
|
3451
3569
|
continue;
|
|
@@ -3515,7 +3633,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3515
3633
|
continue;
|
|
3516
3634
|
}
|
|
3517
3635
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3518
|
-
const required2 =
|
|
3636
|
+
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
3637
|
+
rawEvaluator.required,
|
|
3638
|
+
rawEvaluator.min_score,
|
|
3639
|
+
name,
|
|
3640
|
+
evalId
|
|
3641
|
+
);
|
|
3519
3642
|
evaluators.push({
|
|
3520
3643
|
name,
|
|
3521
3644
|
type: "llm-grader",
|
|
@@ -3523,12 +3646,18 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3523
3646
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
3524
3647
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3525
3648
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3649
|
+
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3526
3650
|
...negate !== void 0 ? { negate } : {}
|
|
3527
3651
|
});
|
|
3528
3652
|
continue;
|
|
3529
3653
|
}
|
|
3530
3654
|
const weight = validateWeight(rawEvaluator.weight, name, evalId);
|
|
3531
|
-
const required =
|
|
3655
|
+
const { required, min_score } = parseRequiredAndMinScore(
|
|
3656
|
+
rawEvaluator.required,
|
|
3657
|
+
rawEvaluator.min_score,
|
|
3658
|
+
name,
|
|
3659
|
+
evalId
|
|
3660
|
+
);
|
|
3532
3661
|
const knownProps = /* @__PURE__ */ new Set([
|
|
3533
3662
|
"name",
|
|
3534
3663
|
"type",
|
|
@@ -3539,6 +3668,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3539
3668
|
"weight",
|
|
3540
3669
|
"config",
|
|
3541
3670
|
"required",
|
|
3671
|
+
"min_score",
|
|
3542
3672
|
"negate",
|
|
3543
3673
|
"max_steps",
|
|
3544
3674
|
"maxSteps",
|
|
@@ -3568,6 +3698,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3568
3698
|
...graderTargetName ? { target: graderTargetName } : {},
|
|
3569
3699
|
...weight !== void 0 ? { weight } : {},
|
|
3570
3700
|
...required !== void 0 ? { required } : {},
|
|
3701
|
+
...min_score !== void 0 ? { min_score } : {},
|
|
3571
3702
|
...negate !== void 0 ? { negate } : {},
|
|
3572
3703
|
...finalConfig ? { config: finalConfig } : {},
|
|
3573
3704
|
...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
|
|
@@ -3699,10 +3830,23 @@ ${detailBlock}${ANSI_RESET5}`);
|
|
|
3699
3830
|
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
|
|
3700
3831
|
}
|
|
3701
3832
|
}
|
|
3702
|
-
function
|
|
3703
|
-
|
|
3704
|
-
if (typeof
|
|
3705
|
-
|
|
3833
|
+
function parseRequiredAndMinScore(rawRequired, rawMinScore, evaluatorName, evalId) {
|
|
3834
|
+
const result = {};
|
|
3835
|
+
if (typeof rawMinScore === "number" && rawMinScore > 0 && rawMinScore <= 1) {
|
|
3836
|
+
result.min_score = rawMinScore;
|
|
3837
|
+
}
|
|
3838
|
+
if (rawRequired === true) {
|
|
3839
|
+
result.required = true;
|
|
3840
|
+
} else if (typeof rawRequired === "number" && rawRequired > 0 && rawRequired <= 1) {
|
|
3841
|
+
if (result.min_score === void 0) {
|
|
3842
|
+
result.min_score = rawRequired;
|
|
3843
|
+
}
|
|
3844
|
+
result.required = rawRequired;
|
|
3845
|
+
logWarning2(
|
|
3846
|
+
`Evaluator '${evaluatorName}' in '${evalId}': 'required: ${rawRequired}' is deprecated. Use 'required: true' + 'min_score: ${rawRequired}' instead.`
|
|
3847
|
+
);
|
|
3848
|
+
}
|
|
3849
|
+
return result;
|
|
3706
3850
|
}
|
|
3707
3851
|
function validateWeight(rawWeight, evaluatorName, evalId) {
|
|
3708
3852
|
if (rawWeight === void 0) {
|
|
@@ -3745,16 +3889,30 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
3745
3889
|
const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
|
|
3746
3890
|
const expectedOutcome = asString(rawRubric.outcome) ?? "";
|
|
3747
3891
|
const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
|
|
3892
|
+
let minScore;
|
|
3748
3893
|
let requiredMinScore;
|
|
3749
3894
|
let required;
|
|
3750
|
-
if (typeof rawRubric.
|
|
3751
|
-
const
|
|
3752
|
-
if (
|
|
3895
|
+
if (typeof rawRubric.min_score === "number") {
|
|
3896
|
+
const ms = rawRubric.min_score;
|
|
3897
|
+
if (ms <= 0 || ms > 1) {
|
|
3753
3898
|
throw new Error(
|
|
3754
|
-
`Invalid
|
|
3899
|
+
`Invalid min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be in (0, 1] (got ${ms})`
|
|
3755
3900
|
);
|
|
3756
3901
|
}
|
|
3757
|
-
|
|
3902
|
+
minScore = ms;
|
|
3903
|
+
requiredMinScore = Math.round(ms * 10);
|
|
3904
|
+
} else if (typeof rawRubric.required_min_score === "number") {
|
|
3905
|
+
const rms = rawRubric.required_min_score;
|
|
3906
|
+
if (!Number.isInteger(rms) || rms < 0 || rms > 10) {
|
|
3907
|
+
throw new Error(
|
|
3908
|
+
`Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${rms})`
|
|
3909
|
+
);
|
|
3910
|
+
}
|
|
3911
|
+
requiredMinScore = rms;
|
|
3912
|
+
minScore = rms / 10;
|
|
3913
|
+
logWarning2(
|
|
3914
|
+
`Rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': 'required_min_score: ${rms}' is deprecated. Use 'min_score: ${rms / 10}' (0-1 scale) instead.`
|
|
3915
|
+
);
|
|
3758
3916
|
}
|
|
3759
3917
|
if (typeof rawRubric.required === "boolean") {
|
|
3760
3918
|
required = rawRubric.required;
|
|
@@ -3774,6 +3932,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
3774
3932
|
weight,
|
|
3775
3933
|
...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
|
|
3776
3934
|
...required !== void 0 ? { required } : {},
|
|
3935
|
+
...minScore !== void 0 ? { min_score: minScore } : {},
|
|
3777
3936
|
...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
|
|
3778
3937
|
score_ranges: scoreRanges
|
|
3779
3938
|
});
|
|
@@ -3790,6 +3949,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
3790
3949
|
weight,
|
|
3791
3950
|
// Default to required: true if not specified (backward compatibility)
|
|
3792
3951
|
required: required ?? true,
|
|
3952
|
+
...minScore !== void 0 ? { min_score: minScore } : {},
|
|
3793
3953
|
...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
|
|
3794
3954
|
});
|
|
3795
3955
|
}
|
|
@@ -3918,12 +4078,22 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3918
4078
|
id: asString(rubric.id) ?? `rubric-${index + 1}`,
|
|
3919
4079
|
weight: typeof rubric.weight === "number" ? rubric.weight : 1
|
|
3920
4080
|
};
|
|
4081
|
+
let inlineMinScore;
|
|
4082
|
+
let inlineRequiredMinScore;
|
|
4083
|
+
if (typeof rubric.min_score === "number") {
|
|
4084
|
+
inlineMinScore = rubric.min_score;
|
|
4085
|
+
inlineRequiredMinScore = Math.round(inlineMinScore * 10);
|
|
4086
|
+
} else if (typeof rubric.required_min_score === "number") {
|
|
4087
|
+
inlineRequiredMinScore = rubric.required_min_score;
|
|
4088
|
+
inlineMinScore = inlineRequiredMinScore / 10;
|
|
4089
|
+
}
|
|
3921
4090
|
if (scoreRanges && scoreRanges.length > 0) {
|
|
3922
4091
|
return {
|
|
3923
4092
|
...baseRubric,
|
|
3924
4093
|
...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
|
|
3925
4094
|
...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
|
|
3926
|
-
...
|
|
4095
|
+
...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
|
|
4096
|
+
...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {},
|
|
3927
4097
|
score_ranges: scoreRanges
|
|
3928
4098
|
};
|
|
3929
4099
|
}
|
|
@@ -3931,7 +4101,8 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3931
4101
|
...baseRubric,
|
|
3932
4102
|
outcome: expectedOutcome,
|
|
3933
4103
|
required: typeof rubric.required === "boolean" ? rubric.required : true,
|
|
3934
|
-
...
|
|
4104
|
+
...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
|
|
4105
|
+
...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {}
|
|
3935
4106
|
};
|
|
3936
4107
|
}).filter((r) => r.outcome && r.outcome.length > 0 || "score_ranges" in r && r.score_ranges);
|
|
3937
4108
|
if (rubricItems.length === 0) {
|
|
@@ -4335,6 +4506,9 @@ function resolveExpectedMessages(raw) {
|
|
|
4335
4506
|
var ANSI_YELLOW6 = "\x1B[33m";
|
|
4336
4507
|
var ANSI_RED2 = "\x1B[31m";
|
|
4337
4508
|
var ANSI_RESET7 = "\x1B[0m";
|
|
4509
|
+
function matchesFilter(id, filter) {
|
|
4510
|
+
return typeof filter === "string" ? import_micromatch.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch.default.isMatch(id, pattern));
|
|
4511
|
+
}
|
|
4338
4512
|
function detectFormat(filePath) {
|
|
4339
4513
|
const ext = import_node_path7.default.extname(filePath).toLowerCase();
|
|
4340
4514
|
if (ext === ".jsonl") return "jsonl";
|
|
@@ -4402,40 +4576,40 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4402
4576
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
4403
4577
|
const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
|
|
4404
4578
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
4405
|
-
const
|
|
4406
|
-
const
|
|
4579
|
+
const fallbackSuiteName = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
4580
|
+
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
4407
4581
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
4408
4582
|
const globalExecution = sidecar.execution;
|
|
4409
4583
|
if (verbose) {
|
|
4410
4584
|
console.log(`
|
|
4411
|
-
[JSONL
|
|
4585
|
+
[JSONL Suite: ${evalFilePath}]`);
|
|
4412
4586
|
console.log(` Cases: ${rawCases.length}`);
|
|
4413
|
-
console.log(`
|
|
4587
|
+
console.log(` Suite: ${suiteName}`);
|
|
4414
4588
|
if (sidecar.description) {
|
|
4415
4589
|
console.log(` Description: ${sidecar.description}`);
|
|
4416
4590
|
}
|
|
4417
4591
|
}
|
|
4418
4592
|
const results = [];
|
|
4419
4593
|
for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
|
|
4420
|
-
const
|
|
4594
|
+
const testCaseConfig = rawCases[lineIndex];
|
|
4421
4595
|
const lineNumber = lineIndex + 1;
|
|
4422
|
-
const id = asString4(
|
|
4423
|
-
if (filterPattern && (!id || !
|
|
4596
|
+
const id = asString4(testCaseConfig.id);
|
|
4597
|
+
if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
|
|
4424
4598
|
continue;
|
|
4425
4599
|
}
|
|
4426
|
-
const conversationId = asString4(
|
|
4427
|
-
let outcome = asString4(
|
|
4428
|
-
if (!outcome &&
|
|
4429
|
-
outcome = asString4(
|
|
4600
|
+
const conversationId = asString4(testCaseConfig.conversation_id);
|
|
4601
|
+
let outcome = asString4(testCaseConfig.criteria);
|
|
4602
|
+
if (!outcome && testCaseConfig.expected_outcome !== void 0) {
|
|
4603
|
+
outcome = asString4(testCaseConfig.expected_outcome);
|
|
4430
4604
|
if (outcome) {
|
|
4431
4605
|
logWarning4(
|
|
4432
|
-
`Test '${asString4(
|
|
4606
|
+
`Test '${asString4(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
4433
4607
|
);
|
|
4434
4608
|
}
|
|
4435
4609
|
}
|
|
4436
|
-
const rawInputMessages = resolveInputMessages(
|
|
4437
|
-
const expectedMessages = resolveExpectedMessages(
|
|
4438
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
4610
|
+
const rawInputMessages = resolveInputMessages(testCaseConfig);
|
|
4611
|
+
const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
|
|
4612
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assert !== void 0;
|
|
4439
4613
|
if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
|
|
4440
4614
|
logError2(
|
|
4441
4615
|
`Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
|
|
@@ -4472,18 +4646,23 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4472
4646
|
}
|
|
4473
4647
|
}
|
|
4474
4648
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
4475
|
-
const caseExecution = isJsonObject(
|
|
4649
|
+
const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
|
|
4476
4650
|
const mergedExecution = caseExecution ?? globalExecution;
|
|
4477
|
-
const
|
|
4651
|
+
const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
|
|
4478
4652
|
let evaluators;
|
|
4479
4653
|
try {
|
|
4480
|
-
evaluators = await parseEvaluators(
|
|
4654
|
+
evaluators = await parseEvaluators(
|
|
4655
|
+
testCaseConfig,
|
|
4656
|
+
mergedExecution,
|
|
4657
|
+
searchRoots,
|
|
4658
|
+
id ?? "unknown"
|
|
4659
|
+
);
|
|
4481
4660
|
} catch (error) {
|
|
4482
4661
|
const message = error instanceof Error ? error.message : String(error);
|
|
4483
4662
|
logError2(`Skipping test '${id}' at line ${lineNumber}: ${message}`);
|
|
4484
4663
|
continue;
|
|
4485
4664
|
}
|
|
4486
|
-
const inlineRubrics =
|
|
4665
|
+
const inlineRubrics = testCaseConfig.rubrics;
|
|
4487
4666
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
4488
4667
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
4489
4668
|
if (rubricEvaluator) {
|
|
@@ -4494,7 +4673,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4494
4673
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
4495
4674
|
const testCase = {
|
|
4496
4675
|
id,
|
|
4497
|
-
|
|
4676
|
+
suite: suiteName,
|
|
4498
4677
|
conversation_id: conversationId,
|
|
4499
4678
|
question,
|
|
4500
4679
|
input: inputMessages,
|
|
@@ -4502,7 +4681,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4502
4681
|
reference_answer: referenceAnswer,
|
|
4503
4682
|
file_paths: userFilePaths,
|
|
4504
4683
|
criteria: outcome ?? "",
|
|
4505
|
-
evaluator:
|
|
4684
|
+
evaluator: testCaseEvaluatorKind,
|
|
4506
4685
|
assertions: evaluators
|
|
4507
4686
|
};
|
|
4508
4687
|
results.push(testCase);
|
|
@@ -4687,6 +4866,9 @@ function buildChatPromptFromSegments(options) {
|
|
|
4687
4866
|
var ANSI_YELLOW7 = "\x1B[33m";
|
|
4688
4867
|
var ANSI_RED3 = "\x1B[31m";
|
|
4689
4868
|
var ANSI_RESET8 = "\x1B[0m";
|
|
4869
|
+
function matchesFilter2(id, filter) {
|
|
4870
|
+
return typeof filter === "string" ? import_micromatch2.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch2.default.isMatch(id, pattern));
|
|
4871
|
+
}
|
|
4690
4872
|
function resolveTests(suite) {
|
|
4691
4873
|
if (suite.tests !== void 0) return suite.tests;
|
|
4692
4874
|
if (suite.eval_cases !== void 0) {
|
|
@@ -4766,18 +4948,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4766
4948
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
4767
4949
|
}
|
|
4768
4950
|
const suite = interpolated;
|
|
4769
|
-
const
|
|
4770
|
-
const
|
|
4771
|
-
const
|
|
4772
|
-
const
|
|
4951
|
+
const suiteNameFromFile = asString5(suite.name)?.trim();
|
|
4952
|
+
const fallbackSuiteName = import_node_path8.default.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
4953
|
+
const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
|
|
4954
|
+
const rawTestCases = resolveTests(suite);
|
|
4773
4955
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
4774
4956
|
const evalFileDir = import_node_path8.default.dirname(absoluteTestPath);
|
|
4775
|
-
let
|
|
4776
|
-
if (typeof
|
|
4777
|
-
const externalPath = import_node_path8.default.resolve(evalFileDir,
|
|
4778
|
-
|
|
4779
|
-
} else if (Array.isArray(
|
|
4780
|
-
|
|
4957
|
+
let expandedTestCases;
|
|
4958
|
+
if (typeof rawTestCases === "string") {
|
|
4959
|
+
const externalPath = import_node_path8.default.resolve(evalFileDir, rawTestCases);
|
|
4960
|
+
expandedTestCases = await loadCasesFromFile(externalPath);
|
|
4961
|
+
} else if (Array.isArray(rawTestCases)) {
|
|
4962
|
+
expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
|
|
4781
4963
|
} else {
|
|
4782
4964
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
4783
4965
|
}
|
|
@@ -4792,32 +4974,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4792
4974
|
}
|
|
4793
4975
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
4794
4976
|
const results = [];
|
|
4795
|
-
for (const
|
|
4796
|
-
if (!isJsonObject(
|
|
4977
|
+
for (const rawTestCase of expandedTestCases) {
|
|
4978
|
+
if (!isJsonObject(rawTestCase)) {
|
|
4797
4979
|
logWarning5("Skipping invalid test entry (expected object)");
|
|
4798
4980
|
continue;
|
|
4799
4981
|
}
|
|
4800
|
-
const
|
|
4801
|
-
const id = asString5(
|
|
4802
|
-
if (filterPattern && (!id || !
|
|
4982
|
+
const testCaseConfig = rawTestCase;
|
|
4983
|
+
const id = asString5(testCaseConfig.id);
|
|
4984
|
+
if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
|
|
4803
4985
|
continue;
|
|
4804
4986
|
}
|
|
4805
|
-
const conversationId = asString5(
|
|
4806
|
-
let outcome = asString5(
|
|
4807
|
-
if (!outcome &&
|
|
4808
|
-
outcome = asString5(
|
|
4987
|
+
const conversationId = asString5(testCaseConfig.conversation_id);
|
|
4988
|
+
let outcome = asString5(testCaseConfig.criteria);
|
|
4989
|
+
if (!outcome && testCaseConfig.expected_outcome !== void 0) {
|
|
4990
|
+
outcome = asString5(testCaseConfig.expected_outcome);
|
|
4809
4991
|
if (outcome) {
|
|
4810
4992
|
logWarning5(
|
|
4811
|
-
`Test '${asString5(
|
|
4993
|
+
`Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
4812
4994
|
);
|
|
4813
4995
|
}
|
|
4814
4996
|
}
|
|
4815
|
-
const caseExecution = isJsonObject(
|
|
4997
|
+
const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
|
|
4816
4998
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
4999
|
+
const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
|
|
4817
5000
|
const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
|
|
4818
|
-
const testInputMessages = resolveInputMessages(
|
|
4819
|
-
const expectedMessages = resolveExpectedMessages(
|
|
4820
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
5001
|
+
const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
|
|
5002
|
+
const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
|
|
5003
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0;
|
|
4821
5004
|
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
4822
5005
|
logError3(
|
|
4823
5006
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions`
|
|
@@ -4864,16 +5047,21 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4864
5047
|
}
|
|
4865
5048
|
}
|
|
4866
5049
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
4867
|
-
const
|
|
5050
|
+
const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
|
|
4868
5051
|
let evaluators;
|
|
4869
5052
|
try {
|
|
4870
|
-
evaluators = await parseEvaluators(
|
|
5053
|
+
evaluators = await parseEvaluators(
|
|
5054
|
+
testCaseConfig,
|
|
5055
|
+
globalExecution,
|
|
5056
|
+
searchRoots,
|
|
5057
|
+
id ?? "unknown"
|
|
5058
|
+
);
|
|
4871
5059
|
} catch (error) {
|
|
4872
5060
|
const message = error instanceof Error ? error.message : String(error);
|
|
4873
5061
|
logError3(`Skipping test '${id}': ${message}`);
|
|
4874
5062
|
continue;
|
|
4875
5063
|
}
|
|
4876
|
-
const inlineRubrics =
|
|
5064
|
+
const inlineRubrics = testCaseConfig.rubrics;
|
|
4877
5065
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
4878
5066
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
4879
5067
|
if (rubricEvaluator) {
|
|
@@ -4882,13 +5070,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4882
5070
|
}
|
|
4883
5071
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
4884
5072
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
4885
|
-
const caseWorkspace = await resolveWorkspaceConfig(
|
|
5073
|
+
const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
|
|
4886
5074
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
4887
|
-
const metadata = isJsonObject(
|
|
4888
|
-
const caseTargets = extractTargetsFromTestCase(
|
|
5075
|
+
const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
5076
|
+
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
4889
5077
|
const testCase = {
|
|
4890
5078
|
id,
|
|
4891
|
-
|
|
5079
|
+
suite: suiteName,
|
|
4892
5080
|
category: options?.category,
|
|
4893
5081
|
conversation_id: conversationId,
|
|
4894
5082
|
question,
|
|
@@ -4897,11 +5085,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4897
5085
|
reference_answer: referenceAnswer,
|
|
4898
5086
|
file_paths: userFilePaths,
|
|
4899
5087
|
criteria: outcome ?? "",
|
|
4900
|
-
evaluator:
|
|
5088
|
+
evaluator: testCaseEvaluatorKind,
|
|
4901
5089
|
assertions: evaluators,
|
|
4902
5090
|
workspace: mergedWorkspace,
|
|
4903
5091
|
metadata,
|
|
4904
|
-
targets: caseTargets
|
|
5092
|
+
targets: caseTargets,
|
|
5093
|
+
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
|
|
4905
5094
|
};
|
|
4906
5095
|
results.push(testCase);
|
|
4907
5096
|
}
|
|
@@ -5567,7 +5756,7 @@ var AzureProvider = class {
|
|
|
5567
5756
|
};
|
|
5568
5757
|
this.retryConfig = config.retry;
|
|
5569
5758
|
const azure = (0, import_azure2.createAzure)(buildAzureOptions(config));
|
|
5570
|
-
this.model = azure.chat(config.deploymentName);
|
|
5759
|
+
this.model = config.apiFormat === "responses" ? azure(config.deploymentName) : azure.chat(config.deploymentName);
|
|
5571
5760
|
}
|
|
5572
5761
|
id;
|
|
5573
5762
|
kind = "azure";
|
|
@@ -5693,7 +5882,9 @@ function buildAzureOptions(config) {
|
|
|
5693
5882
|
const options = {
|
|
5694
5883
|
apiKey: config.apiKey,
|
|
5695
5884
|
apiVersion: config.version,
|
|
5696
|
-
|
|
5885
|
+
// Chat completions still use deployment-scoped Azure URLs for compatibility
|
|
5886
|
+
// with existing deployments. Responses API should use the SDK's v1 path.
|
|
5887
|
+
useDeploymentBasedUrls: config.apiFormat !== "responses"
|
|
5697
5888
|
};
|
|
5698
5889
|
const baseURL = normalizeAzureBaseUrl(config.resourceName);
|
|
5699
5890
|
if (baseURL) {
|
|
@@ -8910,6 +9101,25 @@ var CopilotSdkProvider = class {
|
|
|
8910
9101
|
content: systemPrompt
|
|
8911
9102
|
};
|
|
8912
9103
|
}
|
|
9104
|
+
if (this.config.byokBaseUrl) {
|
|
9105
|
+
const byokType = this.config.byokType ?? "openai";
|
|
9106
|
+
const provider = {
|
|
9107
|
+
type: byokType,
|
|
9108
|
+
baseUrl: normalizeByokBaseUrl(this.config.byokBaseUrl, byokType)
|
|
9109
|
+
};
|
|
9110
|
+
if (this.config.byokBearerToken) {
|
|
9111
|
+
provider.bearerToken = this.config.byokBearerToken;
|
|
9112
|
+
} else if (this.config.byokApiKey) {
|
|
9113
|
+
provider.apiKey = this.config.byokApiKey;
|
|
9114
|
+
}
|
|
9115
|
+
if (this.config.byokWireApi) {
|
|
9116
|
+
provider.wireApi = this.config.byokWireApi;
|
|
9117
|
+
}
|
|
9118
|
+
if (this.config.byokType === "azure" && this.config.byokApiVersion) {
|
|
9119
|
+
provider.azure = { apiVersion: this.config.byokApiVersion };
|
|
9120
|
+
}
|
|
9121
|
+
sessionOptions.provider = provider;
|
|
9122
|
+
}
|
|
8913
9123
|
let session;
|
|
8914
9124
|
try {
|
|
8915
9125
|
session = await client.createSession(sessionOptions);
|
|
@@ -9141,6 +9351,16 @@ function resolveSkillDirectories(cwd) {
|
|
|
9141
9351
|
];
|
|
9142
9352
|
return candidates.filter((dir) => (0, import_node_fs8.existsSync)(dir));
|
|
9143
9353
|
}
|
|
9354
|
+
function normalizeByokBaseUrl(baseUrl, type) {
|
|
9355
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
9356
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
9357
|
+
return trimmed;
|
|
9358
|
+
}
|
|
9359
|
+
if (type === "azure") {
|
|
9360
|
+
return `https://${trimmed}.openai.azure.com`;
|
|
9361
|
+
}
|
|
9362
|
+
return trimmed;
|
|
9363
|
+
}
|
|
9144
9364
|
function summarizeSdkEvent(eventType, data) {
|
|
9145
9365
|
if (!data || typeof data !== "object") {
|
|
9146
9366
|
return eventType;
|
|
@@ -9322,6 +9542,22 @@ function extractAzureResourceName(baseUrl) {
|
|
|
9322
9542
|
if (urlMatch) return urlMatch[1];
|
|
9323
9543
|
return baseUrl;
|
|
9324
9544
|
}
|
|
9545
|
+
function normalizeAzureSdkBaseUrl(baseUrl) {
|
|
9546
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
9547
|
+
if (!trimmed) {
|
|
9548
|
+
return trimmed;
|
|
9549
|
+
}
|
|
9550
|
+
if (!/^https?:\/\//i.test(trimmed)) {
|
|
9551
|
+
return `https://${trimmed}.openai.azure.com/openai/v1`;
|
|
9552
|
+
}
|
|
9553
|
+
if (/\/openai\/v1$/i.test(trimmed)) {
|
|
9554
|
+
return trimmed;
|
|
9555
|
+
}
|
|
9556
|
+
if (/\/openai$/i.test(trimmed)) {
|
|
9557
|
+
return `${trimmed}/v1`;
|
|
9558
|
+
}
|
|
9559
|
+
return `${trimmed}/openai/v1`;
|
|
9560
|
+
}
|
|
9325
9561
|
|
|
9326
9562
|
// src/evaluation/providers/pi-utils.ts
|
|
9327
9563
|
init_cjs_shims();
|
|
@@ -10156,9 +10392,40 @@ var import_node_child_process5 = require("child_process");
|
|
|
10156
10392
|
var import_node_crypto8 = require("crypto");
|
|
10157
10393
|
var import_node_fs10 = require("fs");
|
|
10158
10394
|
var import_promises19 = require("fs/promises");
|
|
10159
|
-
var
|
|
10395
|
+
var import_node_path23 = __toESM(require("path"), 1);
|
|
10160
10396
|
var import_node_readline = require("readline");
|
|
10161
10397
|
var import_node_url3 = require("url");
|
|
10398
|
+
|
|
10399
|
+
// src/paths.ts
|
|
10400
|
+
init_cjs_shims();
|
|
10401
|
+
var import_node_os6 = __toESM(require("os"), 1);
|
|
10402
|
+
var import_node_path22 = __toESM(require("path"), 1);
|
|
10403
|
+
var logged = false;
|
|
10404
|
+
function getAgentvHome() {
|
|
10405
|
+
const envHome = process.env.AGENTV_HOME;
|
|
10406
|
+
if (envHome && envHome !== "undefined") {
|
|
10407
|
+
if (!logged) {
|
|
10408
|
+
logged = true;
|
|
10409
|
+
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
10410
|
+
}
|
|
10411
|
+
return envHome;
|
|
10412
|
+
}
|
|
10413
|
+
return import_node_path22.default.join(import_node_os6.default.homedir(), ".agentv");
|
|
10414
|
+
}
|
|
10415
|
+
function getWorkspacesRoot() {
|
|
10416
|
+
return import_node_path22.default.join(getAgentvHome(), "workspaces");
|
|
10417
|
+
}
|
|
10418
|
+
function getSubagentsRoot() {
|
|
10419
|
+
return import_node_path22.default.join(getAgentvHome(), "subagents");
|
|
10420
|
+
}
|
|
10421
|
+
function getTraceStateRoot() {
|
|
10422
|
+
return import_node_path22.default.join(getAgentvHome(), "trace-state");
|
|
10423
|
+
}
|
|
10424
|
+
function getWorkspacePoolRoot() {
|
|
10425
|
+
return import_node_path22.default.join(getAgentvHome(), "workspace-pool");
|
|
10426
|
+
}
|
|
10427
|
+
|
|
10428
|
+
// src/evaluation/providers/pi-coding-agent.ts
|
|
10162
10429
|
var piCodingAgentModule = null;
|
|
10163
10430
|
var piAiModule = null;
|
|
10164
10431
|
var loadingPromise = null;
|
|
@@ -10176,46 +10443,126 @@ async function promptInstall() {
|
|
|
10176
10443
|
rl.close();
|
|
10177
10444
|
}
|
|
10178
10445
|
}
|
|
10179
|
-
function
|
|
10180
|
-
|
|
10181
|
-
|
|
10182
|
-
|
|
10446
|
+
function findManagedSdkInstallRoot() {
|
|
10447
|
+
return import_node_path23.default.join(getAgentvHome(), "deps", "pi-sdk");
|
|
10448
|
+
}
|
|
10449
|
+
function resolveGlobalNpmRoot() {
|
|
10450
|
+
try {
|
|
10451
|
+
const root = (0, import_node_child_process5.execSync)("npm root -g", {
|
|
10452
|
+
encoding: "utf-8",
|
|
10453
|
+
stdio: ["ignore", "pipe", "ignore"]
|
|
10454
|
+
}).trim();
|
|
10455
|
+
return root.length > 0 ? root : void 0;
|
|
10456
|
+
} catch {
|
|
10457
|
+
return void 0;
|
|
10458
|
+
}
|
|
10459
|
+
}
|
|
10460
|
+
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
10461
|
+
return import_node_path23.default.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
10462
|
+
}
|
|
10463
|
+
function findAccessiblePath(paths) {
|
|
10464
|
+
for (const candidate of paths) {
|
|
10183
10465
|
try {
|
|
10184
|
-
|
|
10185
|
-
|
|
10186
|
-
return dir;
|
|
10466
|
+
(0, import_node_fs10.accessSync)(candidate);
|
|
10467
|
+
return candidate;
|
|
10187
10468
|
} catch {
|
|
10188
|
-
const parent = import_node_path22.default.dirname(dir);
|
|
10189
|
-
if (parent === dir) break;
|
|
10190
|
-
dir = parent;
|
|
10191
10469
|
}
|
|
10192
10470
|
}
|
|
10193
|
-
return
|
|
10471
|
+
return void 0;
|
|
10194
10472
|
}
|
|
10195
|
-
async function
|
|
10473
|
+
async function tryImportLocalSdkModules() {
|
|
10196
10474
|
try {
|
|
10197
10475
|
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
10198
10476
|
import("@mariozechner/pi-coding-agent"),
|
|
10199
10477
|
import("@mariozechner/pi-ai")
|
|
10200
10478
|
]);
|
|
10479
|
+
return true;
|
|
10201
10480
|
} catch {
|
|
10202
|
-
|
|
10203
|
-
|
|
10204
|
-
|
|
10205
|
-
|
|
10206
|
-
|
|
10207
|
-
|
|
10208
|
-
|
|
10209
|
-
|
|
10210
|
-
|
|
10211
|
-
|
|
10212
|
-
|
|
10213
|
-
|
|
10214
|
-
|
|
10215
|
-
|
|
10216
|
-
|
|
10481
|
+
return false;
|
|
10482
|
+
}
|
|
10483
|
+
}
|
|
10484
|
+
async function tryImportManagedSdkModules() {
|
|
10485
|
+
const managedRoot = findManagedSdkInstallRoot();
|
|
10486
|
+
const piCodingAgentEntry = findAccessiblePath([
|
|
10487
|
+
import_node_path23.default.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
10488
|
+
]);
|
|
10489
|
+
const piAiEntry = findAccessiblePath([
|
|
10490
|
+
import_node_path23.default.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
10491
|
+
import_node_path23.default.join(
|
|
10492
|
+
managedRoot,
|
|
10493
|
+
"node_modules",
|
|
10494
|
+
"@mariozechner",
|
|
10495
|
+
"pi-coding-agent",
|
|
10496
|
+
"node_modules",
|
|
10497
|
+
"@mariozechner",
|
|
10498
|
+
"pi-ai",
|
|
10499
|
+
"dist",
|
|
10500
|
+
"index.js"
|
|
10501
|
+
)
|
|
10502
|
+
]);
|
|
10503
|
+
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
10504
|
+
try {
|
|
10505
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
10506
|
+
import((0, import_node_url3.pathToFileURL)(piCodingAgentEntry).href),
|
|
10507
|
+
import((0, import_node_url3.pathToFileURL)(piAiEntry).href)
|
|
10508
|
+
]);
|
|
10509
|
+
return true;
|
|
10510
|
+
} catch {
|
|
10511
|
+
return false;
|
|
10512
|
+
}
|
|
10513
|
+
}
|
|
10514
|
+
async function tryImportGlobalSdkModules() {
|
|
10515
|
+
const globalNpmRoot = resolveGlobalNpmRoot();
|
|
10516
|
+
if (!globalNpmRoot) return false;
|
|
10517
|
+
const piCodingAgentEntry = findAccessiblePath([
|
|
10518
|
+
buildGlobalModuleEntry("@mariozechner/pi-coding-agent", globalNpmRoot)
|
|
10519
|
+
]);
|
|
10520
|
+
const piAiEntry = findAccessiblePath([
|
|
10521
|
+
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
10522
|
+
import_node_path23.default.join(
|
|
10523
|
+
globalNpmRoot,
|
|
10524
|
+
"@mariozechner",
|
|
10525
|
+
"pi-coding-agent",
|
|
10526
|
+
"node_modules",
|
|
10527
|
+
"@mariozechner",
|
|
10528
|
+
"pi-ai",
|
|
10529
|
+
"dist",
|
|
10530
|
+
"index.js"
|
|
10531
|
+
)
|
|
10532
|
+
]);
|
|
10533
|
+
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
10534
|
+
try {
|
|
10535
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
10536
|
+
import((0, import_node_url3.pathToFileURL)(piCodingAgentEntry).href),
|
|
10537
|
+
import((0, import_node_url3.pathToFileURL)(piAiEntry).href)
|
|
10538
|
+
]);
|
|
10539
|
+
return true;
|
|
10540
|
+
} catch {
|
|
10541
|
+
return false;
|
|
10542
|
+
}
|
|
10543
|
+
}
|
|
10544
|
+
function installSdkModules(installDir) {
|
|
10545
|
+
console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
|
|
10546
|
+
(0, import_node_fs10.mkdirSync)(installDir, { recursive: true });
|
|
10547
|
+
(0, import_node_child_process5.execSync)("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
|
|
10548
|
+
cwd: installDir,
|
|
10549
|
+
stdio: "inherit"
|
|
10550
|
+
});
|
|
10551
|
+
}
|
|
10552
|
+
async function doLoadSdkModules() {
|
|
10553
|
+
if (await tryImportLocalSdkModules() || await tryImportManagedSdkModules() || await tryImportGlobalSdkModules()) {
|
|
10554
|
+
return;
|
|
10555
|
+
}
|
|
10556
|
+
if (await promptInstall()) {
|
|
10557
|
+
const installDir = findManagedSdkInstallRoot();
|
|
10558
|
+
installSdkModules(installDir);
|
|
10559
|
+
if (await tryImportManagedSdkModules()) {
|
|
10560
|
+
return;
|
|
10217
10561
|
}
|
|
10218
10562
|
}
|
|
10563
|
+
throw new Error(
|
|
10564
|
+
"pi-coding-agent SDK is not installed. Install it with:\n npm install @mariozechner/pi-coding-agent"
|
|
10565
|
+
);
|
|
10219
10566
|
}
|
|
10220
10567
|
async function loadSdkModules() {
|
|
10221
10568
|
if (!piCodingAgentModule || !piAiModule) {
|
|
@@ -10272,12 +10619,16 @@ var PiCodingAgentProvider = class {
|
|
|
10272
10619
|
try {
|
|
10273
10620
|
const cwd = this.resolveCwd(request.cwd);
|
|
10274
10621
|
const rawProvider = this.config.subprovider ?? "google";
|
|
10275
|
-
const
|
|
10622
|
+
const normalizedBaseUrl = this.normalizeSdkBaseUrl(rawProvider, this.config.baseUrl);
|
|
10623
|
+
const hasBaseUrl = !!normalizedBaseUrl;
|
|
10276
10624
|
const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
|
|
10277
10625
|
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
10278
10626
|
this.setApiKeyEnv(rawProvider, hasBaseUrl);
|
|
10279
|
-
this.setBaseUrlEnv(rawProvider, hasBaseUrl);
|
|
10627
|
+
this.setBaseUrlEnv(rawProvider, normalizedBaseUrl, hasBaseUrl);
|
|
10280
10628
|
let model = sdk.getModel(providerName, modelId);
|
|
10629
|
+
if (model && normalizedBaseUrl) {
|
|
10630
|
+
model = { ...model, baseUrl: normalizedBaseUrl };
|
|
10631
|
+
}
|
|
10281
10632
|
if (!model) {
|
|
10282
10633
|
const envProvider = providerName.replace(/-responses$/, "");
|
|
10283
10634
|
model = {
|
|
@@ -10285,7 +10636,7 @@ var PiCodingAgentProvider = class {
|
|
|
10285
10636
|
name: modelId,
|
|
10286
10637
|
api: providerName,
|
|
10287
10638
|
provider: envProvider,
|
|
10288
|
-
baseUrl:
|
|
10639
|
+
baseUrl: normalizedBaseUrl ?? "",
|
|
10289
10640
|
reasoning: false,
|
|
10290
10641
|
input: ["text"],
|
|
10291
10642
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
@@ -10452,19 +10803,27 @@ ${fileList}`;
|
|
|
10452
10803
|
}
|
|
10453
10804
|
}
|
|
10454
10805
|
/** Maps config baseUrl to the provider-specific env var the SDK reads. */
|
|
10455
|
-
setBaseUrlEnv(providerName, hasBaseUrl = false) {
|
|
10456
|
-
|
|
10806
|
+
setBaseUrlEnv(providerName, baseUrl = this.config.baseUrl, hasBaseUrl = false) {
|
|
10807
|
+
const normalizedBaseUrl = this.normalizeSdkBaseUrl(providerName, baseUrl);
|
|
10808
|
+
if (!normalizedBaseUrl) return;
|
|
10457
10809
|
const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
|
|
10458
10810
|
if (envKey) {
|
|
10459
|
-
process.env[envKey] =
|
|
10811
|
+
process.env[envKey] = normalizedBaseUrl;
|
|
10812
|
+
}
|
|
10813
|
+
}
|
|
10814
|
+
normalizeSdkBaseUrl(providerName, baseUrl) {
|
|
10815
|
+
if (!baseUrl) return void 0;
|
|
10816
|
+
if (providerName.toLowerCase() === "azure") {
|
|
10817
|
+
return normalizeAzureSdkBaseUrl(baseUrl);
|
|
10460
10818
|
}
|
|
10819
|
+
return baseUrl;
|
|
10461
10820
|
}
|
|
10462
10821
|
resolveCwd(cwdOverride) {
|
|
10463
10822
|
if (cwdOverride) {
|
|
10464
|
-
return
|
|
10823
|
+
return import_node_path23.default.resolve(cwdOverride);
|
|
10465
10824
|
}
|
|
10466
10825
|
if (this.config.cwd) {
|
|
10467
|
-
return
|
|
10826
|
+
return import_node_path23.default.resolve(this.config.cwd);
|
|
10468
10827
|
}
|
|
10469
10828
|
return process.cwd();
|
|
10470
10829
|
}
|
|
@@ -10483,9 +10842,9 @@ ${fileList}`;
|
|
|
10483
10842
|
}
|
|
10484
10843
|
resolveLogDirectory() {
|
|
10485
10844
|
if (this.config.logDir) {
|
|
10486
|
-
return
|
|
10845
|
+
return import_node_path23.default.resolve(this.config.logDir);
|
|
10487
10846
|
}
|
|
10488
|
-
return
|
|
10847
|
+
return import_node_path23.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
10489
10848
|
}
|
|
10490
10849
|
async createStreamLogger(request) {
|
|
10491
10850
|
const logDir = this.resolveLogDirectory();
|
|
@@ -10499,7 +10858,7 @@ ${fileList}`;
|
|
|
10499
10858
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
10500
10859
|
return void 0;
|
|
10501
10860
|
}
|
|
10502
|
-
const filePath =
|
|
10861
|
+
const filePath = import_node_path23.default.join(logDir, buildLogFilename6(request, this.targetName));
|
|
10503
10862
|
try {
|
|
10504
10863
|
const logger = await PiStreamLogger2.create({
|
|
10505
10864
|
filePath,
|
|
@@ -10714,19 +11073,17 @@ var ProviderRegistry = class {
|
|
|
10714
11073
|
|
|
10715
11074
|
// src/evaluation/providers/targets.ts
|
|
10716
11075
|
init_cjs_shims();
|
|
10717
|
-
var
|
|
11076
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
10718
11077
|
var import_zod3 = require("zod");
|
|
10719
11078
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
10720
11079
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
10721
|
-
timeout_seconds: import_zod3.z.number().positive().optional()
|
|
10722
|
-
|
|
10723
|
-
});
|
|
11080
|
+
timeout_seconds: import_zod3.z.number().positive().optional()
|
|
11081
|
+
}).passthrough();
|
|
10724
11082
|
var CliHealthcheckCommandInputSchema = import_zod3.z.object({
|
|
10725
11083
|
command: import_zod3.z.string().min(1, "healthcheck command is required"),
|
|
10726
11084
|
cwd: import_zod3.z.string().optional(),
|
|
10727
|
-
timeout_seconds: import_zod3.z.number().positive().optional()
|
|
10728
|
-
|
|
10729
|
-
});
|
|
11085
|
+
timeout_seconds: import_zod3.z.number().positive().optional()
|
|
11086
|
+
}).passthrough();
|
|
10730
11087
|
var CliHealthcheckInputSchema = import_zod3.z.union([
|
|
10731
11088
|
CliHealthcheckHttpInputSchema,
|
|
10732
11089
|
CliHealthcheckCommandInputSchema
|
|
@@ -10738,36 +11095,28 @@ var CliTargetInputSchema = import_zod3.z.object({
|
|
|
10738
11095
|
command: import_zod3.z.string(),
|
|
10739
11096
|
// Files format - optional
|
|
10740
11097
|
files_format: import_zod3.z.string().optional(),
|
|
10741
|
-
filesFormat: import_zod3.z.string().optional(),
|
|
10742
11098
|
attachments_format: import_zod3.z.string().optional(),
|
|
10743
|
-
attachmentsFormat: import_zod3.z.string().optional(),
|
|
10744
11099
|
// Working directory - optional
|
|
10745
11100
|
cwd: import_zod3.z.string().optional(),
|
|
10746
11101
|
// Workspace template directory - optional (mutually exclusive with cwd)
|
|
10747
11102
|
workspace_template: import_zod3.z.string().optional(),
|
|
10748
|
-
workspaceTemplate: import_zod3.z.string().optional(),
|
|
10749
11103
|
// Timeout in seconds - optional
|
|
10750
11104
|
timeout_seconds: import_zod3.z.number().positive().optional(),
|
|
10751
|
-
timeoutSeconds: import_zod3.z.number().positive().optional(),
|
|
10752
11105
|
// Healthcheck configuration - optional
|
|
10753
11106
|
healthcheck: CliHealthcheckInputSchema.optional(),
|
|
10754
11107
|
// Verbose mode - optional
|
|
10755
11108
|
verbose: import_zod3.z.boolean().optional(),
|
|
10756
11109
|
cli_verbose: import_zod3.z.boolean().optional(),
|
|
10757
|
-
cliVerbose: import_zod3.z.boolean().optional(),
|
|
10758
11110
|
// Keep temp files - optional
|
|
10759
11111
|
keep_temp_files: import_zod3.z.boolean().optional(),
|
|
10760
|
-
keepTempFiles: import_zod3.z.boolean().optional(),
|
|
10761
11112
|
keep_output_files: import_zod3.z.boolean().optional(),
|
|
10762
|
-
keepOutputFiles: import_zod3.z.boolean().optional(),
|
|
10763
11113
|
// Common target fields
|
|
10764
11114
|
grader_target: import_zod3.z.string().optional(),
|
|
10765
11115
|
judge_target: import_zod3.z.string().optional(),
|
|
10766
11116
|
// backward compat
|
|
10767
11117
|
workers: import_zod3.z.number().int().min(1).optional(),
|
|
10768
|
-
provider_batching: import_zod3.z.boolean().optional()
|
|
10769
|
-
|
|
10770
|
-
});
|
|
11118
|
+
provider_batching: import_zod3.z.boolean().optional()
|
|
11119
|
+
}).passthrough();
|
|
10771
11120
|
var CliHealthcheckHttpSchema = import_zod3.z.object({
|
|
10772
11121
|
url: import_zod3.z.string().min(1),
|
|
10773
11122
|
timeoutMs: import_zod3.z.number().positive().optional()
|
|
@@ -10792,7 +11141,7 @@ var CliTargetConfigSchema = import_zod3.z.object({
|
|
|
10792
11141
|
keepTempFiles: import_zod3.z.boolean().optional()
|
|
10793
11142
|
}).strict();
|
|
10794
11143
|
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
10795
|
-
const timeoutSeconds = input.timeout_seconds
|
|
11144
|
+
const timeoutSeconds = input.timeout_seconds;
|
|
10796
11145
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
10797
11146
|
if ("url" in input && input.url) {
|
|
10798
11147
|
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
@@ -10811,11 +11160,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
10811
11160
|
allowLiteral: true,
|
|
10812
11161
|
optionalEnv: true
|
|
10813
11162
|
});
|
|
10814
|
-
if (cwd && evalFilePath && !
|
|
10815
|
-
cwd =
|
|
11163
|
+
if (cwd && evalFilePath && !import_node_path24.default.isAbsolute(cwd)) {
|
|
11164
|
+
cwd = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), cwd);
|
|
10816
11165
|
}
|
|
10817
11166
|
if (!cwd && evalFilePath) {
|
|
10818
|
-
cwd =
|
|
11167
|
+
cwd = import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath));
|
|
10819
11168
|
}
|
|
10820
11169
|
return {
|
|
10821
11170
|
command,
|
|
@@ -10826,9 +11175,9 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
10826
11175
|
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
10827
11176
|
const targetName = input.name;
|
|
10828
11177
|
const command = resolveString(input.command, env, `${targetName} CLI command`, true);
|
|
10829
|
-
const filesFormatSource = input.files_format ?? input.
|
|
11178
|
+
const filesFormatSource = input.files_format ?? input.attachments_format;
|
|
10830
11179
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
10831
|
-
const workspaceTemplateSource = input.workspace_template
|
|
11180
|
+
const workspaceTemplateSource = input.workspace_template;
|
|
10832
11181
|
let workspaceTemplate = resolveOptionalString(
|
|
10833
11182
|
workspaceTemplateSource,
|
|
10834
11183
|
env,
|
|
@@ -10838,15 +11187,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10838
11187
|
optionalEnv: true
|
|
10839
11188
|
}
|
|
10840
11189
|
);
|
|
10841
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10842
|
-
workspaceTemplate =
|
|
11190
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
11191
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10843
11192
|
}
|
|
10844
11193
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
10845
11194
|
allowLiteral: true,
|
|
10846
11195
|
optionalEnv: true
|
|
10847
11196
|
});
|
|
10848
|
-
if (cwd && evalFilePath && !
|
|
10849
|
-
cwd =
|
|
11197
|
+
if (cwd && evalFilePath && !import_node_path24.default.isAbsolute(cwd)) {
|
|
11198
|
+
cwd = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), cwd);
|
|
10850
11199
|
}
|
|
10851
11200
|
if (cwd && workspaceTemplate) {
|
|
10852
11201
|
throw new Error(
|
|
@@ -10854,14 +11203,12 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10854
11203
|
);
|
|
10855
11204
|
}
|
|
10856
11205
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
10857
|
-
cwd =
|
|
11206
|
+
cwd = import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath));
|
|
10858
11207
|
}
|
|
10859
|
-
const timeoutSeconds = input.timeout_seconds
|
|
11208
|
+
const timeoutSeconds = input.timeout_seconds;
|
|
10860
11209
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
10861
|
-
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose
|
|
10862
|
-
const keepTempFiles = resolveOptionalBoolean(
|
|
10863
|
-
input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
|
|
10864
|
-
);
|
|
11210
|
+
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose);
|
|
11211
|
+
const keepTempFiles = resolveOptionalBoolean(input.keep_temp_files ?? input.keep_output_files);
|
|
10865
11212
|
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
10866
11213
|
return {
|
|
10867
11214
|
command,
|
|
@@ -10882,14 +11229,104 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
|
10882
11229
|
"FILES",
|
|
10883
11230
|
"OUTPUT_FILE"
|
|
10884
11231
|
]);
|
|
11232
|
+
var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
|
|
11233
|
+
["providerBatching", "provider_batching"],
|
|
11234
|
+
["subagentModeAllowed", "subagent_mode_allowed"],
|
|
11235
|
+
["fallbackTargets", "fallback_targets"],
|
|
11236
|
+
["resourceName", "endpoint"],
|
|
11237
|
+
["baseUrl", "base_url"],
|
|
11238
|
+
["apiKey", "api_key"],
|
|
11239
|
+
["deploymentName", "model"],
|
|
11240
|
+
["thinkingBudget", "thinking_budget"],
|
|
11241
|
+
["maxTokens", "max_output_tokens"],
|
|
11242
|
+
["apiFormat", "api_format"],
|
|
11243
|
+
["timeoutSeconds", "timeout_seconds"],
|
|
11244
|
+
["logDir", "log_dir"],
|
|
11245
|
+
["logDirectory", "log_directory"],
|
|
11246
|
+
["logFormat", "log_format"],
|
|
11247
|
+
["logOutputFormat", "log_output_format"],
|
|
11248
|
+
["systemPrompt", "system_prompt"],
|
|
11249
|
+
["maxTurns", "max_turns"],
|
|
11250
|
+
["maxBudgetUsd", "max_budget_usd"],
|
|
11251
|
+
["dryRun", "dry_run"],
|
|
11252
|
+
["subagentRoot", "subagent_root"],
|
|
11253
|
+
["filesFormat", "files_format"],
|
|
11254
|
+
["attachmentsFormat", "attachments_format"],
|
|
11255
|
+
["cliUrl", "cli_url"],
|
|
11256
|
+
["cliPath", "cli_path"],
|
|
11257
|
+
["githubToken", "github_token"],
|
|
11258
|
+
["sessionDir", "session_dir"],
|
|
11259
|
+
["sessionId", "session_id"],
|
|
11260
|
+
["sessionStateDir", "session_state_dir"],
|
|
11261
|
+
["maxRetries", "max_retries"],
|
|
11262
|
+
["retryInitialDelayMs", "retry_initial_delay_ms"],
|
|
11263
|
+
["retryMaxDelayMs", "retry_max_delay_ms"],
|
|
11264
|
+
["retryBackoffFactor", "retry_backoff_factor"],
|
|
11265
|
+
["retryStatusCodes", "retry_status_codes"]
|
|
11266
|
+
]);
|
|
11267
|
+
var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
|
|
11268
|
+
["timeoutSeconds", "timeout_seconds"]
|
|
11269
|
+
]);
|
|
11270
|
+
function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
|
|
11271
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
11272
|
+
return [];
|
|
11273
|
+
}
|
|
11274
|
+
const warnings = [];
|
|
11275
|
+
for (const [camelCaseField, snakeCaseField] of aliases) {
|
|
11276
|
+
if (Object.prototype.hasOwnProperty.call(value, camelCaseField)) {
|
|
11277
|
+
warnings.push({
|
|
11278
|
+
location: `${location}.${camelCaseField}`,
|
|
11279
|
+
message: `camelCase field '${camelCaseField}' is no longer supported in targets.yaml. Use '${snakeCaseField}' instead.`
|
|
11280
|
+
});
|
|
11281
|
+
}
|
|
11282
|
+
}
|
|
11283
|
+
return warnings;
|
|
11284
|
+
}
|
|
11285
|
+
function assertNoDeprecatedCamelCaseTargetFields(definition) {
|
|
11286
|
+
if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
|
|
11287
|
+
throw new Error(
|
|
11288
|
+
`${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
11289
|
+
);
|
|
11290
|
+
}
|
|
11291
|
+
const warning = findDeprecatedCamelCaseTargetWarnings(
|
|
11292
|
+
definition,
|
|
11293
|
+
`target "${definition.name}"`
|
|
11294
|
+
)[0];
|
|
11295
|
+
if (!warning) {
|
|
11296
|
+
return;
|
|
11297
|
+
}
|
|
11298
|
+
const fieldMatch = warning.message.match(/field '([^']+)'/);
|
|
11299
|
+
const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
|
|
11300
|
+
const field = fieldMatch?.[1] ?? "unknown";
|
|
11301
|
+
const replacement = replacementMatch?.[1] ?? "snake_case";
|
|
11302
|
+
throw new Error(
|
|
11303
|
+
`${warning.location}: camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
|
|
11304
|
+
);
|
|
11305
|
+
}
|
|
11306
|
+
function findDeprecatedCamelCaseTargetWarnings(target, location) {
|
|
11307
|
+
const warnings = collectDeprecatedCamelCaseWarnings(
|
|
11308
|
+
target,
|
|
11309
|
+
location,
|
|
11310
|
+
DEPRECATED_TARGET_CAMEL_CASE_FIELDS
|
|
11311
|
+
);
|
|
11312
|
+
if (typeof target !== "object" || target === null || Array.isArray(target)) {
|
|
11313
|
+
return warnings;
|
|
11314
|
+
}
|
|
11315
|
+
const healthcheck = target.healthcheck;
|
|
11316
|
+
warnings.push(
|
|
11317
|
+
...collectDeprecatedCamelCaseWarnings(
|
|
11318
|
+
healthcheck,
|
|
11319
|
+
`${location}.healthcheck`,
|
|
11320
|
+
DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS
|
|
11321
|
+
)
|
|
11322
|
+
);
|
|
11323
|
+
return warnings;
|
|
11324
|
+
}
|
|
10885
11325
|
var COMMON_TARGET_SETTINGS = [
|
|
10886
11326
|
"use_target",
|
|
10887
11327
|
"provider_batching",
|
|
10888
|
-
"providerBatching",
|
|
10889
11328
|
"subagent_mode_allowed",
|
|
10890
|
-
"
|
|
10891
|
-
"fallback_targets",
|
|
10892
|
-
"fallbackTargets"
|
|
11329
|
+
"fallback_targets"
|
|
10893
11330
|
];
|
|
10894
11331
|
var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
|
|
10895
11332
|
var BASE_TARGET_SCHEMA = import_zod3.z.object({
|
|
@@ -10901,43 +11338,40 @@ var BASE_TARGET_SCHEMA = import_zod3.z.object({
|
|
|
10901
11338
|
// backward compat
|
|
10902
11339
|
workers: import_zod3.z.number().int().min(1).optional(),
|
|
10903
11340
|
workspace_template: import_zod3.z.string().optional(),
|
|
10904
|
-
workspaceTemplate: import_zod3.z.string().optional(),
|
|
10905
11341
|
subagent_mode_allowed: import_zod3.z.boolean().optional(),
|
|
10906
|
-
fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
|
|
10907
|
-
fallbackTargets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
|
|
11342
|
+
fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
|
|
10908
11343
|
}).passthrough();
|
|
10909
11344
|
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
11345
|
+
var DEFAULT_AZURE_RESPONSES_API_VERSION = "v1";
|
|
10910
11346
|
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
10911
|
-
function normalizeAzureApiVersion(value) {
|
|
11347
|
+
function normalizeAzureApiVersion(value, apiFormat) {
|
|
11348
|
+
const defaultVersion = apiFormat === "responses" ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;
|
|
10912
11349
|
if (!value) {
|
|
10913
|
-
return
|
|
11350
|
+
return defaultVersion;
|
|
10914
11351
|
}
|
|
10915
11352
|
const trimmed = value.trim();
|
|
10916
11353
|
if (trimmed.length === 0) {
|
|
10917
|
-
return
|
|
11354
|
+
return defaultVersion;
|
|
10918
11355
|
}
|
|
10919
11356
|
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
10920
|
-
return withoutPrefix.length > 0 ? withoutPrefix :
|
|
11357
|
+
return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
|
|
10921
11358
|
}
|
|
10922
11359
|
function resolveRetryConfig(target) {
|
|
10923
|
-
const maxRetries = resolveOptionalNumber(
|
|
10924
|
-
target.max_retries ?? target.maxRetries,
|
|
10925
|
-
`${target.name} max retries`
|
|
10926
|
-
);
|
|
11360
|
+
const maxRetries = resolveOptionalNumber(target.max_retries, `${target.name} max retries`);
|
|
10927
11361
|
const initialDelayMs = resolveOptionalNumber(
|
|
10928
|
-
target.retry_initial_delay_ms
|
|
11362
|
+
target.retry_initial_delay_ms,
|
|
10929
11363
|
`${target.name} retry initial delay`
|
|
10930
11364
|
);
|
|
10931
11365
|
const maxDelayMs = resolveOptionalNumber(
|
|
10932
|
-
target.retry_max_delay_ms
|
|
11366
|
+
target.retry_max_delay_ms,
|
|
10933
11367
|
`${target.name} retry max delay`
|
|
10934
11368
|
);
|
|
10935
11369
|
const backoffFactor = resolveOptionalNumber(
|
|
10936
|
-
target.retry_backoff_factor
|
|
11370
|
+
target.retry_backoff_factor,
|
|
10937
11371
|
`${target.name} retry backoff factor`
|
|
10938
11372
|
);
|
|
10939
11373
|
const retryableStatusCodes = resolveOptionalNumberArray(
|
|
10940
|
-
target.retry_status_codes
|
|
11374
|
+
target.retry_status_codes,
|
|
10941
11375
|
`${target.name} retry status codes`
|
|
10942
11376
|
);
|
|
10943
11377
|
if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
|
|
@@ -10997,9 +11431,10 @@ function resolveDelegatedTargetDefinition(name, definitions, env = process.env)
|
|
|
10997
11431
|
`Target "${name}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
|
|
10998
11432
|
);
|
|
10999
11433
|
}
|
|
11000
|
-
function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
11434
|
+
function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
|
|
11435
|
+
assertNoDeprecatedCamelCaseTargetFields(definition);
|
|
11001
11436
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
11002
|
-
if (parsed.workspace_template !== void 0
|
|
11437
|
+
if (parsed.workspace_template !== void 0) {
|
|
11003
11438
|
throw new Error(
|
|
11004
11439
|
`${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
11005
11440
|
);
|
|
@@ -11015,13 +11450,9 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
11015
11450
|
`${parsed.name} provider`,
|
|
11016
11451
|
true
|
|
11017
11452
|
).toLowerCase();
|
|
11018
|
-
const providerBatching = resolveOptionalBoolean(
|
|
11019
|
-
|
|
11020
|
-
|
|
11021
|
-
const subagentModeAllowed = resolveOptionalBoolean(
|
|
11022
|
-
parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
|
|
11023
|
-
);
|
|
11024
|
-
const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
|
|
11453
|
+
const providerBatching = resolveOptionalBoolean(parsed.provider_batching);
|
|
11454
|
+
const subagentModeAllowed = resolveOptionalBoolean(parsed.subagent_mode_allowed);
|
|
11455
|
+
const fallbackTargets = parsed.fallback_targets;
|
|
11025
11456
|
const base = {
|
|
11026
11457
|
name: parsed.name,
|
|
11027
11458
|
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
@@ -11171,20 +11602,22 @@ function normalizeOpenAIBaseUrl(value) {
|
|
|
11171
11602
|
return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
|
|
11172
11603
|
}
|
|
11173
11604
|
function resolveAzureConfig(target, env) {
|
|
11174
|
-
const endpointSource = target.endpoint ?? target.resource
|
|
11175
|
-
const apiKeySource = target.api_key
|
|
11176
|
-
const deploymentSource = target.deployment ?? target.
|
|
11605
|
+
const endpointSource = target.endpoint ?? target.resource;
|
|
11606
|
+
const apiKeySource = target.api_key;
|
|
11607
|
+
const deploymentSource = target.deployment ?? target.model;
|
|
11177
11608
|
const versionSource = target.version ?? target.api_version;
|
|
11178
11609
|
const temperatureSource = target.temperature;
|
|
11179
|
-
const maxTokensSource = target.max_output_tokens
|
|
11610
|
+
const maxTokensSource = target.max_output_tokens;
|
|
11180
11611
|
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
11181
11612
|
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
11182
11613
|
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
11614
|
+
const apiFormat = resolveApiFormat(target, env, target.name);
|
|
11183
11615
|
const version = normalizeAzureApiVersion(
|
|
11184
11616
|
resolveOptionalString(versionSource, env, `${target.name} api version`, {
|
|
11185
11617
|
allowLiteral: true,
|
|
11186
11618
|
optionalEnv: true
|
|
11187
|
-
})
|
|
11619
|
+
}),
|
|
11620
|
+
apiFormat
|
|
11188
11621
|
);
|
|
11189
11622
|
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
11190
11623
|
const maxOutputTokens = resolveOptionalNumber(
|
|
@@ -11197,13 +11630,17 @@ function resolveAzureConfig(target, env) {
|
|
|
11197
11630
|
deploymentName,
|
|
11198
11631
|
apiKey,
|
|
11199
11632
|
version,
|
|
11633
|
+
apiFormat,
|
|
11200
11634
|
temperature,
|
|
11201
11635
|
maxOutputTokens,
|
|
11202
11636
|
retry
|
|
11203
11637
|
};
|
|
11204
11638
|
}
|
|
11205
|
-
function resolveApiFormat(target, targetName) {
|
|
11206
|
-
const raw = target.api_format
|
|
11639
|
+
function resolveApiFormat(target, env, targetName) {
|
|
11640
|
+
const raw = resolveOptionalString(target.api_format, env, `${targetName} api format`, {
|
|
11641
|
+
allowLiteral: true,
|
|
11642
|
+
optionalEnv: true
|
|
11643
|
+
});
|
|
11207
11644
|
if (raw === void 0) return void 0;
|
|
11208
11645
|
if (raw === "chat" || raw === "responses") return raw;
|
|
11209
11646
|
throw new Error(
|
|
@@ -11211,11 +11648,11 @@ function resolveApiFormat(target, targetName) {
|
|
|
11211
11648
|
);
|
|
11212
11649
|
}
|
|
11213
11650
|
function resolveOpenAIConfig(target, env) {
|
|
11214
|
-
const endpointSource = target.endpoint ?? target.base_url
|
|
11215
|
-
const apiKeySource = target.api_key
|
|
11651
|
+
const endpointSource = target.endpoint ?? target.base_url;
|
|
11652
|
+
const apiKeySource = target.api_key;
|
|
11216
11653
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
11217
11654
|
const temperatureSource = target.temperature;
|
|
11218
|
-
const maxTokensSource = target.max_output_tokens
|
|
11655
|
+
const maxTokensSource = target.max_output_tokens;
|
|
11219
11656
|
const baseURL = normalizeOpenAIBaseUrl(
|
|
11220
11657
|
resolveOptionalString(endpointSource, env, `${target.name} endpoint`, {
|
|
11221
11658
|
allowLiteral: true,
|
|
@@ -11229,17 +11666,17 @@ function resolveOpenAIConfig(target, env) {
|
|
|
11229
11666
|
baseURL,
|
|
11230
11667
|
apiKey,
|
|
11231
11668
|
model,
|
|
11232
|
-
apiFormat: resolveApiFormat(target, target.name),
|
|
11669
|
+
apiFormat: resolveApiFormat(target, env, target.name),
|
|
11233
11670
|
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
11234
11671
|
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
11235
11672
|
retry
|
|
11236
11673
|
};
|
|
11237
11674
|
}
|
|
11238
11675
|
function resolveOpenRouterConfig(target, env) {
|
|
11239
|
-
const apiKeySource = target.api_key
|
|
11676
|
+
const apiKeySource = target.api_key;
|
|
11240
11677
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
11241
11678
|
const temperatureSource = target.temperature;
|
|
11242
|
-
const maxTokensSource = target.max_output_tokens
|
|
11679
|
+
const maxTokensSource = target.max_output_tokens;
|
|
11243
11680
|
const retry = resolveRetryConfig(target);
|
|
11244
11681
|
return {
|
|
11245
11682
|
apiKey: resolveString(apiKeySource, env, `${target.name} OpenRouter api key`),
|
|
@@ -11250,11 +11687,11 @@ function resolveOpenRouterConfig(target, env) {
|
|
|
11250
11687
|
};
|
|
11251
11688
|
}
|
|
11252
11689
|
function resolveAnthropicConfig(target, env) {
|
|
11253
|
-
const apiKeySource = target.api_key
|
|
11690
|
+
const apiKeySource = target.api_key;
|
|
11254
11691
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
11255
11692
|
const temperatureSource = target.temperature;
|
|
11256
|
-
const maxTokensSource = target.max_output_tokens
|
|
11257
|
-
const thinkingBudgetSource = target.thinking_budget
|
|
11693
|
+
const maxTokensSource = target.max_output_tokens;
|
|
11694
|
+
const thinkingBudgetSource = target.thinking_budget;
|
|
11258
11695
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
11259
11696
|
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
11260
11697
|
const retry = resolveRetryConfig(target);
|
|
@@ -11268,10 +11705,10 @@ function resolveAnthropicConfig(target, env) {
|
|
|
11268
11705
|
};
|
|
11269
11706
|
}
|
|
11270
11707
|
function resolveGeminiConfig(target, env) {
|
|
11271
|
-
const apiKeySource = target.api_key
|
|
11708
|
+
const apiKeySource = target.api_key;
|
|
11272
11709
|
const modelSource = target.model ?? target.deployment ?? target.variant;
|
|
11273
11710
|
const temperatureSource = target.temperature;
|
|
11274
|
-
const maxTokensSource = target.max_output_tokens
|
|
11711
|
+
const maxTokensSource = target.max_output_tokens;
|
|
11275
11712
|
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
11276
11713
|
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
11277
11714
|
allowLiteral: true,
|
|
@@ -11291,11 +11728,11 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
11291
11728
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
11292
11729
|
const argsSource = target.args ?? target.arguments;
|
|
11293
11730
|
const cwdSource = target.cwd;
|
|
11294
|
-
const workspaceTemplateSource = target.workspace_template
|
|
11295
|
-
const timeoutSource = target.timeout_seconds
|
|
11296
|
-
const logDirSource = target.log_dir ?? target.
|
|
11297
|
-
const logFormatSource = target.log_format ?? target.
|
|
11298
|
-
const systemPromptSource = target.system_prompt
|
|
11731
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
11732
|
+
const timeoutSource = target.timeout_seconds;
|
|
11733
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
11734
|
+
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
11735
|
+
const systemPromptSource = target.system_prompt;
|
|
11299
11736
|
const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
|
|
11300
11737
|
allowLiteral: true,
|
|
11301
11738
|
optionalEnv: true
|
|
@@ -11318,8 +11755,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
11318
11755
|
optionalEnv: true
|
|
11319
11756
|
}
|
|
11320
11757
|
);
|
|
11321
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11322
|
-
workspaceTemplate =
|
|
11758
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
11759
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11323
11760
|
}
|
|
11324
11761
|
if (cwd && workspaceTemplate) {
|
|
11325
11762
|
throw new Error(
|
|
@@ -11359,16 +11796,16 @@ function normalizeCodexLogFormat(value) {
|
|
|
11359
11796
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
11360
11797
|
}
|
|
11361
11798
|
function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
11362
|
-
const cliUrlSource = target.cli_url
|
|
11363
|
-
const cliPathSource = target.cli_path
|
|
11364
|
-
const githubTokenSource = target.github_token
|
|
11799
|
+
const cliUrlSource = target.cli_url;
|
|
11800
|
+
const cliPathSource = target.cli_path;
|
|
11801
|
+
const githubTokenSource = target.github_token;
|
|
11365
11802
|
const modelSource = target.model;
|
|
11366
11803
|
const cwdSource = target.cwd;
|
|
11367
|
-
const workspaceTemplateSource = target.workspace_template
|
|
11368
|
-
const timeoutSource = target.timeout_seconds
|
|
11369
|
-
const logDirSource = target.log_dir ?? target.
|
|
11370
|
-
const logFormatSource = target.log_format
|
|
11371
|
-
const systemPromptSource = target.system_prompt
|
|
11804
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
11805
|
+
const timeoutSource = target.timeout_seconds;
|
|
11806
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
11807
|
+
const logFormatSource = target.log_format;
|
|
11808
|
+
const systemPromptSource = target.system_prompt;
|
|
11372
11809
|
const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
|
|
11373
11810
|
allowLiteral: true,
|
|
11374
11811
|
optionalEnv: true
|
|
@@ -11403,8 +11840,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
11403
11840
|
optionalEnv: true
|
|
11404
11841
|
}
|
|
11405
11842
|
);
|
|
11406
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11407
|
-
workspaceTemplate =
|
|
11843
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
11844
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11408
11845
|
}
|
|
11409
11846
|
if (cwd && workspaceTemplate) {
|
|
11410
11847
|
throw new Error(
|
|
@@ -11423,6 +11860,52 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
11423
11860
|
);
|
|
11424
11861
|
const logFormat = normalizeCopilotLogFormat(logFormatSource);
|
|
11425
11862
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
11863
|
+
const byok = target.byok;
|
|
11864
|
+
let byokType;
|
|
11865
|
+
let byokBaseUrl;
|
|
11866
|
+
let byokApiKey;
|
|
11867
|
+
let byokBearerToken;
|
|
11868
|
+
let byokApiVersion;
|
|
11869
|
+
let byokWireApi;
|
|
11870
|
+
if (byok && typeof byok === "object") {
|
|
11871
|
+
byokType = resolveOptionalString(byok.type, env, `${target.name} byok type`, {
|
|
11872
|
+
allowLiteral: true,
|
|
11873
|
+
optionalEnv: true
|
|
11874
|
+
});
|
|
11875
|
+
byokBaseUrl = resolveOptionalString(byok.base_url, env, `${target.name} byok base URL`, {
|
|
11876
|
+
allowLiteral: true,
|
|
11877
|
+
optionalEnv: true
|
|
11878
|
+
});
|
|
11879
|
+
byokApiKey = resolveOptionalString(byok.api_key, env, `${target.name} byok API key`, {
|
|
11880
|
+
allowLiteral: false,
|
|
11881
|
+
optionalEnv: true
|
|
11882
|
+
});
|
|
11883
|
+
byokBearerToken = resolveOptionalString(
|
|
11884
|
+
byok.bearer_token,
|
|
11885
|
+
env,
|
|
11886
|
+
`${target.name} byok bearer token`,
|
|
11887
|
+
{
|
|
11888
|
+
allowLiteral: false,
|
|
11889
|
+
optionalEnv: true
|
|
11890
|
+
}
|
|
11891
|
+
);
|
|
11892
|
+
byokApiVersion = resolveOptionalString(
|
|
11893
|
+
byok.api_version,
|
|
11894
|
+
env,
|
|
11895
|
+
`${target.name} byok API version`,
|
|
11896
|
+
{
|
|
11897
|
+
allowLiteral: true,
|
|
11898
|
+
optionalEnv: true
|
|
11899
|
+
}
|
|
11900
|
+
);
|
|
11901
|
+
byokWireApi = resolveOptionalString(byok.wire_api, env, `${target.name} byok wire API`, {
|
|
11902
|
+
allowLiteral: true,
|
|
11903
|
+
optionalEnv: true
|
|
11904
|
+
});
|
|
11905
|
+
if (!byokBaseUrl) {
|
|
11906
|
+
throw new Error(`${target.name}: 'byok.base_url' is required when 'byok' is specified`);
|
|
11907
|
+
}
|
|
11908
|
+
}
|
|
11426
11909
|
return {
|
|
11427
11910
|
cliUrl,
|
|
11428
11911
|
cliPath,
|
|
@@ -11433,7 +11916,13 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
11433
11916
|
timeoutMs,
|
|
11434
11917
|
logDir,
|
|
11435
11918
|
logFormat,
|
|
11436
|
-
systemPrompt
|
|
11919
|
+
systemPrompt,
|
|
11920
|
+
byokType,
|
|
11921
|
+
byokBaseUrl,
|
|
11922
|
+
byokApiKey,
|
|
11923
|
+
byokBearerToken,
|
|
11924
|
+
byokApiVersion,
|
|
11925
|
+
byokWireApi
|
|
11437
11926
|
};
|
|
11438
11927
|
}
|
|
11439
11928
|
function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
@@ -11441,11 +11930,11 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
11441
11930
|
const modelSource = target.model;
|
|
11442
11931
|
const argsSource = target.args ?? target.arguments;
|
|
11443
11932
|
const cwdSource = target.cwd;
|
|
11444
|
-
const workspaceTemplateSource = target.workspace_template
|
|
11445
|
-
const timeoutSource = target.timeout_seconds
|
|
11446
|
-
const logDirSource = target.log_dir ?? target.
|
|
11447
|
-
const logFormatSource = target.log_format
|
|
11448
|
-
const systemPromptSource = target.system_prompt
|
|
11933
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
11934
|
+
const timeoutSource = target.timeout_seconds;
|
|
11935
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
11936
|
+
const logFormatSource = target.log_format;
|
|
11937
|
+
const systemPromptSource = target.system_prompt;
|
|
11449
11938
|
const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
|
|
11450
11939
|
allowLiteral: true,
|
|
11451
11940
|
optionalEnv: true
|
|
@@ -11468,8 +11957,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
11468
11957
|
optionalEnv: true
|
|
11469
11958
|
}
|
|
11470
11959
|
);
|
|
11471
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11472
|
-
workspaceTemplate =
|
|
11960
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
11961
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11473
11962
|
}
|
|
11474
11963
|
if (cwd && workspaceTemplate) {
|
|
11475
11964
|
throw new Error(
|
|
@@ -11509,16 +11998,16 @@ function normalizeCopilotLogFormat(value) {
|
|
|
11509
11998
|
}
|
|
11510
11999
|
function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
11511
12000
|
const subproviderSource = target.subprovider;
|
|
11512
|
-
const modelSource = target.model ?? target.pi_model
|
|
11513
|
-
const apiKeySource = target.api_key
|
|
11514
|
-
const toolsSource = target.tools ?? target.pi_tools
|
|
11515
|
-
const thinkingSource = target.thinking ?? target.pi_thinking
|
|
12001
|
+
const modelSource = target.model ?? target.pi_model;
|
|
12002
|
+
const apiKeySource = target.api_key;
|
|
12003
|
+
const toolsSource = target.tools ?? target.pi_tools;
|
|
12004
|
+
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
11516
12005
|
const cwdSource = target.cwd;
|
|
11517
|
-
const workspaceTemplateSource = target.workspace_template
|
|
11518
|
-
const timeoutSource = target.timeout_seconds
|
|
11519
|
-
const logDirSource = target.log_dir ?? target.
|
|
11520
|
-
const logFormatSource = target.log_format
|
|
11521
|
-
const systemPromptSource = target.system_prompt
|
|
12006
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
12007
|
+
const timeoutSource = target.timeout_seconds;
|
|
12008
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
12009
|
+
const logFormatSource = target.log_format;
|
|
12010
|
+
const systemPromptSource = target.system_prompt;
|
|
11522
12011
|
const subprovider = resolveOptionalString(
|
|
11523
12012
|
subproviderSource,
|
|
11524
12013
|
env,
|
|
@@ -11536,7 +12025,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
11536
12025
|
allowLiteral: false,
|
|
11537
12026
|
optionalEnv: true
|
|
11538
12027
|
});
|
|
11539
|
-
const baseUrlSource = target.base_url ?? target.
|
|
12028
|
+
const baseUrlSource = target.base_url ?? target.endpoint;
|
|
11540
12029
|
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
|
|
11541
12030
|
allowLiteral: true,
|
|
11542
12031
|
optionalEnv: true
|
|
@@ -11562,8 +12051,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
11562
12051
|
optionalEnv: true
|
|
11563
12052
|
}
|
|
11564
12053
|
);
|
|
11565
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11566
|
-
workspaceTemplate =
|
|
12054
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
12055
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11567
12056
|
}
|
|
11568
12057
|
if (cwd && workspaceTemplate) {
|
|
11569
12058
|
throw new Error(
|
|
@@ -11595,16 +12084,16 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
11595
12084
|
function resolvePiCliConfig(target, env, evalFilePath) {
|
|
11596
12085
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
11597
12086
|
const subproviderSource = target.subprovider;
|
|
11598
|
-
const modelSource = target.model ?? target.pi_model
|
|
11599
|
-
const apiKeySource = target.api_key
|
|
11600
|
-
const toolsSource = target.tools ?? target.pi_tools
|
|
11601
|
-
const thinkingSource = target.thinking ?? target.pi_thinking
|
|
12087
|
+
const modelSource = target.model ?? target.pi_model;
|
|
12088
|
+
const apiKeySource = target.api_key;
|
|
12089
|
+
const toolsSource = target.tools ?? target.pi_tools;
|
|
12090
|
+
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
11602
12091
|
const cwdSource = target.cwd;
|
|
11603
|
-
const workspaceTemplateSource = target.workspace_template
|
|
11604
|
-
const timeoutSource = target.timeout_seconds
|
|
11605
|
-
const logDirSource = target.log_dir ?? target.
|
|
11606
|
-
const logFormatSource = target.log_format
|
|
11607
|
-
const systemPromptSource = target.system_prompt
|
|
12092
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
12093
|
+
const timeoutSource = target.timeout_seconds;
|
|
12094
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
12095
|
+
const logFormatSource = target.log_format;
|
|
12096
|
+
const systemPromptSource = target.system_prompt;
|
|
11608
12097
|
const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
|
|
11609
12098
|
allowLiteral: true,
|
|
11610
12099
|
optionalEnv: true
|
|
@@ -11623,7 +12112,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
11623
12112
|
allowLiteral: false,
|
|
11624
12113
|
optionalEnv: true
|
|
11625
12114
|
});
|
|
11626
|
-
const baseUrlSource = target.base_url ?? target.
|
|
12115
|
+
const baseUrlSource = target.base_url ?? target.endpoint;
|
|
11627
12116
|
const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
|
|
11628
12117
|
allowLiteral: true,
|
|
11629
12118
|
optionalEnv: true
|
|
@@ -11648,8 +12137,8 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
11648
12137
|
`${target.name} pi-cli workspace template`,
|
|
11649
12138
|
{ allowLiteral: true, optionalEnv: true }
|
|
11650
12139
|
);
|
|
11651
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11652
|
-
workspaceTemplate =
|
|
12140
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
12141
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11653
12142
|
}
|
|
11654
12143
|
if (cwd && workspaceTemplate) {
|
|
11655
12144
|
throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
|
|
@@ -11681,11 +12170,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
11681
12170
|
function resolveClaudeConfig(target, env, evalFilePath) {
|
|
11682
12171
|
const modelSource = target.model;
|
|
11683
12172
|
const cwdSource = target.cwd;
|
|
11684
|
-
const workspaceTemplateSource = target.workspace_template
|
|
11685
|
-
const timeoutSource = target.timeout_seconds
|
|
11686
|
-
const logDirSource = target.log_dir ?? target.
|
|
11687
|
-
const logFormatSource = target.log_format ?? target.
|
|
11688
|
-
const systemPromptSource = target.system_prompt
|
|
12173
|
+
const workspaceTemplateSource = target.workspace_template;
|
|
12174
|
+
const timeoutSource = target.timeout_seconds;
|
|
12175
|
+
const logDirSource = target.log_dir ?? target.log_directory;
|
|
12176
|
+
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
|
|
12177
|
+
const systemPromptSource = target.system_prompt;
|
|
11689
12178
|
const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
|
|
11690
12179
|
allowLiteral: true,
|
|
11691
12180
|
optionalEnv: true
|
|
@@ -11703,8 +12192,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
11703
12192
|
optionalEnv: true
|
|
11704
12193
|
}
|
|
11705
12194
|
);
|
|
11706
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11707
|
-
workspaceTemplate =
|
|
12195
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
12196
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11708
12197
|
}
|
|
11709
12198
|
if (cwd && workspaceTemplate) {
|
|
11710
12199
|
throw new Error(
|
|
@@ -11718,8 +12207,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
11718
12207
|
});
|
|
11719
12208
|
const logFormat = normalizeClaudeLogFormat(logFormatSource);
|
|
11720
12209
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
11721
|
-
const maxTurns = typeof target.max_turns === "number" ? target.max_turns :
|
|
11722
|
-
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd :
|
|
12210
|
+
const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
|
|
12211
|
+
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
|
|
11723
12212
|
return {
|
|
11724
12213
|
model,
|
|
11725
12214
|
systemPrompt,
|
|
@@ -11750,9 +12239,7 @@ function resolveMockConfig(target) {
|
|
|
11750
12239
|
return { response };
|
|
11751
12240
|
}
|
|
11752
12241
|
function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
11753
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
11754
|
-
target.workspace_template ?? target.workspaceTemplate
|
|
11755
|
-
);
|
|
12242
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
|
|
11756
12243
|
let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
|
|
11757
12244
|
workspaceTemplateEnvVar,
|
|
11758
12245
|
env,
|
|
@@ -11762,14 +12249,14 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
11762
12249
|
optionalEnv: true
|
|
11763
12250
|
}
|
|
11764
12251
|
) : void 0;
|
|
11765
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11766
|
-
workspaceTemplate =
|
|
12252
|
+
if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
|
|
12253
|
+
workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11767
12254
|
}
|
|
11768
12255
|
const executableSource = target.executable;
|
|
11769
12256
|
const waitSource = target.wait;
|
|
11770
|
-
const dryRunSource = target.dry_run
|
|
11771
|
-
const subagentRootSource = target.subagent_root
|
|
11772
|
-
const timeoutSource = target.timeout_seconds
|
|
12257
|
+
const dryRunSource = target.dry_run;
|
|
12258
|
+
const subagentRootSource = target.subagent_root;
|
|
12259
|
+
const timeoutSource = target.timeout_seconds;
|
|
11773
12260
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
11774
12261
|
const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
|
|
11775
12262
|
allowLiteral: true,
|
|
@@ -11804,8 +12291,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
11804
12291
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
11805
12292
|
if (!parseResult.success) {
|
|
11806
12293
|
const firstError = parseResult.error.errors[0];
|
|
11807
|
-
const
|
|
11808
|
-
const prefix =
|
|
12294
|
+
const path53 = firstError?.path.join(".") || "";
|
|
12295
|
+
const prefix = path53 ? `${target.name} ${path53}: ` : `${target.name}: `;
|
|
11809
12296
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
11810
12297
|
}
|
|
11811
12298
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -11820,17 +12307,17 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
11820
12307
|
}
|
|
11821
12308
|
function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
|
|
11822
12309
|
const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
11823
|
-
const timeoutSeconds = target.timeout_seconds
|
|
12310
|
+
const timeoutSeconds = target.timeout_seconds;
|
|
11824
12311
|
const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
|
|
11825
12312
|
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
11826
12313
|
allowLiteral: true,
|
|
11827
12314
|
optionalEnv: true
|
|
11828
12315
|
});
|
|
11829
|
-
if (cwd && evalFilePath && !
|
|
11830
|
-
cwd =
|
|
12316
|
+
if (cwd && evalFilePath && !import_node_path24.default.isAbsolute(cwd)) {
|
|
12317
|
+
cwd = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), cwd);
|
|
11831
12318
|
}
|
|
11832
12319
|
if (!cwd && evalFilePath) {
|
|
11833
|
-
cwd =
|
|
12320
|
+
cwd = import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath));
|
|
11834
12321
|
}
|
|
11835
12322
|
return {
|
|
11836
12323
|
command,
|
|
@@ -11884,10 +12371,10 @@ function resolveDiscover(value, targetName) {
|
|
|
11884
12371
|
throw new Error(`Target "${targetName}": discover must be "latest" (got "${String(value)}")`);
|
|
11885
12372
|
}
|
|
11886
12373
|
function resolveCopilotLogConfig(target, env) {
|
|
11887
|
-
const sessionDirSource = target.session_dir
|
|
11888
|
-
const sessionIdSource = target.session_id
|
|
12374
|
+
const sessionDirSource = target.session_dir;
|
|
12375
|
+
const sessionIdSource = target.session_id;
|
|
11889
12376
|
const discoverSource = target.discover;
|
|
11890
|
-
const sessionStateDirSource = target.session_state_dir
|
|
12377
|
+
const sessionStateDirSource = target.session_state_dir;
|
|
11891
12378
|
const cwdSource = target.cwd;
|
|
11892
12379
|
return {
|
|
11893
12380
|
sessionDir: resolveOptionalString(
|
|
@@ -12068,7 +12555,7 @@ var import_node_path33 = __toESM(require("path"), 1);
|
|
|
12068
12555
|
init_cjs_shims();
|
|
12069
12556
|
var import_node_fs11 = require("fs");
|
|
12070
12557
|
var import_promises20 = require("fs/promises");
|
|
12071
|
-
var
|
|
12558
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
12072
12559
|
async function pathExists(target) {
|
|
12073
12560
|
try {
|
|
12074
12561
|
await (0, import_promises20.access)(target, import_node_fs11.constants.F_OK);
|
|
@@ -12084,7 +12571,7 @@ async function readDirEntries(target) {
|
|
|
12084
12571
|
const entries = await (0, import_promises20.readdir)(target, { withFileTypes: true });
|
|
12085
12572
|
return entries.map((entry) => ({
|
|
12086
12573
|
name: entry.name,
|
|
12087
|
-
absolutePath:
|
|
12574
|
+
absolutePath: import_node_path25.default.join(target, entry.name),
|
|
12088
12575
|
isDirectory: entry.isDirectory()
|
|
12089
12576
|
}));
|
|
12090
12577
|
}
|
|
@@ -12100,9 +12587,9 @@ async function removeIfExists(target) {
|
|
|
12100
12587
|
|
|
12101
12588
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
12102
12589
|
init_cjs_shims();
|
|
12103
|
-
var
|
|
12590
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
12104
12591
|
function pathToFileUri2(filePath) {
|
|
12105
|
-
const absolutePath =
|
|
12592
|
+
const absolutePath = import_node_path26.default.isAbsolute(filePath) ? filePath : import_node_path26.default.resolve(filePath);
|
|
12106
12593
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
12107
12594
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
12108
12595
|
return `file:///${normalizedPath}`;
|
|
@@ -12112,7 +12599,7 @@ function pathToFileUri2(filePath) {
|
|
|
12112
12599
|
|
|
12113
12600
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
12114
12601
|
init_cjs_shims();
|
|
12115
|
-
var
|
|
12602
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
12116
12603
|
|
|
12117
12604
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
12118
12605
|
init_cjs_shims();
|
|
@@ -12206,8 +12693,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
12206
12693
|
});
|
|
12207
12694
|
}
|
|
12208
12695
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
12209
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
12210
|
-
const responseList = responseFiles.map((file) => `"${
|
|
12696
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path27.default.basename(file)}`).join("\n");
|
|
12697
|
+
const responseList = responseFiles.map((file) => `"${import_node_path27.default.basename(file)}"`).join(", ");
|
|
12211
12698
|
return renderTemplate2(templateContent, {
|
|
12212
12699
|
requestFiles: requestLines,
|
|
12213
12700
|
responseList
|
|
@@ -12217,7 +12704,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
12217
12704
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
12218
12705
|
init_cjs_shims();
|
|
12219
12706
|
var import_promises21 = require("fs/promises");
|
|
12220
|
-
var
|
|
12707
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
12221
12708
|
|
|
12222
12709
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
12223
12710
|
init_cjs_shims();
|
|
@@ -12277,7 +12764,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
12277
12764
|
}
|
|
12278
12765
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
12279
12766
|
if (!silent) {
|
|
12280
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
12767
|
+
const fileList = responseFilesFinal.map((file) => import_node_path28.default.basename(file)).join(", ");
|
|
12281
12768
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
12282
12769
|
}
|
|
12283
12770
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -12286,7 +12773,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
12286
12773
|
while (pending.size > 0) {
|
|
12287
12774
|
if (Date.now() >= deadline) {
|
|
12288
12775
|
if (!silent) {
|
|
12289
|
-
const remaining = [...pending].map((f) =>
|
|
12776
|
+
const remaining = [...pending].map((f) => import_node_path28.default.basename(f)).join(", ");
|
|
12290
12777
|
console.error(
|
|
12291
12778
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
12292
12779
|
);
|
|
@@ -12344,37 +12831,6 @@ var import_node_util2 = require("util");
|
|
|
12344
12831
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
12345
12832
|
init_cjs_shims();
|
|
12346
12833
|
var import_node_path29 = __toESM(require("path"), 1);
|
|
12347
|
-
|
|
12348
|
-
// src/paths.ts
|
|
12349
|
-
init_cjs_shims();
|
|
12350
|
-
var import_node_os6 = __toESM(require("os"), 1);
|
|
12351
|
-
var import_node_path28 = __toESM(require("path"), 1);
|
|
12352
|
-
var logged = false;
|
|
12353
|
-
function getAgentvHome() {
|
|
12354
|
-
const envHome = process.env.AGENTV_HOME;
|
|
12355
|
-
if (envHome && envHome !== "undefined") {
|
|
12356
|
-
if (!logged) {
|
|
12357
|
-
logged = true;
|
|
12358
|
-
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
12359
|
-
}
|
|
12360
|
-
return envHome;
|
|
12361
|
-
}
|
|
12362
|
-
return import_node_path28.default.join(import_node_os6.default.homedir(), ".agentv");
|
|
12363
|
-
}
|
|
12364
|
-
function getWorkspacesRoot() {
|
|
12365
|
-
return import_node_path28.default.join(getAgentvHome(), "workspaces");
|
|
12366
|
-
}
|
|
12367
|
-
function getSubagentsRoot() {
|
|
12368
|
-
return import_node_path28.default.join(getAgentvHome(), "subagents");
|
|
12369
|
-
}
|
|
12370
|
-
function getTraceStateRoot() {
|
|
12371
|
-
return import_node_path28.default.join(getAgentvHome(), "trace-state");
|
|
12372
|
-
}
|
|
12373
|
-
function getWorkspacePoolRoot() {
|
|
12374
|
-
return import_node_path28.default.join(getAgentvHome(), "workspace-pool");
|
|
12375
|
-
}
|
|
12376
|
-
|
|
12377
|
-
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
12378
12834
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
12379
12835
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
12380
12836
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
@@ -13527,6 +13983,15 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
13527
13983
|
"vscode",
|
|
13528
13984
|
"vscode-insiders"
|
|
13529
13985
|
];
|
|
13986
|
+
var LLM_GRADER_CAPABLE_KINDS = [
|
|
13987
|
+
"openai",
|
|
13988
|
+
"openrouter",
|
|
13989
|
+
"azure",
|
|
13990
|
+
"anthropic",
|
|
13991
|
+
"gemini",
|
|
13992
|
+
"agentv",
|
|
13993
|
+
"mock"
|
|
13994
|
+
];
|
|
13530
13995
|
function extractLastAssistantContent(messages) {
|
|
13531
13996
|
if (!messages || messages.length === 0) {
|
|
13532
13997
|
return "";
|
|
@@ -13680,9 +14145,10 @@ init_cjs_shims();
|
|
|
13680
14145
|
|
|
13681
14146
|
// src/evaluation/evaluators/scoring.ts
|
|
13682
14147
|
init_cjs_shims();
|
|
13683
|
-
var
|
|
13684
|
-
|
|
13685
|
-
|
|
14148
|
+
var DEFAULT_THRESHOLD = 0.8;
|
|
14149
|
+
var PASS_THRESHOLD = DEFAULT_THRESHOLD;
|
|
14150
|
+
function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
|
|
14151
|
+
return score >= threshold ? "pass" : "fail";
|
|
13686
14152
|
}
|
|
13687
14153
|
function clampScore(value) {
|
|
13688
14154
|
if (Number.isNaN(value) || !Number.isFinite(value)) {
|
|
@@ -13873,13 +14339,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
13873
14339
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
13874
14340
|
const { mkdir: mkdir17, readFile: readFile17, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
13875
14341
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
13876
|
-
const
|
|
14342
|
+
const path53 = await import("path");
|
|
13877
14343
|
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
13878
|
-
const dir =
|
|
14344
|
+
const dir = path53.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
13879
14345
|
await mkdir17(dir, { recursive: true });
|
|
13880
|
-
const stdinPath =
|
|
13881
|
-
const stdoutPath =
|
|
13882
|
-
const stderrPath =
|
|
14346
|
+
const stdinPath = path53.join(dir, "stdin.txt");
|
|
14347
|
+
const stdoutPath = path53.join(dir, "stdout.txt");
|
|
14348
|
+
const stderrPath = path53.join(dir, "stderr.txt");
|
|
13883
14349
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
13884
14350
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
13885
14351
|
const { spawn: spawn5 } = await import("child_process");
|
|
@@ -15081,7 +15547,7 @@ ${outputSchema}`;
|
|
|
15081
15547
|
parts.push("[[ ## scoring_criteria ## ]]");
|
|
15082
15548
|
for (const rubric of rubrics) {
|
|
15083
15549
|
const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
|
|
15084
|
-
const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
|
|
15550
|
+
const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
|
|
15085
15551
|
parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
|
|
15086
15552
|
if (rubric.outcome) {
|
|
15087
15553
|
parts.push(`Description: ${rubric.outcome}`);
|
|
@@ -15135,54 +15601,106 @@ ${outputSchema}`;
|
|
|
15135
15601
|
async runWithRetry(options) {
|
|
15136
15602
|
const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
|
|
15137
15603
|
let lastError;
|
|
15604
|
+
let lastInvalidResponse;
|
|
15605
|
+
let shouldAttemptStructureFix = false;
|
|
15138
15606
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
15139
15607
|
try {
|
|
15140
|
-
const
|
|
15141
|
-
|
|
15142
|
-
|
|
15143
|
-
|
|
15144
|
-
|
|
15145
|
-
|
|
15146
|
-
|
|
15147
|
-
|
|
15148
|
-
|
|
15149
|
-
|
|
15150
|
-
|
|
15151
|
-
|
|
15152
|
-
|
|
15153
|
-
|
|
15154
|
-
|
|
15155
|
-
|
|
15156
|
-
]
|
|
15157
|
-
}
|
|
15158
|
-
],
|
|
15159
|
-
...modelOptions
|
|
15160
|
-
}) : await (0, import_ai2.generateText)({
|
|
15161
|
-
model,
|
|
15162
|
-
system: systemPrompt,
|
|
15163
|
-
prompt: userPrompt,
|
|
15164
|
-
...modelOptions
|
|
15165
|
-
});
|
|
15166
|
-
const data2 = schema.parse(parseJsonFromText(result.text));
|
|
15167
|
-
const rawUsage = result.usage;
|
|
15168
|
-
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
|
|
15169
|
-
return { data: data2, tokenUsage };
|
|
15608
|
+
const result = await this.generateStructuredResponse({
|
|
15609
|
+
context: context2,
|
|
15610
|
+
graderProvider,
|
|
15611
|
+
systemPrompt,
|
|
15612
|
+
userPrompt,
|
|
15613
|
+
images
|
|
15614
|
+
});
|
|
15615
|
+
const canRepairResponse = result.text.trim().length > 0;
|
|
15616
|
+
lastInvalidResponse = canRepairResponse ? result : void 0;
|
|
15617
|
+
let data;
|
|
15618
|
+
try {
|
|
15619
|
+
data = schema.parse(parseJsonFromText(result.text));
|
|
15620
|
+
} catch (e) {
|
|
15621
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
15622
|
+
shouldAttemptStructureFix = canRepairResponse;
|
|
15623
|
+
continue;
|
|
15170
15624
|
}
|
|
15171
|
-
|
|
15172
|
-
|
|
15625
|
+
return {
|
|
15626
|
+
data,
|
|
15627
|
+
providerResponse: result.providerResponse,
|
|
15628
|
+
tokenUsage: result.tokenUsage
|
|
15629
|
+
};
|
|
15630
|
+
} catch (e) {
|
|
15631
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
15632
|
+
}
|
|
15633
|
+
}
|
|
15634
|
+
if (shouldAttemptStructureFix && lastInvalidResponse) {
|
|
15635
|
+
try {
|
|
15636
|
+
const repaired = await this.generateStructuredResponse({
|
|
15637
|
+
context: context2,
|
|
15638
|
+
graderProvider,
|
|
15173
15639
|
systemPrompt,
|
|
15174
|
-
|
|
15175
|
-
|
|
15176
|
-
|
|
15177
|
-
|
|
15640
|
+
userPrompt: buildStructureRepairPrompt({
|
|
15641
|
+
validationError: lastError?.message ?? "Schema validation failed",
|
|
15642
|
+
invalidResponse: lastInvalidResponse.text
|
|
15643
|
+
})
|
|
15178
15644
|
});
|
|
15179
|
-
const data = schema.parse(parseJsonFromText(
|
|
15180
|
-
return {
|
|
15645
|
+
const data = schema.parse(parseJsonFromText(repaired.text));
|
|
15646
|
+
return {
|
|
15647
|
+
data,
|
|
15648
|
+
providerResponse: repaired.providerResponse,
|
|
15649
|
+
tokenUsage: sumTokenUsage(lastInvalidResponse.tokenUsage, repaired.tokenUsage)
|
|
15650
|
+
};
|
|
15181
15651
|
} catch (e) {
|
|
15182
15652
|
lastError = e instanceof Error ? e : new Error(String(e));
|
|
15183
15653
|
}
|
|
15184
15654
|
}
|
|
15185
|
-
throw new Error(
|
|
15655
|
+
throw new Error(
|
|
15656
|
+
`Failed to parse evaluator response after 3 attempts and 1 structure-fix attempt: ${lastError?.message}`
|
|
15657
|
+
);
|
|
15658
|
+
}
|
|
15659
|
+
async generateStructuredResponse(options) {
|
|
15660
|
+
const { context: context2, graderProvider, systemPrompt, userPrompt, images } = options;
|
|
15661
|
+
const model = graderProvider.asLanguageModel?.();
|
|
15662
|
+
if (model) {
|
|
15663
|
+
const modelOptions = {
|
|
15664
|
+
...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
|
|
15665
|
+
...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
|
|
15666
|
+
};
|
|
15667
|
+
const hasImages = images && images.length > 0;
|
|
15668
|
+
const result = hasImages ? await (0, import_ai2.generateText)({
|
|
15669
|
+
model,
|
|
15670
|
+
system: systemPrompt,
|
|
15671
|
+
messages: [
|
|
15672
|
+
{
|
|
15673
|
+
role: "user",
|
|
15674
|
+
content: [
|
|
15675
|
+
{ type: "text", text: userPrompt },
|
|
15676
|
+
...toAiSdkImageParts(images)
|
|
15677
|
+
]
|
|
15678
|
+
}
|
|
15679
|
+
],
|
|
15680
|
+
...modelOptions
|
|
15681
|
+
}) : await (0, import_ai2.generateText)({
|
|
15682
|
+
model,
|
|
15683
|
+
system: systemPrompt,
|
|
15684
|
+
prompt: userPrompt,
|
|
15685
|
+
...modelOptions
|
|
15686
|
+
});
|
|
15687
|
+
const rawUsage = result.usage;
|
|
15688
|
+
const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
|
|
15689
|
+
return { text: result.text, tokenUsage };
|
|
15690
|
+
}
|
|
15691
|
+
const response = await graderProvider.invoke({
|
|
15692
|
+
question: userPrompt,
|
|
15693
|
+
systemPrompt,
|
|
15694
|
+
evalCaseId: context2.evalCase.id,
|
|
15695
|
+
attempt: context2.attempt,
|
|
15696
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
15697
|
+
temperature: this.temperature
|
|
15698
|
+
});
|
|
15699
|
+
return {
|
|
15700
|
+
text: extractLastAssistantContent(response.output),
|
|
15701
|
+
providerResponse: response,
|
|
15702
|
+
tokenUsage: response.tokenUsage
|
|
15703
|
+
};
|
|
15186
15704
|
}
|
|
15187
15705
|
};
|
|
15188
15706
|
function buildOutputSchema() {
|
|
@@ -15202,6 +15720,29 @@ function buildOutputSchema() {
|
|
|
15202
15720
|
"}"
|
|
15203
15721
|
].join("\n");
|
|
15204
15722
|
}
|
|
15723
|
+
function buildStructureRepairPrompt(options) {
|
|
15724
|
+
const { validationError, invalidResponse } = options;
|
|
15725
|
+
return [
|
|
15726
|
+
"The following evaluation response has useful grading content but invalid JSON structure.",
|
|
15727
|
+
"Repair it to satisfy the schema in the system prompt.",
|
|
15728
|
+
"Preserve the evaluation meaning, do not re-grade the answer, and return only a single JSON object.",
|
|
15729
|
+
"",
|
|
15730
|
+
"Validation error:",
|
|
15731
|
+
validationError,
|
|
15732
|
+
"",
|
|
15733
|
+
"Invalid response:",
|
|
15734
|
+
invalidResponse
|
|
15735
|
+
].join("\n");
|
|
15736
|
+
}
|
|
15737
|
+
function sumTokenUsage(first, second) {
|
|
15738
|
+
if (!first && !second) {
|
|
15739
|
+
return void 0;
|
|
15740
|
+
}
|
|
15741
|
+
return {
|
|
15742
|
+
input: (first?.input ?? 0) + (second?.input ?? 0),
|
|
15743
|
+
output: (first?.output ?? 0) + (second?.output ?? 0)
|
|
15744
|
+
};
|
|
15745
|
+
}
|
|
15205
15746
|
function buildRubricOutputSchema() {
|
|
15206
15747
|
return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
|
|
15207
15748
|
You must return a valid JSON object matching this schema:
|
|
@@ -15301,19 +15842,21 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
15301
15842
|
rawScores[rubric.id] = rawScore;
|
|
15302
15843
|
totalWeight += rubric.weight;
|
|
15303
15844
|
weightedScoreSum += normalizedScore * rubric.weight;
|
|
15304
|
-
let
|
|
15305
|
-
if (rubric.
|
|
15306
|
-
|
|
15845
|
+
let minScoreThreshold;
|
|
15846
|
+
if (rubric.min_score !== void 0) {
|
|
15847
|
+
minScoreThreshold = rubric.min_score;
|
|
15848
|
+
} else if (rubric.required_min_score !== void 0) {
|
|
15849
|
+
minScoreThreshold = rubric.required_min_score / 10;
|
|
15307
15850
|
} else if (rubric.required === true) {
|
|
15308
|
-
|
|
15851
|
+
minScoreThreshold = 1;
|
|
15309
15852
|
}
|
|
15310
15853
|
const matchingRange = rubric.score_ranges?.find(
|
|
15311
15854
|
(r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
|
|
15312
15855
|
);
|
|
15313
15856
|
const rangeDescription = matchingRange?.outcome ?? "";
|
|
15314
15857
|
const criterionLabel = rubric.outcome ?? rubric.id;
|
|
15315
|
-
const passed = !(
|
|
15316
|
-
if (
|
|
15858
|
+
const passed = !(minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) && rawScore >= 7;
|
|
15859
|
+
if (minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) {
|
|
15317
15860
|
failedRequired = true;
|
|
15318
15861
|
}
|
|
15319
15862
|
assertions.push({
|
|
@@ -15390,11 +15933,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
15390
15933
|
execute: async (input) => {
|
|
15391
15934
|
try {
|
|
15392
15935
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
15393
|
-
const
|
|
15394
|
-
if (
|
|
15936
|
+
const stat11 = await import_promises29.default.stat(resolved);
|
|
15937
|
+
if (stat11.isDirectory()) {
|
|
15395
15938
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
15396
15939
|
}
|
|
15397
|
-
const buffer = Buffer.alloc(Math.min(
|
|
15940
|
+
const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
|
|
15398
15941
|
const fd = await import_promises29.default.open(resolved, "r");
|
|
15399
15942
|
try {
|
|
15400
15943
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -15402,8 +15945,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
15402
15945
|
await fd.close();
|
|
15403
15946
|
}
|
|
15404
15947
|
const content = buffer.toString("utf-8");
|
|
15405
|
-
const truncated =
|
|
15406
|
-
return { content, truncated, size:
|
|
15948
|
+
const truncated = stat11.size > MAX_FILE_SIZE;
|
|
15949
|
+
return { content, truncated, size: stat11.size };
|
|
15407
15950
|
} catch (error) {
|
|
15408
15951
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
15409
15952
|
}
|
|
@@ -15454,8 +15997,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
15454
15997
|
const ext = import_node_path39.default.extname(entry.name).toLowerCase();
|
|
15455
15998
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
15456
15999
|
try {
|
|
15457
|
-
const
|
|
15458
|
-
if (
|
|
16000
|
+
const stat11 = await import_promises29.default.stat(fullPath);
|
|
16001
|
+
if (stat11.size > MAX_FILE_SIZE) continue;
|
|
15459
16002
|
const content = await import_promises29.default.readFile(fullPath, "utf-8");
|
|
15460
16003
|
const lines = content.split("\n");
|
|
15461
16004
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -16099,115 +16642,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
16099
16642
|
* Evaluate a single field against the expected value.
|
|
16100
16643
|
*/
|
|
16101
16644
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
16102
|
-
const { path:
|
|
16103
|
-
const candidateValue = resolvePath(candidateData,
|
|
16104
|
-
const expectedValue = resolvePath(expectedData,
|
|
16645
|
+
const { path: path53, match, required = true, weight = 1 } = fieldConfig;
|
|
16646
|
+
const candidateValue = resolvePath(candidateData, path53);
|
|
16647
|
+
const expectedValue = resolvePath(expectedData, path53);
|
|
16105
16648
|
if (expectedValue === void 0) {
|
|
16106
16649
|
return {
|
|
16107
|
-
path:
|
|
16650
|
+
path: path53,
|
|
16108
16651
|
score: 1,
|
|
16109
16652
|
// No expected value means no comparison needed
|
|
16110
16653
|
weight,
|
|
16111
16654
|
hit: true,
|
|
16112
|
-
message: `${
|
|
16655
|
+
message: `${path53}: no expected value`
|
|
16113
16656
|
};
|
|
16114
16657
|
}
|
|
16115
16658
|
if (candidateValue === void 0) {
|
|
16116
16659
|
if (required) {
|
|
16117
16660
|
return {
|
|
16118
|
-
path:
|
|
16661
|
+
path: path53,
|
|
16119
16662
|
score: 0,
|
|
16120
16663
|
weight,
|
|
16121
16664
|
hit: false,
|
|
16122
|
-
message: `${
|
|
16665
|
+
message: `${path53} (required, missing)`
|
|
16123
16666
|
};
|
|
16124
16667
|
}
|
|
16125
16668
|
return {
|
|
16126
|
-
path:
|
|
16669
|
+
path: path53,
|
|
16127
16670
|
score: 1,
|
|
16128
16671
|
// Don't penalize missing optional fields
|
|
16129
16672
|
weight: 0,
|
|
16130
16673
|
// Zero weight means it won't affect the score
|
|
16131
16674
|
hit: true,
|
|
16132
|
-
message: `${
|
|
16675
|
+
message: `${path53}: optional field missing`
|
|
16133
16676
|
};
|
|
16134
16677
|
}
|
|
16135
16678
|
switch (match) {
|
|
16136
16679
|
case "exact":
|
|
16137
|
-
return this.compareExact(
|
|
16680
|
+
return this.compareExact(path53, candidateValue, expectedValue, weight);
|
|
16138
16681
|
case "numeric_tolerance":
|
|
16139
16682
|
return this.compareNumericTolerance(
|
|
16140
|
-
|
|
16683
|
+
path53,
|
|
16141
16684
|
candidateValue,
|
|
16142
16685
|
expectedValue,
|
|
16143
16686
|
fieldConfig,
|
|
16144
16687
|
weight
|
|
16145
16688
|
);
|
|
16146
16689
|
case "date":
|
|
16147
|
-
return this.compareDate(
|
|
16690
|
+
return this.compareDate(path53, candidateValue, expectedValue, fieldConfig, weight);
|
|
16148
16691
|
default:
|
|
16149
16692
|
return {
|
|
16150
|
-
path:
|
|
16693
|
+
path: path53,
|
|
16151
16694
|
score: 0,
|
|
16152
16695
|
weight,
|
|
16153
16696
|
hit: false,
|
|
16154
|
-
message: `${
|
|
16697
|
+
message: `${path53}: unknown match type "${match}"`
|
|
16155
16698
|
};
|
|
16156
16699
|
}
|
|
16157
16700
|
}
|
|
16158
16701
|
/**
|
|
16159
16702
|
* Exact equality comparison.
|
|
16160
16703
|
*/
|
|
16161
|
-
compareExact(
|
|
16704
|
+
compareExact(path53, candidateValue, expectedValue, weight) {
|
|
16162
16705
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
16163
16706
|
return {
|
|
16164
|
-
path:
|
|
16707
|
+
path: path53,
|
|
16165
16708
|
score: 1,
|
|
16166
16709
|
weight,
|
|
16167
16710
|
hit: true,
|
|
16168
|
-
message:
|
|
16711
|
+
message: path53
|
|
16169
16712
|
};
|
|
16170
16713
|
}
|
|
16171
16714
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
16172
16715
|
return {
|
|
16173
|
-
path:
|
|
16716
|
+
path: path53,
|
|
16174
16717
|
score: 0,
|
|
16175
16718
|
weight,
|
|
16176
16719
|
hit: false,
|
|
16177
|
-
message: `${
|
|
16720
|
+
message: `${path53} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
16178
16721
|
};
|
|
16179
16722
|
}
|
|
16180
16723
|
return {
|
|
16181
|
-
path:
|
|
16724
|
+
path: path53,
|
|
16182
16725
|
score: 0,
|
|
16183
16726
|
weight,
|
|
16184
16727
|
hit: false,
|
|
16185
|
-
message: `${
|
|
16728
|
+
message: `${path53} (value mismatch)`
|
|
16186
16729
|
};
|
|
16187
16730
|
}
|
|
16188
16731
|
/**
|
|
16189
16732
|
* Numeric comparison with absolute or relative tolerance.
|
|
16190
16733
|
*/
|
|
16191
|
-
compareNumericTolerance(
|
|
16734
|
+
compareNumericTolerance(path53, candidateValue, expectedValue, fieldConfig, weight) {
|
|
16192
16735
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
16193
16736
|
const candidateNum = toNumber(candidateValue);
|
|
16194
16737
|
const expectedNum = toNumber(expectedValue);
|
|
16195
16738
|
if (candidateNum === null || expectedNum === null) {
|
|
16196
16739
|
return {
|
|
16197
|
-
path:
|
|
16740
|
+
path: path53,
|
|
16198
16741
|
score: 0,
|
|
16199
16742
|
weight,
|
|
16200
16743
|
hit: false,
|
|
16201
|
-
message: `${
|
|
16744
|
+
message: `${path53} (non-numeric value)`
|
|
16202
16745
|
};
|
|
16203
16746
|
}
|
|
16204
16747
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
16205
16748
|
return {
|
|
16206
|
-
path:
|
|
16749
|
+
path: path53,
|
|
16207
16750
|
score: 0,
|
|
16208
16751
|
weight,
|
|
16209
16752
|
hit: false,
|
|
16210
|
-
message: `${
|
|
16753
|
+
message: `${path53} (invalid numeric value)`
|
|
16211
16754
|
};
|
|
16212
16755
|
}
|
|
16213
16756
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -16220,61 +16763,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
16220
16763
|
}
|
|
16221
16764
|
if (withinTolerance) {
|
|
16222
16765
|
return {
|
|
16223
|
-
path:
|
|
16766
|
+
path: path53,
|
|
16224
16767
|
score: 1,
|
|
16225
16768
|
weight,
|
|
16226
16769
|
hit: true,
|
|
16227
|
-
message: `${
|
|
16770
|
+
message: `${path53} (within tolerance: diff=${diff.toFixed(2)})`
|
|
16228
16771
|
};
|
|
16229
16772
|
}
|
|
16230
16773
|
return {
|
|
16231
|
-
path:
|
|
16774
|
+
path: path53,
|
|
16232
16775
|
score: 0,
|
|
16233
16776
|
weight,
|
|
16234
16777
|
hit: false,
|
|
16235
|
-
message: `${
|
|
16778
|
+
message: `${path53} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
16236
16779
|
};
|
|
16237
16780
|
}
|
|
16238
16781
|
/**
|
|
16239
16782
|
* Date comparison with format normalization.
|
|
16240
16783
|
*/
|
|
16241
|
-
compareDate(
|
|
16784
|
+
compareDate(path53, candidateValue, expectedValue, fieldConfig, weight) {
|
|
16242
16785
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
16243
16786
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
16244
16787
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
16245
16788
|
if (candidateDate === null) {
|
|
16246
16789
|
return {
|
|
16247
|
-
path:
|
|
16790
|
+
path: path53,
|
|
16248
16791
|
score: 0,
|
|
16249
16792
|
weight,
|
|
16250
16793
|
hit: false,
|
|
16251
|
-
message: `${
|
|
16794
|
+
message: `${path53} (unparseable candidate date)`
|
|
16252
16795
|
};
|
|
16253
16796
|
}
|
|
16254
16797
|
if (expectedDate === null) {
|
|
16255
16798
|
return {
|
|
16256
|
-
path:
|
|
16799
|
+
path: path53,
|
|
16257
16800
|
score: 0,
|
|
16258
16801
|
weight,
|
|
16259
16802
|
hit: false,
|
|
16260
|
-
message: `${
|
|
16803
|
+
message: `${path53} (unparseable expected date)`
|
|
16261
16804
|
};
|
|
16262
16805
|
}
|
|
16263
16806
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
16264
16807
|
return {
|
|
16265
|
-
path:
|
|
16808
|
+
path: path53,
|
|
16266
16809
|
score: 1,
|
|
16267
16810
|
weight,
|
|
16268
16811
|
hit: true,
|
|
16269
|
-
message:
|
|
16812
|
+
message: path53
|
|
16270
16813
|
};
|
|
16271
16814
|
}
|
|
16272
16815
|
return {
|
|
16273
|
-
path:
|
|
16816
|
+
path: path53,
|
|
16274
16817
|
score: 0,
|
|
16275
16818
|
weight,
|
|
16276
16819
|
hit: false,
|
|
16277
|
-
message: `${
|
|
16820
|
+
message: `${path53} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
16278
16821
|
};
|
|
16279
16822
|
}
|
|
16280
16823
|
/**
|
|
@@ -16307,11 +16850,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
16307
16850
|
};
|
|
16308
16851
|
}
|
|
16309
16852
|
};
|
|
16310
|
-
function resolvePath(obj,
|
|
16311
|
-
if (!
|
|
16853
|
+
function resolvePath(obj, path53) {
|
|
16854
|
+
if (!path53 || !obj) {
|
|
16312
16855
|
return void 0;
|
|
16313
16856
|
}
|
|
16314
|
-
const parts =
|
|
16857
|
+
const parts = path53.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
16315
16858
|
let current = obj;
|
|
16316
16859
|
for (const part of parts) {
|
|
16317
16860
|
if (current === null || current === void 0) {
|
|
@@ -16808,8 +17351,8 @@ var TokenUsageEvaluator = class {
|
|
|
16808
17351
|
|
|
16809
17352
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
16810
17353
|
init_cjs_shims();
|
|
16811
|
-
function getNestedValue(obj,
|
|
16812
|
-
const parts =
|
|
17354
|
+
function getNestedValue(obj, path53) {
|
|
17355
|
+
const parts = path53.split(".");
|
|
16813
17356
|
let current = obj;
|
|
16814
17357
|
for (const part of parts) {
|
|
16815
17358
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -18602,7 +19145,7 @@ var WorkspacePoolManager = class {
|
|
|
18602
19145
|
}
|
|
18603
19146
|
/**
|
|
18604
19147
|
* Reset an existing slot for reuse:
|
|
18605
|
-
* 1. Reset repos (git reset --hard
|
|
19148
|
+
* 1. Reset repos (fetch from origin when resolve=remote, then git reset --hard && git clean per repo)
|
|
18606
19149
|
* 2. Re-copy template files (skip repo directories)
|
|
18607
19150
|
*/
|
|
18608
19151
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
@@ -18615,7 +19158,17 @@ var WorkspacePoolManager = class {
|
|
|
18615
19158
|
continue;
|
|
18616
19159
|
}
|
|
18617
19160
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
18618
|
-
|
|
19161
|
+
const resolve = repo.checkout?.resolve ?? "remote";
|
|
19162
|
+
if (resolve === "remote") {
|
|
19163
|
+
const fetchArgs = ["fetch", "origin", ref];
|
|
19164
|
+
if (repo.clone?.depth) {
|
|
19165
|
+
fetchArgs.splice(1, 0, "--depth", String(repo.clone.depth));
|
|
19166
|
+
}
|
|
19167
|
+
await git(fetchArgs, { cwd: repoDir });
|
|
19168
|
+
await git(["reset", "--hard", "FETCH_HEAD"], { cwd: repoDir });
|
|
19169
|
+
} else {
|
|
19170
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
19171
|
+
}
|
|
18619
19172
|
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
18620
19173
|
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
18621
19174
|
}
|
|
@@ -18915,7 +19468,7 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
18915
19468
|
}
|
|
18916
19469
|
|
|
18917
19470
|
// src/evaluation/orchestrator.ts
|
|
18918
|
-
function classifyQualityStatus(score, threshold =
|
|
19471
|
+
function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
|
|
18919
19472
|
return score >= threshold ? "ok" : "quality_failure";
|
|
18920
19473
|
}
|
|
18921
19474
|
function buildSkippedEvaluatorError(scores) {
|
|
@@ -19007,7 +19560,7 @@ async function runEvaluation(options) {
|
|
|
19007
19560
|
const filteredEvalCases = filterEvalCases(evalCases, filter);
|
|
19008
19561
|
if (filteredEvalCases.length === 0) {
|
|
19009
19562
|
if (filter) {
|
|
19010
|
-
throw new Error(`No tests matched filter '${filter}' in ${evalFilePath}`);
|
|
19563
|
+
throw new Error(`No tests matched filter '${formatFilter(filter)}' in ${evalFilePath}`);
|
|
19011
19564
|
}
|
|
19012
19565
|
return [];
|
|
19013
19566
|
}
|
|
@@ -19059,6 +19612,9 @@ async function runEvaluation(options) {
|
|
|
19059
19612
|
const graderName = targetContext.graderTarget ?? targetContext.name;
|
|
19060
19613
|
const resolvedGrader = resolveTargetByName(graderName);
|
|
19061
19614
|
if (!resolvedGrader) {
|
|
19615
|
+
if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
|
|
19616
|
+
return void 0;
|
|
19617
|
+
}
|
|
19062
19618
|
return getOrCreateProvider(targetContext);
|
|
19063
19619
|
}
|
|
19064
19620
|
return getOrCreateProvider(resolvedGrader);
|
|
@@ -19389,7 +19945,7 @@ async function runEvaluation(options) {
|
|
|
19389
19945
|
const budgetResult = {
|
|
19390
19946
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
19391
19947
|
testId: evalCase.id,
|
|
19392
|
-
|
|
19948
|
+
suite: evalCase.suite,
|
|
19393
19949
|
category: evalCase.category,
|
|
19394
19950
|
score: 0,
|
|
19395
19951
|
assertions: [],
|
|
@@ -19426,7 +19982,7 @@ async function runEvaluation(options) {
|
|
|
19426
19982
|
const haltResult = {
|
|
19427
19983
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
19428
19984
|
testId: evalCase.id,
|
|
19429
|
-
|
|
19985
|
+
suite: evalCase.suite,
|
|
19430
19986
|
category: evalCase.category,
|
|
19431
19987
|
score: 0,
|
|
19432
19988
|
assertions: [],
|
|
@@ -19738,7 +20294,7 @@ async function runBatchEvaluation(options) {
|
|
|
19738
20294
|
targetResolver,
|
|
19739
20295
|
availableTargets,
|
|
19740
20296
|
verbose,
|
|
19741
|
-
threshold: batchThreshold
|
|
20297
|
+
threshold: evalCase.threshold ?? batchThreshold
|
|
19742
20298
|
});
|
|
19743
20299
|
if (providerError) {
|
|
19744
20300
|
result = {
|
|
@@ -20200,8 +20756,9 @@ async function runEvalCase(options) {
|
|
|
20200
20756
|
fileChanges,
|
|
20201
20757
|
workspacePath,
|
|
20202
20758
|
verbose,
|
|
20203
|
-
threshold: caseThreshold
|
|
20759
|
+
threshold: evalCase.threshold ?? caseThreshold
|
|
20204
20760
|
});
|
|
20761
|
+
const effectiveThreshold = evalCase.threshold ?? caseThreshold;
|
|
20205
20762
|
const totalDurationMs = Date.now() - caseStartMs;
|
|
20206
20763
|
const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
|
|
20207
20764
|
const evalRunTokenUsage = tokenUsage || graderTokens ? {
|
|
@@ -20215,7 +20772,7 @@ async function runEvalCase(options) {
|
|
|
20215
20772
|
...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
|
|
20216
20773
|
};
|
|
20217
20774
|
const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
|
|
20218
|
-
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score,
|
|
20775
|
+
const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, effectiveThreshold);
|
|
20219
20776
|
const targetUsedField = targetUsed ? { targetUsed } : {};
|
|
20220
20777
|
const finalResult = providerError ? {
|
|
20221
20778
|
...result,
|
|
@@ -20416,7 +20973,8 @@ async function evaluateCandidate(options) {
|
|
|
20416
20973
|
targetResolver,
|
|
20417
20974
|
availableTargets,
|
|
20418
20975
|
fileChanges,
|
|
20419
|
-
workspacePath
|
|
20976
|
+
workspacePath,
|
|
20977
|
+
threshold: evalThreshold
|
|
20420
20978
|
});
|
|
20421
20979
|
const completedAt = nowFn();
|
|
20422
20980
|
let agentRequest;
|
|
@@ -20447,7 +21005,7 @@ async function evaluateCandidate(options) {
|
|
|
20447
21005
|
return {
|
|
20448
21006
|
timestamp: completedAt.toISOString(),
|
|
20449
21007
|
testId: evalCase.id,
|
|
20450
|
-
|
|
21008
|
+
suite: evalCase.suite,
|
|
20451
21009
|
category: evalCase.category,
|
|
20452
21010
|
conversationId: evalCase.conversation_id,
|
|
20453
21011
|
score: score.score,
|
|
@@ -20490,7 +21048,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
20490
21048
|
targetResolver,
|
|
20491
21049
|
availableTargets,
|
|
20492
21050
|
fileChanges,
|
|
20493
|
-
workspacePath
|
|
21051
|
+
workspacePath,
|
|
21052
|
+
threshold
|
|
20494
21053
|
} = options;
|
|
20495
21054
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
20496
21055
|
return runEvaluatorList({
|
|
@@ -20516,7 +21075,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
20516
21075
|
targetResolver,
|
|
20517
21076
|
availableTargets,
|
|
20518
21077
|
fileChanges,
|
|
20519
|
-
workspacePath
|
|
21078
|
+
workspacePath,
|
|
21079
|
+
threshold
|
|
20520
21080
|
});
|
|
20521
21081
|
}
|
|
20522
21082
|
const evaluatorKind = evalCase.evaluator ?? "llm-grader";
|
|
@@ -20618,7 +21178,8 @@ async function runEvaluatorList(options) {
|
|
|
20618
21178
|
name: evaluatorConfig.name,
|
|
20619
21179
|
type: evaluatorConfig.type,
|
|
20620
21180
|
weight,
|
|
20621
|
-
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
|
|
21181
|
+
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
|
|
21182
|
+
...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
|
|
20622
21183
|
});
|
|
20623
21184
|
scores.push({
|
|
20624
21185
|
name: evaluatorConfig.name,
|
|
@@ -20653,7 +21214,8 @@ async function runEvaluatorList(options) {
|
|
|
20653
21214
|
name: evaluatorConfig.name ?? "unknown",
|
|
20654
21215
|
type: evaluatorConfig.type ?? "llm-grader",
|
|
20655
21216
|
weight,
|
|
20656
|
-
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
|
|
21217
|
+
...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
|
|
21218
|
+
...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
|
|
20657
21219
|
});
|
|
20658
21220
|
scores.push({
|
|
20659
21221
|
name: evaluatorConfig.name ?? "unknown",
|
|
@@ -20687,9 +21249,10 @@ async function runEvaluatorList(options) {
|
|
|
20687
21249
|
}
|
|
20688
21250
|
}
|
|
20689
21251
|
}
|
|
21252
|
+
const effectiveThreshold = options.threshold ?? DEFAULT_THRESHOLD;
|
|
20690
21253
|
const hasRequiredFailure = scored.some((entry) => {
|
|
20691
21254
|
if (!entry.required) return false;
|
|
20692
|
-
const minScore = typeof entry.required === "number" ? entry.required :
|
|
21255
|
+
const minScore = entry.min_score ?? (typeof entry.required === "number" ? entry.required : effectiveThreshold);
|
|
20693
21256
|
return entry.score.score < minScore;
|
|
20694
21257
|
});
|
|
20695
21258
|
const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
|
|
@@ -20700,17 +21263,23 @@ async function runEvaluatorList(options) {
|
|
|
20700
21263
|
const expectedAspectCount = assertions.length || 1;
|
|
20701
21264
|
const score = {
|
|
20702
21265
|
score: aggregateScore,
|
|
20703
|
-
verdict: scoreToVerdict(aggregateScore),
|
|
21266
|
+
verdict: scoreToVerdict(aggregateScore, effectiveThreshold),
|
|
20704
21267
|
assertions,
|
|
20705
21268
|
expectedAspectCount
|
|
20706
21269
|
};
|
|
20707
21270
|
return { score, scores };
|
|
20708
21271
|
}
|
|
21272
|
+
function formatFilter(filter) {
|
|
21273
|
+
return typeof filter === "string" ? filter : filter.join(", ");
|
|
21274
|
+
}
|
|
21275
|
+
function matchesFilter3(id, filter) {
|
|
21276
|
+
return typeof filter === "string" ? import_micromatch3.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch3.default.isMatch(id, pattern));
|
|
21277
|
+
}
|
|
20709
21278
|
function filterEvalCases(evalCases, filter) {
|
|
20710
21279
|
if (!filter) {
|
|
20711
21280
|
return evalCases;
|
|
20712
21281
|
}
|
|
20713
|
-
return evalCases.filter((evalCase) =>
|
|
21282
|
+
return evalCases.filter((evalCase) => matchesFilter3(evalCase.id, filter));
|
|
20714
21283
|
}
|
|
20715
21284
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
20716
21285
|
const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
|
|
@@ -20797,7 +21366,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
20797
21366
|
return {
|
|
20798
21367
|
timestamp: timestamp.toISOString(),
|
|
20799
21368
|
testId: evalCase.id,
|
|
20800
|
-
|
|
21369
|
+
suite: evalCase.suite,
|
|
20801
21370
|
category: evalCase.category,
|
|
20802
21371
|
conversationId: evalCase.conversation_id,
|
|
20803
21372
|
score: 0,
|
|
@@ -21071,6 +21640,7 @@ async function evaluate(config) {
|
|
|
21071
21640
|
verbose: config.verbose,
|
|
21072
21641
|
maxConcurrency: config.workers ?? 3,
|
|
21073
21642
|
filter: config.filter,
|
|
21643
|
+
threshold: config.threshold,
|
|
21074
21644
|
evalCases,
|
|
21075
21645
|
onResult: async (result) => {
|
|
21076
21646
|
collectedResults.push(result);
|
|
@@ -21081,19 +21651,19 @@ async function evaluate(config) {
|
|
|
21081
21651
|
const durationMs = Date.now() - startTime;
|
|
21082
21652
|
return {
|
|
21083
21653
|
results: allResults,
|
|
21084
|
-
summary: computeSummary(allResults, durationMs)
|
|
21654
|
+
summary: computeSummary(allResults, durationMs, config.threshold)
|
|
21085
21655
|
};
|
|
21086
21656
|
}
|
|
21087
21657
|
function mapAssertionType(type) {
|
|
21088
21658
|
return type.replace(/_/g, "-");
|
|
21089
21659
|
}
|
|
21090
|
-
function computeSummary(results, durationMs) {
|
|
21660
|
+
function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
|
|
21091
21661
|
const total = results.length;
|
|
21092
21662
|
let passed = 0;
|
|
21093
21663
|
let scoreSum = 0;
|
|
21094
21664
|
for (const r of results) {
|
|
21095
21665
|
scoreSum += r.score;
|
|
21096
|
-
if (r.score >=
|
|
21666
|
+
if (r.score >= threshold) {
|
|
21097
21667
|
passed++;
|
|
21098
21668
|
}
|
|
21099
21669
|
}
|
|
@@ -21207,7 +21777,7 @@ var CONFIG_FILE_NAMES = [
|
|
|
21207
21777
|
];
|
|
21208
21778
|
async function loadTsConfig(projectRoot) {
|
|
21209
21779
|
const { existsSync: existsSync7 } = await import("fs");
|
|
21210
|
-
const { pathToFileURL } = await import("url");
|
|
21780
|
+
const { pathToFileURL: pathToFileURL2 } = await import("url");
|
|
21211
21781
|
const { join: join2 } = await import("path");
|
|
21212
21782
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
21213
21783
|
const filePath = join2(projectRoot, fileName);
|
|
@@ -21215,7 +21785,7 @@ async function loadTsConfig(projectRoot) {
|
|
|
21215
21785
|
continue;
|
|
21216
21786
|
}
|
|
21217
21787
|
try {
|
|
21218
|
-
const fileUrl =
|
|
21788
|
+
const fileUrl = pathToFileURL2(filePath).href;
|
|
21219
21789
|
const mod = await import(fileUrl);
|
|
21220
21790
|
const config = mod.default ?? mod;
|
|
21221
21791
|
return AgentVConfigSchema.parse(config);
|
|
@@ -21656,7 +22226,7 @@ var OtelTraceExporter = class {
|
|
|
21656
22226
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
21657
22227
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
21658
22228
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
21659
|
-
if (result.
|
|
22229
|
+
if (result.suite) rootSpan.setAttribute("agentv.suite", result.suite);
|
|
21660
22230
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
21661
22231
|
if (captureContent && result.output.length > 0) {
|
|
21662
22232
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -21865,7 +22435,7 @@ var OtelStreamingObserver = class {
|
|
|
21865
22435
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
21866
22436
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
21867
22437
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
21868
|
-
if (evalSet) this.rootSpan.setAttribute("agentv.
|
|
22438
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.suite", evalSet);
|
|
21869
22439
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
21870
22440
|
}
|
|
21871
22441
|
/** Create and immediately export a tool span */
|
|
@@ -22221,12 +22791,244 @@ function extractToolResultContent(content) {
|
|
|
22221
22791
|
return parts.length > 0 ? parts.join("") : void 0;
|
|
22222
22792
|
}
|
|
22223
22793
|
|
|
22224
|
-
// src/import/
|
|
22794
|
+
// src/import/codex-parser.ts
|
|
22795
|
+
init_cjs_shims();
|
|
22796
|
+
function parseCodexSession(jsonl) {
|
|
22797
|
+
const messages = [];
|
|
22798
|
+
let sessionId = "";
|
|
22799
|
+
let cwd;
|
|
22800
|
+
let model;
|
|
22801
|
+
let version;
|
|
22802
|
+
let startTimestamp;
|
|
22803
|
+
let endTimestamp;
|
|
22804
|
+
const pendingCalls = /* @__PURE__ */ new Map();
|
|
22805
|
+
const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
|
|
22806
|
+
for (const line of lines) {
|
|
22807
|
+
let entry;
|
|
22808
|
+
try {
|
|
22809
|
+
entry = JSON.parse(line);
|
|
22810
|
+
} catch {
|
|
22811
|
+
continue;
|
|
22812
|
+
}
|
|
22813
|
+
if (!entry.type) continue;
|
|
22814
|
+
if (entry.timestamp) {
|
|
22815
|
+
if (!startTimestamp) startTimestamp = entry.timestamp;
|
|
22816
|
+
endTimestamp = entry.timestamp;
|
|
22817
|
+
}
|
|
22818
|
+
const payload = entry.payload ?? {};
|
|
22819
|
+
switch (entry.type) {
|
|
22820
|
+
case "session_meta": {
|
|
22821
|
+
sessionId = String(payload.id ?? "");
|
|
22822
|
+
cwd = payload.cwd ? String(payload.cwd) : void 0;
|
|
22823
|
+
version = payload.cli_version ? String(payload.cli_version) : void 0;
|
|
22824
|
+
if (payload.model && !model) {
|
|
22825
|
+
model = String(payload.model);
|
|
22826
|
+
}
|
|
22827
|
+
break;
|
|
22828
|
+
}
|
|
22829
|
+
case "turn_context": {
|
|
22830
|
+
if (payload.model && !model) {
|
|
22831
|
+
model = String(payload.model);
|
|
22832
|
+
}
|
|
22833
|
+
if (payload.cwd && !cwd) {
|
|
22834
|
+
cwd = String(payload.cwd);
|
|
22835
|
+
}
|
|
22836
|
+
break;
|
|
22837
|
+
}
|
|
22838
|
+
case "response_item": {
|
|
22839
|
+
const itemType = String(payload.type ?? "");
|
|
22840
|
+
const role = String(payload.role ?? "");
|
|
22841
|
+
switch (itemType) {
|
|
22842
|
+
case "message": {
|
|
22843
|
+
if (role === "developer") break;
|
|
22844
|
+
const content = extractResponseItemContent(payload.content);
|
|
22845
|
+
if (role === "user" && content) {
|
|
22846
|
+
messages.push({ role: "user", content });
|
|
22847
|
+
} else if (role === "assistant" && content) {
|
|
22848
|
+
messages.push({ role: "assistant", content });
|
|
22849
|
+
}
|
|
22850
|
+
break;
|
|
22851
|
+
}
|
|
22852
|
+
case "function_call": {
|
|
22853
|
+
const toolName = String(payload.name ?? "");
|
|
22854
|
+
const callId = String(payload.call_id ?? "");
|
|
22855
|
+
let input;
|
|
22856
|
+
if (typeof payload.arguments === "string") {
|
|
22857
|
+
try {
|
|
22858
|
+
input = JSON.parse(payload.arguments);
|
|
22859
|
+
} catch {
|
|
22860
|
+
input = payload.arguments;
|
|
22861
|
+
}
|
|
22862
|
+
} else {
|
|
22863
|
+
input = payload.arguments;
|
|
22864
|
+
}
|
|
22865
|
+
const toolCall = { tool: toolName, input, id: callId };
|
|
22866
|
+
const msgIdx = messages.length;
|
|
22867
|
+
messages.push({
|
|
22868
|
+
role: "assistant",
|
|
22869
|
+
toolCalls: [toolCall]
|
|
22870
|
+
});
|
|
22871
|
+
if (callId) {
|
|
22872
|
+
pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
|
|
22873
|
+
}
|
|
22874
|
+
break;
|
|
22875
|
+
}
|
|
22876
|
+
case "custom_tool_call": {
|
|
22877
|
+
const toolName = String(payload.name ?? "");
|
|
22878
|
+
const callId = String(payload.call_id ?? "");
|
|
22879
|
+
let input;
|
|
22880
|
+
if (typeof payload.arguments === "string") {
|
|
22881
|
+
try {
|
|
22882
|
+
input = JSON.parse(payload.arguments);
|
|
22883
|
+
} catch {
|
|
22884
|
+
input = payload.arguments;
|
|
22885
|
+
}
|
|
22886
|
+
} else {
|
|
22887
|
+
input = payload.arguments;
|
|
22888
|
+
}
|
|
22889
|
+
const toolCall = { tool: toolName, input, id: callId };
|
|
22890
|
+
const msgIdx = messages.length;
|
|
22891
|
+
messages.push({
|
|
22892
|
+
role: "assistant",
|
|
22893
|
+
toolCalls: [toolCall]
|
|
22894
|
+
});
|
|
22895
|
+
if (callId) {
|
|
22896
|
+
pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
|
|
22897
|
+
}
|
|
22898
|
+
break;
|
|
22899
|
+
}
|
|
22900
|
+
case "function_call_output":
|
|
22901
|
+
case "custom_tool_call_output": {
|
|
22902
|
+
const callId = String(payload.call_id ?? "");
|
|
22903
|
+
const pending = pendingCalls.get(callId);
|
|
22904
|
+
if (pending) {
|
|
22905
|
+
const existingMsg = messages[pending.msgIdx];
|
|
22906
|
+
const existingCalls = [...existingMsg.toolCalls ?? []];
|
|
22907
|
+
existingCalls[pending.toolIdx] = {
|
|
22908
|
+
...existingCalls[pending.toolIdx],
|
|
22909
|
+
output: payload.output
|
|
22910
|
+
};
|
|
22911
|
+
messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
|
|
22912
|
+
pendingCalls.delete(callId);
|
|
22913
|
+
}
|
|
22914
|
+
break;
|
|
22915
|
+
}
|
|
22916
|
+
// Skip reasoning blocks (thinking tokens)
|
|
22917
|
+
case "reasoning":
|
|
22918
|
+
break;
|
|
22919
|
+
}
|
|
22920
|
+
break;
|
|
22921
|
+
}
|
|
22922
|
+
}
|
|
22923
|
+
}
|
|
22924
|
+
let durationMs;
|
|
22925
|
+
if (startTimestamp && endTimestamp) {
|
|
22926
|
+
durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
|
|
22927
|
+
}
|
|
22928
|
+
const source = {
|
|
22929
|
+
provider: "codex",
|
|
22930
|
+
sessionId,
|
|
22931
|
+
cwd,
|
|
22932
|
+
startedAt: startTimestamp,
|
|
22933
|
+
model,
|
|
22934
|
+
version
|
|
22935
|
+
};
|
|
22936
|
+
return {
|
|
22937
|
+
messages,
|
|
22938
|
+
source,
|
|
22939
|
+
// Codex rollout files don't include token counts (only rate limit info)
|
|
22940
|
+
tokenUsage: void 0,
|
|
22941
|
+
durationMs,
|
|
22942
|
+
costUsd: null
|
|
22943
|
+
};
|
|
22944
|
+
}
|
|
22945
|
+
function extractResponseItemContent(content) {
|
|
22946
|
+
if (typeof content === "string") return content;
|
|
22947
|
+
if (!Array.isArray(content)) return void 0;
|
|
22948
|
+
const parts = [];
|
|
22949
|
+
for (const block of content) {
|
|
22950
|
+
if (typeof block === "object" && block !== null) {
|
|
22951
|
+
const b = block;
|
|
22952
|
+
if (typeof b.text === "string") {
|
|
22953
|
+
parts.push(b.text);
|
|
22954
|
+
}
|
|
22955
|
+
}
|
|
22956
|
+
}
|
|
22957
|
+
return parts.length > 0 ? parts.join("") : void 0;
|
|
22958
|
+
}
|
|
22959
|
+
|
|
22960
|
+
// src/import/codex-session-discovery.ts
|
|
22225
22961
|
init_cjs_shims();
|
|
22226
22962
|
var import_promises36 = require("fs/promises");
|
|
22227
22963
|
var import_node_os8 = require("os");
|
|
22228
22964
|
var import_node_path53 = __toESM(require("path"), 1);
|
|
22229
|
-
var
|
|
22965
|
+
var DEFAULT_SESSIONS_DIR = () => import_node_path53.default.join((0, import_node_os8.homedir)(), ".codex", "sessions");
|
|
22966
|
+
async function discoverCodexSessions(opts) {
|
|
22967
|
+
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
22968
|
+
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
22969
|
+
const sessions = [];
|
|
22970
|
+
let yearDirs;
|
|
22971
|
+
try {
|
|
22972
|
+
yearDirs = await (0, import_promises36.readdir)(sessionsDir);
|
|
22973
|
+
} catch {
|
|
22974
|
+
return [];
|
|
22975
|
+
}
|
|
22976
|
+
for (const year of yearDirs) {
|
|
22977
|
+
const yearPath = import_node_path53.default.join(sessionsDir, year);
|
|
22978
|
+
let monthDirs;
|
|
22979
|
+
try {
|
|
22980
|
+
monthDirs = await (0, import_promises36.readdir)(yearPath);
|
|
22981
|
+
} catch {
|
|
22982
|
+
continue;
|
|
22983
|
+
}
|
|
22984
|
+
for (const month of monthDirs) {
|
|
22985
|
+
const monthPath = import_node_path53.default.join(yearPath, month);
|
|
22986
|
+
let dayDirs;
|
|
22987
|
+
try {
|
|
22988
|
+
dayDirs = await (0, import_promises36.readdir)(monthPath);
|
|
22989
|
+
} catch {
|
|
22990
|
+
continue;
|
|
22991
|
+
}
|
|
22992
|
+
for (const day of dayDirs) {
|
|
22993
|
+
if (opts?.date) {
|
|
22994
|
+
const dirDate = `${year}-${month}-${day}`;
|
|
22995
|
+
if (dirDate !== opts.date) continue;
|
|
22996
|
+
}
|
|
22997
|
+
const dayPath = import_node_path53.default.join(monthPath, day);
|
|
22998
|
+
let files;
|
|
22999
|
+
try {
|
|
23000
|
+
files = await (0, import_promises36.readdir)(dayPath);
|
|
23001
|
+
} catch {
|
|
23002
|
+
continue;
|
|
23003
|
+
}
|
|
23004
|
+
for (const file of files) {
|
|
23005
|
+
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
23006
|
+
const filePath = import_node_path53.default.join(dayPath, file);
|
|
23007
|
+
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
23008
|
+
const parts = nameWithoutExt.split("-");
|
|
23009
|
+
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
23010
|
+
let updatedAt;
|
|
23011
|
+
try {
|
|
23012
|
+
const fileStat = await (0, import_promises36.stat)(filePath);
|
|
23013
|
+
updatedAt = fileStat.mtime;
|
|
23014
|
+
} catch {
|
|
23015
|
+
updatedAt = /* @__PURE__ */ new Date(0);
|
|
23016
|
+
}
|
|
23017
|
+
sessions.push({ sessionId, filePath, filename: file, updatedAt });
|
|
23018
|
+
}
|
|
23019
|
+
}
|
|
23020
|
+
}
|
|
23021
|
+
}
|
|
23022
|
+
sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
23023
|
+
return sessions.slice(0, limit);
|
|
23024
|
+
}
|
|
23025
|
+
|
|
23026
|
+
// src/import/session-discovery.ts
|
|
23027
|
+
init_cjs_shims();
|
|
23028
|
+
var import_promises37 = require("fs/promises");
|
|
23029
|
+
var import_node_os9 = require("os");
|
|
23030
|
+
var import_node_path54 = __toESM(require("path"), 1);
|
|
23031
|
+
var DEFAULT_PROJECTS_DIR = () => import_node_path54.default.join((0, import_node_os9.homedir)(), ".claude", "projects");
|
|
22230
23032
|
function encodeProjectPath(projectPath) {
|
|
22231
23033
|
return projectPath.replace(/\//g, "-");
|
|
22232
23034
|
}
|
|
@@ -22235,7 +23037,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
22235
23037
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
22236
23038
|
let projectDirs;
|
|
22237
23039
|
try {
|
|
22238
|
-
projectDirs = await (0,
|
|
23040
|
+
projectDirs = await (0, import_promises37.readdir)(projectsDir);
|
|
22239
23041
|
} catch {
|
|
22240
23042
|
return [];
|
|
22241
23043
|
}
|
|
@@ -22245,10 +23047,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
22245
23047
|
}
|
|
22246
23048
|
const sessions = [];
|
|
22247
23049
|
for (const projectDir of projectDirs) {
|
|
22248
|
-
const dirPath =
|
|
23050
|
+
const dirPath = import_node_path54.default.join(projectsDir, projectDir);
|
|
22249
23051
|
let entries;
|
|
22250
23052
|
try {
|
|
22251
|
-
entries = await (0,
|
|
23053
|
+
entries = await (0, import_promises37.readdir)(dirPath);
|
|
22252
23054
|
} catch {
|
|
22253
23055
|
continue;
|
|
22254
23056
|
}
|
|
@@ -22256,10 +23058,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
22256
23058
|
if (!entry.endsWith(".jsonl")) continue;
|
|
22257
23059
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
22258
23060
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
22259
|
-
const filePath =
|
|
23061
|
+
const filePath = import_node_path54.default.join(dirPath, entry);
|
|
22260
23062
|
let updatedAt;
|
|
22261
23063
|
try {
|
|
22262
|
-
const fileStat = await (0,
|
|
23064
|
+
const fileStat = await (0, import_promises37.stat)(filePath);
|
|
22263
23065
|
updatedAt = fileStat.mtime;
|
|
22264
23066
|
} catch {
|
|
22265
23067
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -22276,13 +23078,91 @@ async function discoverClaudeSessions(opts) {
|
|
|
22276
23078
|
return sessions.slice(0, limit);
|
|
22277
23079
|
}
|
|
22278
23080
|
|
|
23081
|
+
// src/import/transcript-provider.ts
|
|
23082
|
+
init_cjs_shims();
|
|
23083
|
+
|
|
22279
23084
|
// src/import/types.ts
|
|
22280
23085
|
init_cjs_shims();
|
|
22281
|
-
var
|
|
23086
|
+
var import_promises38 = require("fs/promises");
|
|
23087
|
+
function toTranscriptJsonLine(entry) {
|
|
23088
|
+
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
23089
|
+
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
23090
|
+
return {
|
|
23091
|
+
input,
|
|
23092
|
+
output: entry.messages,
|
|
23093
|
+
token_usage: entry.tokenUsage ? {
|
|
23094
|
+
input: entry.tokenUsage.input,
|
|
23095
|
+
output: entry.tokenUsage.output,
|
|
23096
|
+
cached: entry.tokenUsage.cached
|
|
23097
|
+
} : void 0,
|
|
23098
|
+
duration_ms: entry.durationMs,
|
|
23099
|
+
cost_usd: entry.costUsd,
|
|
23100
|
+
source: {
|
|
23101
|
+
provider: entry.source.provider,
|
|
23102
|
+
session_id: entry.source.sessionId,
|
|
23103
|
+
model: entry.source.model,
|
|
23104
|
+
timestamp: entry.source.startedAt,
|
|
23105
|
+
git_branch: entry.source.gitBranch,
|
|
23106
|
+
cwd: entry.source.cwd ?? entry.source.projectPath,
|
|
23107
|
+
version: entry.source.version
|
|
23108
|
+
}
|
|
23109
|
+
};
|
|
23110
|
+
}
|
|
23111
|
+
async function readTranscriptJsonl(filePath) {
|
|
23112
|
+
const text = await (0, import_promises38.readFile)(filePath, "utf8");
|
|
23113
|
+
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
23114
|
+
}
|
|
22282
23115
|
async function readTranscriptFile(filePath) {
|
|
22283
|
-
return (0,
|
|
23116
|
+
return (0, import_promises38.readFile)(filePath, "utf8");
|
|
22284
23117
|
}
|
|
22285
23118
|
|
|
23119
|
+
// src/import/transcript-provider.ts
|
|
23120
|
+
var TranscriptProvider = class _TranscriptProvider {
|
|
23121
|
+
id;
|
|
23122
|
+
kind = "transcript";
|
|
23123
|
+
targetName;
|
|
23124
|
+
lines;
|
|
23125
|
+
cursor = 0;
|
|
23126
|
+
constructor(targetName, lines) {
|
|
23127
|
+
this.targetName = targetName;
|
|
23128
|
+
this.id = `transcript:${targetName}`;
|
|
23129
|
+
this.lines = lines;
|
|
23130
|
+
}
|
|
23131
|
+
/**
|
|
23132
|
+
* Create a TranscriptProvider from a JSONL file path.
|
|
23133
|
+
*/
|
|
23134
|
+
static async fromFile(filePath) {
|
|
23135
|
+
const lines = await readTranscriptJsonl(filePath);
|
|
23136
|
+
if (lines.length === 0) {
|
|
23137
|
+
throw new Error(`Transcript file is empty: ${filePath}`);
|
|
23138
|
+
}
|
|
23139
|
+
const providerName = lines[0].source.provider ?? "transcript";
|
|
23140
|
+
return new _TranscriptProvider(providerName, lines);
|
|
23141
|
+
}
|
|
23142
|
+
get lineCount() {
|
|
23143
|
+
return this.lines.length;
|
|
23144
|
+
}
|
|
23145
|
+
async invoke(_request) {
|
|
23146
|
+
if (this.cursor >= this.lines.length) {
|
|
23147
|
+
throw new Error(
|
|
23148
|
+
`Transcript exhausted: ${this.lines.length} line(s) available but ${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`
|
|
23149
|
+
);
|
|
23150
|
+
}
|
|
23151
|
+
const line = this.lines[this.cursor++];
|
|
23152
|
+
return {
|
|
23153
|
+
output: line.output,
|
|
23154
|
+
tokenUsage: line.token_usage ? {
|
|
23155
|
+
input: line.token_usage.input,
|
|
23156
|
+
output: line.token_usage.output,
|
|
23157
|
+
cached: line.token_usage.cached
|
|
23158
|
+
} : void 0,
|
|
23159
|
+
durationMs: line.duration_ms,
|
|
23160
|
+
costUsd: line.cost_usd ?? void 0,
|
|
23161
|
+
startTime: line.source.timestamp
|
|
23162
|
+
};
|
|
23163
|
+
}
|
|
23164
|
+
};
|
|
23165
|
+
|
|
22286
23166
|
// src/index.ts
|
|
22287
23167
|
function createAgentKernel() {
|
|
22288
23168
|
return { status: "stub" };
|
|
@@ -22297,6 +23177,7 @@ function createAgentKernel() {
|
|
|
22297
23177
|
DEFAULT_EVALUATOR_TEMPLATE,
|
|
22298
23178
|
DEFAULT_EVAL_PATTERNS,
|
|
22299
23179
|
DEFAULT_EXPLORATION_TOOLS,
|
|
23180
|
+
DEFAULT_THRESHOLD,
|
|
22300
23181
|
DeterministicAssertionEvaluator,
|
|
22301
23182
|
EvaluatorRegistry,
|
|
22302
23183
|
ExecutionMetricsEvaluator,
|
|
@@ -22318,6 +23199,7 @@ function createAgentKernel() {
|
|
|
22318
23199
|
TemplateNotFoundError,
|
|
22319
23200
|
TokenUsageEvaluator,
|
|
22320
23201
|
ToolTrajectoryEvaluator,
|
|
23202
|
+
TranscriptProvider,
|
|
22321
23203
|
WorkspaceCreationError,
|
|
22322
23204
|
WorkspacePoolManager,
|
|
22323
23205
|
addProject,
|
|
@@ -22354,6 +23236,7 @@ function createAgentKernel() {
|
|
|
22354
23236
|
detectFormat,
|
|
22355
23237
|
discoverAssertions,
|
|
22356
23238
|
discoverClaudeSessions,
|
|
23239
|
+
discoverCodexSessions,
|
|
22357
23240
|
discoverCopilotSessions,
|
|
22358
23241
|
discoverGraders,
|
|
22359
23242
|
discoverJudges,
|
|
@@ -22414,6 +23297,8 @@ function createAgentKernel() {
|
|
|
22414
23297
|
normalizeLineEndings,
|
|
22415
23298
|
parseAgentSkillsEvals,
|
|
22416
23299
|
parseClaudeSession,
|
|
23300
|
+
parseCodexSession,
|
|
23301
|
+
parseCopilotEvents,
|
|
22417
23302
|
parseJsonFromText,
|
|
22418
23303
|
parseJsonSafe,
|
|
22419
23304
|
readJsonFile,
|
|
@@ -22421,6 +23306,7 @@ function createAgentKernel() {
|
|
|
22421
23306
|
readTestSuiteMetadata,
|
|
22422
23307
|
readTextFile,
|
|
22423
23308
|
readTranscriptFile,
|
|
23309
|
+
readTranscriptJsonl,
|
|
22424
23310
|
removeProject,
|
|
22425
23311
|
resolveAndCreateProvider,
|
|
22426
23312
|
resolveDelegatedTargetDefinition,
|
|
@@ -22453,6 +23339,7 @@ function createAgentKernel() {
|
|
|
22453
23339
|
substituteVariables,
|
|
22454
23340
|
toCamelCaseDeep,
|
|
22455
23341
|
toSnakeCaseDeep,
|
|
23342
|
+
toTranscriptJsonLine,
|
|
22456
23343
|
tokensPerTool,
|
|
22457
23344
|
touchProject,
|
|
22458
23345
|
transpileEvalYaml,
|