@agentv/core 4.29.2-next.1 → 4.30.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-YFXMMBUG.js → chunk-5RQMJZDJ.js} +57 -1
- package/dist/{chunk-YFXMMBUG.js.map → chunk-5RQMJZDJ.js.map} +1 -1
- package/dist/{chunk-SCC35F3L.js → chunk-Z2BBOGE4.js} +69 -30
- package/dist/{chunk-SCC35F3L.js.map → chunk-Z2BBOGE4.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -1
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +122 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -2
- package/dist/{ts-eval-loader-EMSGL2BQ.js → ts-eval-loader-JL5DGTJL.js} +3 -3
- package/package.json +1 -1
- /package/dist/{ts-eval-loader-EMSGL2BQ.js.map → ts-eval-loader-JL5DGTJL.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -369,6 +369,35 @@ function coercePrimitive(value) {
|
|
|
369
369
|
if (PLAIN_NUMBER_PATTERN.test(value)) return Number(value);
|
|
370
370
|
return value;
|
|
371
371
|
}
|
|
372
|
+
function isPlainObject(value) {
|
|
373
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
374
|
+
}
|
|
375
|
+
function cloneTemplateValue(value) {
|
|
376
|
+
if (Array.isArray(value)) {
|
|
377
|
+
return value.map((item) => cloneTemplateValue(item));
|
|
378
|
+
}
|
|
379
|
+
if (isPlainObject(value)) {
|
|
380
|
+
const result = {};
|
|
381
|
+
for (const [key, nested] of Object.entries(value)) {
|
|
382
|
+
result[key] = cloneTemplateValue(nested);
|
|
383
|
+
}
|
|
384
|
+
return result;
|
|
385
|
+
}
|
|
386
|
+
return value;
|
|
387
|
+
}
|
|
388
|
+
function stringifyTemplateValue(value) {
|
|
389
|
+
if (typeof value === "string") return value;
|
|
390
|
+
return JSON.stringify(value);
|
|
391
|
+
}
|
|
392
|
+
function lookupTemplateVar(vars, expression) {
|
|
393
|
+
if (!expression) return void 0;
|
|
394
|
+
return expression.split(".").reduce((current, segment) => {
|
|
395
|
+
if (!isPlainObject(current)) {
|
|
396
|
+
return void 0;
|
|
397
|
+
}
|
|
398
|
+
return current[segment];
|
|
399
|
+
}, vars);
|
|
400
|
+
}
|
|
372
401
|
function interpolateEnv(value, env) {
|
|
373
402
|
if (typeof value === "string") {
|
|
374
403
|
const wholeMatch = WHOLE_VAR_PATTERN.exec(value);
|
|
@@ -390,12 +419,38 @@ function interpolateEnv(value, env) {
|
|
|
390
419
|
}
|
|
391
420
|
return value;
|
|
392
421
|
}
|
|
393
|
-
|
|
422
|
+
function interpolateTemplateVars(value, vars) {
|
|
423
|
+
if (typeof value === "string") {
|
|
424
|
+
const wholeMatch = WHOLE_TEMPLATE_VAR_PATTERN.exec(value);
|
|
425
|
+
if (wholeMatch) {
|
|
426
|
+
const resolved = lookupTemplateVar(vars, wholeMatch[1]);
|
|
427
|
+
return resolved === void 0 ? value : cloneTemplateValue(resolved);
|
|
428
|
+
}
|
|
429
|
+
return value.replace(TEMPLATE_VAR_PATTERN, (match, expression) => {
|
|
430
|
+
const resolved = lookupTemplateVar(vars, expression);
|
|
431
|
+
return resolved === void 0 ? match : stringifyTemplateValue(resolved);
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
if (Array.isArray(value)) {
|
|
435
|
+
return value.map((item) => interpolateTemplateVars(item, vars));
|
|
436
|
+
}
|
|
437
|
+
if (isPlainObject(value)) {
|
|
438
|
+
const result = {};
|
|
439
|
+
for (const [key, nested] of Object.entries(value)) {
|
|
440
|
+
result[key] = interpolateTemplateVars(nested, vars);
|
|
441
|
+
}
|
|
442
|
+
return result;
|
|
443
|
+
}
|
|
444
|
+
return value;
|
|
445
|
+
}
|
|
446
|
+
var ENV_VAR_PATTERN, TEMPLATE_VAR_PATTERN, WHOLE_TEMPLATE_VAR_PATTERN, WHOLE_VAR_PATTERN, PLAIN_NUMBER_PATTERN;
|
|
394
447
|
var init_interpolation = __esm({
|
|
395
448
|
"src/evaluation/interpolation.ts"() {
|
|
396
449
|
"use strict";
|
|
397
450
|
init_cjs_shims();
|
|
398
451
|
ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
452
|
+
TEMPLATE_VAR_PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}/g;
|
|
453
|
+
WHOLE_TEMPLATE_VAR_PATTERN = /^\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}$/;
|
|
399
454
|
WHOLE_VAR_PATTERN = /^\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}$/;
|
|
400
455
|
PLAIN_NUMBER_PATTERN = /^-?(?:0|[1-9]\d*)(?:\.\d+)?$/;
|
|
401
456
|
}
|
|
@@ -22740,6 +22795,41 @@ function resolveTests(suite) {
|
|
|
22740
22795
|
}
|
|
22741
22796
|
return void 0;
|
|
22742
22797
|
}
|
|
22798
|
+
function interpolateCaseField(value, vars) {
|
|
22799
|
+
if (!vars || value === void 0) {
|
|
22800
|
+
return value;
|
|
22801
|
+
}
|
|
22802
|
+
return interpolateTemplateVars(value, vars);
|
|
22803
|
+
}
|
|
22804
|
+
function interpolateCaseTurns(turns, vars) {
|
|
22805
|
+
if (!vars || !Array.isArray(turns)) {
|
|
22806
|
+
return turns;
|
|
22807
|
+
}
|
|
22808
|
+
return turns.map((rawTurn) => {
|
|
22809
|
+
if (!isJsonObject(rawTurn)) {
|
|
22810
|
+
return rawTurn;
|
|
22811
|
+
}
|
|
22812
|
+
return {
|
|
22813
|
+
...rawTurn,
|
|
22814
|
+
input: interpolateCaseField(rawTurn.input, vars),
|
|
22815
|
+
expected_output: interpolateCaseField(rawTurn.expected_output, vars)
|
|
22816
|
+
};
|
|
22817
|
+
});
|
|
22818
|
+
}
|
|
22819
|
+
function interpolateRawEvalCase(raw, vars) {
|
|
22820
|
+
if (!vars) {
|
|
22821
|
+
return raw;
|
|
22822
|
+
}
|
|
22823
|
+
return {
|
|
22824
|
+
...raw,
|
|
22825
|
+
...raw.criteria !== void 0 ? { criteria: interpolateCaseField(raw.criteria, vars) } : {},
|
|
22826
|
+
...raw.expected_outcome !== void 0 ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars) } : {},
|
|
22827
|
+
...raw.input !== void 0 ? { input: interpolateCaseField(raw.input, vars) } : {},
|
|
22828
|
+
...raw.input_files !== void 0 ? { input_files: interpolateCaseField(raw.input_files, vars) } : {},
|
|
22829
|
+
...raw.expected_output !== void 0 ? { expected_output: interpolateCaseField(raw.expected_output, vars) } : {},
|
|
22830
|
+
...raw.turns !== void 0 ? { turns: interpolateCaseTurns(raw.turns, vars) } : {}
|
|
22831
|
+
};
|
|
22832
|
+
}
|
|
22743
22833
|
async function readTestSuiteMetadata(testFilePath) {
|
|
22744
22834
|
try {
|
|
22745
22835
|
const absolutePath = import_node_path50.default.resolve(testFilePath);
|
|
@@ -22854,8 +22944,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22854
22944
|
}
|
|
22855
22945
|
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
22856
22946
|
const suiteGovernance = extractSuiteGovernance(suite);
|
|
22857
|
-
const
|
|
22858
|
-
const
|
|
22947
|
+
const rawSuiteInput = suite.input;
|
|
22948
|
+
const rawSuiteInputFiles = suite.input_files;
|
|
22859
22949
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
22860
22950
|
const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
|
|
22861
22951
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
@@ -22874,30 +22964,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22874
22964
|
if (filterPattern && (!id || !matchesFilter4(id, filterPattern))) {
|
|
22875
22965
|
continue;
|
|
22876
22966
|
}
|
|
22877
|
-
const
|
|
22878
|
-
|
|
22879
|
-
|
|
22880
|
-
|
|
22967
|
+
const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : void 0;
|
|
22968
|
+
const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars);
|
|
22969
|
+
const conversationId = asString5(renderedCase.conversation_id);
|
|
22970
|
+
let outcome = asString5(renderedCase.criteria);
|
|
22971
|
+
if (!outcome && renderedCase.expected_outcome !== void 0) {
|
|
22972
|
+
outcome = asString5(renderedCase.expected_outcome);
|
|
22881
22973
|
if (outcome) {
|
|
22882
22974
|
logWarning5(
|
|
22883
|
-
`Test '${asString5(
|
|
22975
|
+
`Test '${asString5(renderedCase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
22884
22976
|
);
|
|
22885
22977
|
}
|
|
22886
22978
|
}
|
|
22887
|
-
const caseExecution = isJsonObject(
|
|
22979
|
+
const caseExecution = isJsonObject(renderedCase.execution) ? renderedCase.execution : void 0;
|
|
22888
22980
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
22889
22981
|
const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
|
|
22890
|
-
const effectiveSuiteInputFiles =
|
|
22891
|
-
const testInputMessages = resolveInputMessages(
|
|
22892
|
-
const expectedMessages = resolveExpectedMessages(
|
|
22893
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
22982
|
+
const effectiveSuiteInputFiles = rawSuiteInputFiles && !skipDefaults ? interpolateCaseField(rawSuiteInputFiles, caseVars) : void 0;
|
|
22983
|
+
const testInputMessages = resolveInputMessages(renderedCase, effectiveSuiteInputFiles);
|
|
22984
|
+
const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
|
|
22985
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || renderedCase.assertions !== void 0 || renderedCase.assert !== void 0 || Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0;
|
|
22894
22986
|
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
22895
22987
|
logError3(
|
|
22896
22988
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions/turns`
|
|
22897
22989
|
);
|
|
22898
22990
|
continue;
|
|
22899
22991
|
}
|
|
22900
|
-
const
|
|
22992
|
+
const effectiveSuiteInputValue = rawSuiteInput && !skipDefaults ? interpolateCaseField(rawSuiteInput, caseVars) : void 0;
|
|
22993
|
+
const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
|
|
22901
22994
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
22902
22995
|
const inputTextParts = [];
|
|
22903
22996
|
const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
|
|
@@ -22937,11 +23030,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22937
23030
|
}
|
|
22938
23031
|
}
|
|
22939
23032
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
22940
|
-
const testCaseEvaluatorKind = coerceEvaluator(
|
|
23033
|
+
const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator;
|
|
22941
23034
|
let evaluators;
|
|
22942
23035
|
try {
|
|
22943
23036
|
evaluators = await parseGraders(
|
|
22944
|
-
|
|
23037
|
+
renderedCase,
|
|
22945
23038
|
globalExecution,
|
|
22946
23039
|
searchRoots,
|
|
22947
23040
|
id ?? "unknown",
|
|
@@ -22952,7 +23045,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22952
23045
|
logError3(`Skipping test '${id}': ${message}`);
|
|
22953
23046
|
continue;
|
|
22954
23047
|
}
|
|
22955
|
-
const inlineRubrics =
|
|
23048
|
+
const inlineRubrics = renderedCase.rubrics;
|
|
22956
23049
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
22957
23050
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
22958
23051
|
if (rubricEvaluator) {
|
|
@@ -22961,25 +23054,25 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22961
23054
|
}
|
|
22962
23055
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
22963
23056
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
22964
|
-
const caseWorkspace = await resolveWorkspaceConfig(
|
|
23057
|
+
const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
|
|
22965
23058
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
22966
|
-
const rawCaseMetadata = isJsonObject(
|
|
23059
|
+
const rawCaseMetadata = isJsonObject(renderedCase.metadata) ? renderedCase.metadata : void 0;
|
|
22967
23060
|
const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
|
|
22968
23061
|
const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
|
|
22969
|
-
const caseTargets = extractTargetsFromTestCase(
|
|
22970
|
-
const dependsOn = Array.isArray(
|
|
23062
|
+
const caseTargets = extractTargetsFromTestCase(renderedCase);
|
|
23063
|
+
const dependsOn = Array.isArray(renderedCase.depends_on) ? renderedCase.depends_on.filter(
|
|
22971
23064
|
(v) => typeof v === "string"
|
|
22972
23065
|
) : void 0;
|
|
22973
|
-
const onDependencyFailureRaw = asString5(
|
|
23066
|
+
const onDependencyFailureRaw = asString5(renderedCase.on_dependency_failure);
|
|
22974
23067
|
const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
|
|
22975
|
-
const modeRaw = asString5(
|
|
23068
|
+
const modeRaw = asString5(renderedCase.mode);
|
|
22976
23069
|
const mode = modeRaw === "conversation" ? "conversation" : void 0;
|
|
22977
|
-
const turns = Array.isArray(
|
|
22978
|
-
const aggregationRaw = asString5(
|
|
23070
|
+
const turns = Array.isArray(renderedCase.turns) ? parseTurns(renderedCase.turns) : void 0;
|
|
23071
|
+
const aggregationRaw = asString5(renderedCase.aggregation);
|
|
22979
23072
|
const aggregation = aggregationRaw === "mean" || aggregationRaw === "min" || aggregationRaw === "max" ? aggregationRaw : void 0;
|
|
22980
|
-
const onTurnFailureRaw = asString5(
|
|
23073
|
+
const onTurnFailureRaw = asString5(renderedCase.on_turn_failure);
|
|
22981
23074
|
const onTurnFailure = onTurnFailureRaw === "continue" || onTurnFailureRaw === "stop" ? onTurnFailureRaw : void 0;
|
|
22982
|
-
const windowSize = typeof
|
|
23075
|
+
const windowSize = typeof renderedCase.window_size === "number" && renderedCase.window_size >= 1 ? renderedCase.window_size : void 0;
|
|
22983
23076
|
const testCase = {
|
|
22984
23077
|
id,
|
|
22985
23078
|
suite: suiteName,
|
|
@@ -24111,7 +24204,7 @@ var init_rpc_metadata = __esm({
|
|
|
24111
24204
|
});
|
|
24112
24205
|
|
|
24113
24206
|
// ../../node_modules/.bun/@opentelemetry+core@2.5.1+460773ef8ff1e07c/node_modules/@opentelemetry/core/build/esm/utils/lodash.merge.js
|
|
24114
|
-
function
|
|
24207
|
+
function isPlainObject2(value) {
|
|
24115
24208
|
if (!isObjectLike(value) || baseGetTag(value) !== objectTag) {
|
|
24116
24209
|
return false;
|
|
24117
24210
|
}
|
|
@@ -24270,7 +24363,7 @@ function isPrimitive(value) {
|
|
|
24270
24363
|
return typeof value === "string" || typeof value === "number" || typeof value === "boolean" || typeof value === "undefined" || value instanceof Date || value instanceof RegExp || value === null;
|
|
24271
24364
|
}
|
|
24272
24365
|
function shouldMerge(one, two) {
|
|
24273
|
-
if (!
|
|
24366
|
+
if (!isPlainObject2(one) || !isPlainObject2(two)) {
|
|
24274
24367
|
return false;
|
|
24275
24368
|
}
|
|
24276
24369
|
return true;
|