@wix/evalforge-evaluator 0.169.0 → 0.171.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +205 -105
- package/build/index.js.map +4 -4
- package/build/index.mjs +158 -48
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/install-dependencies.d.ts +11 -0
- package/package.json +3 -3
package/build/index.mjs
CHANGED
|
@@ -67,8 +67,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
67
67
|
}
|
|
68
68
|
return headers;
|
|
69
69
|
}
|
|
70
|
-
async function fetchJson(
|
|
71
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
70
|
+
async function fetchJson(path3) {
|
|
71
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
72
72
|
console.error(`[API] GET ${url}`);
|
|
73
73
|
const headers = buildHeaders();
|
|
74
74
|
const response = await fetch(url, {
|
|
@@ -82,8 +82,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
82
82
|
}
|
|
83
83
|
return response.json();
|
|
84
84
|
}
|
|
85
|
-
async function postJson(
|
|
86
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
85
|
+
async function postJson(path3, body) {
|
|
86
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
87
87
|
console.error(`[API] POST ${url}`);
|
|
88
88
|
const response = await fetch(url, {
|
|
89
89
|
method: "POST",
|
|
@@ -97,8 +97,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
97
97
|
);
|
|
98
98
|
}
|
|
99
99
|
}
|
|
100
|
-
async function deleteRequest(
|
|
101
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
100
|
+
async function deleteRequest(path3) {
|
|
101
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
102
102
|
console.error(`[API] DELETE ${url}`);
|
|
103
103
|
const headers = buildHeaders();
|
|
104
104
|
const response = await fetch(url, {
|
|
@@ -112,8 +112,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
112
112
|
);
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
|
-
async function putJson(
|
|
116
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
115
|
+
async function putJson(path3, body) {
|
|
116
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
117
117
|
console.error(`[API] PUT ${url}`);
|
|
118
118
|
const response = await fetch(url, {
|
|
119
119
|
method: "PUT",
|
|
@@ -415,17 +415,19 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
415
415
|
// src/run-scenario/index.ts
|
|
416
416
|
import {
|
|
417
417
|
AssertionResultStatus,
|
|
418
|
-
DEFAULT_JUDGE_MODEL
|
|
418
|
+
DEFAULT_JUDGE_MODEL,
|
|
419
|
+
LiveTraceEventType as LiveTraceEventType4,
|
|
420
|
+
formatTraceEventLine
|
|
419
421
|
} from "@wix/evalforge-types";
|
|
420
422
|
import {
|
|
421
423
|
evaluateAssertions as evaluateAssertionsBase
|
|
422
424
|
} from "@wix/eval-assertions";
|
|
423
425
|
|
|
424
426
|
// src/run-scenario/environment.ts
|
|
425
|
-
import { mkdirSync, existsSync, rmSync, readFileSync, writeFileSync } from "fs";
|
|
427
|
+
import { mkdirSync as mkdirSync2, existsSync as existsSync2, rmSync, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "fs";
|
|
426
428
|
import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
|
|
427
429
|
import { tmpdir } from "os";
|
|
428
|
-
import
|
|
430
|
+
import path2, { sep as sep2 } from "path";
|
|
429
431
|
import {
|
|
430
432
|
fetchGitHubFolder,
|
|
431
433
|
fetchGitHubFile
|
|
@@ -448,6 +450,94 @@ async function writeFilesToDirectory(targetDir, files) {
|
|
|
448
450
|
}
|
|
449
451
|
}
|
|
450
452
|
|
|
453
|
+
// src/run-scenario/install-dependencies.ts
|
|
454
|
+
import {
|
|
455
|
+
mkdirSync,
|
|
456
|
+
existsSync,
|
|
457
|
+
readFileSync,
|
|
458
|
+
writeFileSync,
|
|
459
|
+
copyFileSync,
|
|
460
|
+
cpSync,
|
|
461
|
+
renameSync
|
|
462
|
+
} from "fs";
|
|
463
|
+
import { createHash } from "crypto";
|
|
464
|
+
import path from "path";
|
|
465
|
+
import { execFileSync } from "child_process";
|
|
466
|
+
function detectPackageManager(workDir) {
|
|
467
|
+
if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
|
|
468
|
+
return { cmd: "pnpm", args: ["install", "--frozen-lockfile"], cacheSourceFile: "pnpm-lock.yaml" };
|
|
469
|
+
}
|
|
470
|
+
if (existsSync(path.join(workDir, "package-lock.json"))) {
|
|
471
|
+
return { cmd: "npm", args: ["ci"], cacheSourceFile: "package-lock.json" };
|
|
472
|
+
}
|
|
473
|
+
if (existsSync(path.join(workDir, "yarn.lock"))) {
|
|
474
|
+
return { cmd: "yarn", args: ["install", "--frozen-lockfile"], cacheSourceFile: "yarn.lock" };
|
|
475
|
+
}
|
|
476
|
+
writeFileSync(path.join(workDir, "yarn.lock"), "", "utf-8");
|
|
477
|
+
return { cmd: "yarn", args: ["install"], cacheSourceFile: "package.json" };
|
|
478
|
+
}
|
|
479
|
+
function cloneDirectory(src, dest) {
|
|
480
|
+
if (process.platform === "darwin") {
|
|
481
|
+
execFileSync("cp", ["-rc", src, dest]);
|
|
482
|
+
} else {
|
|
483
|
+
cpSync(src, dest, { recursive: true });
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
function installWithCache(workDir, exec, cacheBase, pm) {
|
|
487
|
+
const sourceContent = readFileSync(path.join(workDir, pm.cacheSourceFile), "utf-8");
|
|
488
|
+
const cacheKey = createHash("sha256").update(sourceContent).digest("hex").slice(0, 16);
|
|
489
|
+
const cachedNodeModules = path.join(cacheBase, cacheKey, "node_modules");
|
|
490
|
+
const targetNodeModules = path.join(workDir, "node_modules");
|
|
491
|
+
const cacheDir = path.dirname(cachedNodeModules);
|
|
492
|
+
const cachedYarnLock = path.join(cacheDir, "yarn.lock");
|
|
493
|
+
if (existsSync(cachedNodeModules)) {
|
|
494
|
+
console.log(`[environment] Restoring node_modules from cache (key: ${cacheKey})`);
|
|
495
|
+
if (!existsSync(targetNodeModules)) {
|
|
496
|
+
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
497
|
+
}
|
|
498
|
+
if (existsSync(cachedYarnLock)) {
|
|
499
|
+
copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
|
|
500
|
+
}
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir} (cache key: ${cacheKey})`);
|
|
504
|
+
try {
|
|
505
|
+
exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4 });
|
|
506
|
+
} catch (err) {
|
|
507
|
+
console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
|
|
508
|
+
return;
|
|
509
|
+
}
|
|
510
|
+
console.log("[environment] Dependency installation complete \u2014 saving to cache");
|
|
511
|
+
try {
|
|
512
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
513
|
+
const yarnLockPath = path.join(workDir, "yarn.lock");
|
|
514
|
+
if (existsSync(yarnLockPath)) {
|
|
515
|
+
copyFileSync(yarnLockPath, cachedYarnLock);
|
|
516
|
+
}
|
|
517
|
+
renameSync(targetNodeModules, cachedNodeModules);
|
|
518
|
+
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
519
|
+
} catch (err) {
|
|
520
|
+
console.error("[environment] Failed to save to cache (installation still succeeded):", err instanceof Error ? err.message : String(err));
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
async function installDependencies(workDir, exec = execFileSync, cacheBase) {
|
|
524
|
+
if (!existsSync(path.join(workDir, "package.json"))) {
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
const pm = detectPackageManager(workDir);
|
|
528
|
+
if (cacheBase) {
|
|
529
|
+
installWithCache(workDir, exec, cacheBase, pm);
|
|
530
|
+
return;
|
|
531
|
+
}
|
|
532
|
+
console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`);
|
|
533
|
+
try {
|
|
534
|
+
exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4 });
|
|
535
|
+
console.log("[environment] Dependency installation complete");
|
|
536
|
+
} catch (err) {
|
|
537
|
+
console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
451
541
|
// src/run-scenario/environment.ts
|
|
452
542
|
async function fetchAndWriteTemplateFiles(template, workDir) {
|
|
453
543
|
let sourceFiles = [];
|
|
@@ -468,27 +558,27 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
|
|
|
468
558
|
const content = ef.gitSource ? await fetchGitHubFile(ef.gitSource, {
|
|
469
559
|
userAgent: "EvalForge-Evaluator"
|
|
470
560
|
}) : ef.content ?? "";
|
|
471
|
-
const dest =
|
|
561
|
+
const dest = path2.resolve(workDir, ef.path);
|
|
472
562
|
if (!dest.startsWith(workDir + sep2)) {
|
|
473
563
|
throw new Error(
|
|
474
564
|
`Extra file path escapes working directory: "${ef.path}"`
|
|
475
565
|
);
|
|
476
566
|
}
|
|
477
|
-
await mkdir2(
|
|
567
|
+
await mkdir2(path2.dirname(dest), { recursive: true });
|
|
478
568
|
await writeFile2(dest, content, "utf8");
|
|
479
569
|
})
|
|
480
570
|
);
|
|
481
571
|
}
|
|
482
572
|
function writeWixEnvFile(workDir) {
|
|
483
|
-
const configPath =
|
|
484
|
-
if (!
|
|
573
|
+
const configPath = path2.join(workDir, "wix.config.json");
|
|
574
|
+
if (!existsSync2(configPath)) {
|
|
485
575
|
return;
|
|
486
576
|
}
|
|
487
577
|
try {
|
|
488
|
-
const config = JSON.parse(
|
|
578
|
+
const config = JSON.parse(readFileSync2(configPath, "utf-8"));
|
|
489
579
|
if (config.appId) {
|
|
490
|
-
|
|
491
|
-
|
|
580
|
+
writeFileSync2(
|
|
581
|
+
path2.join(workDir, ".env"),
|
|
492
582
|
`WIX_CLIENT_ID=${config.appId}
|
|
493
583
|
`,
|
|
494
584
|
"utf-8"
|
|
@@ -500,28 +590,30 @@ function writeWixEnvFile(workDir) {
|
|
|
500
590
|
}
|
|
501
591
|
}
|
|
502
592
|
async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
|
|
503
|
-
const baseDir = config.evaluationsDir ??
|
|
593
|
+
const baseDir = config.evaluationsDir ?? path2.join(tmpdir(), "evalforge-evaluations");
|
|
594
|
+
const nodeModulesCacheDir = path2.join(baseDir, "_node_modules_cache");
|
|
504
595
|
if (template) {
|
|
505
596
|
if (!config.evaluationsDir) {
|
|
506
597
|
console.warn(
|
|
507
598
|
"Template specified but EVALUATIONS_DIR not set, using temp directory"
|
|
508
599
|
);
|
|
509
600
|
}
|
|
510
|
-
const workDir2 =
|
|
511
|
-
if (
|
|
601
|
+
const workDir2 = path2.join(baseDir, `${evalRunId2}_${targetId}`);
|
|
602
|
+
if (existsSync2(workDir2)) {
|
|
512
603
|
rmSync(workDir2, { recursive: true });
|
|
513
604
|
}
|
|
514
|
-
|
|
605
|
+
mkdirSync2(workDir2, { recursive: true });
|
|
515
606
|
await fetchAndWriteTemplateFiles(template, workDir2);
|
|
516
607
|
console.log(`Template files written to ${workDir2}`);
|
|
517
608
|
writeWixEnvFile(workDir2);
|
|
609
|
+
await installDependencies(workDir2, void 0, nodeModulesCacheDir);
|
|
518
610
|
return workDir2;
|
|
519
611
|
}
|
|
520
|
-
const workDir =
|
|
521
|
-
if (
|
|
612
|
+
const workDir = path2.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
|
|
613
|
+
if (existsSync2(workDir)) {
|
|
522
614
|
rmSync(workDir, { recursive: true });
|
|
523
615
|
}
|
|
524
|
-
|
|
616
|
+
mkdirSync2(workDir, { recursive: true });
|
|
525
617
|
console.log(`Empty working directory created at ${workDir}`);
|
|
526
618
|
return workDir;
|
|
527
619
|
}
|
|
@@ -1066,8 +1158,8 @@ function extractToolActionDescription(toolName, toolArgs) {
|
|
|
1066
1158
|
}
|
|
1067
1159
|
}
|
|
1068
1160
|
if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
|
|
1069
|
-
const
|
|
1070
|
-
return `Listing: ${String(
|
|
1161
|
+
const path3 = args.path || args.directory || ".";
|
|
1162
|
+
return `Listing: ${String(path3).slice(0, 50)}`;
|
|
1071
1163
|
}
|
|
1072
1164
|
if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
|
|
1073
1165
|
const filePath = String(
|
|
@@ -4209,7 +4301,7 @@ var simpleAgentAdapter = new SimpleAgentAdapter();
|
|
|
4209
4301
|
defaultRegistry.register(simpleAgentAdapter);
|
|
4210
4302
|
|
|
4211
4303
|
// src/run-scenario/file-diff.ts
|
|
4212
|
-
import { readdirSync, readFileSync as
|
|
4304
|
+
import { readdirSync, readFileSync as readFileSync3, statSync, existsSync as existsSync3 } from "fs";
|
|
4213
4305
|
import { join as join10, relative } from "path";
|
|
4214
4306
|
|
|
4215
4307
|
// ../../node_modules/diff/lib/index.mjs
|
|
@@ -4314,11 +4406,11 @@ Diff.prototype = {
|
|
|
4314
4406
|
}
|
|
4315
4407
|
}
|
|
4316
4408
|
},
|
|
4317
|
-
addToPath: function addToPath(
|
|
4318
|
-
var last =
|
|
4409
|
+
addToPath: function addToPath(path3, added, removed, oldPosInc, options) {
|
|
4410
|
+
var last = path3.lastComponent;
|
|
4319
4411
|
if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) {
|
|
4320
4412
|
return {
|
|
4321
|
-
oldPos:
|
|
4413
|
+
oldPos: path3.oldPos + oldPosInc,
|
|
4322
4414
|
lastComponent: {
|
|
4323
4415
|
count: last.count + 1,
|
|
4324
4416
|
added,
|
|
@@ -4328,7 +4420,7 @@ Diff.prototype = {
|
|
|
4328
4420
|
};
|
|
4329
4421
|
} else {
|
|
4330
4422
|
return {
|
|
4331
|
-
oldPos:
|
|
4423
|
+
oldPos: path3.oldPos + oldPosInc,
|
|
4332
4424
|
lastComponent: {
|
|
4333
4425
|
count: 1,
|
|
4334
4426
|
added,
|
|
@@ -4768,9 +4860,9 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
|
|
|
4768
4860
|
// src/run-scenario/file-diff.ts
|
|
4769
4861
|
function deriveInfrastructurePaths(prePrep, postPrep) {
|
|
4770
4862
|
const infraPaths = /* @__PURE__ */ new Set();
|
|
4771
|
-
for (const
|
|
4772
|
-
if (prePrep[
|
|
4773
|
-
infraPaths.add(
|
|
4863
|
+
for (const path3 of Object.keys(postPrep)) {
|
|
4864
|
+
if (prePrep[path3] === void 0 || prePrep[path3] !== postPrep[path3]) {
|
|
4865
|
+
infraPaths.add(path3);
|
|
4774
4866
|
}
|
|
4775
4867
|
}
|
|
4776
4868
|
return infraPaths;
|
|
@@ -4830,7 +4922,7 @@ function isBinaryFile(filename) {
|
|
|
4830
4922
|
function snapshotDirectory(dir, baseDir) {
|
|
4831
4923
|
const snapshot = {};
|
|
4832
4924
|
const base = baseDir || dir;
|
|
4833
|
-
if (!
|
|
4925
|
+
if (!existsSync3(dir)) {
|
|
4834
4926
|
return snapshot;
|
|
4835
4927
|
}
|
|
4836
4928
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
@@ -4852,7 +4944,7 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
4852
4944
|
if (stats.size > MAX_FILE_SIZE) {
|
|
4853
4945
|
continue;
|
|
4854
4946
|
}
|
|
4855
|
-
const content =
|
|
4947
|
+
const content = readFileSync3(fullPath, "utf-8");
|
|
4856
4948
|
snapshot[relativePath] = content;
|
|
4857
4949
|
} catch {
|
|
4858
4950
|
continue;
|
|
@@ -4881,19 +4973,19 @@ function generateDiffLines(before, after) {
|
|
|
4881
4973
|
function diffSnapshots(before, after, infrastructurePaths) {
|
|
4882
4974
|
const diffs = [];
|
|
4883
4975
|
const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
4884
|
-
for (const
|
|
4885
|
-
const beforeContent = before[
|
|
4886
|
-
const afterContent = after[
|
|
4887
|
-
if (before[
|
|
4976
|
+
for (const path3 of allPaths) {
|
|
4977
|
+
const beforeContent = before[path3] ?? "";
|
|
4978
|
+
const afterContent = after[path3] ?? "";
|
|
4979
|
+
if (before[path3] !== void 0 && beforeContent === afterContent) {
|
|
4888
4980
|
continue;
|
|
4889
4981
|
}
|
|
4890
4982
|
const diffLines2 = generateDiffLines(beforeContent, afterContent);
|
|
4891
4983
|
diffs.push({
|
|
4892
|
-
path:
|
|
4984
|
+
path: path3,
|
|
4893
4985
|
expected: beforeContent,
|
|
4894
4986
|
actual: afterContent,
|
|
4895
4987
|
diffLines: diffLines2,
|
|
4896
|
-
...infrastructurePaths?.has(
|
|
4988
|
+
...infrastructurePaths?.has(path3) && { isInfrastructure: true }
|
|
4897
4989
|
});
|
|
4898
4990
|
}
|
|
4899
4991
|
const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
|
|
@@ -4920,9 +5012,9 @@ function diffSnapshots(before, after, infrastructurePaths) {
|
|
|
4920
5012
|
function extractTemplateFiles(before, after, infrastructurePaths) {
|
|
4921
5013
|
const files = [];
|
|
4922
5014
|
const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
4923
|
-
for (const
|
|
4924
|
-
const beforeContent = before[
|
|
4925
|
-
const afterContent = after[
|
|
5015
|
+
for (const path3 of allPaths) {
|
|
5016
|
+
const beforeContent = before[path3];
|
|
5017
|
+
const afterContent = after[path3];
|
|
4926
5018
|
if (afterContent === void 0) {
|
|
4927
5019
|
continue;
|
|
4928
5020
|
}
|
|
@@ -4935,10 +5027,10 @@ function extractTemplateFiles(before, after, infrastructurePaths) {
|
|
|
4935
5027
|
status = "unchanged";
|
|
4936
5028
|
}
|
|
4937
5029
|
files.push({
|
|
4938
|
-
path:
|
|
5030
|
+
path: path3,
|
|
4939
5031
|
content: afterContent,
|
|
4940
5032
|
status,
|
|
4941
|
-
...infrastructurePaths?.has(
|
|
5033
|
+
...infrastructurePaths?.has(path3) && { isInfrastructure: true }
|
|
4942
5034
|
});
|
|
4943
5035
|
}
|
|
4944
5036
|
files.sort((a, b) => a.path.localeCompare(b.path));
|
|
@@ -5043,6 +5135,24 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
5043
5135
|
// src/run-scenario/index.ts
|
|
5044
5136
|
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
|
|
5045
5137
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
5138
|
+
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
5139
|
+
if (template) {
|
|
5140
|
+
console.log(
|
|
5141
|
+
formatTraceEventLine({
|
|
5142
|
+
evalRunId: evalRunId2,
|
|
5143
|
+
scenarioId: scenario.id,
|
|
5144
|
+
scenarioName: scenario.name,
|
|
5145
|
+
targetId,
|
|
5146
|
+
targetName,
|
|
5147
|
+
stepNumber: 0,
|
|
5148
|
+
type: LiveTraceEventType4.PROGRESS,
|
|
5149
|
+
outputPreview: "Setting up environment (installing dependencies)...",
|
|
5150
|
+
elapsedMs: 0,
|
|
5151
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5152
|
+
isComplete: false
|
|
5153
|
+
})
|
|
5154
|
+
);
|
|
5155
|
+
}
|
|
5046
5156
|
const workDir = await prepareWorkingDirectory(
|
|
5047
5157
|
config,
|
|
5048
5158
|
evalRunId2,
|