@wix/evalforge-evaluator 0.170.0 → 0.172.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +203 -105
- package/build/index.js.map +4 -4
- package/build/index.mjs +153 -47
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/install-dependencies.d.ts +11 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -67,8 +67,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
67
67
|
}
|
|
68
68
|
return headers;
|
|
69
69
|
}
|
|
70
|
-
async function fetchJson(
|
|
71
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
70
|
+
async function fetchJson(path3) {
|
|
71
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
72
72
|
console.error(`[API] GET ${url}`);
|
|
73
73
|
const headers = buildHeaders();
|
|
74
74
|
const response = await fetch(url, {
|
|
@@ -82,8 +82,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
82
82
|
}
|
|
83
83
|
return response.json();
|
|
84
84
|
}
|
|
85
|
-
async function postJson(
|
|
86
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
85
|
+
async function postJson(path3, body) {
|
|
86
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
87
87
|
console.error(`[API] POST ${url}`);
|
|
88
88
|
const response = await fetch(url, {
|
|
89
89
|
method: "POST",
|
|
@@ -97,8 +97,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
97
97
|
);
|
|
98
98
|
}
|
|
99
99
|
}
|
|
100
|
-
async function deleteRequest(
|
|
101
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
100
|
+
async function deleteRequest(path3) {
|
|
101
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
102
102
|
console.error(`[API] DELETE ${url}`);
|
|
103
103
|
const headers = buildHeaders();
|
|
104
104
|
const response = await fetch(url, {
|
|
@@ -112,8 +112,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
112
112
|
);
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
|
-
async function putJson(
|
|
116
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
115
|
+
async function putJson(path3, body) {
|
|
116
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
|
|
117
117
|
console.error(`[API] PUT ${url}`);
|
|
118
118
|
const response = await fetch(url, {
|
|
119
119
|
method: "PUT",
|
|
@@ -415,17 +415,19 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
415
415
|
// src/run-scenario/index.ts
|
|
416
416
|
import {
|
|
417
417
|
AssertionResultStatus,
|
|
418
|
-
DEFAULT_JUDGE_MODEL
|
|
418
|
+
DEFAULT_JUDGE_MODEL,
|
|
419
|
+
LiveTraceEventType as LiveTraceEventType4,
|
|
420
|
+
formatTraceEventLine
|
|
419
421
|
} from "@wix/evalforge-types";
|
|
420
422
|
import {
|
|
421
423
|
evaluateAssertions as evaluateAssertionsBase
|
|
422
424
|
} from "@wix/eval-assertions";
|
|
423
425
|
|
|
424
426
|
// src/run-scenario/environment.ts
|
|
425
|
-
import { mkdirSync, existsSync, rmSync, readFileSync, writeFileSync } from "fs";
|
|
427
|
+
import { mkdirSync as mkdirSync2, existsSync as existsSync2, rmSync, readFileSync as readFileSync2, writeFileSync } from "fs";
|
|
426
428
|
import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
|
|
427
429
|
import { tmpdir } from "os";
|
|
428
|
-
import
|
|
430
|
+
import path2, { sep as sep2 } from "path";
|
|
429
431
|
import {
|
|
430
432
|
fetchGitHubFolder,
|
|
431
433
|
fetchGitHubFile
|
|
@@ -448,6 +450,90 @@ async function writeFilesToDirectory(targetDir, files) {
|
|
|
448
450
|
}
|
|
449
451
|
}
|
|
450
452
|
|
|
453
|
+
// src/run-scenario/install-dependencies.ts
|
|
454
|
+
import {
|
|
455
|
+
mkdirSync,
|
|
456
|
+
existsSync,
|
|
457
|
+
readFileSync,
|
|
458
|
+
copyFileSync,
|
|
459
|
+
cpSync
|
|
460
|
+
} from "fs";
|
|
461
|
+
import { createHash } from "crypto";
|
|
462
|
+
import path from "path";
|
|
463
|
+
import { execFileSync } from "child_process";
|
|
464
|
+
function detectPackageManager(workDir) {
|
|
465
|
+
if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
|
|
466
|
+
return { cmd: "pnpm", args: ["install", "--frozen-lockfile"], cacheSourceFile: "pnpm-lock.yaml" };
|
|
467
|
+
}
|
|
468
|
+
if (existsSync(path.join(workDir, "package-lock.json"))) {
|
|
469
|
+
return { cmd: "npm", args: ["ci"], cacheSourceFile: "package-lock.json" };
|
|
470
|
+
}
|
|
471
|
+
if (existsSync(path.join(workDir, "yarn.lock"))) {
|
|
472
|
+
return { cmd: "yarn", args: ["install", "--frozen-lockfile"], cacheSourceFile: "yarn.lock" };
|
|
473
|
+
}
|
|
474
|
+
return { cmd: "npm", args: ["install", "--legacy-peer-deps", "--prefer-offline", "--no-fund", "--no-audit"], cacheSourceFile: "package.json" };
|
|
475
|
+
}
|
|
476
|
+
function cloneDirectory(src, dest) {
|
|
477
|
+
if (process.platform === "darwin") {
|
|
478
|
+
execFileSync("cp", ["-rc", src, dest]);
|
|
479
|
+
} else {
|
|
480
|
+
cpSync(src, dest, { recursive: true });
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
function installWithCache(workDir, exec, cacheBase, pm) {
|
|
484
|
+
const sourceContent = readFileSync(path.join(workDir, pm.cacheSourceFile), "utf-8");
|
|
485
|
+
const cacheKey = createHash("sha256").update(sourceContent).digest("hex").slice(0, 16);
|
|
486
|
+
const cachedNodeModules = path.join(cacheBase, cacheKey, "node_modules");
|
|
487
|
+
const targetNodeModules = path.join(workDir, "node_modules");
|
|
488
|
+
const cacheDir = path.dirname(cachedNodeModules);
|
|
489
|
+
const cachedYarnLock = path.join(cacheDir, "yarn.lock");
|
|
490
|
+
if (existsSync(cachedNodeModules)) {
|
|
491
|
+
console.log(`[environment] Restoring node_modules from cache (key: ${cacheKey})`);
|
|
492
|
+
if (!existsSync(targetNodeModules)) {
|
|
493
|
+
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
494
|
+
}
|
|
495
|
+
if (existsSync(cachedYarnLock)) {
|
|
496
|
+
copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
|
|
497
|
+
}
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir} (cache key: ${cacheKey})`);
|
|
501
|
+
try {
|
|
502
|
+
exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4, env: { ...process.env, NODE_ENV: "development" } });
|
|
503
|
+
} catch (err) {
|
|
504
|
+
console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
console.log("[environment] Dependency installation complete \u2014 saving to cache");
|
|
508
|
+
try {
|
|
509
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
510
|
+
const yarnLockPath = path.join(workDir, "yarn.lock");
|
|
511
|
+
if (existsSync(yarnLockPath)) {
|
|
512
|
+
copyFileSync(yarnLockPath, cachedYarnLock);
|
|
513
|
+
}
|
|
514
|
+
cloneDirectory(targetNodeModules, cachedNodeModules);
|
|
515
|
+
} catch (err) {
|
|
516
|
+
console.error("[environment] Failed to save to cache (installation still succeeded):", err instanceof Error ? err.message : String(err));
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
async function installDependencies(workDir, exec = execFileSync, cacheBase) {
|
|
520
|
+
if (!existsSync(path.join(workDir, "package.json"))) {
|
|
521
|
+
return;
|
|
522
|
+
}
|
|
523
|
+
const pm = detectPackageManager(workDir);
|
|
524
|
+
if (cacheBase) {
|
|
525
|
+
installWithCache(workDir, exec, cacheBase, pm);
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`);
|
|
529
|
+
try {
|
|
530
|
+
exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4, env: { ...process.env, NODE_ENV: "development" } });
|
|
531
|
+
console.log("[environment] Dependency installation complete");
|
|
532
|
+
} catch (err) {
|
|
533
|
+
console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
451
537
|
// src/run-scenario/environment.ts
|
|
452
538
|
async function fetchAndWriteTemplateFiles(template, workDir) {
|
|
453
539
|
let sourceFiles = [];
|
|
@@ -468,27 +554,27 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
|
|
|
468
554
|
const content = ef.gitSource ? await fetchGitHubFile(ef.gitSource, {
|
|
469
555
|
userAgent: "EvalForge-Evaluator"
|
|
470
556
|
}) : ef.content ?? "";
|
|
471
|
-
const dest =
|
|
557
|
+
const dest = path2.resolve(workDir, ef.path);
|
|
472
558
|
if (!dest.startsWith(workDir + sep2)) {
|
|
473
559
|
throw new Error(
|
|
474
560
|
`Extra file path escapes working directory: "${ef.path}"`
|
|
475
561
|
);
|
|
476
562
|
}
|
|
477
|
-
await mkdir2(
|
|
563
|
+
await mkdir2(path2.dirname(dest), { recursive: true });
|
|
478
564
|
await writeFile2(dest, content, "utf8");
|
|
479
565
|
})
|
|
480
566
|
);
|
|
481
567
|
}
|
|
482
568
|
function writeWixEnvFile(workDir) {
|
|
483
|
-
const configPath =
|
|
484
|
-
if (!
|
|
569
|
+
const configPath = path2.join(workDir, "wix.config.json");
|
|
570
|
+
if (!existsSync2(configPath)) {
|
|
485
571
|
return;
|
|
486
572
|
}
|
|
487
573
|
try {
|
|
488
|
-
const config = JSON.parse(
|
|
574
|
+
const config = JSON.parse(readFileSync2(configPath, "utf-8"));
|
|
489
575
|
if (config.appId) {
|
|
490
576
|
writeFileSync(
|
|
491
|
-
|
|
577
|
+
path2.join(workDir, ".env"),
|
|
492
578
|
`WIX_CLIENT_ID=${config.appId}
|
|
493
579
|
`,
|
|
494
580
|
"utf-8"
|
|
@@ -500,28 +586,30 @@ function writeWixEnvFile(workDir) {
|
|
|
500
586
|
}
|
|
501
587
|
}
|
|
502
588
|
async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
|
|
503
|
-
const baseDir = config.evaluationsDir ??
|
|
589
|
+
const baseDir = config.evaluationsDir ?? path2.join(tmpdir(), "evalforge-evaluations");
|
|
590
|
+
const nodeModulesCacheDir = path2.join(baseDir, "_node_modules_cache");
|
|
504
591
|
if (template) {
|
|
505
592
|
if (!config.evaluationsDir) {
|
|
506
593
|
console.warn(
|
|
507
594
|
"Template specified but EVALUATIONS_DIR not set, using temp directory"
|
|
508
595
|
);
|
|
509
596
|
}
|
|
510
|
-
const workDir2 =
|
|
511
|
-
if (
|
|
597
|
+
const workDir2 = path2.join(baseDir, `${evalRunId2}_${targetId}`);
|
|
598
|
+
if (existsSync2(workDir2)) {
|
|
512
599
|
rmSync(workDir2, { recursive: true });
|
|
513
600
|
}
|
|
514
|
-
|
|
601
|
+
mkdirSync2(workDir2, { recursive: true });
|
|
515
602
|
await fetchAndWriteTemplateFiles(template, workDir2);
|
|
516
603
|
console.log(`Template files written to ${workDir2}`);
|
|
517
604
|
writeWixEnvFile(workDir2);
|
|
605
|
+
await installDependencies(workDir2, void 0, nodeModulesCacheDir);
|
|
518
606
|
return workDir2;
|
|
519
607
|
}
|
|
520
|
-
const workDir =
|
|
521
|
-
if (
|
|
608
|
+
const workDir = path2.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
|
|
609
|
+
if (existsSync2(workDir)) {
|
|
522
610
|
rmSync(workDir, { recursive: true });
|
|
523
611
|
}
|
|
524
|
-
|
|
612
|
+
mkdirSync2(workDir, { recursive: true });
|
|
525
613
|
console.log(`Empty working directory created at ${workDir}`);
|
|
526
614
|
return workDir;
|
|
527
615
|
}
|
|
@@ -1066,8 +1154,8 @@ function extractToolActionDescription(toolName, toolArgs) {
|
|
|
1066
1154
|
}
|
|
1067
1155
|
}
|
|
1068
1156
|
if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
|
|
1069
|
-
const
|
|
1070
|
-
return `Listing: ${String(
|
|
1157
|
+
const path3 = args.path || args.directory || ".";
|
|
1158
|
+
return `Listing: ${String(path3).slice(0, 50)}`;
|
|
1071
1159
|
}
|
|
1072
1160
|
if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
|
|
1073
1161
|
const filePath = String(
|
|
@@ -4209,7 +4297,7 @@ var simpleAgentAdapter = new SimpleAgentAdapter();
|
|
|
4209
4297
|
defaultRegistry.register(simpleAgentAdapter);
|
|
4210
4298
|
|
|
4211
4299
|
// src/run-scenario/file-diff.ts
|
|
4212
|
-
import { readdirSync, readFileSync as
|
|
4300
|
+
import { readdirSync, readFileSync as readFileSync3, statSync, existsSync as existsSync3 } from "fs";
|
|
4213
4301
|
import { join as join10, relative } from "path";
|
|
4214
4302
|
|
|
4215
4303
|
// ../../node_modules/diff/lib/index.mjs
|
|
@@ -4314,11 +4402,11 @@ Diff.prototype = {
|
|
|
4314
4402
|
}
|
|
4315
4403
|
}
|
|
4316
4404
|
},
|
|
4317
|
-
addToPath: function addToPath(
|
|
4318
|
-
var last =
|
|
4405
|
+
addToPath: function addToPath(path3, added, removed, oldPosInc, options) {
|
|
4406
|
+
var last = path3.lastComponent;
|
|
4319
4407
|
if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) {
|
|
4320
4408
|
return {
|
|
4321
|
-
oldPos:
|
|
4409
|
+
oldPos: path3.oldPos + oldPosInc,
|
|
4322
4410
|
lastComponent: {
|
|
4323
4411
|
count: last.count + 1,
|
|
4324
4412
|
added,
|
|
@@ -4328,7 +4416,7 @@ Diff.prototype = {
|
|
|
4328
4416
|
};
|
|
4329
4417
|
} else {
|
|
4330
4418
|
return {
|
|
4331
|
-
oldPos:
|
|
4419
|
+
oldPos: path3.oldPos + oldPosInc,
|
|
4332
4420
|
lastComponent: {
|
|
4333
4421
|
count: 1,
|
|
4334
4422
|
added,
|
|
@@ -4768,9 +4856,9 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
|
|
|
4768
4856
|
// src/run-scenario/file-diff.ts
|
|
4769
4857
|
function deriveInfrastructurePaths(prePrep, postPrep) {
|
|
4770
4858
|
const infraPaths = /* @__PURE__ */ new Set();
|
|
4771
|
-
for (const
|
|
4772
|
-
if (prePrep[
|
|
4773
|
-
infraPaths.add(
|
|
4859
|
+
for (const path3 of Object.keys(postPrep)) {
|
|
4860
|
+
if (prePrep[path3] === void 0 || prePrep[path3] !== postPrep[path3]) {
|
|
4861
|
+
infraPaths.add(path3);
|
|
4774
4862
|
}
|
|
4775
4863
|
}
|
|
4776
4864
|
return infraPaths;
|
|
@@ -4830,7 +4918,7 @@ function isBinaryFile(filename) {
|
|
|
4830
4918
|
function snapshotDirectory(dir, baseDir) {
|
|
4831
4919
|
const snapshot = {};
|
|
4832
4920
|
const base = baseDir || dir;
|
|
4833
|
-
if (!
|
|
4921
|
+
if (!existsSync3(dir)) {
|
|
4834
4922
|
return snapshot;
|
|
4835
4923
|
}
|
|
4836
4924
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
@@ -4852,7 +4940,7 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
4852
4940
|
if (stats.size > MAX_FILE_SIZE) {
|
|
4853
4941
|
continue;
|
|
4854
4942
|
}
|
|
4855
|
-
const content =
|
|
4943
|
+
const content = readFileSync3(fullPath, "utf-8");
|
|
4856
4944
|
snapshot[relativePath] = content;
|
|
4857
4945
|
} catch {
|
|
4858
4946
|
continue;
|
|
@@ -4881,19 +4969,19 @@ function generateDiffLines(before, after) {
|
|
|
4881
4969
|
function diffSnapshots(before, after, infrastructurePaths) {
|
|
4882
4970
|
const diffs = [];
|
|
4883
4971
|
const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
4884
|
-
for (const
|
|
4885
|
-
const beforeContent = before[
|
|
4886
|
-
const afterContent = after[
|
|
4887
|
-
if (before[
|
|
4972
|
+
for (const path3 of allPaths) {
|
|
4973
|
+
const beforeContent = before[path3] ?? "";
|
|
4974
|
+
const afterContent = after[path3] ?? "";
|
|
4975
|
+
if (before[path3] !== void 0 && beforeContent === afterContent) {
|
|
4888
4976
|
continue;
|
|
4889
4977
|
}
|
|
4890
4978
|
const diffLines2 = generateDiffLines(beforeContent, afterContent);
|
|
4891
4979
|
diffs.push({
|
|
4892
|
-
path:
|
|
4980
|
+
path: path3,
|
|
4893
4981
|
expected: beforeContent,
|
|
4894
4982
|
actual: afterContent,
|
|
4895
4983
|
diffLines: diffLines2,
|
|
4896
|
-
...infrastructurePaths?.has(
|
|
4984
|
+
...infrastructurePaths?.has(path3) && { isInfrastructure: true }
|
|
4897
4985
|
});
|
|
4898
4986
|
}
|
|
4899
4987
|
const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
|
|
@@ -4920,9 +5008,9 @@ function diffSnapshots(before, after, infrastructurePaths) {
|
|
|
4920
5008
|
function extractTemplateFiles(before, after, infrastructurePaths) {
|
|
4921
5009
|
const files = [];
|
|
4922
5010
|
const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
4923
|
-
for (const
|
|
4924
|
-
const beforeContent = before[
|
|
4925
|
-
const afterContent = after[
|
|
5011
|
+
for (const path3 of allPaths) {
|
|
5012
|
+
const beforeContent = before[path3];
|
|
5013
|
+
const afterContent = after[path3];
|
|
4926
5014
|
if (afterContent === void 0) {
|
|
4927
5015
|
continue;
|
|
4928
5016
|
}
|
|
@@ -4935,10 +5023,10 @@ function extractTemplateFiles(before, after, infrastructurePaths) {
|
|
|
4935
5023
|
status = "unchanged";
|
|
4936
5024
|
}
|
|
4937
5025
|
files.push({
|
|
4938
|
-
path:
|
|
5026
|
+
path: path3,
|
|
4939
5027
|
content: afterContent,
|
|
4940
5028
|
status,
|
|
4941
|
-
...infrastructurePaths?.has(
|
|
5029
|
+
...infrastructurePaths?.has(path3) && { isInfrastructure: true }
|
|
4942
5030
|
});
|
|
4943
5031
|
}
|
|
4944
5032
|
files.sort((a, b) => a.path.localeCompare(b.path));
|
|
@@ -5043,6 +5131,24 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
5043
5131
|
// src/run-scenario/index.ts
|
|
5044
5132
|
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
|
|
5045
5133
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
5134
|
+
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
5135
|
+
if (template) {
|
|
5136
|
+
console.log(
|
|
5137
|
+
formatTraceEventLine({
|
|
5138
|
+
evalRunId: evalRunId2,
|
|
5139
|
+
scenarioId: scenario.id,
|
|
5140
|
+
scenarioName: scenario.name,
|
|
5141
|
+
targetId,
|
|
5142
|
+
targetName,
|
|
5143
|
+
stepNumber: 0,
|
|
5144
|
+
type: LiveTraceEventType4.PROGRESS,
|
|
5145
|
+
outputPreview: "Setting up environment (installing dependencies)...",
|
|
5146
|
+
elapsedMs: 0,
|
|
5147
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5148
|
+
isComplete: false
|
|
5149
|
+
})
|
|
5150
|
+
);
|
|
5151
|
+
}
|
|
5046
5152
|
const workDir = await prepareWorkingDirectory(
|
|
5047
5153
|
config,
|
|
5048
5154
|
evalRunId2,
|