@wix/evalforge-evaluator 0.170.0 → 0.171.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -67,8 +67,8 @@ function createApiClient(serverUrl, options = "") {
67
67
  }
68
68
  return headers;
69
69
  }
70
- async function fetchJson(path2) {
71
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
70
+ async function fetchJson(path3) {
71
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
72
72
  console.error(`[API] GET ${url}`);
73
73
  const headers = buildHeaders();
74
74
  const response = await fetch(url, {
@@ -82,8 +82,8 @@ function createApiClient(serverUrl, options = "") {
82
82
  }
83
83
  return response.json();
84
84
  }
85
- async function postJson(path2, body) {
86
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
85
+ async function postJson(path3, body) {
86
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
87
87
  console.error(`[API] POST ${url}`);
88
88
  const response = await fetch(url, {
89
89
  method: "POST",
@@ -97,8 +97,8 @@ function createApiClient(serverUrl, options = "") {
97
97
  );
98
98
  }
99
99
  }
100
- async function deleteRequest(path2) {
101
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
100
+ async function deleteRequest(path3) {
101
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
102
102
  console.error(`[API] DELETE ${url}`);
103
103
  const headers = buildHeaders();
104
104
  const response = await fetch(url, {
@@ -112,8 +112,8 @@ function createApiClient(serverUrl, options = "") {
112
112
  );
113
113
  }
114
114
  }
115
- async function putJson(path2, body) {
116
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
115
+ async function putJson(path3, body) {
116
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
117
117
  console.error(`[API] PUT ${url}`);
118
118
  const response = await fetch(url, {
119
119
  method: "PUT",
@@ -415,17 +415,19 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
415
415
  // src/run-scenario/index.ts
416
416
  import {
417
417
  AssertionResultStatus,
418
- DEFAULT_JUDGE_MODEL
418
+ DEFAULT_JUDGE_MODEL,
419
+ LiveTraceEventType as LiveTraceEventType4,
420
+ formatTraceEventLine
419
421
  } from "@wix/evalforge-types";
420
422
  import {
421
423
  evaluateAssertions as evaluateAssertionsBase
422
424
  } from "@wix/eval-assertions";
423
425
 
424
426
  // src/run-scenario/environment.ts
425
- import { mkdirSync, existsSync, rmSync, readFileSync, writeFileSync } from "fs";
427
+ import { mkdirSync as mkdirSync2, existsSync as existsSync2, rmSync, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "fs";
426
428
  import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
427
429
  import { tmpdir } from "os";
428
- import path, { sep as sep2 } from "path";
430
+ import path2, { sep as sep2 } from "path";
429
431
  import {
430
432
  fetchGitHubFolder,
431
433
  fetchGitHubFile
@@ -448,6 +450,94 @@ async function writeFilesToDirectory(targetDir, files) {
448
450
  }
449
451
  }
450
452
 
453
+ // src/run-scenario/install-dependencies.ts
454
+ import {
455
+ mkdirSync,
456
+ existsSync,
457
+ readFileSync,
458
+ writeFileSync,
459
+ copyFileSync,
460
+ cpSync,
461
+ renameSync
462
+ } from "fs";
463
+ import { createHash } from "crypto";
464
+ import path from "path";
465
+ import { execFileSync } from "child_process";
466
+ function detectPackageManager(workDir) {
467
+ if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
468
+ return { cmd: "pnpm", args: ["install", "--frozen-lockfile"], cacheSourceFile: "pnpm-lock.yaml" };
469
+ }
470
+ if (existsSync(path.join(workDir, "package-lock.json"))) {
471
+ return { cmd: "npm", args: ["ci"], cacheSourceFile: "package-lock.json" };
472
+ }
473
+ if (existsSync(path.join(workDir, "yarn.lock"))) {
474
+ return { cmd: "yarn", args: ["install", "--frozen-lockfile"], cacheSourceFile: "yarn.lock" };
475
+ }
476
+ writeFileSync(path.join(workDir, "yarn.lock"), "", "utf-8");
477
+ return { cmd: "yarn", args: ["install"], cacheSourceFile: "package.json" };
478
+ }
479
+ function cloneDirectory(src, dest) {
480
+ if (process.platform === "darwin") {
481
+ execFileSync("cp", ["-rc", src, dest]);
482
+ } else {
483
+ cpSync(src, dest, { recursive: true });
484
+ }
485
+ }
486
+ function installWithCache(workDir, exec, cacheBase, pm) {
487
+ const sourceContent = readFileSync(path.join(workDir, pm.cacheSourceFile), "utf-8");
488
+ const cacheKey = createHash("sha256").update(sourceContent).digest("hex").slice(0, 16);
489
+ const cachedNodeModules = path.join(cacheBase, cacheKey, "node_modules");
490
+ const targetNodeModules = path.join(workDir, "node_modules");
491
+ const cacheDir = path.dirname(cachedNodeModules);
492
+ const cachedYarnLock = path.join(cacheDir, "yarn.lock");
493
+ if (existsSync(cachedNodeModules)) {
494
+ console.log(`[environment] Restoring node_modules from cache (key: ${cacheKey})`);
495
+ if (!existsSync(targetNodeModules)) {
496
+ cloneDirectory(cachedNodeModules, targetNodeModules);
497
+ }
498
+ if (existsSync(cachedYarnLock)) {
499
+ copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
500
+ }
501
+ return;
502
+ }
503
+ console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir} (cache key: ${cacheKey})`);
504
+ try {
505
+ exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4 });
506
+ } catch (err) {
507
+ console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
508
+ return;
509
+ }
510
+ console.log("[environment] Dependency installation complete \u2014 saving to cache");
511
+ try {
512
+ mkdirSync(cacheDir, { recursive: true });
513
+ const yarnLockPath = path.join(workDir, "yarn.lock");
514
+ if (existsSync(yarnLockPath)) {
515
+ copyFileSync(yarnLockPath, cachedYarnLock);
516
+ }
517
+ renameSync(targetNodeModules, cachedNodeModules);
518
+ cloneDirectory(cachedNodeModules, targetNodeModules);
519
+ } catch (err) {
520
+ console.error("[environment] Failed to save to cache (installation still succeeded):", err instanceof Error ? err.message : String(err));
521
+ }
522
+ }
523
+ async function installDependencies(workDir, exec = execFileSync, cacheBase) {
524
+ if (!existsSync(path.join(workDir, "package.json"))) {
525
+ return;
526
+ }
527
+ const pm = detectPackageManager(workDir);
528
+ if (cacheBase) {
529
+ installWithCache(workDir, exec, cacheBase, pm);
530
+ return;
531
+ }
532
+ console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`);
533
+ try {
534
+ exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4 });
535
+ console.log("[environment] Dependency installation complete");
536
+ } catch (err) {
537
+ console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
538
+ }
539
+ }
540
+
451
541
  // src/run-scenario/environment.ts
452
542
  async function fetchAndWriteTemplateFiles(template, workDir) {
453
543
  let sourceFiles = [];
@@ -468,27 +558,27 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
468
558
  const content = ef.gitSource ? await fetchGitHubFile(ef.gitSource, {
469
559
  userAgent: "EvalForge-Evaluator"
470
560
  }) : ef.content ?? "";
471
- const dest = path.resolve(workDir, ef.path);
561
+ const dest = path2.resolve(workDir, ef.path);
472
562
  if (!dest.startsWith(workDir + sep2)) {
473
563
  throw new Error(
474
564
  `Extra file path escapes working directory: "${ef.path}"`
475
565
  );
476
566
  }
477
- await mkdir2(path.dirname(dest), { recursive: true });
567
+ await mkdir2(path2.dirname(dest), { recursive: true });
478
568
  await writeFile2(dest, content, "utf8");
479
569
  })
480
570
  );
481
571
  }
482
572
  function writeWixEnvFile(workDir) {
483
- const configPath = path.join(workDir, "wix.config.json");
484
- if (!existsSync(configPath)) {
573
+ const configPath = path2.join(workDir, "wix.config.json");
574
+ if (!existsSync2(configPath)) {
485
575
  return;
486
576
  }
487
577
  try {
488
- const config = JSON.parse(readFileSync(configPath, "utf-8"));
578
+ const config = JSON.parse(readFileSync2(configPath, "utf-8"));
489
579
  if (config.appId) {
490
- writeFileSync(
491
- path.join(workDir, ".env"),
580
+ writeFileSync2(
581
+ path2.join(workDir, ".env"),
492
582
  `WIX_CLIENT_ID=${config.appId}
493
583
  `,
494
584
  "utf-8"
@@ -500,28 +590,30 @@ function writeWixEnvFile(workDir) {
500
590
  }
501
591
  }
502
592
  async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
503
- const baseDir = config.evaluationsDir ?? path.join(tmpdir(), "evalforge-evaluations");
593
+ const baseDir = config.evaluationsDir ?? path2.join(tmpdir(), "evalforge-evaluations");
594
+ const nodeModulesCacheDir = path2.join(baseDir, "_node_modules_cache");
504
595
  if (template) {
505
596
  if (!config.evaluationsDir) {
506
597
  console.warn(
507
598
  "Template specified but EVALUATIONS_DIR not set, using temp directory"
508
599
  );
509
600
  }
510
- const workDir2 = path.join(baseDir, `${evalRunId2}_${targetId}`);
511
- if (existsSync(workDir2)) {
601
+ const workDir2 = path2.join(baseDir, `${evalRunId2}_${targetId}`);
602
+ if (existsSync2(workDir2)) {
512
603
  rmSync(workDir2, { recursive: true });
513
604
  }
514
- mkdirSync(workDir2, { recursive: true });
605
+ mkdirSync2(workDir2, { recursive: true });
515
606
  await fetchAndWriteTemplateFiles(template, workDir2);
516
607
  console.log(`Template files written to ${workDir2}`);
517
608
  writeWixEnvFile(workDir2);
609
+ await installDependencies(workDir2, void 0, nodeModulesCacheDir);
518
610
  return workDir2;
519
611
  }
520
- const workDir = path.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
521
- if (existsSync(workDir)) {
612
+ const workDir = path2.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
613
+ if (existsSync2(workDir)) {
522
614
  rmSync(workDir, { recursive: true });
523
615
  }
524
- mkdirSync(workDir, { recursive: true });
616
+ mkdirSync2(workDir, { recursive: true });
525
617
  console.log(`Empty working directory created at ${workDir}`);
526
618
  return workDir;
527
619
  }
@@ -1066,8 +1158,8 @@ function extractToolActionDescription(toolName, toolArgs) {
1066
1158
  }
1067
1159
  }
1068
1160
  if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
1069
- const path2 = args.path || args.directory || ".";
1070
- return `Listing: ${String(path2).slice(0, 50)}`;
1161
+ const path3 = args.path || args.directory || ".";
1162
+ return `Listing: ${String(path3).slice(0, 50)}`;
1071
1163
  }
1072
1164
  if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
1073
1165
  const filePath = String(
@@ -4209,7 +4301,7 @@ var simpleAgentAdapter = new SimpleAgentAdapter();
4209
4301
  defaultRegistry.register(simpleAgentAdapter);
4210
4302
 
4211
4303
  // src/run-scenario/file-diff.ts
4212
- import { readdirSync, readFileSync as readFileSync2, statSync, existsSync as existsSync2 } from "fs";
4304
+ import { readdirSync, readFileSync as readFileSync3, statSync, existsSync as existsSync3 } from "fs";
4213
4305
  import { join as join10, relative } from "path";
4214
4306
 
4215
4307
  // ../../node_modules/diff/lib/index.mjs
@@ -4314,11 +4406,11 @@ Diff.prototype = {
4314
4406
  }
4315
4407
  }
4316
4408
  },
4317
- addToPath: function addToPath(path2, added, removed, oldPosInc, options) {
4318
- var last = path2.lastComponent;
4409
+ addToPath: function addToPath(path3, added, removed, oldPosInc, options) {
4410
+ var last = path3.lastComponent;
4319
4411
  if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) {
4320
4412
  return {
4321
- oldPos: path2.oldPos + oldPosInc,
4413
+ oldPos: path3.oldPos + oldPosInc,
4322
4414
  lastComponent: {
4323
4415
  count: last.count + 1,
4324
4416
  added,
@@ -4328,7 +4420,7 @@ Diff.prototype = {
4328
4420
  };
4329
4421
  } else {
4330
4422
  return {
4331
- oldPos: path2.oldPos + oldPosInc,
4423
+ oldPos: path3.oldPos + oldPosInc,
4332
4424
  lastComponent: {
4333
4425
  count: 1,
4334
4426
  added,
@@ -4768,9 +4860,9 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
4768
4860
  // src/run-scenario/file-diff.ts
4769
4861
  function deriveInfrastructurePaths(prePrep, postPrep) {
4770
4862
  const infraPaths = /* @__PURE__ */ new Set();
4771
- for (const path2 of Object.keys(postPrep)) {
4772
- if (prePrep[path2] === void 0 || prePrep[path2] !== postPrep[path2]) {
4773
- infraPaths.add(path2);
4863
+ for (const path3 of Object.keys(postPrep)) {
4864
+ if (prePrep[path3] === void 0 || prePrep[path3] !== postPrep[path3]) {
4865
+ infraPaths.add(path3);
4774
4866
  }
4775
4867
  }
4776
4868
  return infraPaths;
@@ -4830,7 +4922,7 @@ function isBinaryFile(filename) {
4830
4922
  function snapshotDirectory(dir, baseDir) {
4831
4923
  const snapshot = {};
4832
4924
  const base = baseDir || dir;
4833
- if (!existsSync2(dir)) {
4925
+ if (!existsSync3(dir)) {
4834
4926
  return snapshot;
4835
4927
  }
4836
4928
  const entries = readdirSync(dir, { withFileTypes: true });
@@ -4852,7 +4944,7 @@ function snapshotDirectory(dir, baseDir) {
4852
4944
  if (stats.size > MAX_FILE_SIZE) {
4853
4945
  continue;
4854
4946
  }
4855
- const content = readFileSync2(fullPath, "utf-8");
4947
+ const content = readFileSync3(fullPath, "utf-8");
4856
4948
  snapshot[relativePath] = content;
4857
4949
  } catch {
4858
4950
  continue;
@@ -4881,19 +4973,19 @@ function generateDiffLines(before, after) {
4881
4973
  function diffSnapshots(before, after, infrastructurePaths) {
4882
4974
  const diffs = [];
4883
4975
  const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
4884
- for (const path2 of allPaths) {
4885
- const beforeContent = before[path2] ?? "";
4886
- const afterContent = after[path2] ?? "";
4887
- if (before[path2] !== void 0 && beforeContent === afterContent) {
4976
+ for (const path3 of allPaths) {
4977
+ const beforeContent = before[path3] ?? "";
4978
+ const afterContent = after[path3] ?? "";
4979
+ if (before[path3] !== void 0 && beforeContent === afterContent) {
4888
4980
  continue;
4889
4981
  }
4890
4982
  const diffLines2 = generateDiffLines(beforeContent, afterContent);
4891
4983
  diffs.push({
4892
- path: path2,
4984
+ path: path3,
4893
4985
  expected: beforeContent,
4894
4986
  actual: afterContent,
4895
4987
  diffLines: diffLines2,
4896
- ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
4988
+ ...infrastructurePaths?.has(path3) && { isInfrastructure: true }
4897
4989
  });
4898
4990
  }
4899
4991
  const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
@@ -4920,9 +5012,9 @@ function diffSnapshots(before, after, infrastructurePaths) {
4920
5012
  function extractTemplateFiles(before, after, infrastructurePaths) {
4921
5013
  const files = [];
4922
5014
  const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
4923
- for (const path2 of allPaths) {
4924
- const beforeContent = before[path2];
4925
- const afterContent = after[path2];
5015
+ for (const path3 of allPaths) {
5016
+ const beforeContent = before[path3];
5017
+ const afterContent = after[path3];
4926
5018
  if (afterContent === void 0) {
4927
5019
  continue;
4928
5020
  }
@@ -4935,10 +5027,10 @@ function extractTemplateFiles(before, after, infrastructurePaths) {
4935
5027
  status = "unchanged";
4936
5028
  }
4937
5029
  files.push({
4938
- path: path2,
5030
+ path: path3,
4939
5031
  content: afterContent,
4940
5032
  status,
4941
- ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
5033
+ ...infrastructurePaths?.has(path3) && { isInfrastructure: true }
4942
5034
  });
4943
5035
  }
4944
5036
  files.sort((a, b) => a.path.localeCompare(b.path));
@@ -5043,6 +5135,24 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
5043
5135
  // src/run-scenario/index.ts
5044
5136
  async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
5045
5137
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
5138
+ const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
5139
+ if (template) {
5140
+ console.log(
5141
+ formatTraceEventLine({
5142
+ evalRunId: evalRunId2,
5143
+ scenarioId: scenario.id,
5144
+ scenarioName: scenario.name,
5145
+ targetId,
5146
+ targetName,
5147
+ stepNumber: 0,
5148
+ type: LiveTraceEventType4.PROGRESS,
5149
+ outputPreview: "Setting up environment (installing dependencies)...",
5150
+ elapsedMs: 0,
5151
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
5152
+ isComplete: false
5153
+ })
5154
+ );
5155
+ }
5046
5156
  const workDir = await prepareWorkingDirectory(
5047
5157
  config,
5048
5158
  evalRunId2,