@wix/evalforge-evaluator 0.170.0 → 0.172.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -67,8 +67,8 @@ function createApiClient(serverUrl, options = "") {
67
67
  }
68
68
  return headers;
69
69
  }
70
- async function fetchJson(path2) {
71
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
70
+ async function fetchJson(path3) {
71
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
72
72
  console.error(`[API] GET ${url}`);
73
73
  const headers = buildHeaders();
74
74
  const response = await fetch(url, {
@@ -82,8 +82,8 @@ function createApiClient(serverUrl, options = "") {
82
82
  }
83
83
  return response.json();
84
84
  }
85
- async function postJson(path2, body) {
86
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
85
+ async function postJson(path3, body) {
86
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
87
87
  console.error(`[API] POST ${url}`);
88
88
  const response = await fetch(url, {
89
89
  method: "POST",
@@ -97,8 +97,8 @@ function createApiClient(serverUrl, options = "") {
97
97
  );
98
98
  }
99
99
  }
100
- async function deleteRequest(path2) {
101
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
100
+ async function deleteRequest(path3) {
101
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
102
102
  console.error(`[API] DELETE ${url}`);
103
103
  const headers = buildHeaders();
104
104
  const response = await fetch(url, {
@@ -112,8 +112,8 @@ function createApiClient(serverUrl, options = "") {
112
112
  );
113
113
  }
114
114
  }
115
- async function putJson(path2, body) {
116
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path2}`;
115
+ async function putJson(path3, body) {
116
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path3}`;
117
117
  console.error(`[API] PUT ${url}`);
118
118
  const response = await fetch(url, {
119
119
  method: "PUT",
@@ -415,17 +415,19 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
415
415
  // src/run-scenario/index.ts
416
416
  import {
417
417
  AssertionResultStatus,
418
- DEFAULT_JUDGE_MODEL
418
+ DEFAULT_JUDGE_MODEL,
419
+ LiveTraceEventType as LiveTraceEventType4,
420
+ formatTraceEventLine
419
421
  } from "@wix/evalforge-types";
420
422
  import {
421
423
  evaluateAssertions as evaluateAssertionsBase
422
424
  } from "@wix/eval-assertions";
423
425
 
424
426
  // src/run-scenario/environment.ts
425
- import { mkdirSync, existsSync, rmSync, readFileSync, writeFileSync } from "fs";
427
+ import { mkdirSync as mkdirSync2, existsSync as existsSync2, rmSync, readFileSync as readFileSync2, writeFileSync } from "fs";
426
428
  import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
427
429
  import { tmpdir } from "os";
428
- import path, { sep as sep2 } from "path";
430
+ import path2, { sep as sep2 } from "path";
429
431
  import {
430
432
  fetchGitHubFolder,
431
433
  fetchGitHubFile
@@ -448,6 +450,90 @@ async function writeFilesToDirectory(targetDir, files) {
448
450
  }
449
451
  }
450
452
 
453
+ // src/run-scenario/install-dependencies.ts
454
+ import {
455
+ mkdirSync,
456
+ existsSync,
457
+ readFileSync,
458
+ copyFileSync,
459
+ cpSync
460
+ } from "fs";
461
+ import { createHash } from "crypto";
462
+ import path from "path";
463
+ import { execFileSync } from "child_process";
464
+ function detectPackageManager(workDir) {
465
+ if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
466
+ return { cmd: "pnpm", args: ["install", "--frozen-lockfile"], cacheSourceFile: "pnpm-lock.yaml" };
467
+ }
468
+ if (existsSync(path.join(workDir, "package-lock.json"))) {
469
+ return { cmd: "npm", args: ["ci"], cacheSourceFile: "package-lock.json" };
470
+ }
471
+ if (existsSync(path.join(workDir, "yarn.lock"))) {
472
+ return { cmd: "yarn", args: ["install", "--frozen-lockfile"], cacheSourceFile: "yarn.lock" };
473
+ }
474
+ return { cmd: "npm", args: ["install", "--legacy-peer-deps", "--prefer-offline", "--no-fund", "--no-audit"], cacheSourceFile: "package.json" };
475
+ }
476
+ function cloneDirectory(src, dest) {
477
+ if (process.platform === "darwin") {
478
+ execFileSync("cp", ["-rc", src, dest]);
479
+ } else {
480
+ cpSync(src, dest, { recursive: true });
481
+ }
482
+ }
483
+ function installWithCache(workDir, exec, cacheBase, pm) {
484
+ const sourceContent = readFileSync(path.join(workDir, pm.cacheSourceFile), "utf-8");
485
+ const cacheKey = createHash("sha256").update(sourceContent).digest("hex").slice(0, 16);
486
+ const cachedNodeModules = path.join(cacheBase, cacheKey, "node_modules");
487
+ const targetNodeModules = path.join(workDir, "node_modules");
488
+ const cacheDir = path.dirname(cachedNodeModules);
489
+ const cachedYarnLock = path.join(cacheDir, "yarn.lock");
490
+ if (existsSync(cachedNodeModules)) {
491
+ console.log(`[environment] Restoring node_modules from cache (key: ${cacheKey})`);
492
+ if (!existsSync(targetNodeModules)) {
493
+ cloneDirectory(cachedNodeModules, targetNodeModules);
494
+ }
495
+ if (existsSync(cachedYarnLock)) {
496
+ copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
497
+ }
498
+ return;
499
+ }
500
+ console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir} (cache key: ${cacheKey})`);
501
+ try {
502
+ exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4, env: { ...process.env, NODE_ENV: "development" } });
503
+ } catch (err) {
504
+ console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
505
+ return;
506
+ }
507
+ console.log("[environment] Dependency installation complete \u2014 saving to cache");
508
+ try {
509
+ mkdirSync(cacheDir, { recursive: true });
510
+ const yarnLockPath = path.join(workDir, "yarn.lock");
511
+ if (existsSync(yarnLockPath)) {
512
+ copyFileSync(yarnLockPath, cachedYarnLock);
513
+ }
514
+ cloneDirectory(targetNodeModules, cachedNodeModules);
515
+ } catch (err) {
516
+ console.error("[environment] Failed to save to cache (installation still succeeded):", err instanceof Error ? err.message : String(err));
517
+ }
518
+ }
519
+ async function installDependencies(workDir, exec = execFileSync, cacheBase) {
520
+ if (!existsSync(path.join(workDir, "package.json"))) {
521
+ return;
522
+ }
523
+ const pm = detectPackageManager(workDir);
524
+ if (cacheBase) {
525
+ installWithCache(workDir, exec, cacheBase, pm);
526
+ return;
527
+ }
528
+ console.log(`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`);
529
+ try {
530
+ exec(pm.cmd, pm.args, { cwd: workDir, stdio: "inherit", timeout: 18e4, env: { ...process.env, NODE_ENV: "development" } });
531
+ console.log("[environment] Dependency installation complete");
532
+ } catch (err) {
533
+ console.error("[environment] Dependency installation failed:", err instanceof Error ? err.message : String(err));
534
+ }
535
+ }
536
+
451
537
  // src/run-scenario/environment.ts
452
538
  async function fetchAndWriteTemplateFiles(template, workDir) {
453
539
  let sourceFiles = [];
@@ -468,27 +554,27 @@ async function fetchAndWriteTemplateFiles(template, workDir) {
468
554
  const content = ef.gitSource ? await fetchGitHubFile(ef.gitSource, {
469
555
  userAgent: "EvalForge-Evaluator"
470
556
  }) : ef.content ?? "";
471
- const dest = path.resolve(workDir, ef.path);
557
+ const dest = path2.resolve(workDir, ef.path);
472
558
  if (!dest.startsWith(workDir + sep2)) {
473
559
  throw new Error(
474
560
  `Extra file path escapes working directory: "${ef.path}"`
475
561
  );
476
562
  }
477
- await mkdir2(path.dirname(dest), { recursive: true });
563
+ await mkdir2(path2.dirname(dest), { recursive: true });
478
564
  await writeFile2(dest, content, "utf8");
479
565
  })
480
566
  );
481
567
  }
482
568
  function writeWixEnvFile(workDir) {
483
- const configPath = path.join(workDir, "wix.config.json");
484
- if (!existsSync(configPath)) {
569
+ const configPath = path2.join(workDir, "wix.config.json");
570
+ if (!existsSync2(configPath)) {
485
571
  return;
486
572
  }
487
573
  try {
488
- const config = JSON.parse(readFileSync(configPath, "utf-8"));
574
+ const config = JSON.parse(readFileSync2(configPath, "utf-8"));
489
575
  if (config.appId) {
490
576
  writeFileSync(
491
- path.join(workDir, ".env"),
577
+ path2.join(workDir, ".env"),
492
578
  `WIX_CLIENT_ID=${config.appId}
493
579
  `,
494
580
  "utf-8"
@@ -500,28 +586,30 @@ function writeWixEnvFile(workDir) {
500
586
  }
501
587
  }
502
588
  async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
503
- const baseDir = config.evaluationsDir ?? path.join(tmpdir(), "evalforge-evaluations");
589
+ const baseDir = config.evaluationsDir ?? path2.join(tmpdir(), "evalforge-evaluations");
590
+ const nodeModulesCacheDir = path2.join(baseDir, "_node_modules_cache");
504
591
  if (template) {
505
592
  if (!config.evaluationsDir) {
506
593
  console.warn(
507
594
  "Template specified but EVALUATIONS_DIR not set, using temp directory"
508
595
  );
509
596
  }
510
- const workDir2 = path.join(baseDir, `${evalRunId2}_${targetId}`);
511
- if (existsSync(workDir2)) {
597
+ const workDir2 = path2.join(baseDir, `${evalRunId2}_${targetId}`);
598
+ if (existsSync2(workDir2)) {
512
599
  rmSync(workDir2, { recursive: true });
513
600
  }
514
- mkdirSync(workDir2, { recursive: true });
601
+ mkdirSync2(workDir2, { recursive: true });
515
602
  await fetchAndWriteTemplateFiles(template, workDir2);
516
603
  console.log(`Template files written to ${workDir2}`);
517
604
  writeWixEnvFile(workDir2);
605
+ await installDependencies(workDir2, void 0, nodeModulesCacheDir);
518
606
  return workDir2;
519
607
  }
520
- const workDir = path.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
521
- if (existsSync(workDir)) {
608
+ const workDir = path2.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
609
+ if (existsSync2(workDir)) {
522
610
  rmSync(workDir, { recursive: true });
523
611
  }
524
- mkdirSync(workDir, { recursive: true });
612
+ mkdirSync2(workDir, { recursive: true });
525
613
  console.log(`Empty working directory created at ${workDir}`);
526
614
  return workDir;
527
615
  }
@@ -1066,8 +1154,8 @@ function extractToolActionDescription(toolName, toolArgs) {
1066
1154
  }
1067
1155
  }
1068
1156
  if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
1069
- const path2 = args.path || args.directory || ".";
1070
- return `Listing: ${String(path2).slice(0, 50)}`;
1157
+ const path3 = args.path || args.directory || ".";
1158
+ return `Listing: ${String(path3).slice(0, 50)}`;
1071
1159
  }
1072
1160
  if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
1073
1161
  const filePath = String(
@@ -4209,7 +4297,7 @@ var simpleAgentAdapter = new SimpleAgentAdapter();
4209
4297
  defaultRegistry.register(simpleAgentAdapter);
4210
4298
 
4211
4299
  // src/run-scenario/file-diff.ts
4212
- import { readdirSync, readFileSync as readFileSync2, statSync, existsSync as existsSync2 } from "fs";
4300
+ import { readdirSync, readFileSync as readFileSync3, statSync, existsSync as existsSync3 } from "fs";
4213
4301
  import { join as join10, relative } from "path";
4214
4302
 
4215
4303
  // ../../node_modules/diff/lib/index.mjs
@@ -4314,11 +4402,11 @@ Diff.prototype = {
4314
4402
  }
4315
4403
  }
4316
4404
  },
4317
- addToPath: function addToPath(path2, added, removed, oldPosInc, options) {
4318
- var last = path2.lastComponent;
4405
+ addToPath: function addToPath(path3, added, removed, oldPosInc, options) {
4406
+ var last = path3.lastComponent;
4319
4407
  if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) {
4320
4408
  return {
4321
- oldPos: path2.oldPos + oldPosInc,
4409
+ oldPos: path3.oldPos + oldPosInc,
4322
4410
  lastComponent: {
4323
4411
  count: last.count + 1,
4324
4412
  added,
@@ -4328,7 +4416,7 @@ Diff.prototype = {
4328
4416
  };
4329
4417
  } else {
4330
4418
  return {
4331
- oldPos: path2.oldPos + oldPosInc,
4419
+ oldPos: path3.oldPos + oldPosInc,
4332
4420
  lastComponent: {
4333
4421
  count: 1,
4334
4422
  added,
@@ -4768,9 +4856,9 @@ arrayDiff.join = arrayDiff.removeEmpty = function(value) {
4768
4856
  // src/run-scenario/file-diff.ts
4769
4857
  function deriveInfrastructurePaths(prePrep, postPrep) {
4770
4858
  const infraPaths = /* @__PURE__ */ new Set();
4771
- for (const path2 of Object.keys(postPrep)) {
4772
- if (prePrep[path2] === void 0 || prePrep[path2] !== postPrep[path2]) {
4773
- infraPaths.add(path2);
4859
+ for (const path3 of Object.keys(postPrep)) {
4860
+ if (prePrep[path3] === void 0 || prePrep[path3] !== postPrep[path3]) {
4861
+ infraPaths.add(path3);
4774
4862
  }
4775
4863
  }
4776
4864
  return infraPaths;
@@ -4830,7 +4918,7 @@ function isBinaryFile(filename) {
4830
4918
  function snapshotDirectory(dir, baseDir) {
4831
4919
  const snapshot = {};
4832
4920
  const base = baseDir || dir;
4833
- if (!existsSync2(dir)) {
4921
+ if (!existsSync3(dir)) {
4834
4922
  return snapshot;
4835
4923
  }
4836
4924
  const entries = readdirSync(dir, { withFileTypes: true });
@@ -4852,7 +4940,7 @@ function snapshotDirectory(dir, baseDir) {
4852
4940
  if (stats.size > MAX_FILE_SIZE) {
4853
4941
  continue;
4854
4942
  }
4855
- const content = readFileSync2(fullPath, "utf-8");
4943
+ const content = readFileSync3(fullPath, "utf-8");
4856
4944
  snapshot[relativePath] = content;
4857
4945
  } catch {
4858
4946
  continue;
@@ -4881,19 +4969,19 @@ function generateDiffLines(before, after) {
4881
4969
  function diffSnapshots(before, after, infrastructurePaths) {
4882
4970
  const diffs = [];
4883
4971
  const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
4884
- for (const path2 of allPaths) {
4885
- const beforeContent = before[path2] ?? "";
4886
- const afterContent = after[path2] ?? "";
4887
- if (before[path2] !== void 0 && beforeContent === afterContent) {
4972
+ for (const path3 of allPaths) {
4973
+ const beforeContent = before[path3] ?? "";
4974
+ const afterContent = after[path3] ?? "";
4975
+ if (before[path3] !== void 0 && beforeContent === afterContent) {
4888
4976
  continue;
4889
4977
  }
4890
4978
  const diffLines2 = generateDiffLines(beforeContent, afterContent);
4891
4979
  diffs.push({
4892
- path: path2,
4980
+ path: path3,
4893
4981
  expected: beforeContent,
4894
4982
  actual: afterContent,
4895
4983
  diffLines: diffLines2,
4896
- ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
4984
+ ...infrastructurePaths?.has(path3) && { isInfrastructure: true }
4897
4985
  });
4898
4986
  }
4899
4987
  const deletedPaths = [...allPaths].filter((p) => after[p] === void 0);
@@ -4920,9 +5008,9 @@ function diffSnapshots(before, after, infrastructurePaths) {
4920
5008
  function extractTemplateFiles(before, after, infrastructurePaths) {
4921
5009
  const files = [];
4922
5010
  const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
4923
- for (const path2 of allPaths) {
4924
- const beforeContent = before[path2];
4925
- const afterContent = after[path2];
5011
+ for (const path3 of allPaths) {
5012
+ const beforeContent = before[path3];
5013
+ const afterContent = after[path3];
4926
5014
  if (afterContent === void 0) {
4927
5015
  continue;
4928
5016
  }
@@ -4935,10 +5023,10 @@ function extractTemplateFiles(before, after, infrastructurePaths) {
4935
5023
  status = "unchanged";
4936
5024
  }
4937
5025
  files.push({
4938
- path: path2,
5026
+ path: path3,
4939
5027
  content: afterContent,
4940
5028
  status,
4941
- ...infrastructurePaths?.has(path2) && { isInfrastructure: true }
5029
+ ...infrastructurePaths?.has(path3) && { isInfrastructure: true }
4942
5030
  });
4943
5031
  }
4944
5032
  files.sort((a, b) => a.path.localeCompare(b.path));
@@ -5043,6 +5131,24 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
5043
5131
  // src/run-scenario/index.ts
5044
5132
  async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions) {
5045
5133
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
5134
+ const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
5135
+ if (template) {
5136
+ console.log(
5137
+ formatTraceEventLine({
5138
+ evalRunId: evalRunId2,
5139
+ scenarioId: scenario.id,
5140
+ scenarioName: scenario.name,
5141
+ targetId,
5142
+ targetName,
5143
+ stepNumber: 0,
5144
+ type: LiveTraceEventType4.PROGRESS,
5145
+ outputPreview: "Setting up environment (installing dependencies)...",
5146
+ elapsedMs: 0,
5147
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
5148
+ isComplete: false
5149
+ })
5150
+ );
5151
+ }
5046
5152
  const workDir = await prepareWorkingDirectory(
5047
5153
  config,
5048
5154
  evalRunId2,