@wix/evalforge-evaluator 0.190.0 → 0.192.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -7393,7 +7393,38 @@ async function writeFilesToDirectory(targetDir, files) {
7393
7393
  import { mkdirSync, existsSync, readFileSync, copyFileSync, cpSync } from "fs";
7394
7394
  import { createHash } from "crypto";
7395
7395
  import path from "path";
7396
- import { execFileSync } from "child_process";
7396
+ import { spawn, execFileSync } from "child_process";
7397
+ var INSTALL_TIMEOUT_MS = 9e4;
7398
+ var HEARTBEAT_INTERVAL_MS = 5e3;
7399
+ function defaultExec(cmd, args, opts) {
7400
+ return new Promise((resolve3, reject) => {
7401
+ const child = spawn(cmd, args, {
7402
+ cwd: opts.cwd,
7403
+ env: opts.env,
7404
+ stdio: "inherit"
7405
+ });
7406
+ const timer = setTimeout(() => {
7407
+ child.kill("SIGKILL");
7408
+ reject(
7409
+ new Error(
7410
+ `${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
7411
+ )
7412
+ );
7413
+ }, opts.timeoutMs);
7414
+ child.on("error", (err) => {
7415
+ clearTimeout(timer);
7416
+ reject(err);
7417
+ });
7418
+ child.on("close", (code) => {
7419
+ clearTimeout(timer);
7420
+ if (code === 0) {
7421
+ resolve3();
7422
+ } else {
7423
+ reject(new Error(`${cmd} exited with code ${code}`));
7424
+ }
7425
+ });
7426
+ });
7427
+ }
7397
7428
  function detectPackageManager(workDir) {
7398
7429
  if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
7399
7430
  return {
@@ -7431,7 +7462,34 @@ function cloneDirectory(src, dest) {
7431
7462
  cpSync(src, dest, { recursive: true });
7432
7463
  }
7433
7464
  }
7434
- function installWithCache(workDir, exec, cacheBase, pm) {
7465
+ async function runInstall(exec, pm, workDir, onProgress) {
7466
+ onProgress(`[diag] npm install starting: ${pm.cmd} ${pm.args.join(" ")}`);
7467
+ const startedAt = Date.now();
7468
+ const heartbeat = setInterval(() => {
7469
+ onProgress(
7470
+ `[diag] npm install still running: ${Math.round(
7471
+ (Date.now() - startedAt) / 1e3
7472
+ )}s elapsed`
7473
+ );
7474
+ }, HEARTBEAT_INTERVAL_MS);
7475
+ try {
7476
+ await exec(pm.cmd, pm.args, {
7477
+ cwd: workDir,
7478
+ timeoutMs: INSTALL_TIMEOUT_MS,
7479
+ env: { ...process.env, NODE_ENV: "development" }
7480
+ });
7481
+ onProgress(`[diag] npm install finished in ${Date.now() - startedAt}ms`);
7482
+ return true;
7483
+ } catch (err) {
7484
+ onProgress(
7485
+ `[diag] npm install FAILED after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
7486
+ );
7487
+ return false;
7488
+ } finally {
7489
+ clearInterval(heartbeat);
7490
+ }
7491
+ }
7492
+ async function installWithCache(workDir, exec, cacheBase, pm, onProgress) {
7435
7493
  const sourceContent = readFileSync(
7436
7494
  path.join(workDir, pm.cacheSourceFile),
7437
7495
  "utf-8"
@@ -7445,29 +7503,19 @@ function installWithCache(workDir, exec, cacheBase, pm) {
7445
7503
  console.log(
7446
7504
  `[environment] Restoring node_modules from cache (key: ${cacheKey})`
7447
7505
  );
7506
+ onProgress(`[diag] node_modules cache HIT (key: ${cacheKey}) \u2014 restoring`);
7448
7507
  if (!existsSync(targetNodeModules)) {
7449
7508
  cloneDirectory(cachedNodeModules, targetNodeModules);
7450
7509
  }
7451
7510
  if (existsSync(cachedYarnLock)) {
7452
7511
  copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
7453
7512
  }
7513
+ onProgress("[diag] node_modules cache restore complete");
7454
7514
  return;
7455
7515
  }
7456
- console.log(
7457
- `[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir} (cache key: ${cacheKey})`
7458
- );
7459
- try {
7460
- exec(pm.cmd, pm.args, {
7461
- cwd: workDir,
7462
- stdio: "inherit",
7463
- timeout: 18e4,
7464
- env: { ...process.env, NODE_ENV: "development" }
7465
- });
7466
- } catch (err) {
7467
- console.error(
7468
- "[environment] Dependency installation failed:",
7469
- err instanceof Error ? err.message : String(err)
7470
- );
7516
+ onProgress(`[diag] node_modules cache MISS (key: ${cacheKey})`);
7517
+ const ok = await runInstall(exec, pm, workDir, onProgress);
7518
+ if (!ok) {
7471
7519
  return;
7472
7520
  }
7473
7521
  console.log(
@@ -7491,31 +7539,15 @@ async function installDependencies(workDir, onProgress, options = {}) {
7491
7539
  if (!existsSync(path.join(workDir, "package.json"))) {
7492
7540
  return;
7493
7541
  }
7494
- const exec = options.exec ?? execFileSync;
7542
+ const exec = options.exec ?? defaultExec;
7495
7543
  const cacheBase = options.cacheBase;
7496
7544
  onProgress("Installing dependencies...");
7497
7545
  const pm = detectPackageManager(workDir);
7498
7546
  if (cacheBase) {
7499
- installWithCache(workDir, exec, cacheBase, pm);
7547
+ await installWithCache(workDir, exec, cacheBase, pm, onProgress);
7500
7548
  return;
7501
7549
  }
7502
- console.log(
7503
- `[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`
7504
- );
7505
- try {
7506
- exec(pm.cmd, pm.args, {
7507
- cwd: workDir,
7508
- stdio: "inherit",
7509
- timeout: 18e4,
7510
- env: { ...process.env, NODE_ENV: "development" }
7511
- });
7512
- console.log("[environment] Dependency installation complete");
7513
- } catch (err) {
7514
- console.error(
7515
- "[environment] Dependency installation failed:",
7516
- err instanceof Error ? err.message : String(err)
7517
- );
7518
- }
7550
+ await runInstall(exec, pm, workDir, onProgress);
7519
7551
  }
7520
7552
 
7521
7553
  // src/run-scenario/environment.ts
@@ -7591,12 +7623,39 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
7591
7623
  `Template "${template.name}" has no source configured, creating empty directory`
7592
7624
  );
7593
7625
  }
7626
+ onProgress(`[diag] writing ${sourceFiles.length} source file(s) to workDir`);
7594
7627
  await writeFilesToDirectory(workDir, sourceFiles);
7628
+ onProgress(`[diag] ${sourceFiles.length} source file(s) written`);
7629
+ const extraFiles = template.extraFiles ?? [];
7630
+ onProgress(`[diag] resolving ${extraFiles.length} extra file(s)`);
7595
7631
  await Promise.all(
7596
- (template.extraFiles ?? []).map(async (ef) => {
7597
- const content = ef.gitSource ? await fetchGitHubFile(ef.gitSource, {
7598
- userAgent: "EvalForge-Evaluator"
7599
- }) : ef.content ?? "";
7632
+ extraFiles.map(async (ef) => {
7633
+ let content;
7634
+ if (ef.gitSource) {
7635
+ const { owner, repo, path: gitPath, ref } = ef.gitSource;
7636
+ const startedAt = Date.now();
7637
+ onProgress(
7638
+ `[diag] extra fetch start: ${ef.path} <- ${owner}/${repo}/${gitPath ?? ""}@${ref ?? "default"}`
7639
+ );
7640
+ try {
7641
+ content = await withTimeout(
7642
+ fetchGitHubFile(ef.gitSource, { userAgent: "EvalForge-Evaluator" }),
7643
+ 3e4,
7644
+ `extra file fetch (${ef.path})`
7645
+ );
7646
+ onProgress(
7647
+ `[diag] extra fetch done: ${ef.path} (${content.length} bytes) in ${Date.now() - startedAt}ms`
7648
+ );
7649
+ } catch (err) {
7650
+ onProgress(
7651
+ `[diag] extra fetch FAILED: ${ef.path} after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
7652
+ );
7653
+ throw err;
7654
+ }
7655
+ } else {
7656
+ content = ef.content ?? "";
7657
+ onProgress(`[diag] extra inline: ${ef.path} (${content.length} bytes)`);
7658
+ }
7600
7659
  const dest = path2.resolve(workDir, ef.path);
7601
7660
  if (!dest.startsWith(workDir + sep2)) {
7602
7661
  throw new Error(
@@ -7607,6 +7666,7 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
7607
7666
  await writeFile2(dest, content, "utf8");
7608
7667
  })
7609
7668
  );
7669
+ onProgress("[diag] all extra files written");
7610
7670
  }
7611
7671
  function writeWixEnvFile(workDir) {
7612
7672
  const configPath = path2.join(workDir, "wix.config.json");
@@ -7646,10 +7706,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7646
7706
  onProgress("Fetching template files...");
7647
7707
  await fetchAndWriteTemplateFiles(template, workDir2, onProgress);
7648
7708
  console.log(`Template files written to ${workDir2}`);
7709
+ onProgress("[diag] writing wix env file");
7649
7710
  writeWixEnvFile(workDir2);
7711
+ onProgress("[diag] entering installDependencies");
7650
7712
  await installDependencies(workDir2, onProgress, {
7651
7713
  cacheBase: nodeModulesCacheDir
7652
7714
  });
7715
+ onProgress("[diag] installDependencies returned");
7653
7716
  onProgress("Environment ready");
7654
7717
  return workDir2;
7655
7718
  }
@@ -8463,7 +8526,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
8463
8526
  const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
8464
8527
  let timeoutHandle;
8465
8528
  let timedOut = false;
8466
- const HEARTBEAT_INTERVAL_MS = 1e4;
8529
+ const HEARTBEAT_INTERVAL_MS2 = 1e4;
8467
8530
  let heartbeatHandle;
8468
8531
  const executionStartTime = Date.now();
8469
8532
  try {
@@ -8515,7 +8578,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
8515
8578
  isComplete: false
8516
8579
  };
8517
8580
  emitTraceEvent(progressEvent, traceContext.pushEvent);
8518
- }, HEARTBEAT_INTERVAL_MS);
8581
+ }, HEARTBEAT_INTERVAL_MS2);
8519
8582
  }
8520
8583
  const sdkPromise = (async () => {
8521
8584
  const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
@@ -9281,7 +9344,7 @@ defaultRegistry.register(claudeCodeAdapter);
9281
9344
  import { AgentRunCommand as AgentRunCommand2, OpenCodeConfigSchema as OpenCodeConfigSchema2 } from "@wix/evalforge-types";
9282
9345
 
9283
9346
  // src/run-scenario/agents/opencode/execute.ts
9284
- import { spawn } from "child_process";
9347
+ import { spawn as spawn2 } from "child_process";
9285
9348
  import {
9286
9349
  DEFAULT_EVALUATOR_SYSTEM_PROMPT as DEFAULT_EVALUATOR_SYSTEM_PROMPT2,
9287
9350
  LiveTraceEventType as LiveTraceEventType2
@@ -10088,7 +10151,7 @@ function spawnOpenCodeProcess(opts) {
10088
10151
  };
10089
10152
  let child;
10090
10153
  try {
10091
- child = spawn("opencode", args, {
10154
+ child = spawn2("opencode", args, {
10092
10155
  cwd,
10093
10156
  env,
10094
10157
  stdio: ["ignore", "pipe", "pipe"],
@@ -12132,11 +12195,15 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
12132
12195
  );
12133
12196
  let provisionedSite;
12134
12197
  if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
12198
+ emitSetupProgress(
12199
+ `[diag] provisioning scenario site (mode: ${scenario.siteSetup.mode})...`
12200
+ );
12135
12201
  provisionedSite = await apiClient.provisionScenarioSite(
12136
12202
  projectId2,
12137
12203
  evalRunId2,
12138
12204
  scenario.id
12139
12205
  );
12206
+ emitSetupProgress("[diag] scenario site provisioned");
12140
12207
  }
12141
12208
  const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
12142
12209
  if (failedStep) {
@@ -12169,6 +12236,7 @@ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
12169
12236
  emitSetupProgress,
12170
12237
  { template }
12171
12238
  );
12239
+ emitSetupProgress("[diag] starting agent run");
12172
12240
  const partialResult = await runAgentWithContext(
12173
12241
  config,
12174
12242
  evalRunId2,