@wix/evalforge-evaluator 0.190.0 → 0.192.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +109 -41
- package/build/index.js.map +3 -3
- package/build/index.mjs +112 -44
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/install-dependencies.d.ts +14 -2
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -7372,6 +7372,37 @@ var import_fs = require("fs");
|
|
|
7372
7372
|
var import_crypto = require("crypto");
|
|
7373
7373
|
var import_path2 = __toESM(require("path"));
|
|
7374
7374
|
var import_child_process = require("child_process");
|
|
7375
|
+
var INSTALL_TIMEOUT_MS = 9e4;
|
|
7376
|
+
var HEARTBEAT_INTERVAL_MS = 5e3;
|
|
7377
|
+
function defaultExec(cmd, args, opts) {
|
|
7378
|
+
return new Promise((resolve3, reject) => {
|
|
7379
|
+
const child = (0, import_child_process.spawn)(cmd, args, {
|
|
7380
|
+
cwd: opts.cwd,
|
|
7381
|
+
env: opts.env,
|
|
7382
|
+
stdio: "inherit"
|
|
7383
|
+
});
|
|
7384
|
+
const timer = setTimeout(() => {
|
|
7385
|
+
child.kill("SIGKILL");
|
|
7386
|
+
reject(
|
|
7387
|
+
new Error(
|
|
7388
|
+
`${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
|
|
7389
|
+
)
|
|
7390
|
+
);
|
|
7391
|
+
}, opts.timeoutMs);
|
|
7392
|
+
child.on("error", (err) => {
|
|
7393
|
+
clearTimeout(timer);
|
|
7394
|
+
reject(err);
|
|
7395
|
+
});
|
|
7396
|
+
child.on("close", (code) => {
|
|
7397
|
+
clearTimeout(timer);
|
|
7398
|
+
if (code === 0) {
|
|
7399
|
+
resolve3();
|
|
7400
|
+
} else {
|
|
7401
|
+
reject(new Error(`${cmd} exited with code ${code}`));
|
|
7402
|
+
}
|
|
7403
|
+
});
|
|
7404
|
+
});
|
|
7405
|
+
}
|
|
7375
7406
|
function detectPackageManager(workDir) {
|
|
7376
7407
|
if ((0, import_fs.existsSync)(import_path2.default.join(workDir, "pnpm-lock.yaml"))) {
|
|
7377
7408
|
return {
|
|
@@ -7409,7 +7440,34 @@ function cloneDirectory(src, dest) {
|
|
|
7409
7440
|
(0, import_fs.cpSync)(src, dest, { recursive: true });
|
|
7410
7441
|
}
|
|
7411
7442
|
}
|
|
7412
|
-
function
|
|
7443
|
+
async function runInstall(exec, pm, workDir, onProgress) {
|
|
7444
|
+
onProgress(`[diag] npm install starting: ${pm.cmd} ${pm.args.join(" ")}`);
|
|
7445
|
+
const startedAt = Date.now();
|
|
7446
|
+
const heartbeat = setInterval(() => {
|
|
7447
|
+
onProgress(
|
|
7448
|
+
`[diag] npm install still running: ${Math.round(
|
|
7449
|
+
(Date.now() - startedAt) / 1e3
|
|
7450
|
+
)}s elapsed`
|
|
7451
|
+
);
|
|
7452
|
+
}, HEARTBEAT_INTERVAL_MS);
|
|
7453
|
+
try {
|
|
7454
|
+
await exec(pm.cmd, pm.args, {
|
|
7455
|
+
cwd: workDir,
|
|
7456
|
+
timeoutMs: INSTALL_TIMEOUT_MS,
|
|
7457
|
+
env: { ...process.env, NODE_ENV: "development" }
|
|
7458
|
+
});
|
|
7459
|
+
onProgress(`[diag] npm install finished in ${Date.now() - startedAt}ms`);
|
|
7460
|
+
return true;
|
|
7461
|
+
} catch (err) {
|
|
7462
|
+
onProgress(
|
|
7463
|
+
`[diag] npm install FAILED after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
|
|
7464
|
+
);
|
|
7465
|
+
return false;
|
|
7466
|
+
} finally {
|
|
7467
|
+
clearInterval(heartbeat);
|
|
7468
|
+
}
|
|
7469
|
+
}
|
|
7470
|
+
async function installWithCache(workDir, exec, cacheBase, pm, onProgress) {
|
|
7413
7471
|
const sourceContent = (0, import_fs.readFileSync)(
|
|
7414
7472
|
import_path2.default.join(workDir, pm.cacheSourceFile),
|
|
7415
7473
|
"utf-8"
|
|
@@ -7423,29 +7481,19 @@ function installWithCache(workDir, exec, cacheBase, pm) {
|
|
|
7423
7481
|
console.log(
|
|
7424
7482
|
`[environment] Restoring node_modules from cache (key: ${cacheKey})`
|
|
7425
7483
|
);
|
|
7484
|
+
onProgress(`[diag] node_modules cache HIT (key: ${cacheKey}) \u2014 restoring`);
|
|
7426
7485
|
if (!(0, import_fs.existsSync)(targetNodeModules)) {
|
|
7427
7486
|
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
7428
7487
|
}
|
|
7429
7488
|
if ((0, import_fs.existsSync)(cachedYarnLock)) {
|
|
7430
7489
|
(0, import_fs.copyFileSync)(cachedYarnLock, import_path2.default.join(workDir, "yarn.lock"));
|
|
7431
7490
|
}
|
|
7491
|
+
onProgress("[diag] node_modules cache restore complete");
|
|
7432
7492
|
return;
|
|
7433
7493
|
}
|
|
7434
|
-
|
|
7435
|
-
|
|
7436
|
-
)
|
|
7437
|
-
try {
|
|
7438
|
-
exec(pm.cmd, pm.args, {
|
|
7439
|
-
cwd: workDir,
|
|
7440
|
-
stdio: "inherit",
|
|
7441
|
-
timeout: 18e4,
|
|
7442
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7443
|
-
});
|
|
7444
|
-
} catch (err) {
|
|
7445
|
-
console.error(
|
|
7446
|
-
"[environment] Dependency installation failed:",
|
|
7447
|
-
err instanceof Error ? err.message : String(err)
|
|
7448
|
-
);
|
|
7494
|
+
onProgress(`[diag] node_modules cache MISS (key: ${cacheKey})`);
|
|
7495
|
+
const ok = await runInstall(exec, pm, workDir, onProgress);
|
|
7496
|
+
if (!ok) {
|
|
7449
7497
|
return;
|
|
7450
7498
|
}
|
|
7451
7499
|
console.log(
|
|
@@ -7469,31 +7517,15 @@ async function installDependencies(workDir, onProgress, options = {}) {
|
|
|
7469
7517
|
if (!(0, import_fs.existsSync)(import_path2.default.join(workDir, "package.json"))) {
|
|
7470
7518
|
return;
|
|
7471
7519
|
}
|
|
7472
|
-
const exec = options.exec ??
|
|
7520
|
+
const exec = options.exec ?? defaultExec;
|
|
7473
7521
|
const cacheBase = options.cacheBase;
|
|
7474
7522
|
onProgress("Installing dependencies...");
|
|
7475
7523
|
const pm = detectPackageManager(workDir);
|
|
7476
7524
|
if (cacheBase) {
|
|
7477
|
-
installWithCache(workDir, exec, cacheBase, pm);
|
|
7525
|
+
await installWithCache(workDir, exec, cacheBase, pm, onProgress);
|
|
7478
7526
|
return;
|
|
7479
7527
|
}
|
|
7480
|
-
|
|
7481
|
-
`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`
|
|
7482
|
-
);
|
|
7483
|
-
try {
|
|
7484
|
-
exec(pm.cmd, pm.args, {
|
|
7485
|
-
cwd: workDir,
|
|
7486
|
-
stdio: "inherit",
|
|
7487
|
-
timeout: 18e4,
|
|
7488
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7489
|
-
});
|
|
7490
|
-
console.log("[environment] Dependency installation complete");
|
|
7491
|
-
} catch (err) {
|
|
7492
|
-
console.error(
|
|
7493
|
-
"[environment] Dependency installation failed:",
|
|
7494
|
-
err instanceof Error ? err.message : String(err)
|
|
7495
|
-
);
|
|
7496
|
-
}
|
|
7528
|
+
await runInstall(exec, pm, workDir, onProgress);
|
|
7497
7529
|
}
|
|
7498
7530
|
|
|
7499
7531
|
// src/run-scenario/environment.ts
|
|
@@ -7569,12 +7601,39 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
|
|
|
7569
7601
|
`Template "${template.name}" has no source configured, creating empty directory`
|
|
7570
7602
|
);
|
|
7571
7603
|
}
|
|
7604
|
+
onProgress(`[diag] writing ${sourceFiles.length} source file(s) to workDir`);
|
|
7572
7605
|
await writeFilesToDirectory(workDir, sourceFiles);
|
|
7606
|
+
onProgress(`[diag] ${sourceFiles.length} source file(s) written`);
|
|
7607
|
+
const extraFiles = template.extraFiles ?? [];
|
|
7608
|
+
onProgress(`[diag] resolving ${extraFiles.length} extra file(s)`);
|
|
7573
7609
|
await Promise.all(
|
|
7574
|
-
|
|
7575
|
-
|
|
7576
|
-
|
|
7577
|
-
|
|
7610
|
+
extraFiles.map(async (ef) => {
|
|
7611
|
+
let content;
|
|
7612
|
+
if (ef.gitSource) {
|
|
7613
|
+
const { owner, repo, path: gitPath, ref } = ef.gitSource;
|
|
7614
|
+
const startedAt = Date.now();
|
|
7615
|
+
onProgress(
|
|
7616
|
+
`[diag] extra fetch start: ${ef.path} <- ${owner}/${repo}/${gitPath ?? ""}@${ref ?? "default"}`
|
|
7617
|
+
);
|
|
7618
|
+
try {
|
|
7619
|
+
content = await withTimeout(
|
|
7620
|
+
(0, import_evalforge_github_client.fetchGitHubFile)(ef.gitSource, { userAgent: "EvalForge-Evaluator" }),
|
|
7621
|
+
3e4,
|
|
7622
|
+
`extra file fetch (${ef.path})`
|
|
7623
|
+
);
|
|
7624
|
+
onProgress(
|
|
7625
|
+
`[diag] extra fetch done: ${ef.path} (${content.length} bytes) in ${Date.now() - startedAt}ms`
|
|
7626
|
+
);
|
|
7627
|
+
} catch (err) {
|
|
7628
|
+
onProgress(
|
|
7629
|
+
`[diag] extra fetch FAILED: ${ef.path} after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
|
|
7630
|
+
);
|
|
7631
|
+
throw err;
|
|
7632
|
+
}
|
|
7633
|
+
} else {
|
|
7634
|
+
content = ef.content ?? "";
|
|
7635
|
+
onProgress(`[diag] extra inline: ${ef.path} (${content.length} bytes)`);
|
|
7636
|
+
}
|
|
7578
7637
|
const dest = import_path3.default.resolve(workDir, ef.path);
|
|
7579
7638
|
if (!dest.startsWith(workDir + import_path3.sep)) {
|
|
7580
7639
|
throw new Error(
|
|
@@ -7585,6 +7644,7 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
|
|
|
7585
7644
|
await (0, import_promises2.writeFile)(dest, content, "utf8");
|
|
7586
7645
|
})
|
|
7587
7646
|
);
|
|
7647
|
+
onProgress("[diag] all extra files written");
|
|
7588
7648
|
}
|
|
7589
7649
|
function writeWixEnvFile(workDir) {
|
|
7590
7650
|
const configPath = import_path3.default.join(workDir, "wix.config.json");
|
|
@@ -7624,10 +7684,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7624
7684
|
onProgress("Fetching template files...");
|
|
7625
7685
|
await fetchAndWriteTemplateFiles(template, workDir2, onProgress);
|
|
7626
7686
|
console.log(`Template files written to ${workDir2}`);
|
|
7687
|
+
onProgress("[diag] writing wix env file");
|
|
7627
7688
|
writeWixEnvFile(workDir2);
|
|
7689
|
+
onProgress("[diag] entering installDependencies");
|
|
7628
7690
|
await installDependencies(workDir2, onProgress, {
|
|
7629
7691
|
cacheBase: nodeModulesCacheDir
|
|
7630
7692
|
});
|
|
7693
|
+
onProgress("[diag] installDependencies returned");
|
|
7631
7694
|
onProgress("Environment ready");
|
|
7632
7695
|
return workDir2;
|
|
7633
7696
|
}
|
|
@@ -8434,7 +8497,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8434
8497
|
const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
8435
8498
|
let timeoutHandle;
|
|
8436
8499
|
let timedOut = false;
|
|
8437
|
-
const
|
|
8500
|
+
const HEARTBEAT_INTERVAL_MS2 = 1e4;
|
|
8438
8501
|
let heartbeatHandle;
|
|
8439
8502
|
const executionStartTime = Date.now();
|
|
8440
8503
|
try {
|
|
@@ -8486,7 +8549,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8486
8549
|
isComplete: false
|
|
8487
8550
|
};
|
|
8488
8551
|
emitTraceEvent(progressEvent, traceContext.pushEvent);
|
|
8489
|
-
},
|
|
8552
|
+
}, HEARTBEAT_INTERVAL_MS2);
|
|
8490
8553
|
}
|
|
8491
8554
|
const sdkPromise = (async () => {
|
|
8492
8555
|
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
@@ -12080,11 +12143,15 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
12080
12143
|
);
|
|
12081
12144
|
let provisionedSite;
|
|
12082
12145
|
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
12146
|
+
emitSetupProgress(
|
|
12147
|
+
`[diag] provisioning scenario site (mode: ${scenario.siteSetup.mode})...`
|
|
12148
|
+
);
|
|
12083
12149
|
provisionedSite = await apiClient.provisionScenarioSite(
|
|
12084
12150
|
projectId2,
|
|
12085
12151
|
evalRunId2,
|
|
12086
12152
|
scenario.id
|
|
12087
12153
|
);
|
|
12154
|
+
emitSetupProgress("[diag] scenario site provisioned");
|
|
12088
12155
|
}
|
|
12089
12156
|
const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
|
|
12090
12157
|
if (failedStep) {
|
|
@@ -12117,6 +12184,7 @@ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
|
12117
12184
|
emitSetupProgress,
|
|
12118
12185
|
{ template }
|
|
12119
12186
|
);
|
|
12187
|
+
emitSetupProgress("[diag] starting agent run");
|
|
12120
12188
|
const partialResult = await runAgentWithContext(
|
|
12121
12189
|
config,
|
|
12122
12190
|
evalRunId2,
|