@wix/evalforge-evaluator 0.191.0 → 0.192.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +79 -37
- package/build/index.js.map +3 -3
- package/build/index.mjs +82 -40
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/install-dependencies.d.ts +14 -2
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -7393,7 +7393,38 @@ async function writeFilesToDirectory(targetDir, files) {
|
|
|
7393
7393
|
import { mkdirSync, existsSync, readFileSync, copyFileSync, cpSync } from "fs";
|
|
7394
7394
|
import { createHash } from "crypto";
|
|
7395
7395
|
import path from "path";
|
|
7396
|
-
import { execFileSync } from "child_process";
|
|
7396
|
+
import { spawn, execFileSync } from "child_process";
|
|
7397
|
+
var INSTALL_TIMEOUT_MS = 9e4;
|
|
7398
|
+
var HEARTBEAT_INTERVAL_MS = 5e3;
|
|
7399
|
+
function defaultExec(cmd, args, opts) {
|
|
7400
|
+
return new Promise((resolve3, reject) => {
|
|
7401
|
+
const child = spawn(cmd, args, {
|
|
7402
|
+
cwd: opts.cwd,
|
|
7403
|
+
env: opts.env,
|
|
7404
|
+
stdio: "inherit"
|
|
7405
|
+
});
|
|
7406
|
+
const timer = setTimeout(() => {
|
|
7407
|
+
child.kill("SIGKILL");
|
|
7408
|
+
reject(
|
|
7409
|
+
new Error(
|
|
7410
|
+
`${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
|
|
7411
|
+
)
|
|
7412
|
+
);
|
|
7413
|
+
}, opts.timeoutMs);
|
|
7414
|
+
child.on("error", (err) => {
|
|
7415
|
+
clearTimeout(timer);
|
|
7416
|
+
reject(err);
|
|
7417
|
+
});
|
|
7418
|
+
child.on("close", (code) => {
|
|
7419
|
+
clearTimeout(timer);
|
|
7420
|
+
if (code === 0) {
|
|
7421
|
+
resolve3();
|
|
7422
|
+
} else {
|
|
7423
|
+
reject(new Error(`${cmd} exited with code ${code}`));
|
|
7424
|
+
}
|
|
7425
|
+
});
|
|
7426
|
+
});
|
|
7427
|
+
}
|
|
7397
7428
|
function detectPackageManager(workDir) {
|
|
7398
7429
|
if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
|
|
7399
7430
|
return {
|
|
@@ -7431,7 +7462,34 @@ function cloneDirectory(src, dest) {
|
|
|
7431
7462
|
cpSync(src, dest, { recursive: true });
|
|
7432
7463
|
}
|
|
7433
7464
|
}
|
|
7434
|
-
function
|
|
7465
|
+
async function runInstall(exec, pm, workDir, onProgress) {
|
|
7466
|
+
onProgress(`[diag] npm install starting: ${pm.cmd} ${pm.args.join(" ")}`);
|
|
7467
|
+
const startedAt = Date.now();
|
|
7468
|
+
const heartbeat = setInterval(() => {
|
|
7469
|
+
onProgress(
|
|
7470
|
+
`[diag] npm install still running: ${Math.round(
|
|
7471
|
+
(Date.now() - startedAt) / 1e3
|
|
7472
|
+
)}s elapsed`
|
|
7473
|
+
);
|
|
7474
|
+
}, HEARTBEAT_INTERVAL_MS);
|
|
7475
|
+
try {
|
|
7476
|
+
await exec(pm.cmd, pm.args, {
|
|
7477
|
+
cwd: workDir,
|
|
7478
|
+
timeoutMs: INSTALL_TIMEOUT_MS,
|
|
7479
|
+
env: { ...process.env, NODE_ENV: "development" }
|
|
7480
|
+
});
|
|
7481
|
+
onProgress(`[diag] npm install finished in ${Date.now() - startedAt}ms`);
|
|
7482
|
+
return true;
|
|
7483
|
+
} catch (err) {
|
|
7484
|
+
onProgress(
|
|
7485
|
+
`[diag] npm install FAILED after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
|
|
7486
|
+
);
|
|
7487
|
+
return false;
|
|
7488
|
+
} finally {
|
|
7489
|
+
clearInterval(heartbeat);
|
|
7490
|
+
}
|
|
7491
|
+
}
|
|
7492
|
+
async function installWithCache(workDir, exec, cacheBase, pm, onProgress) {
|
|
7435
7493
|
const sourceContent = readFileSync(
|
|
7436
7494
|
path.join(workDir, pm.cacheSourceFile),
|
|
7437
7495
|
"utf-8"
|
|
@@ -7445,29 +7503,19 @@ function installWithCache(workDir, exec, cacheBase, pm) {
|
|
|
7445
7503
|
console.log(
|
|
7446
7504
|
`[environment] Restoring node_modules from cache (key: ${cacheKey})`
|
|
7447
7505
|
);
|
|
7506
|
+
onProgress(`[diag] node_modules cache HIT (key: ${cacheKey}) \u2014 restoring`);
|
|
7448
7507
|
if (!existsSync(targetNodeModules)) {
|
|
7449
7508
|
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
7450
7509
|
}
|
|
7451
7510
|
if (existsSync(cachedYarnLock)) {
|
|
7452
7511
|
copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
|
|
7453
7512
|
}
|
|
7513
|
+
onProgress("[diag] node_modules cache restore complete");
|
|
7454
7514
|
return;
|
|
7455
7515
|
}
|
|
7456
|
-
|
|
7457
|
-
|
|
7458
|
-
)
|
|
7459
|
-
try {
|
|
7460
|
-
exec(pm.cmd, pm.args, {
|
|
7461
|
-
cwd: workDir,
|
|
7462
|
-
stdio: "inherit",
|
|
7463
|
-
timeout: 18e4,
|
|
7464
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7465
|
-
});
|
|
7466
|
-
} catch (err) {
|
|
7467
|
-
console.error(
|
|
7468
|
-
"[environment] Dependency installation failed:",
|
|
7469
|
-
err instanceof Error ? err.message : String(err)
|
|
7470
|
-
);
|
|
7516
|
+
onProgress(`[diag] node_modules cache MISS (key: ${cacheKey})`);
|
|
7517
|
+
const ok = await runInstall(exec, pm, workDir, onProgress);
|
|
7518
|
+
if (!ok) {
|
|
7471
7519
|
return;
|
|
7472
7520
|
}
|
|
7473
7521
|
console.log(
|
|
@@ -7491,31 +7539,15 @@ async function installDependencies(workDir, onProgress, options = {}) {
|
|
|
7491
7539
|
if (!existsSync(path.join(workDir, "package.json"))) {
|
|
7492
7540
|
return;
|
|
7493
7541
|
}
|
|
7494
|
-
const exec = options.exec ??
|
|
7542
|
+
const exec = options.exec ?? defaultExec;
|
|
7495
7543
|
const cacheBase = options.cacheBase;
|
|
7496
7544
|
onProgress("Installing dependencies...");
|
|
7497
7545
|
const pm = detectPackageManager(workDir);
|
|
7498
7546
|
if (cacheBase) {
|
|
7499
|
-
installWithCache(workDir, exec, cacheBase, pm);
|
|
7547
|
+
await installWithCache(workDir, exec, cacheBase, pm, onProgress);
|
|
7500
7548
|
return;
|
|
7501
7549
|
}
|
|
7502
|
-
|
|
7503
|
-
`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`
|
|
7504
|
-
);
|
|
7505
|
-
try {
|
|
7506
|
-
exec(pm.cmd, pm.args, {
|
|
7507
|
-
cwd: workDir,
|
|
7508
|
-
stdio: "inherit",
|
|
7509
|
-
timeout: 18e4,
|
|
7510
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7511
|
-
});
|
|
7512
|
-
console.log("[environment] Dependency installation complete");
|
|
7513
|
-
} catch (err) {
|
|
7514
|
-
console.error(
|
|
7515
|
-
"[environment] Dependency installation failed:",
|
|
7516
|
-
err instanceof Error ? err.message : String(err)
|
|
7517
|
-
);
|
|
7518
|
-
}
|
|
7550
|
+
await runInstall(exec, pm, workDir, onProgress);
|
|
7519
7551
|
}
|
|
7520
7552
|
|
|
7521
7553
|
// src/run-scenario/environment.ts
|
|
@@ -7591,7 +7623,9 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
|
|
|
7591
7623
|
`Template "${template.name}" has no source configured, creating empty directory`
|
|
7592
7624
|
);
|
|
7593
7625
|
}
|
|
7626
|
+
onProgress(`[diag] writing ${sourceFiles.length} source file(s) to workDir`);
|
|
7594
7627
|
await writeFilesToDirectory(workDir, sourceFiles);
|
|
7628
|
+
onProgress(`[diag] ${sourceFiles.length} source file(s) written`);
|
|
7595
7629
|
const extraFiles = template.extraFiles ?? [];
|
|
7596
7630
|
onProgress(`[diag] resolving ${extraFiles.length} extra file(s)`);
|
|
7597
7631
|
await Promise.all(
|
|
@@ -7672,10 +7706,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7672
7706
|
onProgress("Fetching template files...");
|
|
7673
7707
|
await fetchAndWriteTemplateFiles(template, workDir2, onProgress);
|
|
7674
7708
|
console.log(`Template files written to ${workDir2}`);
|
|
7709
|
+
onProgress("[diag] writing wix env file");
|
|
7675
7710
|
writeWixEnvFile(workDir2);
|
|
7711
|
+
onProgress("[diag] entering installDependencies");
|
|
7676
7712
|
await installDependencies(workDir2, onProgress, {
|
|
7677
7713
|
cacheBase: nodeModulesCacheDir
|
|
7678
7714
|
});
|
|
7715
|
+
onProgress("[diag] installDependencies returned");
|
|
7679
7716
|
onProgress("Environment ready");
|
|
7680
7717
|
return workDir2;
|
|
7681
7718
|
}
|
|
@@ -8489,7 +8526,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8489
8526
|
const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
8490
8527
|
let timeoutHandle;
|
|
8491
8528
|
let timedOut = false;
|
|
8492
|
-
const
|
|
8529
|
+
const HEARTBEAT_INTERVAL_MS2 = 1e4;
|
|
8493
8530
|
let heartbeatHandle;
|
|
8494
8531
|
const executionStartTime = Date.now();
|
|
8495
8532
|
try {
|
|
@@ -8541,7 +8578,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8541
8578
|
isComplete: false
|
|
8542
8579
|
};
|
|
8543
8580
|
emitTraceEvent(progressEvent, traceContext.pushEvent);
|
|
8544
|
-
},
|
|
8581
|
+
}, HEARTBEAT_INTERVAL_MS2);
|
|
8545
8582
|
}
|
|
8546
8583
|
const sdkPromise = (async () => {
|
|
8547
8584
|
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
@@ -9307,7 +9344,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
9307
9344
|
import { AgentRunCommand as AgentRunCommand2, OpenCodeConfigSchema as OpenCodeConfigSchema2 } from "@wix/evalforge-types";
|
|
9308
9345
|
|
|
9309
9346
|
// src/run-scenario/agents/opencode/execute.ts
|
|
9310
|
-
import { spawn } from "child_process";
|
|
9347
|
+
import { spawn as spawn2 } from "child_process";
|
|
9311
9348
|
import {
|
|
9312
9349
|
DEFAULT_EVALUATOR_SYSTEM_PROMPT as DEFAULT_EVALUATOR_SYSTEM_PROMPT2,
|
|
9313
9350
|
LiveTraceEventType as LiveTraceEventType2
|
|
@@ -10114,7 +10151,7 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10114
10151
|
};
|
|
10115
10152
|
let child;
|
|
10116
10153
|
try {
|
|
10117
|
-
child =
|
|
10154
|
+
child = spawn2("opencode", args, {
|
|
10118
10155
|
cwd,
|
|
10119
10156
|
env,
|
|
10120
10157
|
stdio: ["ignore", "pipe", "pipe"],
|
|
@@ -12158,11 +12195,15 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
12158
12195
|
);
|
|
12159
12196
|
let provisionedSite;
|
|
12160
12197
|
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
12198
|
+
emitSetupProgress(
|
|
12199
|
+
`[diag] provisioning scenario site (mode: ${scenario.siteSetup.mode})...`
|
|
12200
|
+
);
|
|
12161
12201
|
provisionedSite = await apiClient.provisionScenarioSite(
|
|
12162
12202
|
projectId2,
|
|
12163
12203
|
evalRunId2,
|
|
12164
12204
|
scenario.id
|
|
12165
12205
|
);
|
|
12206
|
+
emitSetupProgress("[diag] scenario site provisioned");
|
|
12166
12207
|
}
|
|
12167
12208
|
const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
|
|
12168
12209
|
if (failedStep) {
|
|
@@ -12195,6 +12236,7 @@ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
|
12195
12236
|
emitSetupProgress,
|
|
12196
12237
|
{ template }
|
|
12197
12238
|
);
|
|
12239
|
+
emitSetupProgress("[diag] starting agent run");
|
|
12198
12240
|
const partialResult = await runAgentWithContext(
|
|
12199
12241
|
config,
|
|
12200
12242
|
evalRunId2,
|