@wix/evalforge-evaluator 0.191.0 → 0.193.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +103 -37
- package/build/index.js.map +3 -3
- package/build/index.mjs +116 -43
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/install-dependencies.d.ts +14 -2
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -7390,10 +7390,69 @@ async function writeFilesToDirectory(targetDir, files) {
|
|
|
7390
7390
|
}
|
|
7391
7391
|
|
|
7392
7392
|
// src/run-scenario/install-dependencies.ts
|
|
7393
|
-
import {
|
|
7393
|
+
import {
|
|
7394
|
+
mkdirSync,
|
|
7395
|
+
existsSync,
|
|
7396
|
+
readFileSync,
|
|
7397
|
+
readdirSync,
|
|
7398
|
+
copyFileSync,
|
|
7399
|
+
cpSync
|
|
7400
|
+
} from "fs";
|
|
7394
7401
|
import { createHash } from "crypto";
|
|
7395
7402
|
import path from "path";
|
|
7396
|
-
import { execFileSync } from "child_process";
|
|
7403
|
+
import { spawn, execFileSync } from "child_process";
|
|
7404
|
+
var INSTALL_TIMEOUT_MS = 1e5;
|
|
7405
|
+
var HEARTBEAT_INTERVAL_MS = 5e3;
|
|
7406
|
+
function reportRegistry(workDir, onProgress) {
|
|
7407
|
+
try {
|
|
7408
|
+
const registry = execFileSync("npm", ["config", "get", "registry"], {
|
|
7409
|
+
cwd: workDir,
|
|
7410
|
+
encoding: "utf8"
|
|
7411
|
+
}).trim();
|
|
7412
|
+
onProgress(`[diag] npm registry = ${registry}`);
|
|
7413
|
+
} catch (err) {
|
|
7414
|
+
onProgress(
|
|
7415
|
+
`[diag] npm registry = <unreadable: ${err instanceof Error ? err.message : String(err)}>`
|
|
7416
|
+
);
|
|
7417
|
+
}
|
|
7418
|
+
}
|
|
7419
|
+
function reportNodeModules(workDir, onProgress) {
|
|
7420
|
+
try {
|
|
7421
|
+
const nm = path.join(workDir, "node_modules");
|
|
7422
|
+
const count = existsSync(nm) ? readdirSync(nm).length : 0;
|
|
7423
|
+
onProgress(`[diag] node_modules top-level entries: ${count}`);
|
|
7424
|
+
} catch {
|
|
7425
|
+
}
|
|
7426
|
+
}
|
|
7427
|
+
function defaultExec(cmd, args, opts) {
|
|
7428
|
+
return new Promise((resolve3, reject) => {
|
|
7429
|
+
const child = spawn(cmd, args, {
|
|
7430
|
+
cwd: opts.cwd,
|
|
7431
|
+
env: opts.env,
|
|
7432
|
+
stdio: "inherit"
|
|
7433
|
+
});
|
|
7434
|
+
const timer = setTimeout(() => {
|
|
7435
|
+
child.kill("SIGKILL");
|
|
7436
|
+
reject(
|
|
7437
|
+
new Error(
|
|
7438
|
+
`${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
|
|
7439
|
+
)
|
|
7440
|
+
);
|
|
7441
|
+
}, opts.timeoutMs);
|
|
7442
|
+
child.on("error", (err) => {
|
|
7443
|
+
clearTimeout(timer);
|
|
7444
|
+
reject(err);
|
|
7445
|
+
});
|
|
7446
|
+
child.on("close", (code) => {
|
|
7447
|
+
clearTimeout(timer);
|
|
7448
|
+
if (code === 0) {
|
|
7449
|
+
resolve3();
|
|
7450
|
+
} else {
|
|
7451
|
+
reject(new Error(`${cmd} exited with code ${code}`));
|
|
7452
|
+
}
|
|
7453
|
+
});
|
|
7454
|
+
});
|
|
7455
|
+
}
|
|
7397
7456
|
function detectPackageManager(workDir) {
|
|
7398
7457
|
if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
|
|
7399
7458
|
return {
|
|
@@ -7431,7 +7490,37 @@ function cloneDirectory(src, dest) {
|
|
|
7431
7490
|
cpSync(src, dest, { recursive: true });
|
|
7432
7491
|
}
|
|
7433
7492
|
}
|
|
7434
|
-
function
|
|
7493
|
+
async function runInstall(exec, pm, workDir, onProgress) {
|
|
7494
|
+
reportRegistry(workDir, onProgress);
|
|
7495
|
+
onProgress(`[diag] npm install starting: ${pm.cmd} ${pm.args.join(" ")}`);
|
|
7496
|
+
const startedAt = Date.now();
|
|
7497
|
+
const heartbeat = setInterval(() => {
|
|
7498
|
+
onProgress(
|
|
7499
|
+
`[diag] npm install still running: ${Math.round(
|
|
7500
|
+
(Date.now() - startedAt) / 1e3
|
|
7501
|
+
)}s elapsed`
|
|
7502
|
+
);
|
|
7503
|
+
}, HEARTBEAT_INTERVAL_MS);
|
|
7504
|
+
try {
|
|
7505
|
+
await exec(pm.cmd, pm.args, {
|
|
7506
|
+
cwd: workDir,
|
|
7507
|
+
timeoutMs: INSTALL_TIMEOUT_MS,
|
|
7508
|
+
env: { ...process.env, NODE_ENV: "development" }
|
|
7509
|
+
});
|
|
7510
|
+
onProgress(`[diag] npm install finished in ${Date.now() - startedAt}ms`);
|
|
7511
|
+
reportNodeModules(workDir, onProgress);
|
|
7512
|
+
return true;
|
|
7513
|
+
} catch (err) {
|
|
7514
|
+
onProgress(
|
|
7515
|
+
`[diag] npm install FAILED after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
|
|
7516
|
+
);
|
|
7517
|
+
reportNodeModules(workDir, onProgress);
|
|
7518
|
+
return false;
|
|
7519
|
+
} finally {
|
|
7520
|
+
clearInterval(heartbeat);
|
|
7521
|
+
}
|
|
7522
|
+
}
|
|
7523
|
+
async function installWithCache(workDir, exec, cacheBase, pm, onProgress) {
|
|
7435
7524
|
const sourceContent = readFileSync(
|
|
7436
7525
|
path.join(workDir, pm.cacheSourceFile),
|
|
7437
7526
|
"utf-8"
|
|
@@ -7445,29 +7534,19 @@ function installWithCache(workDir, exec, cacheBase, pm) {
|
|
|
7445
7534
|
console.log(
|
|
7446
7535
|
`[environment] Restoring node_modules from cache (key: ${cacheKey})`
|
|
7447
7536
|
);
|
|
7537
|
+
onProgress(`[diag] node_modules cache HIT (key: ${cacheKey}) \u2014 restoring`);
|
|
7448
7538
|
if (!existsSync(targetNodeModules)) {
|
|
7449
7539
|
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
7450
7540
|
}
|
|
7451
7541
|
if (existsSync(cachedYarnLock)) {
|
|
7452
7542
|
copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
|
|
7453
7543
|
}
|
|
7544
|
+
onProgress("[diag] node_modules cache restore complete");
|
|
7454
7545
|
return;
|
|
7455
7546
|
}
|
|
7456
|
-
|
|
7457
|
-
|
|
7458
|
-
)
|
|
7459
|
-
try {
|
|
7460
|
-
exec(pm.cmd, pm.args, {
|
|
7461
|
-
cwd: workDir,
|
|
7462
|
-
stdio: "inherit",
|
|
7463
|
-
timeout: 18e4,
|
|
7464
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7465
|
-
});
|
|
7466
|
-
} catch (err) {
|
|
7467
|
-
console.error(
|
|
7468
|
-
"[environment] Dependency installation failed:",
|
|
7469
|
-
err instanceof Error ? err.message : String(err)
|
|
7470
|
-
);
|
|
7547
|
+
onProgress(`[diag] node_modules cache MISS (key: ${cacheKey})`);
|
|
7548
|
+
const ok = await runInstall(exec, pm, workDir, onProgress);
|
|
7549
|
+
if (!ok) {
|
|
7471
7550
|
return;
|
|
7472
7551
|
}
|
|
7473
7552
|
console.log(
|
|
@@ -7491,31 +7570,15 @@ async function installDependencies(workDir, onProgress, options = {}) {
|
|
|
7491
7570
|
if (!existsSync(path.join(workDir, "package.json"))) {
|
|
7492
7571
|
return;
|
|
7493
7572
|
}
|
|
7494
|
-
const exec = options.exec ??
|
|
7573
|
+
const exec = options.exec ?? defaultExec;
|
|
7495
7574
|
const cacheBase = options.cacheBase;
|
|
7496
7575
|
onProgress("Installing dependencies...");
|
|
7497
7576
|
const pm = detectPackageManager(workDir);
|
|
7498
7577
|
if (cacheBase) {
|
|
7499
|
-
installWithCache(workDir, exec, cacheBase, pm);
|
|
7578
|
+
await installWithCache(workDir, exec, cacheBase, pm, onProgress);
|
|
7500
7579
|
return;
|
|
7501
7580
|
}
|
|
7502
|
-
|
|
7503
|
-
`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`
|
|
7504
|
-
);
|
|
7505
|
-
try {
|
|
7506
|
-
exec(pm.cmd, pm.args, {
|
|
7507
|
-
cwd: workDir,
|
|
7508
|
-
stdio: "inherit",
|
|
7509
|
-
timeout: 18e4,
|
|
7510
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7511
|
-
});
|
|
7512
|
-
console.log("[environment] Dependency installation complete");
|
|
7513
|
-
} catch (err) {
|
|
7514
|
-
console.error(
|
|
7515
|
-
"[environment] Dependency installation failed:",
|
|
7516
|
-
err instanceof Error ? err.message : String(err)
|
|
7517
|
-
);
|
|
7518
|
-
}
|
|
7581
|
+
await runInstall(exec, pm, workDir, onProgress);
|
|
7519
7582
|
}
|
|
7520
7583
|
|
|
7521
7584
|
// src/run-scenario/environment.ts
|
|
@@ -7591,7 +7654,9 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
|
|
|
7591
7654
|
`Template "${template.name}" has no source configured, creating empty directory`
|
|
7592
7655
|
);
|
|
7593
7656
|
}
|
|
7657
|
+
onProgress(`[diag] writing ${sourceFiles.length} source file(s) to workDir`);
|
|
7594
7658
|
await writeFilesToDirectory(workDir, sourceFiles);
|
|
7659
|
+
onProgress(`[diag] ${sourceFiles.length} source file(s) written`);
|
|
7595
7660
|
const extraFiles = template.extraFiles ?? [];
|
|
7596
7661
|
onProgress(`[diag] resolving ${extraFiles.length} extra file(s)`);
|
|
7597
7662
|
await Promise.all(
|
|
@@ -7672,10 +7737,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7672
7737
|
onProgress("Fetching template files...");
|
|
7673
7738
|
await fetchAndWriteTemplateFiles(template, workDir2, onProgress);
|
|
7674
7739
|
console.log(`Template files written to ${workDir2}`);
|
|
7740
|
+
onProgress("[diag] writing wix env file");
|
|
7675
7741
|
writeWixEnvFile(workDir2);
|
|
7742
|
+
onProgress("[diag] entering installDependencies");
|
|
7676
7743
|
await installDependencies(workDir2, onProgress, {
|
|
7677
7744
|
cacheBase: nodeModulesCacheDir
|
|
7678
7745
|
});
|
|
7746
|
+
onProgress("[diag] installDependencies returned");
|
|
7679
7747
|
onProgress("Environment ready");
|
|
7680
7748
|
return workDir2;
|
|
7681
7749
|
}
|
|
@@ -8489,7 +8557,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8489
8557
|
const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
8490
8558
|
let timeoutHandle;
|
|
8491
8559
|
let timedOut = false;
|
|
8492
|
-
const
|
|
8560
|
+
const HEARTBEAT_INTERVAL_MS2 = 1e4;
|
|
8493
8561
|
let heartbeatHandle;
|
|
8494
8562
|
const executionStartTime = Date.now();
|
|
8495
8563
|
try {
|
|
@@ -8541,7 +8609,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8541
8609
|
isComplete: false
|
|
8542
8610
|
};
|
|
8543
8611
|
emitTraceEvent(progressEvent, traceContext.pushEvent);
|
|
8544
|
-
},
|
|
8612
|
+
}, HEARTBEAT_INTERVAL_MS2);
|
|
8545
8613
|
}
|
|
8546
8614
|
const sdkPromise = (async () => {
|
|
8547
8615
|
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
@@ -9307,7 +9375,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
9307
9375
|
import { AgentRunCommand as AgentRunCommand2, OpenCodeConfigSchema as OpenCodeConfigSchema2 } from "@wix/evalforge-types";
|
|
9308
9376
|
|
|
9309
9377
|
// src/run-scenario/agents/opencode/execute.ts
|
|
9310
|
-
import { spawn } from "child_process";
|
|
9378
|
+
import { spawn as spawn2 } from "child_process";
|
|
9311
9379
|
import {
|
|
9312
9380
|
DEFAULT_EVALUATOR_SYSTEM_PROMPT as DEFAULT_EVALUATOR_SYSTEM_PROMPT2,
|
|
9313
9381
|
LiveTraceEventType as LiveTraceEventType2
|
|
@@ -10114,7 +10182,7 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10114
10182
|
};
|
|
10115
10183
|
let child;
|
|
10116
10184
|
try {
|
|
10117
|
-
child =
|
|
10185
|
+
child = spawn2("opencode", args, {
|
|
10118
10186
|
cwd,
|
|
10119
10187
|
env,
|
|
10120
10188
|
stdio: ["ignore", "pipe", "pipe"],
|
|
@@ -11307,7 +11375,7 @@ var simpleAgentAdapter = new SimpleAgentAdapter();
|
|
|
11307
11375
|
defaultRegistry.register(simpleAgentAdapter);
|
|
11308
11376
|
|
|
11309
11377
|
// src/run-scenario/file-diff.ts
|
|
11310
|
-
import { readdirSync, readFileSync as readFileSync3, statSync, existsSync as existsSync3 } from "fs";
|
|
11378
|
+
import { readdirSync as readdirSync2, readFileSync as readFileSync3, statSync, existsSync as existsSync3 } from "fs";
|
|
11311
11379
|
import { join as join10, relative } from "path";
|
|
11312
11380
|
|
|
11313
11381
|
// ../../node_modules/diff/lib/index.mjs
|
|
@@ -11931,7 +11999,7 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
11931
11999
|
if (!existsSync3(dir)) {
|
|
11932
12000
|
return snapshot;
|
|
11933
12001
|
}
|
|
11934
|
-
const entries =
|
|
12002
|
+
const entries = readdirSync2(dir, { withFileTypes: true });
|
|
11935
12003
|
for (const entry of entries) {
|
|
11936
12004
|
const fullPath = join10(dir, entry.name);
|
|
11937
12005
|
const relativePath = relative(base, fullPath);
|
|
@@ -12158,11 +12226,15 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
12158
12226
|
);
|
|
12159
12227
|
let provisionedSite;
|
|
12160
12228
|
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
12229
|
+
emitSetupProgress(
|
|
12230
|
+
`[diag] provisioning scenario site (mode: ${scenario.siteSetup.mode})...`
|
|
12231
|
+
);
|
|
12161
12232
|
provisionedSite = await apiClient.provisionScenarioSite(
|
|
12162
12233
|
projectId2,
|
|
12163
12234
|
evalRunId2,
|
|
12164
12235
|
scenario.id
|
|
12165
12236
|
);
|
|
12237
|
+
emitSetupProgress("[diag] scenario site provisioned");
|
|
12166
12238
|
}
|
|
12167
12239
|
const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
|
|
12168
12240
|
if (failedStep) {
|
|
@@ -12195,6 +12267,7 @@ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
|
12195
12267
|
emitSetupProgress,
|
|
12196
12268
|
{ template }
|
|
12197
12269
|
);
|
|
12270
|
+
emitSetupProgress("[diag] starting agent run");
|
|
12198
12271
|
const partialResult = await runAgentWithContext(
|
|
12199
12272
|
config,
|
|
12200
12273
|
evalRunId2,
|