@wix/evalforge-evaluator 0.190.0 → 0.192.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +109 -41
- package/build/index.js.map +3 -3
- package/build/index.mjs +112 -44
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/install-dependencies.d.ts +14 -2
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -7393,7 +7393,38 @@ async function writeFilesToDirectory(targetDir, files) {
|
|
|
7393
7393
|
import { mkdirSync, existsSync, readFileSync, copyFileSync, cpSync } from "fs";
|
|
7394
7394
|
import { createHash } from "crypto";
|
|
7395
7395
|
import path from "path";
|
|
7396
|
-
import { execFileSync } from "child_process";
|
|
7396
|
+
import { spawn, execFileSync } from "child_process";
|
|
7397
|
+
var INSTALL_TIMEOUT_MS = 9e4;
|
|
7398
|
+
var HEARTBEAT_INTERVAL_MS = 5e3;
|
|
7399
|
+
function defaultExec(cmd, args, opts) {
|
|
7400
|
+
return new Promise((resolve3, reject) => {
|
|
7401
|
+
const child = spawn(cmd, args, {
|
|
7402
|
+
cwd: opts.cwd,
|
|
7403
|
+
env: opts.env,
|
|
7404
|
+
stdio: "inherit"
|
|
7405
|
+
});
|
|
7406
|
+
const timer = setTimeout(() => {
|
|
7407
|
+
child.kill("SIGKILL");
|
|
7408
|
+
reject(
|
|
7409
|
+
new Error(
|
|
7410
|
+
`${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
|
|
7411
|
+
)
|
|
7412
|
+
);
|
|
7413
|
+
}, opts.timeoutMs);
|
|
7414
|
+
child.on("error", (err) => {
|
|
7415
|
+
clearTimeout(timer);
|
|
7416
|
+
reject(err);
|
|
7417
|
+
});
|
|
7418
|
+
child.on("close", (code) => {
|
|
7419
|
+
clearTimeout(timer);
|
|
7420
|
+
if (code === 0) {
|
|
7421
|
+
resolve3();
|
|
7422
|
+
} else {
|
|
7423
|
+
reject(new Error(`${cmd} exited with code ${code}`));
|
|
7424
|
+
}
|
|
7425
|
+
});
|
|
7426
|
+
});
|
|
7427
|
+
}
|
|
7397
7428
|
function detectPackageManager(workDir) {
|
|
7398
7429
|
if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
|
|
7399
7430
|
return {
|
|
@@ -7431,7 +7462,34 @@ function cloneDirectory(src, dest) {
|
|
|
7431
7462
|
cpSync(src, dest, { recursive: true });
|
|
7432
7463
|
}
|
|
7433
7464
|
}
|
|
7434
|
-
function
|
|
7465
|
+
async function runInstall(exec, pm, workDir, onProgress) {
|
|
7466
|
+
onProgress(`[diag] npm install starting: ${pm.cmd} ${pm.args.join(" ")}`);
|
|
7467
|
+
const startedAt = Date.now();
|
|
7468
|
+
const heartbeat = setInterval(() => {
|
|
7469
|
+
onProgress(
|
|
7470
|
+
`[diag] npm install still running: ${Math.round(
|
|
7471
|
+
(Date.now() - startedAt) / 1e3
|
|
7472
|
+
)}s elapsed`
|
|
7473
|
+
);
|
|
7474
|
+
}, HEARTBEAT_INTERVAL_MS);
|
|
7475
|
+
try {
|
|
7476
|
+
await exec(pm.cmd, pm.args, {
|
|
7477
|
+
cwd: workDir,
|
|
7478
|
+
timeoutMs: INSTALL_TIMEOUT_MS,
|
|
7479
|
+
env: { ...process.env, NODE_ENV: "development" }
|
|
7480
|
+
});
|
|
7481
|
+
onProgress(`[diag] npm install finished in ${Date.now() - startedAt}ms`);
|
|
7482
|
+
return true;
|
|
7483
|
+
} catch (err) {
|
|
7484
|
+
onProgress(
|
|
7485
|
+
`[diag] npm install FAILED after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
|
|
7486
|
+
);
|
|
7487
|
+
return false;
|
|
7488
|
+
} finally {
|
|
7489
|
+
clearInterval(heartbeat);
|
|
7490
|
+
}
|
|
7491
|
+
}
|
|
7492
|
+
async function installWithCache(workDir, exec, cacheBase, pm, onProgress) {
|
|
7435
7493
|
const sourceContent = readFileSync(
|
|
7436
7494
|
path.join(workDir, pm.cacheSourceFile),
|
|
7437
7495
|
"utf-8"
|
|
@@ -7445,29 +7503,19 @@ function installWithCache(workDir, exec, cacheBase, pm) {
|
|
|
7445
7503
|
console.log(
|
|
7446
7504
|
`[environment] Restoring node_modules from cache (key: ${cacheKey})`
|
|
7447
7505
|
);
|
|
7506
|
+
onProgress(`[diag] node_modules cache HIT (key: ${cacheKey}) \u2014 restoring`);
|
|
7448
7507
|
if (!existsSync(targetNodeModules)) {
|
|
7449
7508
|
cloneDirectory(cachedNodeModules, targetNodeModules);
|
|
7450
7509
|
}
|
|
7451
7510
|
if (existsSync(cachedYarnLock)) {
|
|
7452
7511
|
copyFileSync(cachedYarnLock, path.join(workDir, "yarn.lock"));
|
|
7453
7512
|
}
|
|
7513
|
+
onProgress("[diag] node_modules cache restore complete");
|
|
7454
7514
|
return;
|
|
7455
7515
|
}
|
|
7456
|
-
|
|
7457
|
-
|
|
7458
|
-
)
|
|
7459
|
-
try {
|
|
7460
|
-
exec(pm.cmd, pm.args, {
|
|
7461
|
-
cwd: workDir,
|
|
7462
|
-
stdio: "inherit",
|
|
7463
|
-
timeout: 18e4,
|
|
7464
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7465
|
-
});
|
|
7466
|
-
} catch (err) {
|
|
7467
|
-
console.error(
|
|
7468
|
-
"[environment] Dependency installation failed:",
|
|
7469
|
-
err instanceof Error ? err.message : String(err)
|
|
7470
|
-
);
|
|
7516
|
+
onProgress(`[diag] node_modules cache MISS (key: ${cacheKey})`);
|
|
7517
|
+
const ok = await runInstall(exec, pm, workDir, onProgress);
|
|
7518
|
+
if (!ok) {
|
|
7471
7519
|
return;
|
|
7472
7520
|
}
|
|
7473
7521
|
console.log(
|
|
@@ -7491,31 +7539,15 @@ async function installDependencies(workDir, onProgress, options = {}) {
|
|
|
7491
7539
|
if (!existsSync(path.join(workDir, "package.json"))) {
|
|
7492
7540
|
return;
|
|
7493
7541
|
}
|
|
7494
|
-
const exec = options.exec ??
|
|
7542
|
+
const exec = options.exec ?? defaultExec;
|
|
7495
7543
|
const cacheBase = options.cacheBase;
|
|
7496
7544
|
onProgress("Installing dependencies...");
|
|
7497
7545
|
const pm = detectPackageManager(workDir);
|
|
7498
7546
|
if (cacheBase) {
|
|
7499
|
-
installWithCache(workDir, exec, cacheBase, pm);
|
|
7547
|
+
await installWithCache(workDir, exec, cacheBase, pm, onProgress);
|
|
7500
7548
|
return;
|
|
7501
7549
|
}
|
|
7502
|
-
|
|
7503
|
-
`[environment] Running ${pm.cmd} ${pm.args.join(" ")} in ${workDir}`
|
|
7504
|
-
);
|
|
7505
|
-
try {
|
|
7506
|
-
exec(pm.cmd, pm.args, {
|
|
7507
|
-
cwd: workDir,
|
|
7508
|
-
stdio: "inherit",
|
|
7509
|
-
timeout: 18e4,
|
|
7510
|
-
env: { ...process.env, NODE_ENV: "development" }
|
|
7511
|
-
});
|
|
7512
|
-
console.log("[environment] Dependency installation complete");
|
|
7513
|
-
} catch (err) {
|
|
7514
|
-
console.error(
|
|
7515
|
-
"[environment] Dependency installation failed:",
|
|
7516
|
-
err instanceof Error ? err.message : String(err)
|
|
7517
|
-
);
|
|
7518
|
-
}
|
|
7550
|
+
await runInstall(exec, pm, workDir, onProgress);
|
|
7519
7551
|
}
|
|
7520
7552
|
|
|
7521
7553
|
// src/run-scenario/environment.ts
|
|
@@ -7591,12 +7623,39 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
|
|
|
7591
7623
|
`Template "${template.name}" has no source configured, creating empty directory`
|
|
7592
7624
|
);
|
|
7593
7625
|
}
|
|
7626
|
+
onProgress(`[diag] writing ${sourceFiles.length} source file(s) to workDir`);
|
|
7594
7627
|
await writeFilesToDirectory(workDir, sourceFiles);
|
|
7628
|
+
onProgress(`[diag] ${sourceFiles.length} source file(s) written`);
|
|
7629
|
+
const extraFiles = template.extraFiles ?? [];
|
|
7630
|
+
onProgress(`[diag] resolving ${extraFiles.length} extra file(s)`);
|
|
7595
7631
|
await Promise.all(
|
|
7596
|
-
|
|
7597
|
-
|
|
7598
|
-
|
|
7599
|
-
|
|
7632
|
+
extraFiles.map(async (ef) => {
|
|
7633
|
+
let content;
|
|
7634
|
+
if (ef.gitSource) {
|
|
7635
|
+
const { owner, repo, path: gitPath, ref } = ef.gitSource;
|
|
7636
|
+
const startedAt = Date.now();
|
|
7637
|
+
onProgress(
|
|
7638
|
+
`[diag] extra fetch start: ${ef.path} <- ${owner}/${repo}/${gitPath ?? ""}@${ref ?? "default"}`
|
|
7639
|
+
);
|
|
7640
|
+
try {
|
|
7641
|
+
content = await withTimeout(
|
|
7642
|
+
fetchGitHubFile(ef.gitSource, { userAgent: "EvalForge-Evaluator" }),
|
|
7643
|
+
3e4,
|
|
7644
|
+
`extra file fetch (${ef.path})`
|
|
7645
|
+
);
|
|
7646
|
+
onProgress(
|
|
7647
|
+
`[diag] extra fetch done: ${ef.path} (${content.length} bytes) in ${Date.now() - startedAt}ms`
|
|
7648
|
+
);
|
|
7649
|
+
} catch (err) {
|
|
7650
|
+
onProgress(
|
|
7651
|
+
`[diag] extra fetch FAILED: ${ef.path} after ${Date.now() - startedAt}ms: ${err instanceof Error ? err.message : String(err)}`
|
|
7652
|
+
);
|
|
7653
|
+
throw err;
|
|
7654
|
+
}
|
|
7655
|
+
} else {
|
|
7656
|
+
content = ef.content ?? "";
|
|
7657
|
+
onProgress(`[diag] extra inline: ${ef.path} (${content.length} bytes)`);
|
|
7658
|
+
}
|
|
7600
7659
|
const dest = path2.resolve(workDir, ef.path);
|
|
7601
7660
|
if (!dest.startsWith(workDir + sep2)) {
|
|
7602
7661
|
throw new Error(
|
|
@@ -7607,6 +7666,7 @@ async function fetchAndWriteTemplateFiles(template, workDir, onProgress) {
|
|
|
7607
7666
|
await writeFile2(dest, content, "utf8");
|
|
7608
7667
|
})
|
|
7609
7668
|
);
|
|
7669
|
+
onProgress("[diag] all extra files written");
|
|
7610
7670
|
}
|
|
7611
7671
|
function writeWixEnvFile(workDir) {
|
|
7612
7672
|
const configPath = path2.join(workDir, "wix.config.json");
|
|
@@ -7646,10 +7706,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7646
7706
|
onProgress("Fetching template files...");
|
|
7647
7707
|
await fetchAndWriteTemplateFiles(template, workDir2, onProgress);
|
|
7648
7708
|
console.log(`Template files written to ${workDir2}`);
|
|
7709
|
+
onProgress("[diag] writing wix env file");
|
|
7649
7710
|
writeWixEnvFile(workDir2);
|
|
7711
|
+
onProgress("[diag] entering installDependencies");
|
|
7650
7712
|
await installDependencies(workDir2, onProgress, {
|
|
7651
7713
|
cacheBase: nodeModulesCacheDir
|
|
7652
7714
|
});
|
|
7715
|
+
onProgress("[diag] installDependencies returned");
|
|
7653
7716
|
onProgress("Environment ready");
|
|
7654
7717
|
return workDir2;
|
|
7655
7718
|
}
|
|
@@ -8463,7 +8526,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8463
8526
|
const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
8464
8527
|
let timeoutHandle;
|
|
8465
8528
|
let timedOut = false;
|
|
8466
|
-
const
|
|
8529
|
+
const HEARTBEAT_INTERVAL_MS2 = 1e4;
|
|
8467
8530
|
let heartbeatHandle;
|
|
8468
8531
|
const executionStartTime = Date.now();
|
|
8469
8532
|
try {
|
|
@@ -8515,7 +8578,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
8515
8578
|
isComplete: false
|
|
8516
8579
|
};
|
|
8517
8580
|
emitTraceEvent(progressEvent, traceContext.pushEvent);
|
|
8518
|
-
},
|
|
8581
|
+
}, HEARTBEAT_INTERVAL_MS2);
|
|
8519
8582
|
}
|
|
8520
8583
|
const sdkPromise = (async () => {
|
|
8521
8584
|
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
@@ -9281,7 +9344,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
9281
9344
|
import { AgentRunCommand as AgentRunCommand2, OpenCodeConfigSchema as OpenCodeConfigSchema2 } from "@wix/evalforge-types";
|
|
9282
9345
|
|
|
9283
9346
|
// src/run-scenario/agents/opencode/execute.ts
|
|
9284
|
-
import { spawn } from "child_process";
|
|
9347
|
+
import { spawn as spawn2 } from "child_process";
|
|
9285
9348
|
import {
|
|
9286
9349
|
DEFAULT_EVALUATOR_SYSTEM_PROMPT as DEFAULT_EVALUATOR_SYSTEM_PROMPT2,
|
|
9287
9350
|
LiveTraceEventType as LiveTraceEventType2
|
|
@@ -10088,7 +10151,7 @@ function spawnOpenCodeProcess(opts) {
|
|
|
10088
10151
|
};
|
|
10089
10152
|
let child;
|
|
10090
10153
|
try {
|
|
10091
|
-
child =
|
|
10154
|
+
child = spawn2("opencode", args, {
|
|
10092
10155
|
cwd,
|
|
10093
10156
|
env,
|
|
10094
10157
|
stdio: ["ignore", "pipe", "pipe"],
|
|
@@ -12132,11 +12195,15 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
12132
12195
|
);
|
|
12133
12196
|
let provisionedSite;
|
|
12134
12197
|
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
12198
|
+
emitSetupProgress(
|
|
12199
|
+
`[diag] provisioning scenario site (mode: ${scenario.siteSetup.mode})...`
|
|
12200
|
+
);
|
|
12135
12201
|
provisionedSite = await apiClient.provisionScenarioSite(
|
|
12136
12202
|
projectId2,
|
|
12137
12203
|
evalRunId2,
|
|
12138
12204
|
scenario.id
|
|
12139
12205
|
);
|
|
12206
|
+
emitSetupProgress("[diag] scenario site provisioned");
|
|
12140
12207
|
}
|
|
12141
12208
|
const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
|
|
12142
12209
|
if (failedStep) {
|
|
@@ -12169,6 +12236,7 @@ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
|
12169
12236
|
emitSetupProgress,
|
|
12170
12237
|
{ template }
|
|
12171
12238
|
);
|
|
12239
|
+
emitSetupProgress("[diag] starting agent run");
|
|
12172
12240
|
const partialResult = await runAgentWithContext(
|
|
12173
12241
|
config,
|
|
12174
12242
|
evalRunId2,
|