@wix/evalforge-evaluator 0.187.0 → 0.188.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/index.js +39 -30
- package/build/index.js.map +4 -4
- package/build/index.mjs +38 -30
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/environment.d.ts +6 -2
- package/build/types/run-scenario/install-dependencies.d.ts +9 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -50,6 +50,10 @@ Backend calls go through the evalforge ambassador packages (gRPC via
|
|
|
50
50
|
user-editable fields, not the system state transitions
|
|
51
51
|
(`status`/`completedAt`/`jobError`/`jobStatus`) the evaluator writes.
|
|
52
52
|
|
|
53
|
+
## Live trace during environment setup
|
|
54
|
+
|
|
55
|
+
For templated runs, the evaluator emits `PROGRESS` trace events during environment setup — "Setting up environment", "Fetching template files", "Installing dependencies", "Environment ready" — via the shared `emitTraceEvent` helper. Because `emitTraceEvent` writes to stdout (captured by the backend for local runs) and also calls the `pushEvent` callback (used for remote jobs via `tracePushUrl`), these events appear in the live trace in both local and remote runs. Without them, the trace panel stays blank during the often multi-minute setup phase before the agent starts.
|
|
56
|
+
|
|
53
57
|
## Scripts
|
|
54
58
|
|
|
55
59
|
```bash
|
package/build/index.js
CHANGED
|
@@ -7462,10 +7462,13 @@ function installWithCache(workDir, exec, cacheBase, pm) {
|
|
|
7462
7462
|
);
|
|
7463
7463
|
}
|
|
7464
7464
|
}
|
|
7465
|
-
async function installDependencies(workDir,
|
|
7465
|
+
async function installDependencies(workDir, onProgress, options = {}) {
|
|
7466
7466
|
if (!(0, import_fs.existsSync)(import_path2.default.join(workDir, "package.json"))) {
|
|
7467
7467
|
return;
|
|
7468
7468
|
}
|
|
7469
|
+
const exec = options.exec ?? import_child_process.execFileSync;
|
|
7470
|
+
const cacheBase = options.cacheBase;
|
|
7471
|
+
onProgress("Installing dependencies...");
|
|
7469
7472
|
const pm = detectPackageManager(workDir);
|
|
7470
7473
|
if (cacheBase) {
|
|
7471
7474
|
installWithCache(workDir, exec, cacheBase, pm);
|
|
@@ -7541,7 +7544,8 @@ function writeWixEnvFile(workDir) {
|
|
|
7541
7544
|
console.warn("[environment] Failed to read wix.config.json");
|
|
7542
7545
|
}
|
|
7543
7546
|
}
|
|
7544
|
-
async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
7547
|
+
async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, onProgress, options = {}) {
|
|
7548
|
+
const template = options.template;
|
|
7545
7549
|
const baseDir = config.evaluationsDir ?? import_path3.default.join((0, import_os.tmpdir)(), "evalforge-evaluations");
|
|
7546
7550
|
const nodeModulesCacheDir = import_path3.default.join(baseDir, "_node_modules_cache");
|
|
7547
7551
|
if (template) {
|
|
@@ -7555,10 +7559,14 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7555
7559
|
(0, import_fs2.rmSync)(workDir2, { recursive: true });
|
|
7556
7560
|
}
|
|
7557
7561
|
(0, import_fs2.mkdirSync)(workDir2, { recursive: true });
|
|
7562
|
+
onProgress("Fetching template files...");
|
|
7558
7563
|
await fetchAndWriteTemplateFiles(template, workDir2);
|
|
7559
7564
|
console.log(`Template files written to ${workDir2}`);
|
|
7560
7565
|
writeWixEnvFile(workDir2);
|
|
7561
|
-
await installDependencies(workDir2,
|
|
7566
|
+
await installDependencies(workDir2, onProgress, {
|
|
7567
|
+
cacheBase: nodeModulesCacheDir
|
|
7568
|
+
});
|
|
7569
|
+
onProgress("Environment ready");
|
|
7562
7570
|
return workDir2;
|
|
7563
7571
|
}
|
|
7564
7572
|
const workDir = import_path3.default.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
|
|
@@ -7570,6 +7578,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7570
7578
|
return workDir;
|
|
7571
7579
|
}
|
|
7572
7580
|
|
|
7581
|
+
// src/run-scenario/agents/shared/trace-emit.ts
|
|
7582
|
+
var import_evalforge_types2 = require("@wix/evalforge-types");
|
|
7583
|
+
function emitTraceEvent(event, pushEvent) {
|
|
7584
|
+
console.log(`${import_evalforge_types2.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
|
|
7585
|
+
pushEvent?.(event);
|
|
7586
|
+
}
|
|
7587
|
+
|
|
7573
7588
|
// src/run-scenario/run-agent-with-context.ts
|
|
7574
7589
|
var import_crypto5 = require("crypto");
|
|
7575
7590
|
|
|
@@ -7734,7 +7749,7 @@ var import_crypto2 = require("crypto");
|
|
|
7734
7749
|
// src/run-scenario/agents/claude-code/write-mcp.ts
|
|
7735
7750
|
var import_promises5 = require("fs/promises");
|
|
7736
7751
|
var import_path6 = require("path");
|
|
7737
|
-
var
|
|
7752
|
+
var import_evalforge_types3 = require("@wix/evalforge-types");
|
|
7738
7753
|
|
|
7739
7754
|
// src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
|
|
7740
7755
|
var import_promises4 = require("fs/promises");
|
|
@@ -7795,7 +7810,7 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
7795
7810
|
}
|
|
7796
7811
|
const resolvedServers = await resolveMcpPlaceholders(mcpServers, { cwd });
|
|
7797
7812
|
const content = JSON.stringify(
|
|
7798
|
-
{ [
|
|
7813
|
+
{ [import_evalforge_types3.MCP_SERVERS_JSON_KEY]: resolvedServers },
|
|
7799
7814
|
null,
|
|
7800
7815
|
2
|
|
7801
7816
|
);
|
|
@@ -8021,13 +8036,6 @@ function buildConversation(timestampedMessages) {
|
|
|
8021
8036
|
return messages;
|
|
8022
8037
|
}
|
|
8023
8038
|
|
|
8024
|
-
// src/run-scenario/agents/shared/trace-emit.ts
|
|
8025
|
-
var import_evalforge_types3 = require("@wix/evalforge-types");
|
|
8026
|
-
function emitTraceEvent(event, pushEvent) {
|
|
8027
|
-
console.log(`${import_evalforge_types3.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
|
|
8028
|
-
pushEvent?.(event);
|
|
8029
|
-
}
|
|
8030
|
-
|
|
8031
8039
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
8032
8040
|
var DEFAULT_MODEL = import_evalforge_types4.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
|
|
8033
8041
|
async function* buildPromptStream(triggerPrompt, images) {
|
|
@@ -11992,6 +12000,22 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
11992
12000
|
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
|
|
11993
12001
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
11994
12002
|
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
12003
|
+
const emitSetupProgress = (outputPreview) => emitTraceEvent(
|
|
12004
|
+
{
|
|
12005
|
+
evalRunId: evalRunId2,
|
|
12006
|
+
scenarioId: scenario.id,
|
|
12007
|
+
scenarioName: scenario.name,
|
|
12008
|
+
targetId,
|
|
12009
|
+
targetName,
|
|
12010
|
+
stepNumber: 0,
|
|
12011
|
+
type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
|
|
12012
|
+
outputPreview,
|
|
12013
|
+
elapsedMs: 0,
|
|
12014
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12015
|
+
isComplete: false
|
|
12016
|
+
},
|
|
12017
|
+
pushEvent
|
|
12018
|
+
);
|
|
11995
12019
|
let provisionedSite;
|
|
11996
12020
|
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
11997
12021
|
provisionedSite = await apiClient.provisionScenarioSite(
|
|
@@ -12022,29 +12046,14 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
12022
12046
|
|
|
12023
12047
|
Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
12024
12048
|
try {
|
|
12025
|
-
|
|
12026
|
-
console.log(
|
|
12027
|
-
(0, import_evalforge_types13.formatTraceEventLine)({
|
|
12028
|
-
evalRunId: evalRunId2,
|
|
12029
|
-
scenarioId: scenario.id,
|
|
12030
|
-
scenarioName: scenario.name,
|
|
12031
|
-
targetId,
|
|
12032
|
-
targetName,
|
|
12033
|
-
stepNumber: 0,
|
|
12034
|
-
type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
|
|
12035
|
-
outputPreview: "Setting up environment (installing dependencies)...",
|
|
12036
|
-
elapsedMs: 0,
|
|
12037
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12038
|
-
isComplete: false
|
|
12039
|
-
})
|
|
12040
|
-
);
|
|
12041
|
-
}
|
|
12049
|
+
emitSetupProgress("Setting up environment...");
|
|
12042
12050
|
const workDir = await prepareWorkingDirectory(
|
|
12043
12051
|
config,
|
|
12044
12052
|
evalRunId2,
|
|
12045
12053
|
targetId,
|
|
12046
12054
|
scenario.id,
|
|
12047
|
-
|
|
12055
|
+
emitSetupProgress,
|
|
12056
|
+
{ template }
|
|
12048
12057
|
);
|
|
12049
12058
|
const partialResult = await runAgentWithContext(
|
|
12050
12059
|
config,
|