@wix/evalforge-evaluator 0.187.0 → 0.189.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -50,6 +50,10 @@ Backend calls go through the evalforge ambassador packages (gRPC via
50
50
  user-editable fields, not the system state transitions
51
51
  (`status`/`completedAt`/`jobError`/`jobStatus`) the evaluator writes.
52
52
 
53
+ ## Live trace during environment setup
54
+
55
+ For templated runs, the evaluator emits `PROGRESS` trace events during environment setup — "Setting up environment", "Fetching template files", "Installing dependencies", "Environment ready" — via the shared `emitTraceEvent` helper. Because `emitTraceEvent` writes to stdout (captured by the backend for local runs) and also calls the `pushEvent` callback (used for remote jobs via `tracePushUrl`), these events appear in the live trace in both local and remote runs. Without them, the trace panel stays blank during the often multi-minute setup phase before the agent starts.
56
+
53
57
  ## Scripts
54
58
 
55
59
  ```bash
package/build/index.js CHANGED
@@ -7462,10 +7462,13 @@ function installWithCache(workDir, exec, cacheBase, pm) {
7462
7462
  );
7463
7463
  }
7464
7464
  }
7465
- async function installDependencies(workDir, exec = import_child_process.execFileSync, cacheBase) {
7465
+ async function installDependencies(workDir, onProgress, options = {}) {
7466
7466
  if (!(0, import_fs.existsSync)(import_path2.default.join(workDir, "package.json"))) {
7467
7467
  return;
7468
7468
  }
7469
+ const exec = options.exec ?? import_child_process.execFileSync;
7470
+ const cacheBase = options.cacheBase;
7471
+ onProgress("Installing dependencies...");
7469
7472
  const pm = detectPackageManager(workDir);
7470
7473
  if (cacheBase) {
7471
7474
  installWithCache(workDir, exec, cacheBase, pm);
@@ -7541,7 +7544,8 @@ function writeWixEnvFile(workDir) {
7541
7544
  console.warn("[environment] Failed to read wix.config.json");
7542
7545
  }
7543
7546
  }
7544
- async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
7547
+ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, onProgress, options = {}) {
7548
+ const template = options.template;
7545
7549
  const baseDir = config.evaluationsDir ?? import_path3.default.join((0, import_os.tmpdir)(), "evalforge-evaluations");
7546
7550
  const nodeModulesCacheDir = import_path3.default.join(baseDir, "_node_modules_cache");
7547
7551
  if (template) {
@@ -7555,10 +7559,14 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7555
7559
  (0, import_fs2.rmSync)(workDir2, { recursive: true });
7556
7560
  }
7557
7561
  (0, import_fs2.mkdirSync)(workDir2, { recursive: true });
7562
+ onProgress("Fetching template files...");
7558
7563
  await fetchAndWriteTemplateFiles(template, workDir2);
7559
7564
  console.log(`Template files written to ${workDir2}`);
7560
7565
  writeWixEnvFile(workDir2);
7561
- await installDependencies(workDir2, void 0, nodeModulesCacheDir);
7566
+ await installDependencies(workDir2, onProgress, {
7567
+ cacheBase: nodeModulesCacheDir
7568
+ });
7569
+ onProgress("Environment ready");
7562
7570
  return workDir2;
7563
7571
  }
7564
7572
  const workDir = import_path3.default.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
@@ -7570,6 +7578,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7570
7578
  return workDir;
7571
7579
  }
7572
7580
 
7581
+ // src/run-scenario/agents/shared/trace-emit.ts
7582
+ var import_evalforge_types2 = require("@wix/evalforge-types");
7583
+ function emitTraceEvent(event, pushEvent) {
7584
+ console.log(`${import_evalforge_types2.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
7585
+ pushEvent?.(event);
7586
+ }
7587
+
7573
7588
  // src/run-scenario/run-agent-with-context.ts
7574
7589
  var import_crypto5 = require("crypto");
7575
7590
 
@@ -7734,7 +7749,7 @@ var import_crypto2 = require("crypto");
7734
7749
  // src/run-scenario/agents/claude-code/write-mcp.ts
7735
7750
  var import_promises5 = require("fs/promises");
7736
7751
  var import_path6 = require("path");
7737
- var import_evalforge_types2 = require("@wix/evalforge-types");
7752
+ var import_evalforge_types3 = require("@wix/evalforge-types");
7738
7753
 
7739
7754
  // src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
7740
7755
  var import_promises4 = require("fs/promises");
@@ -7795,7 +7810,7 @@ async function writeMcpToFilesystem(cwd, mcps) {
7795
7810
  }
7796
7811
  const resolvedServers = await resolveMcpPlaceholders(mcpServers, { cwd });
7797
7812
  const content = JSON.stringify(
7798
- { [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: resolvedServers },
7813
+ { [import_evalforge_types3.MCP_SERVERS_JSON_KEY]: resolvedServers },
7799
7814
  null,
7800
7815
  2
7801
7816
  );
@@ -8021,13 +8036,6 @@ function buildConversation(timestampedMessages) {
8021
8036
  return messages;
8022
8037
  }
8023
8038
 
8024
- // src/run-scenario/agents/shared/trace-emit.ts
8025
- var import_evalforge_types3 = require("@wix/evalforge-types");
8026
- function emitTraceEvent(event, pushEvent) {
8027
- console.log(`${import_evalforge_types3.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
8028
- pushEvent?.(event);
8029
- }
8030
-
8031
8039
  // src/run-scenario/agents/claude-code/execute.ts
8032
8040
  var DEFAULT_MODEL = import_evalforge_types4.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
8033
8041
  async function* buildPromptStream(triggerPrompt, images) {
@@ -11992,6 +12000,22 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
11992
12000
  async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
11993
12001
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
11994
12002
  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
12003
+ const emitSetupProgress = (outputPreview) => emitTraceEvent(
12004
+ {
12005
+ evalRunId: evalRunId2,
12006
+ scenarioId: scenario.id,
12007
+ scenarioName: scenario.name,
12008
+ targetId,
12009
+ targetName,
12010
+ stepNumber: 0,
12011
+ type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
12012
+ outputPreview,
12013
+ elapsedMs: 0,
12014
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12015
+ isComplete: false
12016
+ },
12017
+ pushEvent
12018
+ );
11995
12019
  let provisionedSite;
11996
12020
  if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
11997
12021
  provisionedSite = await apiClient.provisionScenarioSite(
@@ -12022,29 +12046,14 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
12022
12046
 
12023
12047
  Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
12024
12048
  try {
12025
- if (template) {
12026
- console.log(
12027
- (0, import_evalforge_types13.formatTraceEventLine)({
12028
- evalRunId: evalRunId2,
12029
- scenarioId: scenario.id,
12030
- scenarioName: scenario.name,
12031
- targetId,
12032
- targetName,
12033
- stepNumber: 0,
12034
- type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
12035
- outputPreview: "Setting up environment (installing dependencies)...",
12036
- elapsedMs: 0,
12037
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12038
- isComplete: false
12039
- })
12040
- );
12041
- }
12049
+ emitSetupProgress("Setting up environment...");
12042
12050
  const workDir = await prepareWorkingDirectory(
12043
12051
  config,
12044
12052
  evalRunId2,
12045
12053
  targetId,
12046
12054
  scenario.id,
12047
- template
12055
+ emitSetupProgress,
12056
+ { template }
12048
12057
  );
12049
12058
  const partialResult = await runAgentWithContext(
12050
12059
  config,