@wix/evalforge-evaluator 0.187.0 → 0.188.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/index.js +39 -30
- package/build/index.js.map +4 -4
- package/build/index.mjs +38 -30
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/environment.d.ts +6 -2
- package/build/types/run-scenario/install-dependencies.d.ts +9 -1
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -7353,8 +7353,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
7353
7353
|
import {
|
|
7354
7354
|
AssertionResultStatus,
|
|
7355
7355
|
DEFAULT_JUDGE_MODEL,
|
|
7356
|
-
LiveTraceEventType as LiveTraceEventType4
|
|
7357
|
-
formatTraceEventLine
|
|
7356
|
+
LiveTraceEventType as LiveTraceEventType4
|
|
7358
7357
|
} from "@wix/evalforge-types";
|
|
7359
7358
|
import {
|
|
7360
7359
|
evaluateAssertions as evaluateAssertionsBase
|
|
@@ -7485,10 +7484,13 @@ function installWithCache(workDir, exec, cacheBase, pm) {
|
|
|
7485
7484
|
);
|
|
7486
7485
|
}
|
|
7487
7486
|
}
|
|
7488
|
-
async function installDependencies(workDir,
|
|
7487
|
+
async function installDependencies(workDir, onProgress, options = {}) {
|
|
7489
7488
|
if (!existsSync(path.join(workDir, "package.json"))) {
|
|
7490
7489
|
return;
|
|
7491
7490
|
}
|
|
7491
|
+
const exec = options.exec ?? execFileSync;
|
|
7492
|
+
const cacheBase = options.cacheBase;
|
|
7493
|
+
onProgress("Installing dependencies...");
|
|
7492
7494
|
const pm = detectPackageManager(workDir);
|
|
7493
7495
|
if (cacheBase) {
|
|
7494
7496
|
installWithCache(workDir, exec, cacheBase, pm);
|
|
@@ -7564,7 +7566,8 @@ function writeWixEnvFile(workDir) {
|
|
|
7564
7566
|
console.warn("[environment] Failed to read wix.config.json");
|
|
7565
7567
|
}
|
|
7566
7568
|
}
|
|
7567
|
-
async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
7569
|
+
async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, onProgress, options = {}) {
|
|
7570
|
+
const template = options.template;
|
|
7568
7571
|
const baseDir = config.evaluationsDir ?? path2.join(tmpdir(), "evalforge-evaluations");
|
|
7569
7572
|
const nodeModulesCacheDir = path2.join(baseDir, "_node_modules_cache");
|
|
7570
7573
|
if (template) {
|
|
@@ -7578,10 +7581,14 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7578
7581
|
rmSync(workDir2, { recursive: true });
|
|
7579
7582
|
}
|
|
7580
7583
|
mkdirSync2(workDir2, { recursive: true });
|
|
7584
|
+
onProgress("Fetching template files...");
|
|
7581
7585
|
await fetchAndWriteTemplateFiles(template, workDir2);
|
|
7582
7586
|
console.log(`Template files written to ${workDir2}`);
|
|
7583
7587
|
writeWixEnvFile(workDir2);
|
|
7584
|
-
await installDependencies(workDir2,
|
|
7588
|
+
await installDependencies(workDir2, onProgress, {
|
|
7589
|
+
cacheBase: nodeModulesCacheDir
|
|
7590
|
+
});
|
|
7591
|
+
onProgress("Environment ready");
|
|
7585
7592
|
return workDir2;
|
|
7586
7593
|
}
|
|
7587
7594
|
const workDir = path2.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
|
|
@@ -7593,6 +7600,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7593
7600
|
return workDir;
|
|
7594
7601
|
}
|
|
7595
7602
|
|
|
7603
|
+
// src/run-scenario/agents/shared/trace-emit.ts
|
|
7604
|
+
import { TRACE_EVENT_PREFIX } from "@wix/evalforge-types";
|
|
7605
|
+
function emitTraceEvent(event, pushEvent) {
|
|
7606
|
+
console.log(`${TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
|
|
7607
|
+
pushEvent?.(event);
|
|
7608
|
+
}
|
|
7609
|
+
|
|
7596
7610
|
// src/run-scenario/run-agent-with-context.ts
|
|
7597
7611
|
import { randomUUID as randomUUID4 } from "crypto";
|
|
7598
7612
|
|
|
@@ -8051,13 +8065,6 @@ function buildConversation(timestampedMessages) {
|
|
|
8051
8065
|
return messages;
|
|
8052
8066
|
}
|
|
8053
8067
|
|
|
8054
|
-
// src/run-scenario/agents/shared/trace-emit.ts
|
|
8055
|
-
import { TRACE_EVENT_PREFIX } from "@wix/evalforge-types";
|
|
8056
|
-
function emitTraceEvent(event, pushEvent) {
|
|
8057
|
-
console.log(`${TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
|
|
8058
|
-
pushEvent?.(event);
|
|
8059
|
-
}
|
|
8060
|
-
|
|
8061
8068
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
8062
8069
|
var DEFAULT_MODEL = ClaudeModel.CLAUDE_4_5_SONNET_1_0;
|
|
8063
8070
|
async function* buildPromptStream(triggerPrompt, images) {
|
|
@@ -12045,6 +12052,22 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
12045
12052
|
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
|
|
12046
12053
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
12047
12054
|
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
12055
|
+
const emitSetupProgress = (outputPreview) => emitTraceEvent(
|
|
12056
|
+
{
|
|
12057
|
+
evalRunId: evalRunId2,
|
|
12058
|
+
scenarioId: scenario.id,
|
|
12059
|
+
scenarioName: scenario.name,
|
|
12060
|
+
targetId,
|
|
12061
|
+
targetName,
|
|
12062
|
+
stepNumber: 0,
|
|
12063
|
+
type: LiveTraceEventType4.PROGRESS,
|
|
12064
|
+
outputPreview,
|
|
12065
|
+
elapsedMs: 0,
|
|
12066
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12067
|
+
isComplete: false
|
|
12068
|
+
},
|
|
12069
|
+
pushEvent
|
|
12070
|
+
);
|
|
12048
12071
|
let provisionedSite;
|
|
12049
12072
|
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
12050
12073
|
provisionedSite = await apiClient.provisionScenarioSite(
|
|
@@ -12075,29 +12098,14 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
12075
12098
|
|
|
12076
12099
|
Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
12077
12100
|
try {
|
|
12078
|
-
|
|
12079
|
-
console.log(
|
|
12080
|
-
formatTraceEventLine({
|
|
12081
|
-
evalRunId: evalRunId2,
|
|
12082
|
-
scenarioId: scenario.id,
|
|
12083
|
-
scenarioName: scenario.name,
|
|
12084
|
-
targetId,
|
|
12085
|
-
targetName,
|
|
12086
|
-
stepNumber: 0,
|
|
12087
|
-
type: LiveTraceEventType4.PROGRESS,
|
|
12088
|
-
outputPreview: "Setting up environment (installing dependencies)...",
|
|
12089
|
-
elapsedMs: 0,
|
|
12090
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12091
|
-
isComplete: false
|
|
12092
|
-
})
|
|
12093
|
-
);
|
|
12094
|
-
}
|
|
12101
|
+
emitSetupProgress("Setting up environment...");
|
|
12095
12102
|
const workDir = await prepareWorkingDirectory(
|
|
12096
12103
|
config,
|
|
12097
12104
|
evalRunId2,
|
|
12098
12105
|
targetId,
|
|
12099
12106
|
scenario.id,
|
|
12100
|
-
|
|
12107
|
+
emitSetupProgress,
|
|
12108
|
+
{ template }
|
|
12101
12109
|
);
|
|
12102
12110
|
const partialResult = await runAgentWithContext(
|
|
12103
12111
|
config,
|