@wix/evalforge-evaluator 0.186.0 → 0.188.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -50,6 +50,10 @@ Backend calls go through the evalforge ambassador packages (gRPC via
50
50
  user-editable fields, not the system state transitions
51
51
  (`status`/`completedAt`/`jobError`/`jobStatus`) the evaluator writes.
52
52
 
53
+ ## Live trace during environment setup
54
+
55
+ For templated runs, the evaluator emits `PROGRESS` trace events during environment setup — "Setting up environment", "Fetching template files", "Installing dependencies", "Environment ready" — via the shared `emitTraceEvent` helper. Because `emitTraceEvent` writes to stdout (captured by the backend for local runs) and also calls the `pushEvent` callback (used for remote jobs via `tracePushUrl`), these events appear in the live trace in both local and remote runs. Without them, the trace panel stays blank during the often multi-minute setup phase before the agent starts.
56
+
53
57
  ## Scripts
54
58
 
55
59
  ```bash
package/build/index.js CHANGED
@@ -1277,7 +1277,7 @@ var require_error = __commonJS({
1277
1277
  "toJSON",
1278
1278
  "__CANCEL__"
1279
1279
  ];
1280
- var HttpError = class _HttpError extends Error {
1280
+ var HttpError2 = class _HttpError extends Error {
1281
1281
  constructor(error) {
1282
1282
  var _a;
1283
1283
  super(error.message);
@@ -1294,9 +1294,9 @@ var require_error = __commonJS({
1294
1294
  return (0, headers_1.requestIdOrEmptyString)(this.response);
1295
1295
  }
1296
1296
  };
1297
- exports2.HttpError = HttpError;
1297
+ exports2.HttpError = HttpError2;
1298
1298
  function createHttpError(...args) {
1299
- return new HttpError(...args);
1299
+ return new HttpError2(...args);
1300
1300
  }
1301
1301
  exports2.createHttpError = createHttpError;
1302
1302
  }
@@ -6351,6 +6351,133 @@ function getLatestCapabilityVersion(payload) {
6351
6351
  return __getLatestCapabilityVersion;
6352
6352
  }
6353
6353
 
6354
+ // ../../node_modules/@wix/ambassador-evalforge-v1-site-provisioning/es/build/http.impl.js
6355
+ var _deleteProvisionedSiteRequest = {};
6356
+ var _deleteProvisionedSiteResponse = {};
6357
+ var _provisionScenarioSiteRequest = {};
6358
+ var _provisionScenarioSiteResponse = {};
6359
+ function resolveWixEvalforgeV1SiteProvisioningServiceUrl(opts) {
6360
+ var domainToMappings = {
6361
+ "dev._base_domain_": [
6362
+ {
6363
+ srcPath: "/_api/evalforge-backend",
6364
+ destPath: ""
6365
+ }
6366
+ ],
6367
+ "api._api_base_domain_": [
6368
+ {
6369
+ srcPath: "/evalforge-backend",
6370
+ destPath: ""
6371
+ }
6372
+ ],
6373
+ "bo._base_domain_": [
6374
+ {
6375
+ srcPath: "/_api/evalforge-backend",
6376
+ destPath: ""
6377
+ }
6378
+ ],
6379
+ "wixbo.ai": [
6380
+ {
6381
+ srcPath: "/_api/evalforge-backend",
6382
+ destPath: ""
6383
+ }
6384
+ ],
6385
+ "wix-bo.com": [
6386
+ {
6387
+ srcPath: "/_api/evalforge-backend",
6388
+ destPath: ""
6389
+ }
6390
+ ],
6391
+ "manage._base_domain_": [
6392
+ {
6393
+ srcPath: "/_api/evalforge-backend",
6394
+ destPath: ""
6395
+ }
6396
+ ]
6397
+ };
6398
+ return resolveUrl(Object.assign(opts, { domainToMappings }));
6399
+ }
6400
+ function provisionScenarioSite(payload) {
6401
+ var _a = serializer(_provisionScenarioSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
6402
+ var fromRes = serializer(_provisionScenarioSiteResponse, {}).fromJSON;
6403
+ function __provisionScenarioSite(_a2) {
6404
+ var host = _a2.host;
6405
+ var serializedData = toReq(payload);
6406
+ var metadata = {
6407
+ entityFqdn: "wix.evalforge.v1.site_provisioning",
6408
+ method: "POST",
6409
+ methodFqn: "wix.evalforge.v1.SiteProvisioningService.ProvisionScenarioSite",
6410
+ migrationOptions: {
6411
+ optInTransformResponse: true
6412
+ },
6413
+ url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
6414
+ protoPath: "/v1/projects/{projectId}/site-provisioning/provision-site",
6415
+ data: serializedData,
6416
+ host
6417
+ }),
6418
+ data: serializedData,
6419
+ transformResponse: fromRes
6420
+ };
6421
+ return metadata;
6422
+ }
6423
+ __provisionScenarioSite.fromReq = fromReq;
6424
+ __provisionScenarioSite.__isAmbassador = true;
6425
+ return __provisionScenarioSite;
6426
+ }
6427
+ function deleteProvisionedSite(payload) {
6428
+ var _a = serializer(_deleteProvisionedSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
6429
+ var fromRes = serializer(_deleteProvisionedSiteResponse, {}).fromJSON;
6430
+ function __deleteProvisionedSite(_a2) {
6431
+ var host = _a2.host;
6432
+ var serializedData = toReq(payload);
6433
+ var metadata = {
6434
+ entityFqdn: "wix.evalforge.v1.site_provisioning",
6435
+ method: "POST",
6436
+ methodFqn: "wix.evalforge.v1.SiteProvisioningService.DeleteProvisionedSite",
6437
+ migrationOptions: {
6438
+ optInTransformResponse: true
6439
+ },
6440
+ url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
6441
+ protoPath: "/v1/projects/{projectId}/site-provisioning/delete-site",
6442
+ data: serializedData,
6443
+ host
6444
+ }),
6445
+ data: serializedData,
6446
+ transformResponse: fromRes
6447
+ };
6448
+ return metadata;
6449
+ }
6450
+ __deleteProvisionedSite.fromReq = fromReq;
6451
+ __deleteProvisionedSite.__isAmbassador = true;
6452
+ return __deleteProvisionedSite;
6453
+ }
6454
+
6455
+ // ../../node_modules/@wix/ambassador-evalforge-v1-test-scenario/es/build/types.impl.js
6456
+ var WebhookIdentityType;
6457
+ (function(WebhookIdentityType2) {
6458
+ WebhookIdentityType2["UNKNOWN"] = "UNKNOWN";
6459
+ WebhookIdentityType2["ANONYMOUS_VISITOR"] = "ANONYMOUS_VISITOR";
6460
+ WebhookIdentityType2["MEMBER"] = "MEMBER";
6461
+ WebhookIdentityType2["WIX_USER"] = "WIX_USER";
6462
+ WebhookIdentityType2["APP"] = "APP";
6463
+ })(WebhookIdentityType || (WebhookIdentityType = {}));
6464
+ var SiteBootstrapHttpMethod;
6465
+ (function(SiteBootstrapHttpMethod2) {
6466
+ SiteBootstrapHttpMethod2["SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED"] = "SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED";
6467
+ SiteBootstrapHttpMethod2["GET"] = "GET";
6468
+ SiteBootstrapHttpMethod2["POST"] = "POST";
6469
+ SiteBootstrapHttpMethod2["PUT"] = "PUT";
6470
+ SiteBootstrapHttpMethod2["PATCH"] = "PATCH";
6471
+ SiteBootstrapHttpMethod2["DELETE"] = "DELETE";
6472
+ })(SiteBootstrapHttpMethod || (SiteBootstrapHttpMethod = {}));
6473
+ var Mode;
6474
+ (function(Mode2) {
6475
+ Mode2["UNKNOWN_MODE"] = "UNKNOWN_MODE";
6476
+ Mode2["NONE"] = "NONE";
6477
+ Mode2["CLONE"] = "CLONE";
6478
+ Mode2["TEMPLATE"] = "TEMPLATE";
6479
+ })(Mode || (Mode = {}));
6480
+
6354
6481
  // src/ambassador-converters.ts
6355
6482
  function toProtoEnum(prefix, value) {
6356
6483
  return `${prefix}${value.toUpperCase()}`;
@@ -6657,9 +6784,37 @@ function testScenarioFromProto(wire) {
6657
6784
  })
6658
6785
  ),
6659
6786
  createdAt: fromProtoDate(wire.createdAt) ?? "",
6660
- updatedAt: fromProtoDate(wire.updatedAt) ?? ""
6787
+ updatedAt: fromProtoDate(wire.updatedAt) ?? "",
6788
+ siteSetup: siteSetupFromAmbassador(wire.siteSetup)
6661
6789
  };
6662
6790
  }
6791
+ function siteSetupFromAmbassador(wire) {
6792
+ if (!wire) return void 0;
6793
+ const steps = (wire.bootstrap?.steps ?? []).filter(
6794
+ (step) => step.method && step.method !== SiteBootstrapHttpMethod.SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED && step.url
6795
+ ).map((step) => ({
6796
+ label: step.label ?? void 0,
6797
+ method: step.method.toLowerCase(),
6798
+ url: step.url ?? "",
6799
+ body: step.body ?? void 0
6800
+ }));
6801
+ const bootstrap = steps.length > 0 ? { steps } : void 0;
6802
+ if (wire.mode === Mode.CLONE) {
6803
+ return {
6804
+ mode: "clone",
6805
+ sourceSiteId: wire.cloneOptions?.sourceSiteId ?? "",
6806
+ bootstrap
6807
+ };
6808
+ }
6809
+ if (wire.mode === Mode.TEMPLATE) {
6810
+ return {
6811
+ mode: "template",
6812
+ templateId: wire.templateOptions?.templateId ?? "",
6813
+ bootstrap
6814
+ };
6815
+ }
6816
+ return void 0;
6817
+ }
6663
6818
  function templateFromProto(wire) {
6664
6819
  return {
6665
6820
  id: wire.id ?? "",
@@ -6713,8 +6868,36 @@ function capabilityVersionFromProto(wire, projectId2) {
6713
6868
  createdAt: fromProtoDate(wire.createdAt) ?? ""
6714
6869
  };
6715
6870
  }
6871
+ function provisionedSiteFromProto(proto) {
6872
+ return {
6873
+ id: proto.id ?? "",
6874
+ url: proto.url ?? void 0,
6875
+ editorUrl: proto.editorUrl ?? void 0
6876
+ };
6877
+ }
6878
+ function siteBootstrapResultFromProto(proto) {
6879
+ if (!proto) return void 0;
6880
+ return {
6881
+ steps: (proto.steps ?? []).map((step) => ({
6882
+ label: step.label ?? void 0,
6883
+ statusCode: step.statusCode ?? 0,
6884
+ ok: step.ok ?? false,
6885
+ error: step.error ?? void 0
6886
+ }))
6887
+ };
6888
+ }
6716
6889
 
6717
6890
  // src/api-client.ts
6891
+ function rethrowWithRequestId(err, action) {
6892
+ if (err instanceof import_http_client.HttpError) {
6893
+ const status = err.response?.status;
6894
+ const requestId = err.requestId;
6895
+ throw new Error(
6896
+ `Failed to ${action}` + (status !== void 0 ? ` (HTTP ${status})` : "") + (requestId ? ` [request id: ${requestId}]` : "") + `: ${err.message}`
6897
+ );
6898
+ }
6899
+ throw err;
6900
+ }
6718
6901
  function resolveAmbassadorBaseUrl(serverUrl) {
6719
6902
  try {
6720
6903
  return new URL(serverUrl).origin;
@@ -6890,6 +7073,29 @@ function createApiClient(serverUrl, options = "") {
6890
7073
  },
6891
7074
  updateEvalRun(projectId2, evalRunId2, update) {
6892
7075
  return putJson(`/projects/${projectId2}/eval-runs/${evalRunId2}`, update);
7076
+ },
7077
+ async provisionScenarioSite(projectId2, evalRunId2, scenarioId) {
7078
+ const res = await httpClient.request(provisionScenarioSite({ projectId: projectId2, evalRunId: evalRunId2, scenarioId })).catch(
7079
+ (err) => rethrowWithRequestId(err, `provision a site for scenario ${scenarioId}`)
7080
+ );
7081
+ const site = res.data.site;
7082
+ if (!site) {
7083
+ throw new Error(
7084
+ `Site provisioning for scenario ${scenarioId} returned no site.`
7085
+ );
7086
+ }
7087
+ return {
7088
+ ...provisionedSiteFromProto(site),
7089
+ bootstrapResult: siteBootstrapResultFromProto(res.data.bootstrapResult)
7090
+ };
7091
+ },
7092
+ async deleteProvisionedSite(projectId2, siteId) {
7093
+ await httpClient.request(deleteProvisionedSite({ projectId: projectId2, siteId })).catch((err) => {
7094
+ console.warn(
7095
+ "[site-provisioning] deleteProvisionedSite failed \u2014 site may remain:",
7096
+ err
7097
+ );
7098
+ });
6893
7099
  }
6894
7100
  };
6895
7101
  }
@@ -7256,10 +7462,13 @@ function installWithCache(workDir, exec, cacheBase, pm) {
7256
7462
  );
7257
7463
  }
7258
7464
  }
7259
- async function installDependencies(workDir, exec = import_child_process.execFileSync, cacheBase) {
7465
+ async function installDependencies(workDir, onProgress, options = {}) {
7260
7466
  if (!(0, import_fs.existsSync)(import_path2.default.join(workDir, "package.json"))) {
7261
7467
  return;
7262
7468
  }
7469
+ const exec = options.exec ?? import_child_process.execFileSync;
7470
+ const cacheBase = options.cacheBase;
7471
+ onProgress("Installing dependencies...");
7263
7472
  const pm = detectPackageManager(workDir);
7264
7473
  if (cacheBase) {
7265
7474
  installWithCache(workDir, exec, cacheBase, pm);
@@ -7335,7 +7544,8 @@ function writeWixEnvFile(workDir) {
7335
7544
  console.warn("[environment] Failed to read wix.config.json");
7336
7545
  }
7337
7546
  }
7338
- async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, template) {
7547
+ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId, onProgress, options = {}) {
7548
+ const template = options.template;
7339
7549
  const baseDir = config.evaluationsDir ?? import_path3.default.join((0, import_os.tmpdir)(), "evalforge-evaluations");
7340
7550
  const nodeModulesCacheDir = import_path3.default.join(baseDir, "_node_modules_cache");
7341
7551
  if (template) {
@@ -7349,10 +7559,14 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7349
7559
  (0, import_fs2.rmSync)(workDir2, { recursive: true });
7350
7560
  }
7351
7561
  (0, import_fs2.mkdirSync)(workDir2, { recursive: true });
7562
+ onProgress("Fetching template files...");
7352
7563
  await fetchAndWriteTemplateFiles(template, workDir2);
7353
7564
  console.log(`Template files written to ${workDir2}`);
7354
7565
  writeWixEnvFile(workDir2);
7355
- await installDependencies(workDir2, void 0, nodeModulesCacheDir);
7566
+ await installDependencies(workDir2, onProgress, {
7567
+ cacheBase: nodeModulesCacheDir
7568
+ });
7569
+ onProgress("Environment ready");
7356
7570
  return workDir2;
7357
7571
  }
7358
7572
  const workDir = import_path3.default.join(baseDir, `${evalRunId2}_${targetId}_${scenarioId}`);
@@ -7364,6 +7578,13 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7364
7578
  return workDir;
7365
7579
  }
7366
7580
 
7581
+ // src/run-scenario/agents/shared/trace-emit.ts
7582
+ var import_evalforge_types2 = require("@wix/evalforge-types");
7583
+ function emitTraceEvent(event, pushEvent) {
7584
+ console.log(`${import_evalforge_types2.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
7585
+ pushEvent?.(event);
7586
+ }
7587
+
7367
7588
  // src/run-scenario/run-agent-with-context.ts
7368
7589
  var import_crypto5 = require("crypto");
7369
7590
 
@@ -7528,7 +7749,7 @@ var import_crypto2 = require("crypto");
7528
7749
  // src/run-scenario/agents/claude-code/write-mcp.ts
7529
7750
  var import_promises5 = require("fs/promises");
7530
7751
  var import_path6 = require("path");
7531
- var import_evalforge_types2 = require("@wix/evalforge-types");
7752
+ var import_evalforge_types3 = require("@wix/evalforge-types");
7532
7753
 
7533
7754
  // src/run-scenario/agents/shared/resolve-mcp-placeholders.ts
7534
7755
  var import_promises4 = require("fs/promises");
@@ -7589,7 +7810,7 @@ async function writeMcpToFilesystem(cwd, mcps) {
7589
7810
  }
7590
7811
  const resolvedServers = await resolveMcpPlaceholders(mcpServers, { cwd });
7591
7812
  const content = JSON.stringify(
7592
- { [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: resolvedServers },
7813
+ { [import_evalforge_types3.MCP_SERVERS_JSON_KEY]: resolvedServers },
7593
7814
  null,
7594
7815
  2
7595
7816
  );
@@ -7815,13 +8036,6 @@ function buildConversation(timestampedMessages) {
7815
8036
  return messages;
7816
8037
  }
7817
8038
 
7818
- // src/run-scenario/agents/shared/trace-emit.ts
7819
- var import_evalforge_types3 = require("@wix/evalforge-types");
7820
- function emitTraceEvent(event, pushEvent) {
7821
- console.log(`${import_evalforge_types3.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
7822
- pushEvent?.(event);
7823
- }
7824
-
7825
8039
  // src/run-scenario/agents/claude-code/execute.ts
7826
8040
  var DEFAULT_MODEL = import_evalforge_types4.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
7827
8041
  async function* buildPromptStream(triggerPrompt, images) {
@@ -11783,87 +11997,124 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
11783
11997
  }
11784
11998
 
11785
11999
  // src/run-scenario/index.ts
11786
- async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent) {
12000
+ async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
11787
12001
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
11788
12002
  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
11789
- if (template) {
11790
- console.log(
11791
- (0, import_evalforge_types13.formatTraceEventLine)({
11792
- evalRunId: evalRunId2,
11793
- scenarioId: scenario.id,
11794
- scenarioName: scenario.name,
11795
- targetId,
11796
- targetName,
11797
- stepNumber: 0,
11798
- type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
11799
- outputPreview: "Setting up environment (installing dependencies)...",
11800
- elapsedMs: 0,
11801
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
11802
- isComplete: false
11803
- })
11804
- );
11805
- }
11806
- const workDir = await prepareWorkingDirectory(
11807
- config,
11808
- evalRunId2,
11809
- targetId,
11810
- scenario.id,
11811
- template
11812
- );
11813
- const partialResult = await runAgentWithContext(
11814
- config,
11815
- evalRunId2,
11816
- scenario,
11817
- evalData,
11818
- workDir,
12003
+ const emitSetupProgress = (outputPreview) => emitTraceEvent(
12004
+ {
12005
+ evalRunId: evalRunId2,
12006
+ scenarioId: scenario.id,
12007
+ scenarioName: scenario.name,
12008
+ targetId,
12009
+ targetName,
12010
+ stepNumber: 0,
12011
+ type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
12012
+ outputPreview,
12013
+ elapsedMs: 0,
12014
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12015
+ isComplete: false
12016
+ },
11819
12017
  pushEvent
11820
12018
  );
11821
- const inlineAssertions = scenario.assertions ?? [];
11822
- const assertions = [
11823
- ...inlineAssertions,
11824
- ...resolvedAssertions ?? []
11825
- ];
11826
- const templateFilesMap = new Map(
11827
- (partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
11828
- );
11829
- const evaluationInput = {
11830
- outputText: partialResult.outputText,
11831
- llmTrace: partialResult.llmTrace,
11832
- fileDiffs: partialResult.fileDiffs?.map((d) => ({
11833
- path: d.path,
11834
- status: templateFilesMap.get(d.path)
11835
- })),
11836
- durationMs: partialResult.duration
11837
- };
11838
- const defaultJudgeModel = import_evalforge_types13.DEFAULT_JUDGE_MODEL;
11839
- const assertionContext = {
11840
- workDir,
11841
- defaultJudgeModel,
11842
- llmConfig: {
11843
- baseUrl: config.aiGatewayUrl,
11844
- headers: config.aiGatewayHeaders
12019
+ let provisionedSite;
12020
+ if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
12021
+ provisionedSite = await apiClient.provisionScenarioSite(
12022
+ projectId2,
12023
+ evalRunId2,
12024
+ scenario.id
12025
+ );
12026
+ }
12027
+ const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
12028
+ if (failedStep) {
12029
+ const message = `Site bootstrap step ${failedStep.label ? `"${failedStep.label}" ` : ""}failed (HTTP ${failedStep.statusCode}): ${failedStep.error ?? "unknown error"}`;
12030
+ console.warn(`[run-scenario] ${message}`);
12031
+ pushEvent?.({
12032
+ evalRunId: evalRunId2,
12033
+ scenarioId: scenario.id,
12034
+ scenarioName: scenario.name,
12035
+ targetId,
12036
+ targetName,
12037
+ stepNumber: 0,
12038
+ type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
12039
+ outputPreview: message,
12040
+ elapsedMs: 0,
12041
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12042
+ isComplete: false
12043
+ });
12044
+ }
12045
+ const effectiveTriggerPrompt = provisionedSite ? `${scenario.triggerPrompt}
12046
+
12047
+ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
12048
+ try {
12049
+ emitSetupProgress("Setting up environment...");
12050
+ const workDir = await prepareWorkingDirectory(
12051
+ config,
12052
+ evalRunId2,
12053
+ targetId,
12054
+ scenario.id,
12055
+ emitSetupProgress,
12056
+ { template }
12057
+ );
12058
+ const partialResult = await runAgentWithContext(
12059
+ config,
12060
+ evalRunId2,
12061
+ { ...scenario, triggerPrompt: effectiveTriggerPrompt },
12062
+ evalData,
12063
+ workDir,
12064
+ pushEvent
12065
+ );
12066
+ const inlineAssertions = scenario.assertions ?? [];
12067
+ const assertions = [
12068
+ ...inlineAssertions,
12069
+ ...resolvedAssertions ?? []
12070
+ ];
12071
+ const templateFilesMap = new Map(
12072
+ (partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
12073
+ );
12074
+ const evaluationInput = {
12075
+ outputText: partialResult.outputText,
12076
+ llmTrace: partialResult.llmTrace,
12077
+ fileDiffs: partialResult.fileDiffs?.map((d) => ({
12078
+ path: d.path,
12079
+ status: templateFilesMap.get(d.path)
12080
+ })),
12081
+ durationMs: partialResult.duration
12082
+ };
12083
+ const defaultJudgeModel = import_evalforge_types13.DEFAULT_JUDGE_MODEL;
12084
+ const assertionContext = {
12085
+ workDir,
12086
+ defaultJudgeModel,
12087
+ llmConfig: {
12088
+ baseUrl: config.aiGatewayUrl,
12089
+ headers: config.aiGatewayHeaders
12090
+ }
12091
+ };
12092
+ const assertionResults = assertions.length > 0 ? await (0, import_eval_assertions.evaluateAssertions)(
12093
+ evaluationInput,
12094
+ assertions,
12095
+ assertionContext
12096
+ ) : [];
12097
+ const passed = assertionResults.filter(
12098
+ (r) => r.status === import_evalforge_types13.AssertionResultStatus.PASSED
12099
+ ).length;
12100
+ const failed = assertionResults.filter(
12101
+ (r) => r.status === import_evalforge_types13.AssertionResultStatus.FAILED
12102
+ ).length;
12103
+ const total = assertionResults.length;
12104
+ const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
12105
+ return {
12106
+ ...partialResult,
12107
+ assertionResults,
12108
+ passed,
12109
+ failed,
12110
+ passRate,
12111
+ provisionedSite
12112
+ };
12113
+ } finally {
12114
+ if (provisionedSite && apiClient && projectId2) {
12115
+ await apiClient.deleteProvisionedSite(projectId2, provisionedSite.id);
11845
12116
  }
11846
- };
11847
- const assertionResults = assertions.length > 0 ? await (0, import_eval_assertions.evaluateAssertions)(
11848
- evaluationInput,
11849
- assertions,
11850
- assertionContext
11851
- ) : [];
11852
- const passed = assertionResults.filter(
11853
- (r) => r.status === import_evalforge_types13.AssertionResultStatus.PASSED
11854
- ).length;
11855
- const failed = assertionResults.filter(
11856
- (r) => r.status === import_evalforge_types13.AssertionResultStatus.FAILED
11857
- ).length;
11858
- const total = assertionResults.length;
11859
- const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
11860
- return {
11861
- ...partialResult,
11862
- assertionResults,
11863
- passed,
11864
- failed,
11865
- passRate
11866
- };
12117
+ }
11867
12118
  }
11868
12119
 
11869
12120
  // src/evaluation-loop.ts
@@ -12143,7 +12394,9 @@ async function runEvaluation(projectId2, evalRunId2) {
12143
12394
  evalData,
12144
12395
  template,
12145
12396
  resolvedAssertions,
12146
- pushEvent
12397
+ pushEvent,
12398
+ api,
12399
+ projectId2
12147
12400
  );
12148
12401
  },
12149
12402
  addResult: async (result) => {