@wix/evalforge-evaluator 0.186.0 → 0.187.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -1282,7 +1282,7 @@ var require_error = __commonJS({
1282
1282
  "toJSON",
1283
1283
  "__CANCEL__"
1284
1284
  ];
1285
- var HttpError = class _HttpError extends Error {
1285
+ var HttpError2 = class _HttpError extends Error {
1286
1286
  constructor(error) {
1287
1287
  var _a;
1288
1288
  super(error.message);
@@ -1299,9 +1299,9 @@ var require_error = __commonJS({
1299
1299
  return (0, headers_1.requestIdOrEmptyString)(this.response);
1300
1300
  }
1301
1301
  };
1302
- exports.HttpError = HttpError;
1302
+ exports.HttpError = HttpError2;
1303
1303
  function createHttpError(...args) {
1304
- return new HttpError(...args);
1304
+ return new HttpError2(...args);
1305
1305
  }
1306
1306
  exports.createHttpError = createHttpError;
1307
1307
  }
@@ -6356,6 +6356,133 @@ function getLatestCapabilityVersion(payload) {
6356
6356
  return __getLatestCapabilityVersion;
6357
6357
  }
6358
6358
 
6359
+ // ../../node_modules/@wix/ambassador-evalforge-v1-site-provisioning/es/build/http.impl.js
6360
+ var _deleteProvisionedSiteRequest = {};
6361
+ var _deleteProvisionedSiteResponse = {};
6362
+ var _provisionScenarioSiteRequest = {};
6363
+ var _provisionScenarioSiteResponse = {};
6364
+ function resolveWixEvalforgeV1SiteProvisioningServiceUrl(opts) {
6365
+ var domainToMappings = {
6366
+ "dev._base_domain_": [
6367
+ {
6368
+ srcPath: "/_api/evalforge-backend",
6369
+ destPath: ""
6370
+ }
6371
+ ],
6372
+ "api._api_base_domain_": [
6373
+ {
6374
+ srcPath: "/evalforge-backend",
6375
+ destPath: ""
6376
+ }
6377
+ ],
6378
+ "bo._base_domain_": [
6379
+ {
6380
+ srcPath: "/_api/evalforge-backend",
6381
+ destPath: ""
6382
+ }
6383
+ ],
6384
+ "wixbo.ai": [
6385
+ {
6386
+ srcPath: "/_api/evalforge-backend",
6387
+ destPath: ""
6388
+ }
6389
+ ],
6390
+ "wix-bo.com": [
6391
+ {
6392
+ srcPath: "/_api/evalforge-backend",
6393
+ destPath: ""
6394
+ }
6395
+ ],
6396
+ "manage._base_domain_": [
6397
+ {
6398
+ srcPath: "/_api/evalforge-backend",
6399
+ destPath: ""
6400
+ }
6401
+ ]
6402
+ };
6403
+ return resolveUrl(Object.assign(opts, { domainToMappings }));
6404
+ }
6405
+ function provisionScenarioSite(payload) {
6406
+ var _a = serializer(_provisionScenarioSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
6407
+ var fromRes = serializer(_provisionScenarioSiteResponse, {}).fromJSON;
6408
+ function __provisionScenarioSite(_a2) {
6409
+ var host = _a2.host;
6410
+ var serializedData = toReq(payload);
6411
+ var metadata = {
6412
+ entityFqdn: "wix.evalforge.v1.site_provisioning",
6413
+ method: "POST",
6414
+ methodFqn: "wix.evalforge.v1.SiteProvisioningService.ProvisionScenarioSite",
6415
+ migrationOptions: {
6416
+ optInTransformResponse: true
6417
+ },
6418
+ url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
6419
+ protoPath: "/v1/projects/{projectId}/site-provisioning/provision-site",
6420
+ data: serializedData,
6421
+ host
6422
+ }),
6423
+ data: serializedData,
6424
+ transformResponse: fromRes
6425
+ };
6426
+ return metadata;
6427
+ }
6428
+ __provisionScenarioSite.fromReq = fromReq;
6429
+ __provisionScenarioSite.__isAmbassador = true;
6430
+ return __provisionScenarioSite;
6431
+ }
6432
+ function deleteProvisionedSite(payload) {
6433
+ var _a = serializer(_deleteProvisionedSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
6434
+ var fromRes = serializer(_deleteProvisionedSiteResponse, {}).fromJSON;
6435
+ function __deleteProvisionedSite(_a2) {
6436
+ var host = _a2.host;
6437
+ var serializedData = toReq(payload);
6438
+ var metadata = {
6439
+ entityFqdn: "wix.evalforge.v1.site_provisioning",
6440
+ method: "POST",
6441
+ methodFqn: "wix.evalforge.v1.SiteProvisioningService.DeleteProvisionedSite",
6442
+ migrationOptions: {
6443
+ optInTransformResponse: true
6444
+ },
6445
+ url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
6446
+ protoPath: "/v1/projects/{projectId}/site-provisioning/delete-site",
6447
+ data: serializedData,
6448
+ host
6449
+ }),
6450
+ data: serializedData,
6451
+ transformResponse: fromRes
6452
+ };
6453
+ return metadata;
6454
+ }
6455
+ __deleteProvisionedSite.fromReq = fromReq;
6456
+ __deleteProvisionedSite.__isAmbassador = true;
6457
+ return __deleteProvisionedSite;
6458
+ }
6459
+
6460
+ // ../../node_modules/@wix/ambassador-evalforge-v1-test-scenario/es/build/types.impl.js
6461
+ var WebhookIdentityType;
6462
+ (function(WebhookIdentityType2) {
6463
+ WebhookIdentityType2["UNKNOWN"] = "UNKNOWN";
6464
+ WebhookIdentityType2["ANONYMOUS_VISITOR"] = "ANONYMOUS_VISITOR";
6465
+ WebhookIdentityType2["MEMBER"] = "MEMBER";
6466
+ WebhookIdentityType2["WIX_USER"] = "WIX_USER";
6467
+ WebhookIdentityType2["APP"] = "APP";
6468
+ })(WebhookIdentityType || (WebhookIdentityType = {}));
6469
+ var SiteBootstrapHttpMethod;
6470
+ (function(SiteBootstrapHttpMethod2) {
6471
+ SiteBootstrapHttpMethod2["SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED"] = "SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED";
6472
+ SiteBootstrapHttpMethod2["GET"] = "GET";
6473
+ SiteBootstrapHttpMethod2["POST"] = "POST";
6474
+ SiteBootstrapHttpMethod2["PUT"] = "PUT";
6475
+ SiteBootstrapHttpMethod2["PATCH"] = "PATCH";
6476
+ SiteBootstrapHttpMethod2["DELETE"] = "DELETE";
6477
+ })(SiteBootstrapHttpMethod || (SiteBootstrapHttpMethod = {}));
6478
+ var Mode;
6479
+ (function(Mode2) {
6480
+ Mode2["UNKNOWN_MODE"] = "UNKNOWN_MODE";
6481
+ Mode2["NONE"] = "NONE";
6482
+ Mode2["CLONE"] = "CLONE";
6483
+ Mode2["TEMPLATE"] = "TEMPLATE";
6484
+ })(Mode || (Mode = {}));
6485
+
6359
6486
  // src/ambassador-converters.ts
6360
6487
  function toProtoEnum(prefix, value) {
6361
6488
  return `${prefix}${value.toUpperCase()}`;
@@ -6662,9 +6789,37 @@ function testScenarioFromProto(wire) {
6662
6789
  })
6663
6790
  ),
6664
6791
  createdAt: fromProtoDate(wire.createdAt) ?? "",
6665
- updatedAt: fromProtoDate(wire.updatedAt) ?? ""
6792
+ updatedAt: fromProtoDate(wire.updatedAt) ?? "",
6793
+ siteSetup: siteSetupFromAmbassador(wire.siteSetup)
6666
6794
  };
6667
6795
  }
6796
+ function siteSetupFromAmbassador(wire) {
6797
+ if (!wire) return void 0;
6798
+ const steps = (wire.bootstrap?.steps ?? []).filter(
6799
+ (step) => step.method && step.method !== SiteBootstrapHttpMethod.SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED && step.url
6800
+ ).map((step) => ({
6801
+ label: step.label ?? void 0,
6802
+ method: step.method.toLowerCase(),
6803
+ url: step.url ?? "",
6804
+ body: step.body ?? void 0
6805
+ }));
6806
+ const bootstrap = steps.length > 0 ? { steps } : void 0;
6807
+ if (wire.mode === Mode.CLONE) {
6808
+ return {
6809
+ mode: "clone",
6810
+ sourceSiteId: wire.cloneOptions?.sourceSiteId ?? "",
6811
+ bootstrap
6812
+ };
6813
+ }
6814
+ if (wire.mode === Mode.TEMPLATE) {
6815
+ return {
6816
+ mode: "template",
6817
+ templateId: wire.templateOptions?.templateId ?? "",
6818
+ bootstrap
6819
+ };
6820
+ }
6821
+ return void 0;
6822
+ }
6668
6823
  function templateFromProto(wire) {
6669
6824
  return {
6670
6825
  id: wire.id ?? "",
@@ -6718,8 +6873,36 @@ function capabilityVersionFromProto(wire, projectId2) {
6718
6873
  createdAt: fromProtoDate(wire.createdAt) ?? ""
6719
6874
  };
6720
6875
  }
6876
+ function provisionedSiteFromProto(proto) {
6877
+ return {
6878
+ id: proto.id ?? "",
6879
+ url: proto.url ?? void 0,
6880
+ editorUrl: proto.editorUrl ?? void 0
6881
+ };
6882
+ }
6883
+ function siteBootstrapResultFromProto(proto) {
6884
+ if (!proto) return void 0;
6885
+ return {
6886
+ steps: (proto.steps ?? []).map((step) => ({
6887
+ label: step.label ?? void 0,
6888
+ statusCode: step.statusCode ?? 0,
6889
+ ok: step.ok ?? false,
6890
+ error: step.error ?? void 0
6891
+ }))
6892
+ };
6893
+ }
6721
6894
 
6722
6895
  // src/api-client.ts
6896
+ function rethrowWithRequestId(err, action) {
6897
+ if (err instanceof import_http_client.HttpError) {
6898
+ const status = err.response?.status;
6899
+ const requestId = err.requestId;
6900
+ throw new Error(
6901
+ `Failed to ${action}` + (status !== void 0 ? ` (HTTP ${status})` : "") + (requestId ? ` [request id: ${requestId}]` : "") + `: ${err.message}`
6902
+ );
6903
+ }
6904
+ throw err;
6905
+ }
6723
6906
  function resolveAmbassadorBaseUrl(serverUrl) {
6724
6907
  try {
6725
6908
  return new URL(serverUrl).origin;
@@ -6895,6 +7078,29 @@ function createApiClient(serverUrl, options = "") {
6895
7078
  },
6896
7079
  updateEvalRun(projectId2, evalRunId2, update) {
6897
7080
  return putJson(`/projects/${projectId2}/eval-runs/${evalRunId2}`, update);
7081
+ },
7082
+ async provisionScenarioSite(projectId2, evalRunId2, scenarioId) {
7083
+ const res = await httpClient.request(provisionScenarioSite({ projectId: projectId2, evalRunId: evalRunId2, scenarioId })).catch(
7084
+ (err) => rethrowWithRequestId(err, `provision a site for scenario ${scenarioId}`)
7085
+ );
7086
+ const site = res.data.site;
7087
+ if (!site) {
7088
+ throw new Error(
7089
+ `Site provisioning for scenario ${scenarioId} returned no site.`
7090
+ );
7091
+ }
7092
+ return {
7093
+ ...provisionedSiteFromProto(site),
7094
+ bootstrapResult: siteBootstrapResultFromProto(res.data.bootstrapResult)
7095
+ };
7096
+ },
7097
+ async deleteProvisionedSite(projectId2, siteId) {
7098
+ await httpClient.request(deleteProvisionedSite({ projectId: projectId2, siteId })).catch((err) => {
7099
+ console.warn(
7100
+ "[site-provisioning] deleteProvisionedSite failed \u2014 site may remain:",
7101
+ err
7102
+ );
7103
+ });
6898
7104
  }
6899
7105
  };
6900
7106
  }
@@ -11836,87 +12042,123 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
11836
12042
  }
11837
12043
 
11838
12044
  // src/run-scenario/index.ts
11839
- async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent) {
12045
+ async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
11840
12046
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
11841
12047
  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
11842
- if (template) {
11843
- console.log(
11844
- formatTraceEventLine({
11845
- evalRunId: evalRunId2,
11846
- scenarioId: scenario.id,
11847
- scenarioName: scenario.name,
11848
- targetId,
11849
- targetName,
11850
- stepNumber: 0,
11851
- type: LiveTraceEventType4.PROGRESS,
11852
- outputPreview: "Setting up environment (installing dependencies)...",
11853
- elapsedMs: 0,
11854
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
11855
- isComplete: false
11856
- })
12048
+ let provisionedSite;
12049
+ if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
12050
+ provisionedSite = await apiClient.provisionScenarioSite(
12051
+ projectId2,
12052
+ evalRunId2,
12053
+ scenario.id
11857
12054
  );
11858
12055
  }
11859
- const workDir = await prepareWorkingDirectory(
11860
- config,
11861
- evalRunId2,
11862
- targetId,
11863
- scenario.id,
11864
- template
11865
- );
11866
- const partialResult = await runAgentWithContext(
11867
- config,
11868
- evalRunId2,
11869
- scenario,
11870
- evalData,
11871
- workDir,
11872
- pushEvent
11873
- );
11874
- const inlineAssertions = scenario.assertions ?? [];
11875
- const assertions = [
11876
- ...inlineAssertions,
11877
- ...resolvedAssertions ?? []
11878
- ];
11879
- const templateFilesMap = new Map(
11880
- (partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
11881
- );
11882
- const evaluationInput = {
11883
- outputText: partialResult.outputText,
11884
- llmTrace: partialResult.llmTrace,
11885
- fileDiffs: partialResult.fileDiffs?.map((d) => ({
11886
- path: d.path,
11887
- status: templateFilesMap.get(d.path)
11888
- })),
11889
- durationMs: partialResult.duration
11890
- };
11891
- const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
11892
- const assertionContext = {
11893
- workDir,
11894
- defaultJudgeModel,
11895
- llmConfig: {
11896
- baseUrl: config.aiGatewayUrl,
11897
- headers: config.aiGatewayHeaders
12056
+ const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
12057
+ if (failedStep) {
12058
+ const message = `Site bootstrap step ${failedStep.label ? `"${failedStep.label}" ` : ""}failed (HTTP ${failedStep.statusCode}): ${failedStep.error ?? "unknown error"}`;
12059
+ console.warn(`[run-scenario] ${message}`);
12060
+ pushEvent?.({
12061
+ evalRunId: evalRunId2,
12062
+ scenarioId: scenario.id,
12063
+ scenarioName: scenario.name,
12064
+ targetId,
12065
+ targetName,
12066
+ stepNumber: 0,
12067
+ type: LiveTraceEventType4.PROGRESS,
12068
+ outputPreview: message,
12069
+ elapsedMs: 0,
12070
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12071
+ isComplete: false
12072
+ });
12073
+ }
12074
+ const effectiveTriggerPrompt = provisionedSite ? `${scenario.triggerPrompt}
12075
+
12076
+ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
12077
+ try {
12078
+ if (template) {
12079
+ console.log(
12080
+ formatTraceEventLine({
12081
+ evalRunId: evalRunId2,
12082
+ scenarioId: scenario.id,
12083
+ scenarioName: scenario.name,
12084
+ targetId,
12085
+ targetName,
12086
+ stepNumber: 0,
12087
+ type: LiveTraceEventType4.PROGRESS,
12088
+ outputPreview: "Setting up environment (installing dependencies)...",
12089
+ elapsedMs: 0,
12090
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12091
+ isComplete: false
12092
+ })
12093
+ );
11898
12094
  }
11899
- };
11900
- const assertionResults = assertions.length > 0 ? await evaluateAssertionsBase(
11901
- evaluationInput,
11902
- assertions,
11903
- assertionContext
11904
- ) : [];
11905
- const passed = assertionResults.filter(
11906
- (r) => r.status === AssertionResultStatus.PASSED
11907
- ).length;
11908
- const failed = assertionResults.filter(
11909
- (r) => r.status === AssertionResultStatus.FAILED
11910
- ).length;
11911
- const total = assertionResults.length;
11912
- const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
11913
- return {
11914
- ...partialResult,
11915
- assertionResults,
11916
- passed,
11917
- failed,
11918
- passRate
11919
- };
12095
+ const workDir = await prepareWorkingDirectory(
12096
+ config,
12097
+ evalRunId2,
12098
+ targetId,
12099
+ scenario.id,
12100
+ template
12101
+ );
12102
+ const partialResult = await runAgentWithContext(
12103
+ config,
12104
+ evalRunId2,
12105
+ { ...scenario, triggerPrompt: effectiveTriggerPrompt },
12106
+ evalData,
12107
+ workDir,
12108
+ pushEvent
12109
+ );
12110
+ const inlineAssertions = scenario.assertions ?? [];
12111
+ const assertions = [
12112
+ ...inlineAssertions,
12113
+ ...resolvedAssertions ?? []
12114
+ ];
12115
+ const templateFilesMap = new Map(
12116
+ (partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
12117
+ );
12118
+ const evaluationInput = {
12119
+ outputText: partialResult.outputText,
12120
+ llmTrace: partialResult.llmTrace,
12121
+ fileDiffs: partialResult.fileDiffs?.map((d) => ({
12122
+ path: d.path,
12123
+ status: templateFilesMap.get(d.path)
12124
+ })),
12125
+ durationMs: partialResult.duration
12126
+ };
12127
+ const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
12128
+ const assertionContext = {
12129
+ workDir,
12130
+ defaultJudgeModel,
12131
+ llmConfig: {
12132
+ baseUrl: config.aiGatewayUrl,
12133
+ headers: config.aiGatewayHeaders
12134
+ }
12135
+ };
12136
+ const assertionResults = assertions.length > 0 ? await evaluateAssertionsBase(
12137
+ evaluationInput,
12138
+ assertions,
12139
+ assertionContext
12140
+ ) : [];
12141
+ const passed = assertionResults.filter(
12142
+ (r) => r.status === AssertionResultStatus.PASSED
12143
+ ).length;
12144
+ const failed = assertionResults.filter(
12145
+ (r) => r.status === AssertionResultStatus.FAILED
12146
+ ).length;
12147
+ const total = assertionResults.length;
12148
+ const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
12149
+ return {
12150
+ ...partialResult,
12151
+ assertionResults,
12152
+ passed,
12153
+ failed,
12154
+ passRate,
12155
+ provisionedSite
12156
+ };
12157
+ } finally {
12158
+ if (provisionedSite && apiClient && projectId2) {
12159
+ await apiClient.deleteProvisionedSite(projectId2, provisionedSite.id);
12160
+ }
12161
+ }
11920
12162
  }
11921
12163
 
11922
12164
  // src/evaluation-loop.ts
@@ -12196,7 +12438,9 @@ async function runEvaluation(projectId2, evalRunId2) {
12196
12438
  evalData,
12197
12439
  template,
12198
12440
  resolvedAssertions,
12199
- pushEvent
12441
+ pushEvent,
12442
+ api,
12443
+ projectId2
12200
12444
  );
12201
12445
  },
12202
12446
  addResult: async (result) => {