@wix/evalforge-evaluator 0.186.0 → 0.187.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -1277,7 +1277,7 @@ var require_error = __commonJS({
1277
1277
  "toJSON",
1278
1278
  "__CANCEL__"
1279
1279
  ];
1280
- var HttpError = class _HttpError extends Error {
1280
+ var HttpError2 = class _HttpError extends Error {
1281
1281
  constructor(error) {
1282
1282
  var _a;
1283
1283
  super(error.message);
@@ -1294,9 +1294,9 @@ var require_error = __commonJS({
1294
1294
  return (0, headers_1.requestIdOrEmptyString)(this.response);
1295
1295
  }
1296
1296
  };
1297
- exports2.HttpError = HttpError;
1297
+ exports2.HttpError = HttpError2;
1298
1298
  function createHttpError(...args) {
1299
- return new HttpError(...args);
1299
+ return new HttpError2(...args);
1300
1300
  }
1301
1301
  exports2.createHttpError = createHttpError;
1302
1302
  }
@@ -6351,6 +6351,133 @@ function getLatestCapabilityVersion(payload) {
6351
6351
  return __getLatestCapabilityVersion;
6352
6352
  }
6353
6353
 
6354
+ // ../../node_modules/@wix/ambassador-evalforge-v1-site-provisioning/es/build/http.impl.js
6355
+ var _deleteProvisionedSiteRequest = {};
6356
+ var _deleteProvisionedSiteResponse = {};
6357
+ var _provisionScenarioSiteRequest = {};
6358
+ var _provisionScenarioSiteResponse = {};
6359
+ function resolveWixEvalforgeV1SiteProvisioningServiceUrl(opts) {
6360
+ var domainToMappings = {
6361
+ "dev._base_domain_": [
6362
+ {
6363
+ srcPath: "/_api/evalforge-backend",
6364
+ destPath: ""
6365
+ }
6366
+ ],
6367
+ "api._api_base_domain_": [
6368
+ {
6369
+ srcPath: "/evalforge-backend",
6370
+ destPath: ""
6371
+ }
6372
+ ],
6373
+ "bo._base_domain_": [
6374
+ {
6375
+ srcPath: "/_api/evalforge-backend",
6376
+ destPath: ""
6377
+ }
6378
+ ],
6379
+ "wixbo.ai": [
6380
+ {
6381
+ srcPath: "/_api/evalforge-backend",
6382
+ destPath: ""
6383
+ }
6384
+ ],
6385
+ "wix-bo.com": [
6386
+ {
6387
+ srcPath: "/_api/evalforge-backend",
6388
+ destPath: ""
6389
+ }
6390
+ ],
6391
+ "manage._base_domain_": [
6392
+ {
6393
+ srcPath: "/_api/evalforge-backend",
6394
+ destPath: ""
6395
+ }
6396
+ ]
6397
+ };
6398
+ return resolveUrl(Object.assign(opts, { domainToMappings }));
6399
+ }
6400
+ function provisionScenarioSite(payload) {
6401
+ var _a = serializer(_provisionScenarioSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
6402
+ var fromRes = serializer(_provisionScenarioSiteResponse, {}).fromJSON;
6403
+ function __provisionScenarioSite(_a2) {
6404
+ var host = _a2.host;
6405
+ var serializedData = toReq(payload);
6406
+ var metadata = {
6407
+ entityFqdn: "wix.evalforge.v1.site_provisioning",
6408
+ method: "POST",
6409
+ methodFqn: "wix.evalforge.v1.SiteProvisioningService.ProvisionScenarioSite",
6410
+ migrationOptions: {
6411
+ optInTransformResponse: true
6412
+ },
6413
+ url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
6414
+ protoPath: "/v1/projects/{projectId}/site-provisioning/provision-site",
6415
+ data: serializedData,
6416
+ host
6417
+ }),
6418
+ data: serializedData,
6419
+ transformResponse: fromRes
6420
+ };
6421
+ return metadata;
6422
+ }
6423
+ __provisionScenarioSite.fromReq = fromReq;
6424
+ __provisionScenarioSite.__isAmbassador = true;
6425
+ return __provisionScenarioSite;
6426
+ }
6427
+ function deleteProvisionedSite(payload) {
6428
+ var _a = serializer(_deleteProvisionedSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
6429
+ var fromRes = serializer(_deleteProvisionedSiteResponse, {}).fromJSON;
6430
+ function __deleteProvisionedSite(_a2) {
6431
+ var host = _a2.host;
6432
+ var serializedData = toReq(payload);
6433
+ var metadata = {
6434
+ entityFqdn: "wix.evalforge.v1.site_provisioning",
6435
+ method: "POST",
6436
+ methodFqn: "wix.evalforge.v1.SiteProvisioningService.DeleteProvisionedSite",
6437
+ migrationOptions: {
6438
+ optInTransformResponse: true
6439
+ },
6440
+ url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
6441
+ protoPath: "/v1/projects/{projectId}/site-provisioning/delete-site",
6442
+ data: serializedData,
6443
+ host
6444
+ }),
6445
+ data: serializedData,
6446
+ transformResponse: fromRes
6447
+ };
6448
+ return metadata;
6449
+ }
6450
+ __deleteProvisionedSite.fromReq = fromReq;
6451
+ __deleteProvisionedSite.__isAmbassador = true;
6452
+ return __deleteProvisionedSite;
6453
+ }
6454
+
6455
+ // ../../node_modules/@wix/ambassador-evalforge-v1-test-scenario/es/build/types.impl.js
6456
+ var WebhookIdentityType;
6457
+ (function(WebhookIdentityType2) {
6458
+ WebhookIdentityType2["UNKNOWN"] = "UNKNOWN";
6459
+ WebhookIdentityType2["ANONYMOUS_VISITOR"] = "ANONYMOUS_VISITOR";
6460
+ WebhookIdentityType2["MEMBER"] = "MEMBER";
6461
+ WebhookIdentityType2["WIX_USER"] = "WIX_USER";
6462
+ WebhookIdentityType2["APP"] = "APP";
6463
+ })(WebhookIdentityType || (WebhookIdentityType = {}));
6464
+ var SiteBootstrapHttpMethod;
6465
+ (function(SiteBootstrapHttpMethod2) {
6466
+ SiteBootstrapHttpMethod2["SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED"] = "SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED";
6467
+ SiteBootstrapHttpMethod2["GET"] = "GET";
6468
+ SiteBootstrapHttpMethod2["POST"] = "POST";
6469
+ SiteBootstrapHttpMethod2["PUT"] = "PUT";
6470
+ SiteBootstrapHttpMethod2["PATCH"] = "PATCH";
6471
+ SiteBootstrapHttpMethod2["DELETE"] = "DELETE";
6472
+ })(SiteBootstrapHttpMethod || (SiteBootstrapHttpMethod = {}));
6473
+ var Mode;
6474
+ (function(Mode2) {
6475
+ Mode2["UNKNOWN_MODE"] = "UNKNOWN_MODE";
6476
+ Mode2["NONE"] = "NONE";
6477
+ Mode2["CLONE"] = "CLONE";
6478
+ Mode2["TEMPLATE"] = "TEMPLATE";
6479
+ })(Mode || (Mode = {}));
6480
+
6354
6481
  // src/ambassador-converters.ts
6355
6482
  function toProtoEnum(prefix, value) {
6356
6483
  return `${prefix}${value.toUpperCase()}`;
@@ -6657,9 +6784,37 @@ function testScenarioFromProto(wire) {
6657
6784
  })
6658
6785
  ),
6659
6786
  createdAt: fromProtoDate(wire.createdAt) ?? "",
6660
- updatedAt: fromProtoDate(wire.updatedAt) ?? ""
6787
+ updatedAt: fromProtoDate(wire.updatedAt) ?? "",
6788
+ siteSetup: siteSetupFromAmbassador(wire.siteSetup)
6661
6789
  };
6662
6790
  }
6791
+ function siteSetupFromAmbassador(wire) {
6792
+ if (!wire) return void 0;
6793
+ const steps = (wire.bootstrap?.steps ?? []).filter(
6794
+ (step) => step.method && step.method !== SiteBootstrapHttpMethod.SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED && step.url
6795
+ ).map((step) => ({
6796
+ label: step.label ?? void 0,
6797
+ method: step.method.toLowerCase(),
6798
+ url: step.url ?? "",
6799
+ body: step.body ?? void 0
6800
+ }));
6801
+ const bootstrap = steps.length > 0 ? { steps } : void 0;
6802
+ if (wire.mode === Mode.CLONE) {
6803
+ return {
6804
+ mode: "clone",
6805
+ sourceSiteId: wire.cloneOptions?.sourceSiteId ?? "",
6806
+ bootstrap
6807
+ };
6808
+ }
6809
+ if (wire.mode === Mode.TEMPLATE) {
6810
+ return {
6811
+ mode: "template",
6812
+ templateId: wire.templateOptions?.templateId ?? "",
6813
+ bootstrap
6814
+ };
6815
+ }
6816
+ return void 0;
6817
+ }
6663
6818
  function templateFromProto(wire) {
6664
6819
  return {
6665
6820
  id: wire.id ?? "",
@@ -6713,8 +6868,36 @@ function capabilityVersionFromProto(wire, projectId2) {
6713
6868
  createdAt: fromProtoDate(wire.createdAt) ?? ""
6714
6869
  };
6715
6870
  }
6871
+ function provisionedSiteFromProto(proto) {
6872
+ return {
6873
+ id: proto.id ?? "",
6874
+ url: proto.url ?? void 0,
6875
+ editorUrl: proto.editorUrl ?? void 0
6876
+ };
6877
+ }
6878
+ function siteBootstrapResultFromProto(proto) {
6879
+ if (!proto) return void 0;
6880
+ return {
6881
+ steps: (proto.steps ?? []).map((step) => ({
6882
+ label: step.label ?? void 0,
6883
+ statusCode: step.statusCode ?? 0,
6884
+ ok: step.ok ?? false,
6885
+ error: step.error ?? void 0
6886
+ }))
6887
+ };
6888
+ }
6716
6889
 
6717
6890
  // src/api-client.ts
6891
+ function rethrowWithRequestId(err, action) {
6892
+ if (err instanceof import_http_client.HttpError) {
6893
+ const status = err.response?.status;
6894
+ const requestId = err.requestId;
6895
+ throw new Error(
6896
+ `Failed to ${action}` + (status !== void 0 ? ` (HTTP ${status})` : "") + (requestId ? ` [request id: ${requestId}]` : "") + `: ${err.message}`
6897
+ );
6898
+ }
6899
+ throw err;
6900
+ }
6718
6901
  function resolveAmbassadorBaseUrl(serverUrl) {
6719
6902
  try {
6720
6903
  return new URL(serverUrl).origin;
@@ -6890,6 +7073,29 @@ function createApiClient(serverUrl, options = "") {
6890
7073
  },
6891
7074
  updateEvalRun(projectId2, evalRunId2, update) {
6892
7075
  return putJson(`/projects/${projectId2}/eval-runs/${evalRunId2}`, update);
7076
+ },
7077
+ async provisionScenarioSite(projectId2, evalRunId2, scenarioId) {
7078
+ const res = await httpClient.request(provisionScenarioSite({ projectId: projectId2, evalRunId: evalRunId2, scenarioId })).catch(
7079
+ (err) => rethrowWithRequestId(err, `provision a site for scenario ${scenarioId}`)
7080
+ );
7081
+ const site = res.data.site;
7082
+ if (!site) {
7083
+ throw new Error(
7084
+ `Site provisioning for scenario ${scenarioId} returned no site.`
7085
+ );
7086
+ }
7087
+ return {
7088
+ ...provisionedSiteFromProto(site),
7089
+ bootstrapResult: siteBootstrapResultFromProto(res.data.bootstrapResult)
7090
+ };
7091
+ },
7092
+ async deleteProvisionedSite(projectId2, siteId) {
7093
+ await httpClient.request(deleteProvisionedSite({ projectId: projectId2, siteId })).catch((err) => {
7094
+ console.warn(
7095
+ "[site-provisioning] deleteProvisionedSite failed \u2014 site may remain:",
7096
+ err
7097
+ );
7098
+ });
6893
7099
  }
6894
7100
  };
6895
7101
  }
@@ -11783,87 +11989,123 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
11783
11989
  }
11784
11990
 
11785
11991
  // src/run-scenario/index.ts
11786
- async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent) {
11992
+ async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
11787
11993
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
11788
11994
  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
11789
- if (template) {
11790
- console.log(
11791
- (0, import_evalforge_types13.formatTraceEventLine)({
11792
- evalRunId: evalRunId2,
11793
- scenarioId: scenario.id,
11794
- scenarioName: scenario.name,
11795
- targetId,
11796
- targetName,
11797
- stepNumber: 0,
11798
- type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
11799
- outputPreview: "Setting up environment (installing dependencies)...",
11800
- elapsedMs: 0,
11801
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
11802
- isComplete: false
11803
- })
11995
+ let provisionedSite;
11996
+ if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
11997
+ provisionedSite = await apiClient.provisionScenarioSite(
11998
+ projectId2,
11999
+ evalRunId2,
12000
+ scenario.id
11804
12001
  );
11805
12002
  }
11806
- const workDir = await prepareWorkingDirectory(
11807
- config,
11808
- evalRunId2,
11809
- targetId,
11810
- scenario.id,
11811
- template
11812
- );
11813
- const partialResult = await runAgentWithContext(
11814
- config,
11815
- evalRunId2,
11816
- scenario,
11817
- evalData,
11818
- workDir,
11819
- pushEvent
11820
- );
11821
- const inlineAssertions = scenario.assertions ?? [];
11822
- const assertions = [
11823
- ...inlineAssertions,
11824
- ...resolvedAssertions ?? []
11825
- ];
11826
- const templateFilesMap = new Map(
11827
- (partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
11828
- );
11829
- const evaluationInput = {
11830
- outputText: partialResult.outputText,
11831
- llmTrace: partialResult.llmTrace,
11832
- fileDiffs: partialResult.fileDiffs?.map((d) => ({
11833
- path: d.path,
11834
- status: templateFilesMap.get(d.path)
11835
- })),
11836
- durationMs: partialResult.duration
11837
- };
11838
- const defaultJudgeModel = import_evalforge_types13.DEFAULT_JUDGE_MODEL;
11839
- const assertionContext = {
11840
- workDir,
11841
- defaultJudgeModel,
11842
- llmConfig: {
11843
- baseUrl: config.aiGatewayUrl,
11844
- headers: config.aiGatewayHeaders
12003
+ const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
12004
+ if (failedStep) {
12005
+ const message = `Site bootstrap step ${failedStep.label ? `"${failedStep.label}" ` : ""}failed (HTTP ${failedStep.statusCode}): ${failedStep.error ?? "unknown error"}`;
12006
+ console.warn(`[run-scenario] ${message}`);
12007
+ pushEvent?.({
12008
+ evalRunId: evalRunId2,
12009
+ scenarioId: scenario.id,
12010
+ scenarioName: scenario.name,
12011
+ targetId,
12012
+ targetName,
12013
+ stepNumber: 0,
12014
+ type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
12015
+ outputPreview: message,
12016
+ elapsedMs: 0,
12017
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12018
+ isComplete: false
12019
+ });
12020
+ }
12021
+ const effectiveTriggerPrompt = provisionedSite ? `${scenario.triggerPrompt}
12022
+
12023
+ Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
12024
+ try {
12025
+ if (template) {
12026
+ console.log(
12027
+ (0, import_evalforge_types13.formatTraceEventLine)({
12028
+ evalRunId: evalRunId2,
12029
+ scenarioId: scenario.id,
12030
+ scenarioName: scenario.name,
12031
+ targetId,
12032
+ targetName,
12033
+ stepNumber: 0,
12034
+ type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
12035
+ outputPreview: "Setting up environment (installing dependencies)...",
12036
+ elapsedMs: 0,
12037
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
12038
+ isComplete: false
12039
+ })
12040
+ );
11845
12041
  }
11846
- };
11847
- const assertionResults = assertions.length > 0 ? await (0, import_eval_assertions.evaluateAssertions)(
11848
- evaluationInput,
11849
- assertions,
11850
- assertionContext
11851
- ) : [];
11852
- const passed = assertionResults.filter(
11853
- (r) => r.status === import_evalforge_types13.AssertionResultStatus.PASSED
11854
- ).length;
11855
- const failed = assertionResults.filter(
11856
- (r) => r.status === import_evalforge_types13.AssertionResultStatus.FAILED
11857
- ).length;
11858
- const total = assertionResults.length;
11859
- const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
11860
- return {
11861
- ...partialResult,
11862
- assertionResults,
11863
- passed,
11864
- failed,
11865
- passRate
11866
- };
12042
+ const workDir = await prepareWorkingDirectory(
12043
+ config,
12044
+ evalRunId2,
12045
+ targetId,
12046
+ scenario.id,
12047
+ template
12048
+ );
12049
+ const partialResult = await runAgentWithContext(
12050
+ config,
12051
+ evalRunId2,
12052
+ { ...scenario, triggerPrompt: effectiveTriggerPrompt },
12053
+ evalData,
12054
+ workDir,
12055
+ pushEvent
12056
+ );
12057
+ const inlineAssertions = scenario.assertions ?? [];
12058
+ const assertions = [
12059
+ ...inlineAssertions,
12060
+ ...resolvedAssertions ?? []
12061
+ ];
12062
+ const templateFilesMap = new Map(
12063
+ (partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
12064
+ );
12065
+ const evaluationInput = {
12066
+ outputText: partialResult.outputText,
12067
+ llmTrace: partialResult.llmTrace,
12068
+ fileDiffs: partialResult.fileDiffs?.map((d) => ({
12069
+ path: d.path,
12070
+ status: templateFilesMap.get(d.path)
12071
+ })),
12072
+ durationMs: partialResult.duration
12073
+ };
12074
+ const defaultJudgeModel = import_evalforge_types13.DEFAULT_JUDGE_MODEL;
12075
+ const assertionContext = {
12076
+ workDir,
12077
+ defaultJudgeModel,
12078
+ llmConfig: {
12079
+ baseUrl: config.aiGatewayUrl,
12080
+ headers: config.aiGatewayHeaders
12081
+ }
12082
+ };
12083
+ const assertionResults = assertions.length > 0 ? await (0, import_eval_assertions.evaluateAssertions)(
12084
+ evaluationInput,
12085
+ assertions,
12086
+ assertionContext
12087
+ ) : [];
12088
+ const passed = assertionResults.filter(
12089
+ (r) => r.status === import_evalforge_types13.AssertionResultStatus.PASSED
12090
+ ).length;
12091
+ const failed = assertionResults.filter(
12092
+ (r) => r.status === import_evalforge_types13.AssertionResultStatus.FAILED
12093
+ ).length;
12094
+ const total = assertionResults.length;
12095
+ const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
12096
+ return {
12097
+ ...partialResult,
12098
+ assertionResults,
12099
+ passed,
12100
+ failed,
12101
+ passRate,
12102
+ provisionedSite
12103
+ };
12104
+ } finally {
12105
+ if (provisionedSite && apiClient && projectId2) {
12106
+ await apiClient.deleteProvisionedSite(projectId2, provisionedSite.id);
12107
+ }
12108
+ }
11867
12109
  }
11868
12110
 
11869
12111
  // src/evaluation-loop.ts
@@ -12143,7 +12385,9 @@ async function runEvaluation(projectId2, evalRunId2) {
12143
12385
  evalData,
12144
12386
  template,
12145
12387
  resolvedAssertions,
12146
- pushEvent
12388
+ pushEvent,
12389
+ api,
12390
+ projectId2
12147
12391
  );
12148
12392
  },
12149
12393
  addResult: async (result) => {