@wix/evalforge-evaluator 0.185.0 → 0.187.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +347 -87
- package/build/index.js.map +4 -4
- package/build/index.mjs +347 -87
- package/build/index.mjs.map +4 -4
- package/build/types/ambassador-converters.d.ts +8 -1
- package/build/types/api-client.d.ts +3 -1
- package/build/types/run-scenario/index.d.ts +2 -1
- package/package.json +7 -6
package/build/index.js
CHANGED
|
@@ -1277,7 +1277,7 @@ var require_error = __commonJS({
|
|
|
1277
1277
|
"toJSON",
|
|
1278
1278
|
"__CANCEL__"
|
|
1279
1279
|
];
|
|
1280
|
-
var
|
|
1280
|
+
var HttpError2 = class _HttpError extends Error {
|
|
1281
1281
|
constructor(error) {
|
|
1282
1282
|
var _a;
|
|
1283
1283
|
super(error.message);
|
|
@@ -1294,9 +1294,9 @@ var require_error = __commonJS({
|
|
|
1294
1294
|
return (0, headers_1.requestIdOrEmptyString)(this.response);
|
|
1295
1295
|
}
|
|
1296
1296
|
};
|
|
1297
|
-
exports2.HttpError =
|
|
1297
|
+
exports2.HttpError = HttpError2;
|
|
1298
1298
|
function createHttpError(...args) {
|
|
1299
|
-
return new
|
|
1299
|
+
return new HttpError2(...args);
|
|
1300
1300
|
}
|
|
1301
1301
|
exports2.createHttpError = createHttpError;
|
|
1302
1302
|
}
|
|
@@ -6351,6 +6351,133 @@ function getLatestCapabilityVersion(payload) {
|
|
|
6351
6351
|
return __getLatestCapabilityVersion;
|
|
6352
6352
|
}
|
|
6353
6353
|
|
|
6354
|
+
// ../../node_modules/@wix/ambassador-evalforge-v1-site-provisioning/es/build/http.impl.js
|
|
6355
|
+
var _deleteProvisionedSiteRequest = {};
|
|
6356
|
+
var _deleteProvisionedSiteResponse = {};
|
|
6357
|
+
var _provisionScenarioSiteRequest = {};
|
|
6358
|
+
var _provisionScenarioSiteResponse = {};
|
|
6359
|
+
function resolveWixEvalforgeV1SiteProvisioningServiceUrl(opts) {
|
|
6360
|
+
var domainToMappings = {
|
|
6361
|
+
"dev._base_domain_": [
|
|
6362
|
+
{
|
|
6363
|
+
srcPath: "/_api/evalforge-backend",
|
|
6364
|
+
destPath: ""
|
|
6365
|
+
}
|
|
6366
|
+
],
|
|
6367
|
+
"api._api_base_domain_": [
|
|
6368
|
+
{
|
|
6369
|
+
srcPath: "/evalforge-backend",
|
|
6370
|
+
destPath: ""
|
|
6371
|
+
}
|
|
6372
|
+
],
|
|
6373
|
+
"bo._base_domain_": [
|
|
6374
|
+
{
|
|
6375
|
+
srcPath: "/_api/evalforge-backend",
|
|
6376
|
+
destPath: ""
|
|
6377
|
+
}
|
|
6378
|
+
],
|
|
6379
|
+
"wixbo.ai": [
|
|
6380
|
+
{
|
|
6381
|
+
srcPath: "/_api/evalforge-backend",
|
|
6382
|
+
destPath: ""
|
|
6383
|
+
}
|
|
6384
|
+
],
|
|
6385
|
+
"wix-bo.com": [
|
|
6386
|
+
{
|
|
6387
|
+
srcPath: "/_api/evalforge-backend",
|
|
6388
|
+
destPath: ""
|
|
6389
|
+
}
|
|
6390
|
+
],
|
|
6391
|
+
"manage._base_domain_": [
|
|
6392
|
+
{
|
|
6393
|
+
srcPath: "/_api/evalforge-backend",
|
|
6394
|
+
destPath: ""
|
|
6395
|
+
}
|
|
6396
|
+
]
|
|
6397
|
+
};
|
|
6398
|
+
return resolveUrl(Object.assign(opts, { domainToMappings }));
|
|
6399
|
+
}
|
|
6400
|
+
function provisionScenarioSite(payload) {
|
|
6401
|
+
var _a = serializer(_provisionScenarioSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
|
|
6402
|
+
var fromRes = serializer(_provisionScenarioSiteResponse, {}).fromJSON;
|
|
6403
|
+
function __provisionScenarioSite(_a2) {
|
|
6404
|
+
var host = _a2.host;
|
|
6405
|
+
var serializedData = toReq(payload);
|
|
6406
|
+
var metadata = {
|
|
6407
|
+
entityFqdn: "wix.evalforge.v1.site_provisioning",
|
|
6408
|
+
method: "POST",
|
|
6409
|
+
methodFqn: "wix.evalforge.v1.SiteProvisioningService.ProvisionScenarioSite",
|
|
6410
|
+
migrationOptions: {
|
|
6411
|
+
optInTransformResponse: true
|
|
6412
|
+
},
|
|
6413
|
+
url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
|
|
6414
|
+
protoPath: "/v1/projects/{projectId}/site-provisioning/provision-site",
|
|
6415
|
+
data: serializedData,
|
|
6416
|
+
host
|
|
6417
|
+
}),
|
|
6418
|
+
data: serializedData,
|
|
6419
|
+
transformResponse: fromRes
|
|
6420
|
+
};
|
|
6421
|
+
return metadata;
|
|
6422
|
+
}
|
|
6423
|
+
__provisionScenarioSite.fromReq = fromReq;
|
|
6424
|
+
__provisionScenarioSite.__isAmbassador = true;
|
|
6425
|
+
return __provisionScenarioSite;
|
|
6426
|
+
}
|
|
6427
|
+
function deleteProvisionedSite(payload) {
|
|
6428
|
+
var _a = serializer(_deleteProvisionedSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
|
|
6429
|
+
var fromRes = serializer(_deleteProvisionedSiteResponse, {}).fromJSON;
|
|
6430
|
+
function __deleteProvisionedSite(_a2) {
|
|
6431
|
+
var host = _a2.host;
|
|
6432
|
+
var serializedData = toReq(payload);
|
|
6433
|
+
var metadata = {
|
|
6434
|
+
entityFqdn: "wix.evalforge.v1.site_provisioning",
|
|
6435
|
+
method: "POST",
|
|
6436
|
+
methodFqn: "wix.evalforge.v1.SiteProvisioningService.DeleteProvisionedSite",
|
|
6437
|
+
migrationOptions: {
|
|
6438
|
+
optInTransformResponse: true
|
|
6439
|
+
},
|
|
6440
|
+
url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
|
|
6441
|
+
protoPath: "/v1/projects/{projectId}/site-provisioning/delete-site",
|
|
6442
|
+
data: serializedData,
|
|
6443
|
+
host
|
|
6444
|
+
}),
|
|
6445
|
+
data: serializedData,
|
|
6446
|
+
transformResponse: fromRes
|
|
6447
|
+
};
|
|
6448
|
+
return metadata;
|
|
6449
|
+
}
|
|
6450
|
+
__deleteProvisionedSite.fromReq = fromReq;
|
|
6451
|
+
__deleteProvisionedSite.__isAmbassador = true;
|
|
6452
|
+
return __deleteProvisionedSite;
|
|
6453
|
+
}
|
|
6454
|
+
|
|
6455
|
+
// ../../node_modules/@wix/ambassador-evalforge-v1-test-scenario/es/build/types.impl.js
|
|
6456
|
+
var WebhookIdentityType;
|
|
6457
|
+
(function(WebhookIdentityType2) {
|
|
6458
|
+
WebhookIdentityType2["UNKNOWN"] = "UNKNOWN";
|
|
6459
|
+
WebhookIdentityType2["ANONYMOUS_VISITOR"] = "ANONYMOUS_VISITOR";
|
|
6460
|
+
WebhookIdentityType2["MEMBER"] = "MEMBER";
|
|
6461
|
+
WebhookIdentityType2["WIX_USER"] = "WIX_USER";
|
|
6462
|
+
WebhookIdentityType2["APP"] = "APP";
|
|
6463
|
+
})(WebhookIdentityType || (WebhookIdentityType = {}));
|
|
6464
|
+
var SiteBootstrapHttpMethod;
|
|
6465
|
+
(function(SiteBootstrapHttpMethod2) {
|
|
6466
|
+
SiteBootstrapHttpMethod2["SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED"] = "SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED";
|
|
6467
|
+
SiteBootstrapHttpMethod2["GET"] = "GET";
|
|
6468
|
+
SiteBootstrapHttpMethod2["POST"] = "POST";
|
|
6469
|
+
SiteBootstrapHttpMethod2["PUT"] = "PUT";
|
|
6470
|
+
SiteBootstrapHttpMethod2["PATCH"] = "PATCH";
|
|
6471
|
+
SiteBootstrapHttpMethod2["DELETE"] = "DELETE";
|
|
6472
|
+
})(SiteBootstrapHttpMethod || (SiteBootstrapHttpMethod = {}));
|
|
6473
|
+
var Mode;
|
|
6474
|
+
(function(Mode2) {
|
|
6475
|
+
Mode2["UNKNOWN_MODE"] = "UNKNOWN_MODE";
|
|
6476
|
+
Mode2["NONE"] = "NONE";
|
|
6477
|
+
Mode2["CLONE"] = "CLONE";
|
|
6478
|
+
Mode2["TEMPLATE"] = "TEMPLATE";
|
|
6479
|
+
})(Mode || (Mode = {}));
|
|
6480
|
+
|
|
6354
6481
|
// src/ambassador-converters.ts
|
|
6355
6482
|
function toProtoEnum(prefix, value) {
|
|
6356
6483
|
return `${prefix}${value.toUpperCase()}`;
|
|
@@ -6657,9 +6784,37 @@ function testScenarioFromProto(wire) {
|
|
|
6657
6784
|
})
|
|
6658
6785
|
),
|
|
6659
6786
|
createdAt: fromProtoDate(wire.createdAt) ?? "",
|
|
6660
|
-
updatedAt: fromProtoDate(wire.updatedAt) ?? ""
|
|
6787
|
+
updatedAt: fromProtoDate(wire.updatedAt) ?? "",
|
|
6788
|
+
siteSetup: siteSetupFromAmbassador(wire.siteSetup)
|
|
6661
6789
|
};
|
|
6662
6790
|
}
|
|
6791
|
+
function siteSetupFromAmbassador(wire) {
|
|
6792
|
+
if (!wire) return void 0;
|
|
6793
|
+
const steps = (wire.bootstrap?.steps ?? []).filter(
|
|
6794
|
+
(step) => step.method && step.method !== SiteBootstrapHttpMethod.SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED && step.url
|
|
6795
|
+
).map((step) => ({
|
|
6796
|
+
label: step.label ?? void 0,
|
|
6797
|
+
method: step.method.toLowerCase(),
|
|
6798
|
+
url: step.url ?? "",
|
|
6799
|
+
body: step.body ?? void 0
|
|
6800
|
+
}));
|
|
6801
|
+
const bootstrap = steps.length > 0 ? { steps } : void 0;
|
|
6802
|
+
if (wire.mode === Mode.CLONE) {
|
|
6803
|
+
return {
|
|
6804
|
+
mode: "clone",
|
|
6805
|
+
sourceSiteId: wire.cloneOptions?.sourceSiteId ?? "",
|
|
6806
|
+
bootstrap
|
|
6807
|
+
};
|
|
6808
|
+
}
|
|
6809
|
+
if (wire.mode === Mode.TEMPLATE) {
|
|
6810
|
+
return {
|
|
6811
|
+
mode: "template",
|
|
6812
|
+
templateId: wire.templateOptions?.templateId ?? "",
|
|
6813
|
+
bootstrap
|
|
6814
|
+
};
|
|
6815
|
+
}
|
|
6816
|
+
return void 0;
|
|
6817
|
+
}
|
|
6663
6818
|
function templateFromProto(wire) {
|
|
6664
6819
|
return {
|
|
6665
6820
|
id: wire.id ?? "",
|
|
@@ -6713,8 +6868,36 @@ function capabilityVersionFromProto(wire, projectId2) {
|
|
|
6713
6868
|
createdAt: fromProtoDate(wire.createdAt) ?? ""
|
|
6714
6869
|
};
|
|
6715
6870
|
}
|
|
6871
|
+
function provisionedSiteFromProto(proto) {
|
|
6872
|
+
return {
|
|
6873
|
+
id: proto.id ?? "",
|
|
6874
|
+
url: proto.url ?? void 0,
|
|
6875
|
+
editorUrl: proto.editorUrl ?? void 0
|
|
6876
|
+
};
|
|
6877
|
+
}
|
|
6878
|
+
function siteBootstrapResultFromProto(proto) {
|
|
6879
|
+
if (!proto) return void 0;
|
|
6880
|
+
return {
|
|
6881
|
+
steps: (proto.steps ?? []).map((step) => ({
|
|
6882
|
+
label: step.label ?? void 0,
|
|
6883
|
+
statusCode: step.statusCode ?? 0,
|
|
6884
|
+
ok: step.ok ?? false,
|
|
6885
|
+
error: step.error ?? void 0
|
|
6886
|
+
}))
|
|
6887
|
+
};
|
|
6888
|
+
}
|
|
6716
6889
|
|
|
6717
6890
|
// src/api-client.ts
|
|
6891
|
+
function rethrowWithRequestId(err, action) {
|
|
6892
|
+
if (err instanceof import_http_client.HttpError) {
|
|
6893
|
+
const status = err.response?.status;
|
|
6894
|
+
const requestId = err.requestId;
|
|
6895
|
+
throw new Error(
|
|
6896
|
+
`Failed to ${action}` + (status !== void 0 ? ` (HTTP ${status})` : "") + (requestId ? ` [request id: ${requestId}]` : "") + `: ${err.message}`
|
|
6897
|
+
);
|
|
6898
|
+
}
|
|
6899
|
+
throw err;
|
|
6900
|
+
}
|
|
6718
6901
|
function resolveAmbassadorBaseUrl(serverUrl) {
|
|
6719
6902
|
try {
|
|
6720
6903
|
return new URL(serverUrl).origin;
|
|
@@ -6816,21 +6999,37 @@ function createApiClient(serverUrl, options = "") {
|
|
|
6816
6999
|
// The legacy REST endpoint enriched the capability with its latest version
|
|
6817
7000
|
// server-side; ambassador's GetCapability returns the bare entity, so we
|
|
6818
7001
|
// compose it with GetLatestCapabilityVersion in parallel here.
|
|
7002
|
+
//
|
|
7003
|
+
// The latest-version fetch is BEST-EFFORT: a failure must not drop the whole
|
|
7004
|
+
// capability. Otherwise one broken snapshot fetch makes the capability (e.g.
|
|
7005
|
+
// an MCP) silently vanish from the run. Runs that pin a version still resolve
|
|
7006
|
+
// their content via getCapabilityVersion downstream.
|
|
6819
7007
|
async getCapability(projectId2, id) {
|
|
6820
|
-
const [
|
|
7008
|
+
const [capResult, versionResult] = await Promise.allSettled([
|
|
6821
7009
|
httpClient.request(getCapability({ projectId: projectId2, capabilityId: id })),
|
|
6822
7010
|
httpClient.request(
|
|
6823
7011
|
getLatestCapabilityVersion({ projectId: projectId2, capabilityId: id })
|
|
6824
7012
|
)
|
|
6825
7013
|
]);
|
|
6826
|
-
|
|
7014
|
+
if (capResult.status === "rejected") {
|
|
7015
|
+
throw capResult.reason;
|
|
7016
|
+
}
|
|
7017
|
+
const capability = capResult.value.data.capability;
|
|
6827
7018
|
if (!capability) {
|
|
6828
7019
|
throw new Error(`Capability ${id} not found in project ${projectId2}`);
|
|
6829
7020
|
}
|
|
6830
|
-
|
|
6831
|
-
|
|
6832
|
-
|
|
6833
|
-
|
|
7021
|
+
let latestVersion;
|
|
7022
|
+
if (versionResult.status === "fulfilled" && versionResult.value.data.capabilityVersion) {
|
|
7023
|
+
latestVersion = capabilityVersionFromProto(
|
|
7024
|
+
versionResult.value.data.capabilityVersion,
|
|
7025
|
+
projectId2
|
|
7026
|
+
);
|
|
7027
|
+
} else if (versionResult.status === "rejected") {
|
|
7028
|
+
const reason = versionResult.reason instanceof Error ? versionResult.reason.message : String(versionResult.reason);
|
|
7029
|
+
console.warn(
|
|
7030
|
+
`[Capabilities] getLatestCapabilityVersion(${id}) failed; loading capability without a snapshot (pinned versions still resolve): ${reason}`
|
|
7031
|
+
);
|
|
7032
|
+
}
|
|
6834
7033
|
return { ...capabilityFromProto(capability), latestVersion };
|
|
6835
7034
|
},
|
|
6836
7035
|
async getCapabilityVersion(projectId2, capabilityId, versionId) {
|
|
@@ -6874,6 +7073,29 @@ function createApiClient(serverUrl, options = "") {
|
|
|
6874
7073
|
},
|
|
6875
7074
|
updateEvalRun(projectId2, evalRunId2, update) {
|
|
6876
7075
|
return putJson(`/projects/${projectId2}/eval-runs/${evalRunId2}`, update);
|
|
7076
|
+
},
|
|
7077
|
+
async provisionScenarioSite(projectId2, evalRunId2, scenarioId) {
|
|
7078
|
+
const res = await httpClient.request(provisionScenarioSite({ projectId: projectId2, evalRunId: evalRunId2, scenarioId })).catch(
|
|
7079
|
+
(err) => rethrowWithRequestId(err, `provision a site for scenario ${scenarioId}`)
|
|
7080
|
+
);
|
|
7081
|
+
const site = res.data.site;
|
|
7082
|
+
if (!site) {
|
|
7083
|
+
throw new Error(
|
|
7084
|
+
`Site provisioning for scenario ${scenarioId} returned no site.`
|
|
7085
|
+
);
|
|
7086
|
+
}
|
|
7087
|
+
return {
|
|
7088
|
+
...provisionedSiteFromProto(site),
|
|
7089
|
+
bootstrapResult: siteBootstrapResultFromProto(res.data.bootstrapResult)
|
|
7090
|
+
};
|
|
7091
|
+
},
|
|
7092
|
+
async deleteProvisionedSite(projectId2, siteId) {
|
|
7093
|
+
await httpClient.request(deleteProvisionedSite({ projectId: projectId2, siteId })).catch((err) => {
|
|
7094
|
+
console.warn(
|
|
7095
|
+
"[site-provisioning] deleteProvisionedSite failed \u2014 site may remain:",
|
|
7096
|
+
err
|
|
7097
|
+
);
|
|
7098
|
+
});
|
|
6877
7099
|
}
|
|
6878
7100
|
};
|
|
6879
7101
|
}
|
|
@@ -11767,87 +11989,123 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
11767
11989
|
}
|
|
11768
11990
|
|
|
11769
11991
|
// src/run-scenario/index.ts
|
|
11770
|
-
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent) {
|
|
11992
|
+
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
|
|
11771
11993
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
11772
11994
|
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
11773
|
-
|
|
11774
|
-
|
|
11775
|
-
|
|
11776
|
-
|
|
11777
|
-
|
|
11778
|
-
|
|
11779
|
-
targetId,
|
|
11780
|
-
targetName,
|
|
11781
|
-
stepNumber: 0,
|
|
11782
|
-
type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
|
|
11783
|
-
outputPreview: "Setting up environment (installing dependencies)...",
|
|
11784
|
-
elapsedMs: 0,
|
|
11785
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
11786
|
-
isComplete: false
|
|
11787
|
-
})
|
|
11995
|
+
let provisionedSite;
|
|
11996
|
+
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
11997
|
+
provisionedSite = await apiClient.provisionScenarioSite(
|
|
11998
|
+
projectId2,
|
|
11999
|
+
evalRunId2,
|
|
12000
|
+
scenario.id
|
|
11788
12001
|
);
|
|
11789
12002
|
}
|
|
11790
|
-
const
|
|
11791
|
-
|
|
11792
|
-
|
|
11793
|
-
|
|
11794
|
-
|
|
11795
|
-
|
|
11796
|
-
|
|
11797
|
-
|
|
11798
|
-
|
|
11799
|
-
|
|
11800
|
-
|
|
11801
|
-
|
|
11802
|
-
|
|
11803
|
-
|
|
11804
|
-
|
|
11805
|
-
|
|
11806
|
-
|
|
11807
|
-
|
|
11808
|
-
|
|
11809
|
-
|
|
11810
|
-
|
|
11811
|
-
|
|
11812
|
-
|
|
11813
|
-
|
|
11814
|
-
|
|
11815
|
-
|
|
11816
|
-
|
|
11817
|
-
|
|
11818
|
-
|
|
11819
|
-
|
|
11820
|
-
|
|
11821
|
-
|
|
11822
|
-
|
|
11823
|
-
|
|
11824
|
-
|
|
11825
|
-
|
|
11826
|
-
|
|
11827
|
-
|
|
11828
|
-
headers: config.aiGatewayHeaders
|
|
12003
|
+
const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
|
|
12004
|
+
if (failedStep) {
|
|
12005
|
+
const message = `Site bootstrap step ${failedStep.label ? `"${failedStep.label}" ` : ""}failed (HTTP ${failedStep.statusCode}): ${failedStep.error ?? "unknown error"}`;
|
|
12006
|
+
console.warn(`[run-scenario] ${message}`);
|
|
12007
|
+
pushEvent?.({
|
|
12008
|
+
evalRunId: evalRunId2,
|
|
12009
|
+
scenarioId: scenario.id,
|
|
12010
|
+
scenarioName: scenario.name,
|
|
12011
|
+
targetId,
|
|
12012
|
+
targetName,
|
|
12013
|
+
stepNumber: 0,
|
|
12014
|
+
type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
|
|
12015
|
+
outputPreview: message,
|
|
12016
|
+
elapsedMs: 0,
|
|
12017
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12018
|
+
isComplete: false
|
|
12019
|
+
});
|
|
12020
|
+
}
|
|
12021
|
+
const effectiveTriggerPrompt = provisionedSite ? `${scenario.triggerPrompt}
|
|
12022
|
+
|
|
12023
|
+
Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
12024
|
+
try {
|
|
12025
|
+
if (template) {
|
|
12026
|
+
console.log(
|
|
12027
|
+
(0, import_evalforge_types13.formatTraceEventLine)({
|
|
12028
|
+
evalRunId: evalRunId2,
|
|
12029
|
+
scenarioId: scenario.id,
|
|
12030
|
+
scenarioName: scenario.name,
|
|
12031
|
+
targetId,
|
|
12032
|
+
targetName,
|
|
12033
|
+
stepNumber: 0,
|
|
12034
|
+
type: import_evalforge_types13.LiveTraceEventType.PROGRESS,
|
|
12035
|
+
outputPreview: "Setting up environment (installing dependencies)...",
|
|
12036
|
+
elapsedMs: 0,
|
|
12037
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12038
|
+
isComplete: false
|
|
12039
|
+
})
|
|
12040
|
+
);
|
|
11829
12041
|
}
|
|
11830
|
-
|
|
11831
|
-
|
|
11832
|
-
|
|
11833
|
-
|
|
11834
|
-
|
|
11835
|
-
|
|
11836
|
-
|
|
11837
|
-
|
|
11838
|
-
|
|
11839
|
-
|
|
11840
|
-
|
|
11841
|
-
|
|
11842
|
-
|
|
11843
|
-
|
|
11844
|
-
|
|
11845
|
-
|
|
11846
|
-
|
|
11847
|
-
|
|
11848
|
-
|
|
11849
|
-
|
|
11850
|
-
|
|
12042
|
+
const workDir = await prepareWorkingDirectory(
|
|
12043
|
+
config,
|
|
12044
|
+
evalRunId2,
|
|
12045
|
+
targetId,
|
|
12046
|
+
scenario.id,
|
|
12047
|
+
template
|
|
12048
|
+
);
|
|
12049
|
+
const partialResult = await runAgentWithContext(
|
|
12050
|
+
config,
|
|
12051
|
+
evalRunId2,
|
|
12052
|
+
{ ...scenario, triggerPrompt: effectiveTriggerPrompt },
|
|
12053
|
+
evalData,
|
|
12054
|
+
workDir,
|
|
12055
|
+
pushEvent
|
|
12056
|
+
);
|
|
12057
|
+
const inlineAssertions = scenario.assertions ?? [];
|
|
12058
|
+
const assertions = [
|
|
12059
|
+
...inlineAssertions,
|
|
12060
|
+
...resolvedAssertions ?? []
|
|
12061
|
+
];
|
|
12062
|
+
const templateFilesMap = new Map(
|
|
12063
|
+
(partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
|
|
12064
|
+
);
|
|
12065
|
+
const evaluationInput = {
|
|
12066
|
+
outputText: partialResult.outputText,
|
|
12067
|
+
llmTrace: partialResult.llmTrace,
|
|
12068
|
+
fileDiffs: partialResult.fileDiffs?.map((d) => ({
|
|
12069
|
+
path: d.path,
|
|
12070
|
+
status: templateFilesMap.get(d.path)
|
|
12071
|
+
})),
|
|
12072
|
+
durationMs: partialResult.duration
|
|
12073
|
+
};
|
|
12074
|
+
const defaultJudgeModel = import_evalforge_types13.DEFAULT_JUDGE_MODEL;
|
|
12075
|
+
const assertionContext = {
|
|
12076
|
+
workDir,
|
|
12077
|
+
defaultJudgeModel,
|
|
12078
|
+
llmConfig: {
|
|
12079
|
+
baseUrl: config.aiGatewayUrl,
|
|
12080
|
+
headers: config.aiGatewayHeaders
|
|
12081
|
+
}
|
|
12082
|
+
};
|
|
12083
|
+
const assertionResults = assertions.length > 0 ? await (0, import_eval_assertions.evaluateAssertions)(
|
|
12084
|
+
evaluationInput,
|
|
12085
|
+
assertions,
|
|
12086
|
+
assertionContext
|
|
12087
|
+
) : [];
|
|
12088
|
+
const passed = assertionResults.filter(
|
|
12089
|
+
(r) => r.status === import_evalforge_types13.AssertionResultStatus.PASSED
|
|
12090
|
+
).length;
|
|
12091
|
+
const failed = assertionResults.filter(
|
|
12092
|
+
(r) => r.status === import_evalforge_types13.AssertionResultStatus.FAILED
|
|
12093
|
+
).length;
|
|
12094
|
+
const total = assertionResults.length;
|
|
12095
|
+
const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
|
|
12096
|
+
return {
|
|
12097
|
+
...partialResult,
|
|
12098
|
+
assertionResults,
|
|
12099
|
+
passed,
|
|
12100
|
+
failed,
|
|
12101
|
+
passRate,
|
|
12102
|
+
provisionedSite
|
|
12103
|
+
};
|
|
12104
|
+
} finally {
|
|
12105
|
+
if (provisionedSite && apiClient && projectId2) {
|
|
12106
|
+
await apiClient.deleteProvisionedSite(projectId2, provisionedSite.id);
|
|
12107
|
+
}
|
|
12108
|
+
}
|
|
11851
12109
|
}
|
|
11852
12110
|
|
|
11853
12111
|
// src/evaluation-loop.ts
|
|
@@ -12127,7 +12385,9 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
12127
12385
|
evalData,
|
|
12128
12386
|
template,
|
|
12129
12387
|
resolvedAssertions,
|
|
12130
|
-
pushEvent
|
|
12388
|
+
pushEvent,
|
|
12389
|
+
api,
|
|
12390
|
+
projectId2
|
|
12131
12391
|
);
|
|
12132
12392
|
},
|
|
12133
12393
|
addResult: async (result) => {
|