@wix/evalforge-evaluator 0.185.0 → 0.187.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +347 -87
- package/build/index.js.map +4 -4
- package/build/index.mjs +347 -87
- package/build/index.mjs.map +4 -4
- package/build/types/ambassador-converters.d.ts +8 -1
- package/build/types/api-client.d.ts +3 -1
- package/build/types/run-scenario/index.d.ts +2 -1
- package/package.json +7 -6
package/build/index.mjs
CHANGED
|
@@ -1282,7 +1282,7 @@ var require_error = __commonJS({
|
|
|
1282
1282
|
"toJSON",
|
|
1283
1283
|
"__CANCEL__"
|
|
1284
1284
|
];
|
|
1285
|
-
var
|
|
1285
|
+
var HttpError2 = class _HttpError extends Error {
|
|
1286
1286
|
constructor(error) {
|
|
1287
1287
|
var _a;
|
|
1288
1288
|
super(error.message);
|
|
@@ -1299,9 +1299,9 @@ var require_error = __commonJS({
|
|
|
1299
1299
|
return (0, headers_1.requestIdOrEmptyString)(this.response);
|
|
1300
1300
|
}
|
|
1301
1301
|
};
|
|
1302
|
-
exports.HttpError =
|
|
1302
|
+
exports.HttpError = HttpError2;
|
|
1303
1303
|
function createHttpError(...args) {
|
|
1304
|
-
return new
|
|
1304
|
+
return new HttpError2(...args);
|
|
1305
1305
|
}
|
|
1306
1306
|
exports.createHttpError = createHttpError;
|
|
1307
1307
|
}
|
|
@@ -6356,6 +6356,133 @@ function getLatestCapabilityVersion(payload) {
|
|
|
6356
6356
|
return __getLatestCapabilityVersion;
|
|
6357
6357
|
}
|
|
6358
6358
|
|
|
6359
|
+
// ../../node_modules/@wix/ambassador-evalforge-v1-site-provisioning/es/build/http.impl.js
|
|
6360
|
+
var _deleteProvisionedSiteRequest = {};
|
|
6361
|
+
var _deleteProvisionedSiteResponse = {};
|
|
6362
|
+
var _provisionScenarioSiteRequest = {};
|
|
6363
|
+
var _provisionScenarioSiteResponse = {};
|
|
6364
|
+
function resolveWixEvalforgeV1SiteProvisioningServiceUrl(opts) {
|
|
6365
|
+
var domainToMappings = {
|
|
6366
|
+
"dev._base_domain_": [
|
|
6367
|
+
{
|
|
6368
|
+
srcPath: "/_api/evalforge-backend",
|
|
6369
|
+
destPath: ""
|
|
6370
|
+
}
|
|
6371
|
+
],
|
|
6372
|
+
"api._api_base_domain_": [
|
|
6373
|
+
{
|
|
6374
|
+
srcPath: "/evalforge-backend",
|
|
6375
|
+
destPath: ""
|
|
6376
|
+
}
|
|
6377
|
+
],
|
|
6378
|
+
"bo._base_domain_": [
|
|
6379
|
+
{
|
|
6380
|
+
srcPath: "/_api/evalforge-backend",
|
|
6381
|
+
destPath: ""
|
|
6382
|
+
}
|
|
6383
|
+
],
|
|
6384
|
+
"wixbo.ai": [
|
|
6385
|
+
{
|
|
6386
|
+
srcPath: "/_api/evalforge-backend",
|
|
6387
|
+
destPath: ""
|
|
6388
|
+
}
|
|
6389
|
+
],
|
|
6390
|
+
"wix-bo.com": [
|
|
6391
|
+
{
|
|
6392
|
+
srcPath: "/_api/evalforge-backend",
|
|
6393
|
+
destPath: ""
|
|
6394
|
+
}
|
|
6395
|
+
],
|
|
6396
|
+
"manage._base_domain_": [
|
|
6397
|
+
{
|
|
6398
|
+
srcPath: "/_api/evalforge-backend",
|
|
6399
|
+
destPath: ""
|
|
6400
|
+
}
|
|
6401
|
+
]
|
|
6402
|
+
};
|
|
6403
|
+
return resolveUrl(Object.assign(opts, { domainToMappings }));
|
|
6404
|
+
}
|
|
6405
|
+
function provisionScenarioSite(payload) {
|
|
6406
|
+
var _a = serializer(_provisionScenarioSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
|
|
6407
|
+
var fromRes = serializer(_provisionScenarioSiteResponse, {}).fromJSON;
|
|
6408
|
+
function __provisionScenarioSite(_a2) {
|
|
6409
|
+
var host = _a2.host;
|
|
6410
|
+
var serializedData = toReq(payload);
|
|
6411
|
+
var metadata = {
|
|
6412
|
+
entityFqdn: "wix.evalforge.v1.site_provisioning",
|
|
6413
|
+
method: "POST",
|
|
6414
|
+
methodFqn: "wix.evalforge.v1.SiteProvisioningService.ProvisionScenarioSite",
|
|
6415
|
+
migrationOptions: {
|
|
6416
|
+
optInTransformResponse: true
|
|
6417
|
+
},
|
|
6418
|
+
url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
|
|
6419
|
+
protoPath: "/v1/projects/{projectId}/site-provisioning/provision-site",
|
|
6420
|
+
data: serializedData,
|
|
6421
|
+
host
|
|
6422
|
+
}),
|
|
6423
|
+
data: serializedData,
|
|
6424
|
+
transformResponse: fromRes
|
|
6425
|
+
};
|
|
6426
|
+
return metadata;
|
|
6427
|
+
}
|
|
6428
|
+
__provisionScenarioSite.fromReq = fromReq;
|
|
6429
|
+
__provisionScenarioSite.__isAmbassador = true;
|
|
6430
|
+
return __provisionScenarioSite;
|
|
6431
|
+
}
|
|
6432
|
+
function deleteProvisionedSite(payload) {
|
|
6433
|
+
var _a = serializer(_deleteProvisionedSiteRequest, {}), toReq = _a.toJSON, fromReq = _a.fromJSON;
|
|
6434
|
+
var fromRes = serializer(_deleteProvisionedSiteResponse, {}).fromJSON;
|
|
6435
|
+
function __deleteProvisionedSite(_a2) {
|
|
6436
|
+
var host = _a2.host;
|
|
6437
|
+
var serializedData = toReq(payload);
|
|
6438
|
+
var metadata = {
|
|
6439
|
+
entityFqdn: "wix.evalforge.v1.site_provisioning",
|
|
6440
|
+
method: "POST",
|
|
6441
|
+
methodFqn: "wix.evalforge.v1.SiteProvisioningService.DeleteProvisionedSite",
|
|
6442
|
+
migrationOptions: {
|
|
6443
|
+
optInTransformResponse: true
|
|
6444
|
+
},
|
|
6445
|
+
url: resolveWixEvalforgeV1SiteProvisioningServiceUrl({
|
|
6446
|
+
protoPath: "/v1/projects/{projectId}/site-provisioning/delete-site",
|
|
6447
|
+
data: serializedData,
|
|
6448
|
+
host
|
|
6449
|
+
}),
|
|
6450
|
+
data: serializedData,
|
|
6451
|
+
transformResponse: fromRes
|
|
6452
|
+
};
|
|
6453
|
+
return metadata;
|
|
6454
|
+
}
|
|
6455
|
+
__deleteProvisionedSite.fromReq = fromReq;
|
|
6456
|
+
__deleteProvisionedSite.__isAmbassador = true;
|
|
6457
|
+
return __deleteProvisionedSite;
|
|
6458
|
+
}
|
|
6459
|
+
|
|
6460
|
+
// ../../node_modules/@wix/ambassador-evalforge-v1-test-scenario/es/build/types.impl.js
|
|
6461
|
+
var WebhookIdentityType;
|
|
6462
|
+
(function(WebhookIdentityType2) {
|
|
6463
|
+
WebhookIdentityType2["UNKNOWN"] = "UNKNOWN";
|
|
6464
|
+
WebhookIdentityType2["ANONYMOUS_VISITOR"] = "ANONYMOUS_VISITOR";
|
|
6465
|
+
WebhookIdentityType2["MEMBER"] = "MEMBER";
|
|
6466
|
+
WebhookIdentityType2["WIX_USER"] = "WIX_USER";
|
|
6467
|
+
WebhookIdentityType2["APP"] = "APP";
|
|
6468
|
+
})(WebhookIdentityType || (WebhookIdentityType = {}));
|
|
6469
|
+
var SiteBootstrapHttpMethod;
|
|
6470
|
+
(function(SiteBootstrapHttpMethod2) {
|
|
6471
|
+
SiteBootstrapHttpMethod2["SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED"] = "SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED";
|
|
6472
|
+
SiteBootstrapHttpMethod2["GET"] = "GET";
|
|
6473
|
+
SiteBootstrapHttpMethod2["POST"] = "POST";
|
|
6474
|
+
SiteBootstrapHttpMethod2["PUT"] = "PUT";
|
|
6475
|
+
SiteBootstrapHttpMethod2["PATCH"] = "PATCH";
|
|
6476
|
+
SiteBootstrapHttpMethod2["DELETE"] = "DELETE";
|
|
6477
|
+
})(SiteBootstrapHttpMethod || (SiteBootstrapHttpMethod = {}));
|
|
6478
|
+
var Mode;
|
|
6479
|
+
(function(Mode2) {
|
|
6480
|
+
Mode2["UNKNOWN_MODE"] = "UNKNOWN_MODE";
|
|
6481
|
+
Mode2["NONE"] = "NONE";
|
|
6482
|
+
Mode2["CLONE"] = "CLONE";
|
|
6483
|
+
Mode2["TEMPLATE"] = "TEMPLATE";
|
|
6484
|
+
})(Mode || (Mode = {}));
|
|
6485
|
+
|
|
6359
6486
|
// src/ambassador-converters.ts
|
|
6360
6487
|
function toProtoEnum(prefix, value) {
|
|
6361
6488
|
return `${prefix}${value.toUpperCase()}`;
|
|
@@ -6662,9 +6789,37 @@ function testScenarioFromProto(wire) {
|
|
|
6662
6789
|
})
|
|
6663
6790
|
),
|
|
6664
6791
|
createdAt: fromProtoDate(wire.createdAt) ?? "",
|
|
6665
|
-
updatedAt: fromProtoDate(wire.updatedAt) ?? ""
|
|
6792
|
+
updatedAt: fromProtoDate(wire.updatedAt) ?? "",
|
|
6793
|
+
siteSetup: siteSetupFromAmbassador(wire.siteSetup)
|
|
6666
6794
|
};
|
|
6667
6795
|
}
|
|
6796
|
+
function siteSetupFromAmbassador(wire) {
|
|
6797
|
+
if (!wire) return void 0;
|
|
6798
|
+
const steps = (wire.bootstrap?.steps ?? []).filter(
|
|
6799
|
+
(step) => step.method && step.method !== SiteBootstrapHttpMethod.SITE_BOOTSTRAP_HTTP_METHOD_UNSPECIFIED && step.url
|
|
6800
|
+
).map((step) => ({
|
|
6801
|
+
label: step.label ?? void 0,
|
|
6802
|
+
method: step.method.toLowerCase(),
|
|
6803
|
+
url: step.url ?? "",
|
|
6804
|
+
body: step.body ?? void 0
|
|
6805
|
+
}));
|
|
6806
|
+
const bootstrap = steps.length > 0 ? { steps } : void 0;
|
|
6807
|
+
if (wire.mode === Mode.CLONE) {
|
|
6808
|
+
return {
|
|
6809
|
+
mode: "clone",
|
|
6810
|
+
sourceSiteId: wire.cloneOptions?.sourceSiteId ?? "",
|
|
6811
|
+
bootstrap
|
|
6812
|
+
};
|
|
6813
|
+
}
|
|
6814
|
+
if (wire.mode === Mode.TEMPLATE) {
|
|
6815
|
+
return {
|
|
6816
|
+
mode: "template",
|
|
6817
|
+
templateId: wire.templateOptions?.templateId ?? "",
|
|
6818
|
+
bootstrap
|
|
6819
|
+
};
|
|
6820
|
+
}
|
|
6821
|
+
return void 0;
|
|
6822
|
+
}
|
|
6668
6823
|
function templateFromProto(wire) {
|
|
6669
6824
|
return {
|
|
6670
6825
|
id: wire.id ?? "",
|
|
@@ -6718,8 +6873,36 @@ function capabilityVersionFromProto(wire, projectId2) {
|
|
|
6718
6873
|
createdAt: fromProtoDate(wire.createdAt) ?? ""
|
|
6719
6874
|
};
|
|
6720
6875
|
}
|
|
6876
|
+
function provisionedSiteFromProto(proto) {
|
|
6877
|
+
return {
|
|
6878
|
+
id: proto.id ?? "",
|
|
6879
|
+
url: proto.url ?? void 0,
|
|
6880
|
+
editorUrl: proto.editorUrl ?? void 0
|
|
6881
|
+
};
|
|
6882
|
+
}
|
|
6883
|
+
function siteBootstrapResultFromProto(proto) {
|
|
6884
|
+
if (!proto) return void 0;
|
|
6885
|
+
return {
|
|
6886
|
+
steps: (proto.steps ?? []).map((step) => ({
|
|
6887
|
+
label: step.label ?? void 0,
|
|
6888
|
+
statusCode: step.statusCode ?? 0,
|
|
6889
|
+
ok: step.ok ?? false,
|
|
6890
|
+
error: step.error ?? void 0
|
|
6891
|
+
}))
|
|
6892
|
+
};
|
|
6893
|
+
}
|
|
6721
6894
|
|
|
6722
6895
|
// src/api-client.ts
|
|
6896
|
+
function rethrowWithRequestId(err, action) {
|
|
6897
|
+
if (err instanceof import_http_client.HttpError) {
|
|
6898
|
+
const status = err.response?.status;
|
|
6899
|
+
const requestId = err.requestId;
|
|
6900
|
+
throw new Error(
|
|
6901
|
+
`Failed to ${action}` + (status !== void 0 ? ` (HTTP ${status})` : "") + (requestId ? ` [request id: ${requestId}]` : "") + `: ${err.message}`
|
|
6902
|
+
);
|
|
6903
|
+
}
|
|
6904
|
+
throw err;
|
|
6905
|
+
}
|
|
6723
6906
|
function resolveAmbassadorBaseUrl(serverUrl) {
|
|
6724
6907
|
try {
|
|
6725
6908
|
return new URL(serverUrl).origin;
|
|
@@ -6821,21 +7004,37 @@ function createApiClient(serverUrl, options = "") {
|
|
|
6821
7004
|
// The legacy REST endpoint enriched the capability with its latest version
|
|
6822
7005
|
// server-side; ambassador's GetCapability returns the bare entity, so we
|
|
6823
7006
|
// compose it with GetLatestCapabilityVersion in parallel here.
|
|
7007
|
+
//
|
|
7008
|
+
// The latest-version fetch is BEST-EFFORT: a failure must not drop the whole
|
|
7009
|
+
// capability. Otherwise one broken snapshot fetch makes the capability (e.g.
|
|
7010
|
+
// an MCP) silently vanish from the run. Runs that pin a version still resolve
|
|
7011
|
+
// their content via getCapabilityVersion downstream.
|
|
6824
7012
|
async getCapability(projectId2, id) {
|
|
6825
|
-
const [
|
|
7013
|
+
const [capResult, versionResult] = await Promise.allSettled([
|
|
6826
7014
|
httpClient.request(getCapability({ projectId: projectId2, capabilityId: id })),
|
|
6827
7015
|
httpClient.request(
|
|
6828
7016
|
getLatestCapabilityVersion({ projectId: projectId2, capabilityId: id })
|
|
6829
7017
|
)
|
|
6830
7018
|
]);
|
|
6831
|
-
|
|
7019
|
+
if (capResult.status === "rejected") {
|
|
7020
|
+
throw capResult.reason;
|
|
7021
|
+
}
|
|
7022
|
+
const capability = capResult.value.data.capability;
|
|
6832
7023
|
if (!capability) {
|
|
6833
7024
|
throw new Error(`Capability ${id} not found in project ${projectId2}`);
|
|
6834
7025
|
}
|
|
6835
|
-
|
|
6836
|
-
|
|
6837
|
-
|
|
6838
|
-
|
|
7026
|
+
let latestVersion;
|
|
7027
|
+
if (versionResult.status === "fulfilled" && versionResult.value.data.capabilityVersion) {
|
|
7028
|
+
latestVersion = capabilityVersionFromProto(
|
|
7029
|
+
versionResult.value.data.capabilityVersion,
|
|
7030
|
+
projectId2
|
|
7031
|
+
);
|
|
7032
|
+
} else if (versionResult.status === "rejected") {
|
|
7033
|
+
const reason = versionResult.reason instanceof Error ? versionResult.reason.message : String(versionResult.reason);
|
|
7034
|
+
console.warn(
|
|
7035
|
+
`[Capabilities] getLatestCapabilityVersion(${id}) failed; loading capability without a snapshot (pinned versions still resolve): ${reason}`
|
|
7036
|
+
);
|
|
7037
|
+
}
|
|
6839
7038
|
return { ...capabilityFromProto(capability), latestVersion };
|
|
6840
7039
|
},
|
|
6841
7040
|
async getCapabilityVersion(projectId2, capabilityId, versionId) {
|
|
@@ -6879,6 +7078,29 @@ function createApiClient(serverUrl, options = "") {
|
|
|
6879
7078
|
},
|
|
6880
7079
|
updateEvalRun(projectId2, evalRunId2, update) {
|
|
6881
7080
|
return putJson(`/projects/${projectId2}/eval-runs/${evalRunId2}`, update);
|
|
7081
|
+
},
|
|
7082
|
+
async provisionScenarioSite(projectId2, evalRunId2, scenarioId) {
|
|
7083
|
+
const res = await httpClient.request(provisionScenarioSite({ projectId: projectId2, evalRunId: evalRunId2, scenarioId })).catch(
|
|
7084
|
+
(err) => rethrowWithRequestId(err, `provision a site for scenario ${scenarioId}`)
|
|
7085
|
+
);
|
|
7086
|
+
const site = res.data.site;
|
|
7087
|
+
if (!site) {
|
|
7088
|
+
throw new Error(
|
|
7089
|
+
`Site provisioning for scenario ${scenarioId} returned no site.`
|
|
7090
|
+
);
|
|
7091
|
+
}
|
|
7092
|
+
return {
|
|
7093
|
+
...provisionedSiteFromProto(site),
|
|
7094
|
+
bootstrapResult: siteBootstrapResultFromProto(res.data.bootstrapResult)
|
|
7095
|
+
};
|
|
7096
|
+
},
|
|
7097
|
+
async deleteProvisionedSite(projectId2, siteId) {
|
|
7098
|
+
await httpClient.request(deleteProvisionedSite({ projectId: projectId2, siteId })).catch((err) => {
|
|
7099
|
+
console.warn(
|
|
7100
|
+
"[site-provisioning] deleteProvisionedSite failed \u2014 site may remain:",
|
|
7101
|
+
err
|
|
7102
|
+
);
|
|
7103
|
+
});
|
|
6882
7104
|
}
|
|
6883
7105
|
};
|
|
6884
7106
|
}
|
|
@@ -11820,87 +12042,123 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
11820
12042
|
}
|
|
11821
12043
|
|
|
11822
12044
|
// src/run-scenario/index.ts
|
|
11823
|
-
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent) {
|
|
12045
|
+
async function runScenario(config, evalRunId2, scenario, evalData, template, resolvedAssertions, pushEvent, apiClient, projectId2) {
|
|
11824
12046
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
11825
12047
|
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
11826
|
-
|
|
11827
|
-
|
|
11828
|
-
|
|
11829
|
-
|
|
11830
|
-
|
|
11831
|
-
|
|
11832
|
-
targetId,
|
|
11833
|
-
targetName,
|
|
11834
|
-
stepNumber: 0,
|
|
11835
|
-
type: LiveTraceEventType4.PROGRESS,
|
|
11836
|
-
outputPreview: "Setting up environment (installing dependencies)...",
|
|
11837
|
-
elapsedMs: 0,
|
|
11838
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
11839
|
-
isComplete: false
|
|
11840
|
-
})
|
|
12048
|
+
let provisionedSite;
|
|
12049
|
+
if (apiClient && projectId2 && scenario.siteSetup && scenario.siteSetup.mode !== "none") {
|
|
12050
|
+
provisionedSite = await apiClient.provisionScenarioSite(
|
|
12051
|
+
projectId2,
|
|
12052
|
+
evalRunId2,
|
|
12053
|
+
scenario.id
|
|
11841
12054
|
);
|
|
11842
12055
|
}
|
|
11843
|
-
const
|
|
11844
|
-
|
|
11845
|
-
|
|
11846
|
-
|
|
11847
|
-
|
|
11848
|
-
|
|
11849
|
-
|
|
11850
|
-
|
|
11851
|
-
|
|
11852
|
-
|
|
11853
|
-
|
|
11854
|
-
|
|
11855
|
-
|
|
11856
|
-
|
|
11857
|
-
|
|
11858
|
-
|
|
11859
|
-
|
|
11860
|
-
|
|
11861
|
-
|
|
11862
|
-
|
|
11863
|
-
|
|
11864
|
-
|
|
11865
|
-
|
|
11866
|
-
|
|
11867
|
-
|
|
11868
|
-
|
|
11869
|
-
|
|
11870
|
-
|
|
11871
|
-
|
|
11872
|
-
|
|
11873
|
-
|
|
11874
|
-
|
|
11875
|
-
|
|
11876
|
-
|
|
11877
|
-
|
|
11878
|
-
|
|
11879
|
-
|
|
11880
|
-
|
|
11881
|
-
headers: config.aiGatewayHeaders
|
|
12056
|
+
const failedStep = provisionedSite?.bootstrapResult?.steps.find((s) => !s.ok);
|
|
12057
|
+
if (failedStep) {
|
|
12058
|
+
const message = `Site bootstrap step ${failedStep.label ? `"${failedStep.label}" ` : ""}failed (HTTP ${failedStep.statusCode}): ${failedStep.error ?? "unknown error"}`;
|
|
12059
|
+
console.warn(`[run-scenario] ${message}`);
|
|
12060
|
+
pushEvent?.({
|
|
12061
|
+
evalRunId: evalRunId2,
|
|
12062
|
+
scenarioId: scenario.id,
|
|
12063
|
+
scenarioName: scenario.name,
|
|
12064
|
+
targetId,
|
|
12065
|
+
targetName,
|
|
12066
|
+
stepNumber: 0,
|
|
12067
|
+
type: LiveTraceEventType4.PROGRESS,
|
|
12068
|
+
outputPreview: message,
|
|
12069
|
+
elapsedMs: 0,
|
|
12070
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12071
|
+
isComplete: false
|
|
12072
|
+
});
|
|
12073
|
+
}
|
|
12074
|
+
const effectiveTriggerPrompt = provisionedSite ? `${scenario.triggerPrompt}
|
|
12075
|
+
|
|
12076
|
+
Site ID: ${provisionedSite.id}` : scenario.triggerPrompt;
|
|
12077
|
+
try {
|
|
12078
|
+
if (template) {
|
|
12079
|
+
console.log(
|
|
12080
|
+
formatTraceEventLine({
|
|
12081
|
+
evalRunId: evalRunId2,
|
|
12082
|
+
scenarioId: scenario.id,
|
|
12083
|
+
scenarioName: scenario.name,
|
|
12084
|
+
targetId,
|
|
12085
|
+
targetName,
|
|
12086
|
+
stepNumber: 0,
|
|
12087
|
+
type: LiveTraceEventType4.PROGRESS,
|
|
12088
|
+
outputPreview: "Setting up environment (installing dependencies)...",
|
|
12089
|
+
elapsedMs: 0,
|
|
12090
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12091
|
+
isComplete: false
|
|
12092
|
+
})
|
|
12093
|
+
);
|
|
11882
12094
|
}
|
|
11883
|
-
|
|
11884
|
-
|
|
11885
|
-
|
|
11886
|
-
|
|
11887
|
-
|
|
11888
|
-
|
|
11889
|
-
|
|
11890
|
-
|
|
11891
|
-
|
|
11892
|
-
|
|
11893
|
-
|
|
11894
|
-
|
|
11895
|
-
|
|
11896
|
-
|
|
11897
|
-
|
|
11898
|
-
|
|
11899
|
-
|
|
11900
|
-
|
|
11901
|
-
|
|
11902
|
-
|
|
11903
|
-
|
|
12095
|
+
const workDir = await prepareWorkingDirectory(
|
|
12096
|
+
config,
|
|
12097
|
+
evalRunId2,
|
|
12098
|
+
targetId,
|
|
12099
|
+
scenario.id,
|
|
12100
|
+
template
|
|
12101
|
+
);
|
|
12102
|
+
const partialResult = await runAgentWithContext(
|
|
12103
|
+
config,
|
|
12104
|
+
evalRunId2,
|
|
12105
|
+
{ ...scenario, triggerPrompt: effectiveTriggerPrompt },
|
|
12106
|
+
evalData,
|
|
12107
|
+
workDir,
|
|
12108
|
+
pushEvent
|
|
12109
|
+
);
|
|
12110
|
+
const inlineAssertions = scenario.assertions ?? [];
|
|
12111
|
+
const assertions = [
|
|
12112
|
+
...inlineAssertions,
|
|
12113
|
+
...resolvedAssertions ?? []
|
|
12114
|
+
];
|
|
12115
|
+
const templateFilesMap = new Map(
|
|
12116
|
+
(partialResult.templateFiles ?? []).map((f) => [f.path, f.status])
|
|
12117
|
+
);
|
|
12118
|
+
const evaluationInput = {
|
|
12119
|
+
outputText: partialResult.outputText,
|
|
12120
|
+
llmTrace: partialResult.llmTrace,
|
|
12121
|
+
fileDiffs: partialResult.fileDiffs?.map((d) => ({
|
|
12122
|
+
path: d.path,
|
|
12123
|
+
status: templateFilesMap.get(d.path)
|
|
12124
|
+
})),
|
|
12125
|
+
durationMs: partialResult.duration
|
|
12126
|
+
};
|
|
12127
|
+
const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
|
|
12128
|
+
const assertionContext = {
|
|
12129
|
+
workDir,
|
|
12130
|
+
defaultJudgeModel,
|
|
12131
|
+
llmConfig: {
|
|
12132
|
+
baseUrl: config.aiGatewayUrl,
|
|
12133
|
+
headers: config.aiGatewayHeaders
|
|
12134
|
+
}
|
|
12135
|
+
};
|
|
12136
|
+
const assertionResults = assertions.length > 0 ? await evaluateAssertionsBase(
|
|
12137
|
+
evaluationInput,
|
|
12138
|
+
assertions,
|
|
12139
|
+
assertionContext
|
|
12140
|
+
) : [];
|
|
12141
|
+
const passed = assertionResults.filter(
|
|
12142
|
+
(r) => r.status === AssertionResultStatus.PASSED
|
|
12143
|
+
).length;
|
|
12144
|
+
const failed = assertionResults.filter(
|
|
12145
|
+
(r) => r.status === AssertionResultStatus.FAILED
|
|
12146
|
+
).length;
|
|
12147
|
+
const total = assertionResults.length;
|
|
12148
|
+
const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
|
|
12149
|
+
return {
|
|
12150
|
+
...partialResult,
|
|
12151
|
+
assertionResults,
|
|
12152
|
+
passed,
|
|
12153
|
+
failed,
|
|
12154
|
+
passRate,
|
|
12155
|
+
provisionedSite
|
|
12156
|
+
};
|
|
12157
|
+
} finally {
|
|
12158
|
+
if (provisionedSite && apiClient && projectId2) {
|
|
12159
|
+
await apiClient.deleteProvisionedSite(projectId2, provisionedSite.id);
|
|
12160
|
+
}
|
|
12161
|
+
}
|
|
11904
12162
|
}
|
|
11905
12163
|
|
|
11906
12164
|
// src/evaluation-loop.ts
|
|
@@ -12180,7 +12438,9 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
12180
12438
|
evalData,
|
|
12181
12439
|
template,
|
|
12182
12440
|
resolvedAssertions,
|
|
12183
|
-
pushEvent
|
|
12441
|
+
pushEvent,
|
|
12442
|
+
api,
|
|
12443
|
+
projectId2
|
|
12184
12444
|
);
|
|
12185
12445
|
},
|
|
12186
12446
|
addResult: async (result) => {
|