@wix/evalforge-evaluator 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +90 -24
- package/build/index.js.map +2 -2
- package/build/index.mjs +90 -24
- package/build/index.mjs.map +2 -2
- package/build/types/api-client.d.ts +15 -3
- package/build/types/config.d.ts +8 -0
- package/build/types/run-scenario/claude-code/types.d.ts +4 -0
- package/package.json +3 -3
package/build/index.js
CHANGED
|
@@ -55,21 +55,38 @@ function loadConfig() {
|
|
|
55
55
|
aiGatewayHeaders[key] = value;
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
|
+
const tracePushUrl = process.env.TRACE_PUSH_URL;
|
|
59
|
+
const routeHeader = process.env.EVAL_ROUTE_HEADER;
|
|
58
60
|
return {
|
|
59
61
|
serverUrl,
|
|
60
62
|
apiPrefix,
|
|
61
63
|
aiGatewayUrl,
|
|
62
64
|
aiGatewayHeaders,
|
|
63
|
-
evaluationsDir
|
|
65
|
+
evaluationsDir,
|
|
66
|
+
tracePushUrl,
|
|
67
|
+
routeHeader
|
|
64
68
|
};
|
|
65
69
|
}
|
|
66
70
|
|
|
67
71
|
// src/api-client.ts
|
|
68
|
-
function createApiClient(serverUrl,
|
|
72
|
+
function createApiClient(serverUrl, options = "") {
|
|
73
|
+
const opts = typeof options === "string" ? { apiPrefix: options } : options;
|
|
74
|
+
const apiPrefix = opts.apiPrefix ?? "";
|
|
75
|
+
const routeHeader = opts.routeHeader;
|
|
76
|
+
function buildHeaders(additionalHeaders) {
|
|
77
|
+
const headers = { ...additionalHeaders };
|
|
78
|
+
if (routeHeader) {
|
|
79
|
+
headers["x-wix-route"] = routeHeader;
|
|
80
|
+
}
|
|
81
|
+
return headers;
|
|
82
|
+
}
|
|
69
83
|
async function fetchJson(path9) {
|
|
70
84
|
const url = `${serverUrl}${apiPrefix}${path9}`;
|
|
71
85
|
console.error(`[API] GET ${url}`);
|
|
72
|
-
const
|
|
86
|
+
const headers = buildHeaders();
|
|
87
|
+
const response = await fetch(url, {
|
|
88
|
+
headers: Object.keys(headers).length > 0 ? headers : void 0
|
|
89
|
+
});
|
|
73
90
|
if (!response.ok) {
|
|
74
91
|
const errorText = await response.text();
|
|
75
92
|
throw new Error(
|
|
@@ -83,7 +100,7 @@ function createApiClient(serverUrl, apiPrefix = "") {
|
|
|
83
100
|
console.error(`[API] POST ${url}`);
|
|
84
101
|
const response = await fetch(url, {
|
|
85
102
|
method: "POST",
|
|
86
|
-
headers: { "Content-Type": "application/json" },
|
|
103
|
+
headers: buildHeaders({ "Content-Type": "application/json" }),
|
|
87
104
|
body: JSON.stringify(body)
|
|
88
105
|
});
|
|
89
106
|
if (!response.ok) {
|
|
@@ -96,7 +113,11 @@ function createApiClient(serverUrl, apiPrefix = "") {
|
|
|
96
113
|
async function deleteRequest(path9) {
|
|
97
114
|
const url = `${serverUrl}${apiPrefix}${path9}`;
|
|
98
115
|
console.error(`[API] DELETE ${url}`);
|
|
99
|
-
const
|
|
116
|
+
const headers = buildHeaders();
|
|
117
|
+
const response = await fetch(url, {
|
|
118
|
+
method: "DELETE",
|
|
119
|
+
headers: Object.keys(headers).length > 0 ? headers : void 0
|
|
120
|
+
});
|
|
100
121
|
if (!response.ok) {
|
|
101
122
|
const errorText = await response.text();
|
|
102
123
|
throw new Error(
|
|
@@ -109,7 +130,7 @@ function createApiClient(serverUrl, apiPrefix = "") {
|
|
|
109
130
|
console.error(`[API] PUT ${url}`);
|
|
110
131
|
const response = await fetch(url, {
|
|
111
132
|
method: "PUT",
|
|
112
|
-
headers: { "Content-Type": "application/json" },
|
|
133
|
+
headers: buildHeaders({ "Content-Type": "application/json" }),
|
|
113
134
|
body: JSON.stringify(body)
|
|
114
135
|
});
|
|
115
136
|
if (!response.ok) {
|
|
@@ -6076,8 +6097,36 @@ var import_crypto = require("crypto");
|
|
|
6076
6097
|
var import_promises3 = require("fs/promises");
|
|
6077
6098
|
var import_path5 = require("path");
|
|
6078
6099
|
var DEFAULT_MODEL = "claude-sonnet-4-20250514";
|
|
6079
|
-
function emitTraceEvent(event) {
|
|
6100
|
+
function emitTraceEvent(event, tracePushUrl, routeHeader) {
|
|
6080
6101
|
console.log(`${import_evalforge_types.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
|
|
6102
|
+
if (tracePushUrl) {
|
|
6103
|
+
pushTraceEvent(tracePushUrl, event, routeHeader).catch((err) => {
|
|
6104
|
+
console.error("[Trace Push] Failed to push trace event:", err);
|
|
6105
|
+
});
|
|
6106
|
+
}
|
|
6107
|
+
}
|
|
6108
|
+
async function pushTraceEvent(url, event, routeHeader) {
|
|
6109
|
+
try {
|
|
6110
|
+
const headers = {
|
|
6111
|
+
"Content-Type": "application/json"
|
|
6112
|
+
};
|
|
6113
|
+
if (routeHeader) {
|
|
6114
|
+
headers["x-wix-route"] = routeHeader;
|
|
6115
|
+
}
|
|
6116
|
+
const response = await fetch(url, {
|
|
6117
|
+
method: "POST",
|
|
6118
|
+
headers,
|
|
6119
|
+
body: JSON.stringify(event)
|
|
6120
|
+
});
|
|
6121
|
+
if (!response.ok) {
|
|
6122
|
+
const errorText = await response.text();
|
|
6123
|
+
console.error(
|
|
6124
|
+
`[Trace Push] HTTP ${response.status}: ${errorText.slice(0, 200)}`
|
|
6125
|
+
);
|
|
6126
|
+
}
|
|
6127
|
+
} catch (err) {
|
|
6128
|
+
console.error("[Trace Push] Network error:", err);
|
|
6129
|
+
}
|
|
6081
6130
|
}
|
|
6082
6131
|
function createTraceEventFromMessage(message, context, stepNumber, isComplete) {
|
|
6083
6132
|
let type = import_evalforge_types.LiveTraceEventType.COMPLETION;
|
|
@@ -6235,7 +6284,11 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6235
6284
|
false
|
|
6236
6285
|
// Not complete yet
|
|
6237
6286
|
);
|
|
6238
|
-
emitTraceEvent(
|
|
6287
|
+
emitTraceEvent(
|
|
6288
|
+
traceEvent,
|
|
6289
|
+
traceContext.tracePushUrl,
|
|
6290
|
+
traceContext.routeHeader
|
|
6291
|
+
);
|
|
6239
6292
|
}
|
|
6240
6293
|
}
|
|
6241
6294
|
console.log(
|
|
@@ -6244,18 +6297,22 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6244
6297
|
"messages"
|
|
6245
6298
|
);
|
|
6246
6299
|
if (traceContext) {
|
|
6247
|
-
emitTraceEvent(
|
|
6248
|
-
|
|
6249
|
-
|
|
6250
|
-
|
|
6251
|
-
|
|
6252
|
-
|
|
6253
|
-
|
|
6254
|
-
|
|
6255
|
-
|
|
6256
|
-
|
|
6257
|
-
|
|
6258
|
-
|
|
6300
|
+
emitTraceEvent(
|
|
6301
|
+
{
|
|
6302
|
+
evalRunId: traceContext.evalRunId,
|
|
6303
|
+
scenarioId: traceContext.scenarioId,
|
|
6304
|
+
scenarioName: traceContext.scenarioName,
|
|
6305
|
+
targetId: traceContext.targetId,
|
|
6306
|
+
targetName: traceContext.targetName,
|
|
6307
|
+
stepNumber: traceStepNumber + 1,
|
|
6308
|
+
type: import_evalforge_types.LiveTraceEventType.COMPLETION,
|
|
6309
|
+
outputPreview: "Scenario execution completed",
|
|
6310
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6311
|
+
isComplete: true
|
|
6312
|
+
},
|
|
6313
|
+
traceContext.tracePushUrl,
|
|
6314
|
+
traceContext.routeHeader
|
|
6315
|
+
);
|
|
6259
6316
|
}
|
|
6260
6317
|
const endTime = /* @__PURE__ */ new Date();
|
|
6261
6318
|
const totalDurationMs = endTime.getTime() - startTime.getTime();
|
|
@@ -6473,7 +6530,9 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6473
6530
|
scenarioId: scenario.id,
|
|
6474
6531
|
scenarioName: scenario.name,
|
|
6475
6532
|
targetId: skill.id,
|
|
6476
|
-
targetName: skill.name
|
|
6533
|
+
targetName: skill.name,
|
|
6534
|
+
tracePushUrl: config.tracePushUrl,
|
|
6535
|
+
routeHeader: config.routeHeader
|
|
6477
6536
|
}
|
|
6478
6537
|
});
|
|
6479
6538
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -6586,9 +6645,13 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
6586
6645
|
serverUrl: config.serverUrl,
|
|
6587
6646
|
aiGatewayUrl: config.aiGatewayUrl,
|
|
6588
6647
|
evaluationsDir: config.evaluationsDir,
|
|
6589
|
-
hasAiGatewayHeaders: Object.keys(config.aiGatewayHeaders).length > 0
|
|
6648
|
+
hasAiGatewayHeaders: Object.keys(config.aiGatewayHeaders).length > 0,
|
|
6649
|
+
hasRouteHeader: !!config.routeHeader
|
|
6650
|
+
});
|
|
6651
|
+
const api = createApiClient(config.serverUrl, {
|
|
6652
|
+
apiPrefix: config.apiPrefix,
|
|
6653
|
+
routeHeader: config.routeHeader
|
|
6590
6654
|
});
|
|
6591
|
-
const api = createApiClient(config.serverUrl, config.apiPrefix);
|
|
6592
6655
|
console.error(
|
|
6593
6656
|
"[DEBUG-H2] fetchEvaluationData START",
|
|
6594
6657
|
JSON.stringify({ serverUrl: config.serverUrl, timestamp: Date.now() })
|
|
@@ -6672,7 +6735,10 @@ runEvaluation(projectId, evalRunId).then(() => {
|
|
|
6672
6735
|
console.error("[EVALUATOR-BOOT] runEvaluation FAILED:", err);
|
|
6673
6736
|
try {
|
|
6674
6737
|
const config = loadConfig();
|
|
6675
|
-
const api = createApiClient(config.serverUrl,
|
|
6738
|
+
const api = createApiClient(config.serverUrl, {
|
|
6739
|
+
apiPrefix: config.apiPrefix,
|
|
6740
|
+
routeHeader: config.routeHeader
|
|
6741
|
+
});
|
|
6676
6742
|
await api.updateEvalRun(projectId, evalRunId, {
|
|
6677
6743
|
status: import_evalforge_types3.EvalStatus.FAILED,
|
|
6678
6744
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|