@wix/evalforge-evaluator 0.53.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +13 -9
- package/build/index.js.map +4 -4
- package/build/index.mjs +5 -1
- package/build/index.mjs.map +3 -3
- package/package.json +3 -3
package/build/index.js
CHANGED
|
@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
24
24
|
));
|
|
25
25
|
|
|
26
26
|
// src/index.ts
|
|
27
|
-
var
|
|
27
|
+
var import_evalforge_types6 = require("@wix/evalforge-types");
|
|
28
28
|
|
|
29
29
|
// src/config.ts
|
|
30
30
|
function loadConfig() {
|
|
@@ -349,7 +349,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
349
349
|
}
|
|
350
350
|
|
|
351
351
|
// src/run-scenario/index.ts
|
|
352
|
-
var
|
|
352
|
+
var import_evalforge_types4 = require("@wix/evalforge-types");
|
|
353
353
|
var import_eval_assertions = require("@wix/eval-assertions");
|
|
354
354
|
|
|
355
355
|
// src/run-scenario/environment.ts
|
|
@@ -6337,6 +6337,9 @@ function getAdapter(runCommand) {
|
|
|
6337
6337
|
return adapter;
|
|
6338
6338
|
}
|
|
6339
6339
|
|
|
6340
|
+
// src/run-scenario/agents/claude-code/claude-code-adapter.ts
|
|
6341
|
+
var import_evalforge_types3 = require("@wix/evalforge-types");
|
|
6342
|
+
|
|
6340
6343
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
6341
6344
|
var import_evalforge_types2 = require("@wix/evalforge-types");
|
|
6342
6345
|
var import_crypto = require("crypto");
|
|
@@ -7310,10 +7313,11 @@ var ClaudeCodeAdapter = class {
|
|
|
7310
7313
|
aiGatewayHeaders,
|
|
7311
7314
|
traceContext
|
|
7312
7315
|
} = context;
|
|
7316
|
+
const modelForSdk = modelConfig?.model ? import_evalforge_types3.AVAILABLE_MODELS_MAP[modelConfig.model]?.providerModelId ?? modelConfig.model : void 0;
|
|
7313
7317
|
const options = {
|
|
7314
7318
|
cwd,
|
|
7315
7319
|
systemPrompt: skill.skillMd,
|
|
7316
|
-
model:
|
|
7320
|
+
model: modelForSdk,
|
|
7317
7321
|
temperature: modelConfig?.temperature,
|
|
7318
7322
|
maxTokens: modelConfig?.maxTokens,
|
|
7319
7323
|
aiGatewayUrl,
|
|
@@ -8185,10 +8189,10 @@ async function runScenario(config, evalRunId2, scenario, target, template, resol
|
|
|
8185
8189
|
assertionContext
|
|
8186
8190
|
) : [];
|
|
8187
8191
|
const passed = assertionResults.filter(
|
|
8188
|
-
(r) => r.status ===
|
|
8192
|
+
(r) => r.status === import_evalforge_types4.AssertionResultStatus.PASSED
|
|
8189
8193
|
).length;
|
|
8190
8194
|
const failed = assertionResults.filter(
|
|
8191
|
-
(r) => r.status ===
|
|
8195
|
+
(r) => r.status === import_evalforge_types4.AssertionResultStatus.FAILED
|
|
8192
8196
|
).length;
|
|
8193
8197
|
const total = assertionResults.length;
|
|
8194
8198
|
const passRate = total > 0 ? Math.round(passed / total * 100) : 100;
|
|
@@ -8202,7 +8206,7 @@ async function runScenario(config, evalRunId2, scenario, target, template, resol
|
|
|
8202
8206
|
}
|
|
8203
8207
|
|
|
8204
8208
|
// src/error-reporter.ts
|
|
8205
|
-
var
|
|
8209
|
+
var import_evalforge_types5 = require("@wix/evalforge-types");
|
|
8206
8210
|
function formatError(error, phase, context) {
|
|
8207
8211
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
8208
8212
|
if (error instanceof Error) {
|
|
@@ -8451,7 +8455,7 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
8451
8455
|
};
|
|
8452
8456
|
try {
|
|
8453
8457
|
await api.updateEvalRun(projectId2, evalRunId2, {
|
|
8454
|
-
status:
|
|
8458
|
+
status: import_evalforge_types6.EvalStatus.COMPLETED,
|
|
8455
8459
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
8456
8460
|
});
|
|
8457
8461
|
} catch (updateErr) {
|
|
@@ -8492,7 +8496,7 @@ runEvaluation(projectId, evalRunId).then(() => {
|
|
|
8492
8496
|
authToken: config.authToken
|
|
8493
8497
|
});
|
|
8494
8498
|
await api.updateEvalRun(projectId, evalRunId, {
|
|
8495
|
-
status:
|
|
8499
|
+
status: import_evalforge_types6.EvalStatus.FAILED,
|
|
8496
8500
|
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
8497
8501
|
jobError,
|
|
8498
8502
|
jobStatus: "FAILED"
|
|
@@ -8515,7 +8519,7 @@ runEvaluation(projectId, evalRunId).then(() => {
|
|
|
8515
8519
|
authToken
|
|
8516
8520
|
});
|
|
8517
8521
|
await api.updateEvalRun(projectId, evalRunId, {
|
|
8518
|
-
status:
|
|
8522
|
+
status: import_evalforge_types6.EvalStatus.FAILED,
|
|
8519
8523
|
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
8520
8524
|
jobError: `Config load failed, then: ${jobError}`,
|
|
8521
8525
|
jobStatus: "FAILED"
|