@wix/evalforge-evaluator 0.146.0 → 0.147.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +58 -7
- package/build/index.js.map +3 -3
- package/build/index.mjs +62 -8
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/agents/simple-agent/execute.d.ts +1 -0
- package/build/types/run-scenario/agents/simple-agent/mcp-tools.d.ts +0 -2
- package/package.json +8 -7
package/build/index.js
CHANGED
|
@@ -2225,6 +2225,10 @@ function parseModel(model) {
|
|
|
2225
2225
|
const isOpenAI = import_evalforge_types6.AVAILABLE_OPENAI_MODEL_IDS.includes(
|
|
2226
2226
|
model
|
|
2227
2227
|
);
|
|
2228
|
+
const isGemini = import_evalforge_types6.AVAILABLE_GEMINI_MODEL_IDS.includes(
|
|
2229
|
+
model
|
|
2230
|
+
);
|
|
2231
|
+
if (isGemini) return { providerID: "google", modelID: model };
|
|
2228
2232
|
return { providerID: isOpenAI ? "openai" : "anthropic", modelID: model };
|
|
2229
2233
|
}
|
|
2230
2234
|
function toOpenCodeMcpConfig(servers) {
|
|
@@ -2276,8 +2280,9 @@ async function buildOpenCodeEnv(options) {
|
|
|
2276
2280
|
const { providerID, modelID } = parseModel(modelStr);
|
|
2277
2281
|
const provider = {};
|
|
2278
2282
|
if (options.aiGatewayUrl) {
|
|
2283
|
+
const proxyPath = providerID === "google" ? "gemini" : providerID;
|
|
2279
2284
|
const providerOptions = {
|
|
2280
|
-
baseURL: `${options.aiGatewayUrl}/proxy/${
|
|
2285
|
+
baseURL: `${options.aiGatewayUrl}/proxy/${proxyPath}`,
|
|
2281
2286
|
apiKey: "sk-placeholder-auth-handled-by-gateway"
|
|
2282
2287
|
};
|
|
2283
2288
|
if (options.aiGatewayHeaders) {
|
|
@@ -3338,6 +3343,7 @@ defaultRegistry.register(openCodeAdapter);
|
|
|
3338
3343
|
// src/run-scenario/agents/simple-agent/execute.ts
|
|
3339
3344
|
var import_ai = require("ai");
|
|
3340
3345
|
var import_anthropic = require("@ai-sdk/anthropic");
|
|
3346
|
+
var import_google = require("@ai-sdk/google");
|
|
3341
3347
|
var import_openai = require("@ai-sdk/openai");
|
|
3342
3348
|
var import_evalforge_types11 = require("@wix/evalforge-types");
|
|
3343
3349
|
var import_crypto3 = require("crypto");
|
|
@@ -3360,8 +3366,9 @@ async function buildMcpTools(mcps, cwd) {
|
|
|
3360
3366
|
const client = await (0, import_mcp.createMCPClient)({ transport });
|
|
3361
3367
|
clients.push(client);
|
|
3362
3368
|
const tools = await client.tools();
|
|
3369
|
+
const safePrefix = serverName.replace(/[^a-zA-Z0-9]/g, "_");
|
|
3363
3370
|
for (const [toolName, tool] of Object.entries(tools)) {
|
|
3364
|
-
allTools[`${
|
|
3371
|
+
allTools[`${safePrefix}_${toolName}`] = tool;
|
|
3365
3372
|
}
|
|
3366
3373
|
}
|
|
3367
3374
|
}
|
|
@@ -3437,6 +3444,7 @@ function extractErrorText(content) {
|
|
|
3437
3444
|
// src/run-scenario/agents/simple-agent/cost-calculation.ts
|
|
3438
3445
|
var import_evalforge_types10 = require("@wix/evalforge-types");
|
|
3439
3446
|
var PROVIDER_ANTHROPIC = "anthropic";
|
|
3447
|
+
var PROVIDER_GEMINI = "gemini";
|
|
3440
3448
|
var MODEL_PRICING = {
|
|
3441
3449
|
// Anthropic — Claude 4.6
|
|
3442
3450
|
"claude-sonnet-4-6": { input: 3, output: 15 },
|
|
@@ -3463,7 +3471,18 @@ var MODEL_PRICING = {
|
|
|
3463
3471
|
o3: { input: 2, output: 8 },
|
|
3464
3472
|
"o4-mini": { input: 1.1, output: 4.4 },
|
|
3465
3473
|
"o3-mini": { input: 1.1, output: 4.4 },
|
|
3466
|
-
o1: { input: 15, output: 60 }
|
|
3474
|
+
o1: { input: 15, output: 60 },
|
|
3475
|
+
// Google Gemini 2.0
|
|
3476
|
+
"gemini-2.0-flash": { input: 0.1, output: 0.4 },
|
|
3477
|
+
"gemini-2.0-flash-lite": { input: 0.075, output: 0.3 },
|
|
3478
|
+
// Google Gemini 2.5
|
|
3479
|
+
"gemini-2.5-pro": { input: 1.25, output: 10 },
|
|
3480
|
+
"gemini-2.5-flash": { input: 0.15, output: 0.6 },
|
|
3481
|
+
"gemini-2.5-flash-lite": { input: 0.075, output: 0.3 },
|
|
3482
|
+
// Google Gemini 3.x — standard pricing up to 200K context tokens
|
|
3483
|
+
"gemini-3-pro-preview": { input: 2, output: 12 },
|
|
3484
|
+
"gemini-3-flash-preview": { input: 0.5, output: 3 },
|
|
3485
|
+
"gemini-3.1-pro-preview": { input: 2, output: 12 }
|
|
3467
3486
|
};
|
|
3468
3487
|
function extractGatewayCost(step, provider) {
|
|
3469
3488
|
try {
|
|
@@ -3474,6 +3493,15 @@ function extractGatewayCost(step, provider) {
|
|
|
3474
3493
|
const cost2 = usage?.total_cost_usd;
|
|
3475
3494
|
return typeof cost2 === "number" && cost2 > 0 ? cost2 : void 0;
|
|
3476
3495
|
}
|
|
3496
|
+
if (provider === PROVIDER_GEMINI) {
|
|
3497
|
+
const meta = step.providerMetadata;
|
|
3498
|
+
const google = meta?.google;
|
|
3499
|
+
const cost2 = google?.total_cost_usd;
|
|
3500
|
+
if (typeof cost2 === "number" && cost2 > 0) return cost2;
|
|
3501
|
+
const body2 = step.response?.body;
|
|
3502
|
+
const bodyCost = body2?.total_cost_usd;
|
|
3503
|
+
return typeof bodyCost === "number" && bodyCost > 0 ? bodyCost : void 0;
|
|
3504
|
+
}
|
|
3477
3505
|
const body = step.response?.body;
|
|
3478
3506
|
const cost = body?.total_cost_usd;
|
|
3479
3507
|
return typeof cost === "number" && cost > 0 ? cost : void 0;
|
|
@@ -3551,10 +3579,10 @@ function buildConversation3(triggerPrompt, steps, executionStartMs, stepTimestam
|
|
|
3551
3579
|
// src/run-scenario/agents/simple-agent/execute.ts
|
|
3552
3580
|
var PROVIDER_ANTHROPIC2 = "anthropic";
|
|
3553
3581
|
var PROVIDER_OPENAI = "openai";
|
|
3582
|
+
var PROVIDER_GEMINI2 = "gemini";
|
|
3554
3583
|
var DEFAULT_MAX_TOOL_STEPS = 25;
|
|
3555
3584
|
function createModel(modelId, baseUrl, headers) {
|
|
3556
|
-
|
|
3557
|
-
if (isClaudeModel) {
|
|
3585
|
+
if (isClaudeModelId(modelId)) {
|
|
3558
3586
|
const anthropic = (0, import_anthropic.createAnthropic)({
|
|
3559
3587
|
baseURL: `${baseUrl}/proxy/anthropic`,
|
|
3560
3588
|
apiKey: "proxy-auth",
|
|
@@ -3562,6 +3590,14 @@ function createModel(modelId, baseUrl, headers) {
|
|
|
3562
3590
|
});
|
|
3563
3591
|
return anthropic(modelId);
|
|
3564
3592
|
}
|
|
3593
|
+
if (isGeminiModelId(modelId)) {
|
|
3594
|
+
const google = (0, import_google.createGoogleGenerativeAI)({
|
|
3595
|
+
baseURL: `${baseUrl}/proxy/gemini`,
|
|
3596
|
+
apiKey: "proxy-auth",
|
|
3597
|
+
headers
|
|
3598
|
+
});
|
|
3599
|
+
return google(modelId);
|
|
3600
|
+
}
|
|
3565
3601
|
const openai = (0, import_openai.createOpenAI)({
|
|
3566
3602
|
baseURL: `${baseUrl}/proxy/openai`,
|
|
3567
3603
|
apiKey: "proxy-auth",
|
|
@@ -3579,6 +3615,11 @@ function isClaudeModelId(modelId) {
|
|
|
3579
3615
|
(id) => modelId === id || modelId.startsWith(id)
|
|
3580
3616
|
);
|
|
3581
3617
|
}
|
|
3618
|
+
function isGeminiModelId(modelId) {
|
|
3619
|
+
return import_evalforge_types11.AVAILABLE_GEMINI_MODEL_IDS.some(
|
|
3620
|
+
(id) => modelId === id || modelId.startsWith(id)
|
|
3621
|
+
);
|
|
3622
|
+
}
|
|
3582
3623
|
function extractSkillContent(files) {
|
|
3583
3624
|
if (!files || files.length === 0) return void 0;
|
|
3584
3625
|
const skillMd = files.find((f) => f.path === "SKILL.md");
|
|
@@ -3602,7 +3643,7 @@ async function executeWithAiSdk(context) {
|
|
|
3602
3643
|
throw new Error("Simple Agent requires a model in modelConfig");
|
|
3603
3644
|
}
|
|
3604
3645
|
const model = createModel(modelConfig.model, aiGatewayUrl, aiGatewayHeaders);
|
|
3605
|
-
const provider = isClaudeModelId(modelConfig.model) ? PROVIDER_ANTHROPIC2 : PROVIDER_OPENAI;
|
|
3646
|
+
const provider = isClaudeModelId(modelConfig.model) ? PROVIDER_ANTHROPIC2 : isGeminiModelId(modelConfig.model) ? PROVIDER_GEMINI2 : PROVIDER_OPENAI;
|
|
3606
3647
|
const systemPrompt = composeSystemPrompt(context);
|
|
3607
3648
|
const { tools: mcpTools, clients } = mcps && mcps.length > 0 ? await buildMcpTools(mcps, cwd) : { tools: void 0, clients: [] };
|
|
3608
3649
|
const startTime = Date.now();
|
|
@@ -3614,7 +3655,9 @@ async function executeWithAiSdk(context) {
|
|
|
3614
3655
|
const isResponsesAPI = [...import_evalforge_types11.OPENAI_RESPONSES_MODEL_IDS].some(
|
|
3615
3656
|
(id) => modelConfig.model === id || modelConfig.model.startsWith(id)
|
|
3616
3657
|
);
|
|
3617
|
-
const
|
|
3658
|
+
const isGemini = provider === PROVIDER_GEMINI2;
|
|
3659
|
+
const isGeminiThinking = isGemini && import_evalforge_types11.GEMINI_THINKING_MODEL_IDS.has(modelConfig.model);
|
|
3660
|
+
const supportsThinking = isAnthropic || isResponsesAPI || isGeminiThinking;
|
|
3618
3661
|
const providerOpts = {
|
|
3619
3662
|
...isAnthropic && {
|
|
3620
3663
|
anthropic: {
|
|
@@ -3629,6 +3672,14 @@ async function executeWithAiSdk(context) {
|
|
|
3629
3672
|
reasoningEffort: "high",
|
|
3630
3673
|
reasoningSummary: "detailed"
|
|
3631
3674
|
}
|
|
3675
|
+
},
|
|
3676
|
+
...isGeminiThinking && {
|
|
3677
|
+
google: {
|
|
3678
|
+
thinkingConfig: {
|
|
3679
|
+
includeThoughts: true,
|
|
3680
|
+
thinkingBudget: 1e4
|
|
3681
|
+
}
|
|
3682
|
+
}
|
|
3632
3683
|
}
|
|
3633
3684
|
};
|
|
3634
3685
|
const stepTimestamps = [];
|