@wix/evalforge-evaluator 0.145.0 → 0.147.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -2216,7 +2216,8 @@ async function writeSubAgentsToFilesystem2(cwd, subAgents, fetchFn = fetchGitHub
2216
2216
  import { homedir as homedir2 } from "os";
2217
2217
  import {
2218
2218
  ClaudeModel as ClaudeModel2,
2219
- AVAILABLE_OPENAI_MODEL_IDS
2219
+ AVAILABLE_OPENAI_MODEL_IDS,
2220
+ AVAILABLE_GEMINI_MODEL_IDS
2220
2221
  } from "@wix/evalforge-types";
2221
2222
  var DEFAULT_MODEL2 = `${ClaudeModel2.CLAUDE_4_5_SONNET_1_0}`;
2222
2223
  function parseModel(model) {
@@ -2230,6 +2231,10 @@ function parseModel(model) {
2230
2231
  const isOpenAI = AVAILABLE_OPENAI_MODEL_IDS.includes(
2231
2232
  model
2232
2233
  );
2234
+ const isGemini = AVAILABLE_GEMINI_MODEL_IDS.includes(
2235
+ model
2236
+ );
2237
+ if (isGemini) return { providerID: "google", modelID: model };
2233
2238
  return { providerID: isOpenAI ? "openai" : "anthropic", modelID: model };
2234
2239
  }
2235
2240
  function toOpenCodeMcpConfig(servers) {
@@ -2281,8 +2286,9 @@ async function buildOpenCodeEnv(options) {
2281
2286
  const { providerID, modelID } = parseModel(modelStr);
2282
2287
  const provider = {};
2283
2288
  if (options.aiGatewayUrl) {
2289
+ const proxyPath = providerID === "google" ? "gemini" : providerID;
2284
2290
  const providerOptions = {
2285
- baseURL: `${options.aiGatewayUrl}/proxy/${providerID}`,
2291
+ baseURL: `${options.aiGatewayUrl}/proxy/${proxyPath}`,
2286
2292
  apiKey: "sk-placeholder-auth-handled-by-gateway"
2287
2293
  };
2288
2294
  if (options.aiGatewayHeaders) {
@@ -3346,9 +3352,12 @@ import {
3346
3352
  stepCountIs
3347
3353
  } from "ai";
3348
3354
  import { createAnthropic } from "@ai-sdk/anthropic";
3355
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
3349
3356
  import { createOpenAI } from "@ai-sdk/openai";
3350
3357
  import {
3351
3358
  AVAILABLE_CLAUDE_MODEL_IDS,
3359
+ AVAILABLE_GEMINI_MODEL_IDS as AVAILABLE_GEMINI_MODEL_IDS2,
3360
+ GEMINI_THINKING_MODEL_IDS,
3352
3361
  OPENAI_RESPONSES_MODEL_IDS,
3353
3362
  LLMStepType as LLMStepType3,
3354
3363
  LiveTraceEventType as LiveTraceEventType3
@@ -3373,8 +3382,9 @@ async function buildMcpTools(mcps, cwd) {
3373
3382
  const client = await createMCPClient({ transport });
3374
3383
  clients.push(client);
3375
3384
  const tools = await client.tools();
3385
+ const safePrefix = serverName.replace(/[^a-zA-Z0-9]/g, "_");
3376
3386
  for (const [toolName, tool] of Object.entries(tools)) {
3377
- allTools[`${serverName}__${toolName}`] = tool;
3387
+ allTools[`${safePrefix}_${toolName}`] = tool;
3378
3388
  }
3379
3389
  }
3380
3390
  }
@@ -3450,6 +3460,7 @@ function extractErrorText(content) {
3450
3460
  // src/run-scenario/agents/simple-agent/cost-calculation.ts
3451
3461
  import { normalizeModelId } from "@wix/evalforge-types";
3452
3462
  var PROVIDER_ANTHROPIC = "anthropic";
3463
+ var PROVIDER_GEMINI = "gemini";
3453
3464
  var MODEL_PRICING = {
3454
3465
  // Anthropic — Claude 4.6
3455
3466
  "claude-sonnet-4-6": { input: 3, output: 15 },
@@ -3476,7 +3487,18 @@ var MODEL_PRICING = {
3476
3487
  o3: { input: 2, output: 8 },
3477
3488
  "o4-mini": { input: 1.1, output: 4.4 },
3478
3489
  "o3-mini": { input: 1.1, output: 4.4 },
3479
- o1: { input: 15, output: 60 }
3490
+ o1: { input: 15, output: 60 },
3491
+ // Google Gemini 2.0
3492
+ "gemini-2.0-flash": { input: 0.1, output: 0.4 },
3493
+ "gemini-2.0-flash-lite": { input: 0.075, output: 0.3 },
3494
+ // Google Gemini 2.5
3495
+ "gemini-2.5-pro": { input: 1.25, output: 10 },
3496
+ "gemini-2.5-flash": { input: 0.15, output: 0.6 },
3497
+ "gemini-2.5-flash-lite": { input: 0.075, output: 0.3 },
3498
+ // Google Gemini 3.x — standard pricing up to 200K context tokens
3499
+ "gemini-3-pro-preview": { input: 2, output: 12 },
3500
+ "gemini-3-flash-preview": { input: 0.5, output: 3 },
3501
+ "gemini-3.1-pro-preview": { input: 2, output: 12 }
3480
3502
  };
3481
3503
  function extractGatewayCost(step, provider) {
3482
3504
  try {
@@ -3487,6 +3509,15 @@ function extractGatewayCost(step, provider) {
3487
3509
  const cost2 = usage?.total_cost_usd;
3488
3510
  return typeof cost2 === "number" && cost2 > 0 ? cost2 : void 0;
3489
3511
  }
3512
+ if (provider === PROVIDER_GEMINI) {
3513
+ const meta = step.providerMetadata;
3514
+ const google = meta?.google;
3515
+ const cost2 = google?.total_cost_usd;
3516
+ if (typeof cost2 === "number" && cost2 > 0) return cost2;
3517
+ const body2 = step.response?.body;
3518
+ const bodyCost = body2?.total_cost_usd;
3519
+ return typeof bodyCost === "number" && bodyCost > 0 ? bodyCost : void 0;
3520
+ }
3490
3521
  const body = step.response?.body;
3491
3522
  const cost = body?.total_cost_usd;
3492
3523
  return typeof cost === "number" && cost > 0 ? cost : void 0;
@@ -3564,10 +3595,10 @@ function buildConversation3(triggerPrompt, steps, executionStartMs, stepTimestam
3564
3595
  // src/run-scenario/agents/simple-agent/execute.ts
3565
3596
  var PROVIDER_ANTHROPIC2 = "anthropic";
3566
3597
  var PROVIDER_OPENAI = "openai";
3598
+ var PROVIDER_GEMINI2 = "gemini";
3567
3599
  var DEFAULT_MAX_TOOL_STEPS = 25;
3568
3600
  function createModel(modelId, baseUrl, headers) {
3569
- const isClaudeModel = isClaudeModelId(modelId);
3570
- if (isClaudeModel) {
3601
+ if (isClaudeModelId(modelId)) {
3571
3602
  const anthropic = createAnthropic({
3572
3603
  baseURL: `${baseUrl}/proxy/anthropic`,
3573
3604
  apiKey: "proxy-auth",
@@ -3575,6 +3606,14 @@ function createModel(modelId, baseUrl, headers) {
3575
3606
  });
3576
3607
  return anthropic(modelId);
3577
3608
  }
3609
+ if (isGeminiModelId(modelId)) {
3610
+ const google = createGoogleGenerativeAI({
3611
+ baseURL: `${baseUrl}/proxy/gemini`,
3612
+ apiKey: "proxy-auth",
3613
+ headers
3614
+ });
3615
+ return google(modelId);
3616
+ }
3578
3617
  const openai = createOpenAI({
3579
3618
  baseURL: `${baseUrl}/proxy/openai`,
3580
3619
  apiKey: "proxy-auth",
@@ -3592,6 +3631,11 @@ function isClaudeModelId(modelId) {
3592
3631
  (id) => modelId === id || modelId.startsWith(id)
3593
3632
  );
3594
3633
  }
3634
+ function isGeminiModelId(modelId) {
3635
+ return AVAILABLE_GEMINI_MODEL_IDS2.some(
3636
+ (id) => modelId === id || modelId.startsWith(id)
3637
+ );
3638
+ }
3595
3639
  function extractSkillContent(files) {
3596
3640
  if (!files || files.length === 0) return void 0;
3597
3641
  const skillMd = files.find((f) => f.path === "SKILL.md");
@@ -3615,7 +3659,7 @@ async function executeWithAiSdk(context) {
3615
3659
  throw new Error("Simple Agent requires a model in modelConfig");
3616
3660
  }
3617
3661
  const model = createModel(modelConfig.model, aiGatewayUrl, aiGatewayHeaders);
3618
- const provider = isClaudeModelId(modelConfig.model) ? PROVIDER_ANTHROPIC2 : PROVIDER_OPENAI;
3662
+ const provider = isClaudeModelId(modelConfig.model) ? PROVIDER_ANTHROPIC2 : isGeminiModelId(modelConfig.model) ? PROVIDER_GEMINI2 : PROVIDER_OPENAI;
3619
3663
  const systemPrompt = composeSystemPrompt(context);
3620
3664
  const { tools: mcpTools, clients } = mcps && mcps.length > 0 ? await buildMcpTools(mcps, cwd) : { tools: void 0, clients: [] };
3621
3665
  const startTime = Date.now();
@@ -3627,7 +3671,9 @@ async function executeWithAiSdk(context) {
3627
3671
  const isResponsesAPI = [...OPENAI_RESPONSES_MODEL_IDS].some(
3628
3672
  (id) => modelConfig.model === id || modelConfig.model.startsWith(id)
3629
3673
  );
3630
- const supportsThinking = isAnthropic || isResponsesAPI;
3674
+ const isGemini = provider === PROVIDER_GEMINI2;
3675
+ const isGeminiThinking = isGemini && GEMINI_THINKING_MODEL_IDS.has(modelConfig.model);
3676
+ const supportsThinking = isAnthropic || isResponsesAPI || isGeminiThinking;
3631
3677
  const providerOpts = {
3632
3678
  ...isAnthropic && {
3633
3679
  anthropic: {
@@ -3642,6 +3688,14 @@ async function executeWithAiSdk(context) {
3642
3688
  reasoningEffort: "high",
3643
3689
  reasoningSummary: "detailed"
3644
3690
  }
3691
+ },
3692
+ ...isGeminiThinking && {
3693
+ google: {
3694
+ thinkingConfig: {
3695
+ includeThoughts: true,
3696
+ thinkingBudget: 1e4
3697
+ }
3698
+ }
3645
3699
  }
3646
3700
  };
3647
3701
  const stepTimestamps = [];