npm - @wix/evalforge-evaluator - Versions diffs - 0.145.0 → 0.147.0 - Mend

@wix/evalforge-evaluator 0.145.0 → 0.147.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/build/index.js +58 -7
package/build/index.js.map +3 -3
package/build/index.mjs +62 -8
package/build/index.mjs.map +3 -3
package/build/types/run-scenario/agents/simple-agent/execute.d.ts +1 -0
package/build/types/run-scenario/agents/simple-agent/mcp-tools.d.ts +0 -2
package/package.json +8 -7

package/build/index.js CHANGED Viewed

@@ -2225,6 +2225,10 @@ function parseModel(model) {
   const isOpenAI = import_evalforge_types6.AVAILABLE_OPENAI_MODEL_IDS.includes(
     model
   );
+  const isGemini = import_evalforge_types6.AVAILABLE_GEMINI_MODEL_IDS.includes(
+    model
+  );
+  if (isGemini) return { providerID: "google", modelID: model };
   return { providerID: isOpenAI ? "openai" : "anthropic", modelID: model };
 }
 function toOpenCodeMcpConfig(servers) {
@@ -2276,8 +2280,9 @@ async function buildOpenCodeEnv(options) {
   const { providerID, modelID } = parseModel(modelStr);
   const provider = {};
   if (options.aiGatewayUrl) {
+    const proxyPath = providerID === "google" ? "gemini" : providerID;
     const providerOptions = {
-      baseURL: `${options.aiGatewayUrl}/proxy/${providerID}`,
+      baseURL: `${options.aiGatewayUrl}/proxy/${proxyPath}`,
       apiKey: "sk-placeholder-auth-handled-by-gateway"
     };
     if (options.aiGatewayHeaders) {
@@ -3338,6 +3343,7 @@ defaultRegistry.register(openCodeAdapter);
 // src/run-scenario/agents/simple-agent/execute.ts
 var import_ai = require("ai");
 var import_anthropic = require("@ai-sdk/anthropic");
+var import_google = require("@ai-sdk/google");
 var import_openai = require("@ai-sdk/openai");
 var import_evalforge_types11 = require("@wix/evalforge-types");
 var import_crypto3 = require("crypto");
@@ -3360,8 +3366,9 @@ async function buildMcpTools(mcps, cwd) {
         const client = await (0, import_mcp.createMCPClient)({ transport });
         clients.push(client);
         const tools = await client.tools();
+        const safePrefix = serverName.replace(/[^a-zA-Z0-9]/g, "_");
         for (const [toolName, tool] of Object.entries(tools)) {
-          allTools[`${serverName}__${toolName}`] = tool;
+          allTools[`${safePrefix}_${toolName}`] = tool;
         }
       }
     }
@@ -3437,6 +3444,7 @@ function extractErrorText(content) {
 // src/run-scenario/agents/simple-agent/cost-calculation.ts
 var import_evalforge_types10 = require("@wix/evalforge-types");
 var PROVIDER_ANTHROPIC = "anthropic";
+var PROVIDER_GEMINI = "gemini";
 var MODEL_PRICING = {
   // Anthropic — Claude 4.6
   "claude-sonnet-4-6": { input: 3, output: 15 },
@@ -3463,7 +3471,18 @@ var MODEL_PRICING = {
   o3: { input: 2, output: 8 },
   "o4-mini": { input: 1.1, output: 4.4 },
   "o3-mini": { input: 1.1, output: 4.4 },
-  o1: { input: 15, output: 60 }
+  o1: { input: 15, output: 60 },
+  // Google Gemini 2.0
+  "gemini-2.0-flash": { input: 0.1, output: 0.4 },
+  "gemini-2.0-flash-lite": { input: 0.075, output: 0.3 },
+  // Google Gemini 2.5
+  "gemini-2.5-pro": { input: 1.25, output: 10 },
+  "gemini-2.5-flash": { input: 0.15, output: 0.6 },
+  "gemini-2.5-flash-lite": { input: 0.075, output: 0.3 },
+  // Google Gemini 3.x — standard pricing up to 200K context tokens
+  "gemini-3-pro-preview": { input: 2, output: 12 },
+  "gemini-3-flash-preview": { input: 0.5, output: 3 },
+  "gemini-3.1-pro-preview": { input: 2, output: 12 }
 };
 function extractGatewayCost(step, provider) {
   try {
@@ -3474,6 +3493,15 @@ function extractGatewayCost(step, provider) {
       const cost2 = usage?.total_cost_usd;
       return typeof cost2 === "number" && cost2 > 0 ? cost2 : void 0;
     }
+    if (provider === PROVIDER_GEMINI) {
+      const meta = step.providerMetadata;
+      const google = meta?.google;
+      const cost2 = google?.total_cost_usd;
+      if (typeof cost2 === "number" && cost2 > 0) return cost2;
+      const body2 = step.response?.body;
+      const bodyCost = body2?.total_cost_usd;
+      return typeof bodyCost === "number" && bodyCost > 0 ? bodyCost : void 0;
+    }
     const body = step.response?.body;
     const cost = body?.total_cost_usd;
     return typeof cost === "number" && cost > 0 ? cost : void 0;
@@ -3551,10 +3579,10 @@ function buildConversation3(triggerPrompt, steps, executionStartMs, stepTimestam
 // src/run-scenario/agents/simple-agent/execute.ts
 var PROVIDER_ANTHROPIC2 = "anthropic";
 var PROVIDER_OPENAI = "openai";
+var PROVIDER_GEMINI2 = "gemini";
 var DEFAULT_MAX_TOOL_STEPS = 25;
 function createModel(modelId, baseUrl, headers) {
-  const isClaudeModel = isClaudeModelId(modelId);
-  if (isClaudeModel) {
+  if (isClaudeModelId(modelId)) {
     const anthropic = (0, import_anthropic.createAnthropic)({
       baseURL: `${baseUrl}/proxy/anthropic`,
       apiKey: "proxy-auth",
@@ -3562,6 +3590,14 @@ function createModel(modelId, baseUrl, headers) {
     });
     return anthropic(modelId);
   }
+  if (isGeminiModelId(modelId)) {
+    const google = (0, import_google.createGoogleGenerativeAI)({
+      baseURL: `${baseUrl}/proxy/gemini`,
+      apiKey: "proxy-auth",
+      headers
+    });
+    return google(modelId);
+  }
   const openai = (0, import_openai.createOpenAI)({
     baseURL: `${baseUrl}/proxy/openai`,
     apiKey: "proxy-auth",
@@ -3579,6 +3615,11 @@ function isClaudeModelId(modelId) {
     (id) => modelId === id || modelId.startsWith(id)
   );
 }
+function isGeminiModelId(modelId) {
+  return import_evalforge_types11.AVAILABLE_GEMINI_MODEL_IDS.some(
+    (id) => modelId === id || modelId.startsWith(id)
+  );
+}
 function extractSkillContent(files) {
   if (!files || files.length === 0) return void 0;
   const skillMd = files.find((f) => f.path === "SKILL.md");
@@ -3602,7 +3643,7 @@ async function executeWithAiSdk(context) {
     throw new Error("Simple Agent requires a model in modelConfig");
   }
   const model = createModel(modelConfig.model, aiGatewayUrl, aiGatewayHeaders);
-  const provider = isClaudeModelId(modelConfig.model) ? PROVIDER_ANTHROPIC2 : PROVIDER_OPENAI;
+  const provider = isClaudeModelId(modelConfig.model) ? PROVIDER_ANTHROPIC2 : isGeminiModelId(modelConfig.model) ? PROVIDER_GEMINI2 : PROVIDER_OPENAI;
   const systemPrompt = composeSystemPrompt(context);
   const { tools: mcpTools, clients } = mcps && mcps.length > 0 ? await buildMcpTools(mcps, cwd) : { tools: void 0, clients: [] };
   const startTime = Date.now();
@@ -3614,7 +3655,9 @@ async function executeWithAiSdk(context) {
     const isResponsesAPI = [...import_evalforge_types11.OPENAI_RESPONSES_MODEL_IDS].some(
       (id) => modelConfig.model === id || modelConfig.model.startsWith(id)
     );
-    const supportsThinking = isAnthropic || isResponsesAPI;
+    const isGemini = provider === PROVIDER_GEMINI2;
+    const isGeminiThinking = isGemini && import_evalforge_types11.GEMINI_THINKING_MODEL_IDS.has(modelConfig.model);
+    const supportsThinking = isAnthropic || isResponsesAPI || isGeminiThinking;
     const providerOpts = {
       ...isAnthropic && {
         anthropic: {
@@ -3629,6 +3672,14 @@ async function executeWithAiSdk(context) {
           reasoningEffort: "high",
           reasoningSummary: "detailed"
         }
+      },
+      ...isGeminiThinking && {
+        google: {
+          thinkingConfig: {
+            includeThoughts: true,
+            thinkingBudget: 1e4
+          }
+        }
       }
     };
     const stepTimestamps = [];