npm - compute-cfo - Versions diffs - 0.1.0 → 0.3.0 - Mend

compute-cfo 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/pricing.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Model pricing database for OpenAI and Anthropic.
+ * Model pricing database for OpenAI, Anthropic, Google Gemini, and Mistral.
  * Prices are in USD per 1 million tokens. Updated March 2026.
  */
 export interface ModelPrice {

package/dist/pricing.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 /**
- * Model pricing database for OpenAI and Anthropic.
+ * Model pricing database for OpenAI, Anthropic, Google Gemini, and Mistral.
  * Prices are in USD per 1 million tokens. Updated March 2026.
  */
 Object.defineProperty(exports, "__esModule", { value: true });
@@ -52,6 +52,36 @@ const MODEL_PRICES = {
     'claude-3-opus': { inputPerMillion: 15.0, outputPerMillion: 75.0 },
     'claude-3-sonnet': { inputPerMillion: 3.0, outputPerMillion: 15.0 },
     'claude-3-haiku': { inputPerMillion: 0.25, outputPerMillion: 1.25 },
+    // ── Google Gemini ─────────────────────────────────────────
+    'gemini-2.5-pro': { inputPerMillion: 1.25, outputPerMillion: 10.0 },
+    'gemini-2.5-flash': { inputPerMillion: 0.3, outputPerMillion: 2.5 },
+    'gemini-2.5-flash-lite': { inputPerMillion: 0.1, outputPerMillion: 0.4 },
+    'gemini-2.0-flash': { inputPerMillion: 0.1, outputPerMillion: 0.4 },
+    'gemini-1.5-pro': { inputPerMillion: 1.25, outputPerMillion: 5.0 },
+    'gemini-1.5-flash': { inputPerMillion: 0.075, outputPerMillion: 0.3 },
+    'gemini-1.5-flash-8b': { inputPerMillion: 0.0375, outputPerMillion: 0.15 },
+    'gemini-embedding': { inputPerMillion: 0.15, outputPerMillion: 0 },
+    // ── Mistral ───────────────────────────────────────────────
+    'mistral-large-latest': { inputPerMillion: 0.5, outputPerMillion: 1.5 },
+    'mistral-medium-latest': { inputPerMillion: 0.4, outputPerMillion: 2.0 },
+    'mistral-small-latest': { inputPerMillion: 0.03, outputPerMillion: 0.11 },
+    'codestral-latest': { inputPerMillion: 0.3, outputPerMillion: 0.9 },
+    'pixtral-large-latest': { inputPerMillion: 2.0, outputPerMillion: 6.0 },
+    'mistral-nemo': { inputPerMillion: 0.02, outputPerMillion: 0.05 },
+    'pixtral-12b': { inputPerMillion: 0.15, outputPerMillion: 0.15 },
+    // ── Groq ──────────────────────────────────────────────────
+    'llama-3.3-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
+    'llama-3.1-8b-instant': { inputPerMillion: 0.05, outputPerMillion: 0.08 },
+    'llama-3.1-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
+    'gemma2-9b-it': { inputPerMillion: 0.2, outputPerMillion: 0.2 },
+    'mixtral-8x7b-32768': { inputPerMillion: 0.24, outputPerMillion: 0.24 },
+    // ── Together AI ───────────────────────────────────────────
+    'meta-llama/Llama-3.3-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
+    'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo': { inputPerMillion: 0.18, outputPerMillion: 0.18 },
+    'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
+    'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo': { inputPerMillion: 3.5, outputPerMillion: 3.5 },
+    'mistralai/Mixtral-8x7B-Instruct-v0.1': { inputPerMillion: 0.6, outputPerMillion: 0.6 },
+    'Qwen/Qwen2.5-72B-Instruct-Turbo': { inputPerMillion: 1.2, outputPerMillion: 1.2 },
 };
 const ALIASES = {
     'gpt-4o-2024-11-20': 'gpt-4o',
@@ -66,6 +96,19 @@ const ALIASES = {
     'gpt-3.5-turbo-1106': 'gpt-3.5-turbo',
     'o3-2025-04-16': 'o3',
     'o4-mini-2025-04-16': 'o4-mini',
+    // Gemini aliases
+    'models/gemini-2.5-pro': 'gemini-2.5-pro',
+    'models/gemini-2.5-flash': 'gemini-2.5-flash',
+    'models/gemini-2.5-flash-lite': 'gemini-2.5-flash-lite',
+    'models/gemini-2.0-flash': 'gemini-2.0-flash',
+    'models/gemini-1.5-pro': 'gemini-1.5-pro',
+    'models/gemini-1.5-flash': 'gemini-1.5-flash',
+    'models/gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
+    // Mistral aliases
+    'mistral-large-2501': 'mistral-large-latest',
+    'mistral-medium-2505': 'mistral-medium-latest',
+    'mistral-small-2503': 'mistral-small-latest',
+    'codestral-2501': 'codestral-latest',
 };
 function resolveModel(model) {
     return ALIASES[model] ?? model;

package/dist/wrapper.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Drop-in wrapper for OpenAI and Anthropic SDK clients.
+ * Drop-in wrapper for OpenAI, Anthropic, Google Gemini, and Mistral SDK clients.
  */
 import { CostTracker } from './tracker';
 export interface WrapOptions {

package/dist/wrapper.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 /**
- * Drop-in wrapper for OpenAI and Anthropic SDK clients.
+ * Drop-in wrapper for OpenAI, Anthropic, Google Gemini, and Mistral SDK clients.
  */
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.wrap = wrap;
@@ -25,7 +25,16 @@ function wrap(client, options) {
     if ('messages' in client && typeof client.messages?.create === 'function') {
         return wrapAnthropic(client, tracker);
     }
-    throw new TypeError(`Unsupported client type. Supported: OpenAI, Anthropic`);
+    if ('models' in client && typeof client.models?.generateContent === 'function') {
+        return wrapGemini(client, tracker);
+    }
+    if ('chat' in client && typeof client.chat?.complete === 'function') {
+        return wrapMistral(client, tracker);
+    }
+    throw new TypeError(`Unsupported client type. Supported: OpenAI, Anthropic, Google Gemini, Mistral`);
+    // Note: Groq and Together AI use OpenAI-compatible APIs, so they match the
+    // first condition (chat.completions.create) and are wrapped as OpenAI clients.
+    // The provider will be detected as "openai" in cost events.
 }
 function wrapOpenAI(client, tracker) {
     const originalCreate = client.chat.completions.create.bind(client.chat.completions);
@@ -35,6 +44,35 @@ function wrapOpenAI(client, tracker) {
         const model = rest.model ?? 'unknown';
         const start = performance.now();
         const response = await originalCreate(rest);
+        // Handle streaming responses
+        if (rest.stream && response && typeof response[Symbol.asyncIterator] === 'function') {
+            const originalIterator = response[Symbol.asyncIterator].bind(response);
+            let usage = null;
+            response[Symbol.asyncIterator] = async function* () {
+                for await (const chunk of originalIterator()) {
+                    if (chunk?.usage)
+                        usage = chunk.usage;
+                    yield chunk;
+                }
+                const latencyMs = Math.round((performance.now() - start) * 10) / 10;
+                const inputTokens = usage?.prompt_tokens ?? 0;
+                const outputTokens = usage?.completion_tokens ?? 0;
+                const costUsd = (0, pricing_1.getCost)(model, inputTokens, outputTokens);
+                const event = {
+                    timestamp: new Date().toISOString(),
+                    provider: 'openai',
+                    model,
+                    operation: 'chat.completions',
+                    inputTokens,
+                    outputTokens,
+                    costUsd,
+                    latencyMs,
+                    tags,
+                };
+                tracker.record(event);
+            };
+            return response;
+        }
         const latencyMs = Math.round((performance.now() - start) * 10) / 10;
         const usage = response?.usage;
         const inputTokens = usage?.prompt_tokens ?? 0;
@@ -78,6 +116,94 @@ function wrapOpenAI(client, tracker) {
         },
     });
 }
+function wrapGemini(client, tracker) {
+    const originalGenerateContent = client.models.generateContent.bind(client.models);
+    const trackedGenerateContent = async (params) => {
+        const { compute_cfo_tags, ...rest } = params ?? {};
+        const tags = compute_cfo_tags && typeof compute_cfo_tags === 'object' ? { ...compute_cfo_tags } : {};
+        let model = rest.model ?? 'unknown';
+        if (typeof model === 'string' && model.startsWith('models/')) {
+            model = model.slice('models/'.length);
+        }
+        const start = performance.now();
+        const response = await originalGenerateContent(rest);
+        const latencyMs = Math.round((performance.now() - start) * 10) / 10;
+        const usage = response?.usageMetadata;
+        const inputTokens = usage?.promptTokenCount ?? 0;
+        const outputTokens = usage?.candidatesTokenCount ?? 0;
+        const costUsd = (0, pricing_1.getCost)(model, inputTokens, outputTokens);
+        const event = {
+            timestamp: new Date().toISOString(),
+            provider: 'google',
+            model,
+            operation: 'generate_content',
+            inputTokens,
+            outputTokens,
+            costUsd,
+            latencyMs,
+            tags,
+        };
+        tracker.record(event);
+        return response;
+    };
+    return new Proxy(client, {
+        get(target, prop) {
+            if (prop === 'models') {
+                return new Proxy(target.models, {
+                    get(modelsTarget, modelsProp) {
+                        if (modelsProp === 'generateContent')
+                            return trackedGenerateContent;
+                        return modelsTarget[modelsProp];
+                    },
+                });
+            }
+            return target[prop];
+        },
+    });
+}
+function wrapMistral(client, tracker) {
+    const originalComplete = client.chat.complete.bind(client.chat);
+    const trackedComplete = async (params) => {
+        const { compute_cfo_tags, ...rest } = params ?? {};
+        const tags = compute_cfo_tags && typeof compute_cfo_tags === 'object' ? { ...compute_cfo_tags } : {};
+        const model = rest.model ?? 'unknown';
+        const start = performance.now();
+        const response = await originalComplete(rest);
+        const latencyMs = Math.round((performance.now() - start) * 10) / 10;
+        const usage = response?.usage;
+        const inputTokens = usage?.prompt_tokens ?? 0;
+        const outputTokens = usage?.completion_tokens ?? 0;
+        const actualModel = response?.model ?? model;
+        const costUsd = (0, pricing_1.getCost)(actualModel, inputTokens, outputTokens);
+        const event = {
+            timestamp: new Date().toISOString(),
+            provider: 'mistral',
+            model: actualModel,
+            operation: 'chat.complete',
+            inputTokens,
+            outputTokens,
+            costUsd,
+            latencyMs,
+            tags,
+        };
+        tracker.record(event);
+        return response;
+    };
+    return new Proxy(client, {
+        get(target, prop) {
+            if (prop === 'chat') {
+                return new Proxy(target.chat, {
+                    get(chatTarget, chatProp) {
+                        if (chatProp === 'complete')
+                            return trackedComplete;
+                        return chatTarget[chatProp];
+                    },
+                });
+            }
+            return target[prop];
+        },
+    });
+}
 function wrapAnthropic(client, tracker) {
     const originalCreate = client.messages.create.bind(client.messages);
     const trackedCreate = async (params) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "compute-cfo",
-  "version": "0.1.0",
+  "version": "0.3.0",
   "description": "Cost tracking, attribution, and budget enforcement for AI inference APIs",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",