npm - compute-cfo - Versions diffs - 0.2.0 → 0.3.0 - Mend

compute-cfo 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/pricing.js CHANGED Viewed

@@ -69,6 +69,19 @@ const MODEL_PRICES = {
     'pixtral-large-latest': { inputPerMillion: 2.0, outputPerMillion: 6.0 },
     'mistral-nemo': { inputPerMillion: 0.02, outputPerMillion: 0.05 },
     'pixtral-12b': { inputPerMillion: 0.15, outputPerMillion: 0.15 },
+    // ── Groq ──────────────────────────────────────────────────
+    'llama-3.3-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
+    'llama-3.1-8b-instant': { inputPerMillion: 0.05, outputPerMillion: 0.08 },
+    'llama-3.1-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
+    'gemma2-9b-it': { inputPerMillion: 0.2, outputPerMillion: 0.2 },
+    'mixtral-8x7b-32768': { inputPerMillion: 0.24, outputPerMillion: 0.24 },
+    // ── Together AI ───────────────────────────────────────────
+    'meta-llama/Llama-3.3-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
+    'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo': { inputPerMillion: 0.18, outputPerMillion: 0.18 },
+    'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
+    'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo': { inputPerMillion: 3.5, outputPerMillion: 3.5 },
+    'mistralai/Mixtral-8x7B-Instruct-v0.1': { inputPerMillion: 0.6, outputPerMillion: 0.6 },
+    'Qwen/Qwen2.5-72B-Instruct-Turbo': { inputPerMillion: 1.2, outputPerMillion: 1.2 },
 };
 const ALIASES = {
     'gpt-4o-2024-11-20': 'gpt-4o',

package/dist/wrapper.js CHANGED Viewed

@@ -32,6 +32,9 @@ function wrap(client, options) {
         return wrapMistral(client, tracker);
     }
     throw new TypeError(`Unsupported client type. Supported: OpenAI, Anthropic, Google Gemini, Mistral`);
+    // Note: Groq and Together AI use OpenAI-compatible APIs, so they match the
+    // first condition (chat.completions.create) and are wrapped as OpenAI clients.
+    // The provider will be detected as "openai" in cost events.
 }
 function wrapOpenAI(client, tracker) {
     const originalCreate = client.chat.completions.create.bind(client.chat.completions);
@@ -41,6 +44,35 @@ function wrapOpenAI(client, tracker) {
         const model = rest.model ?? 'unknown';
         const start = performance.now();
         const response = await originalCreate(rest);
+        // Handle streaming responses
+        if (rest.stream && response && typeof response[Symbol.asyncIterator] === 'function') {
+            const originalIterator = response[Symbol.asyncIterator].bind(response);
+            let usage = null;
+            response[Symbol.asyncIterator] = async function* () {
+                for await (const chunk of originalIterator()) {
+                    if (chunk?.usage)
+                        usage = chunk.usage;
+                    yield chunk;
+                }
+                const latencyMs = Math.round((performance.now() - start) * 10) / 10;
+                const inputTokens = usage?.prompt_tokens ?? 0;
+                const outputTokens = usage?.completion_tokens ?? 0;
+                const costUsd = (0, pricing_1.getCost)(model, inputTokens, outputTokens);
+                const event = {
+                    timestamp: new Date().toISOString(),
+                    provider: 'openai',
+                    model,
+                    operation: 'chat.completions',
+                    inputTokens,
+                    outputTokens,
+                    costUsd,
+                    latencyMs,
+                    tags,
+                };
+                tracker.record(event);
+            };
+            return response;
+        }
         const latencyMs = Math.round((performance.now() - start) * 10) / 10;
         const usage = response?.usage;
         const inputTokens = usage?.prompt_tokens ?? 0;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "compute-cfo",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "Cost tracking, attribution, and budget enforcement for AI inference APIs",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",