compute-cfo 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pricing.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Model pricing database for OpenAI and Anthropic.
2
+ * Model pricing database for OpenAI, Anthropic, Google Gemini, and Mistral.
3
3
  * Prices are in USD per 1 million tokens. Updated March 2026.
4
4
  */
5
5
  export interface ModelPrice {
package/dist/pricing.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  /**
3
- * Model pricing database for OpenAI and Anthropic.
3
+ * Model pricing database for OpenAI, Anthropic, Google Gemini, and Mistral.
4
4
  * Prices are in USD per 1 million tokens. Updated March 2026.
5
5
  */
6
6
  Object.defineProperty(exports, "__esModule", { value: true });
@@ -52,6 +52,36 @@ const MODEL_PRICES = {
52
52
  'claude-3-opus': { inputPerMillion: 15.0, outputPerMillion: 75.0 },
53
53
  'claude-3-sonnet': { inputPerMillion: 3.0, outputPerMillion: 15.0 },
54
54
  'claude-3-haiku': { inputPerMillion: 0.25, outputPerMillion: 1.25 },
55
+ // ── Google Gemini ─────────────────────────────────────────
56
+ 'gemini-2.5-pro': { inputPerMillion: 1.25, outputPerMillion: 10.0 },
57
+ 'gemini-2.5-flash': { inputPerMillion: 0.3, outputPerMillion: 2.5 },
58
+ 'gemini-2.5-flash-lite': { inputPerMillion: 0.1, outputPerMillion: 0.4 },
59
+ 'gemini-2.0-flash': { inputPerMillion: 0.1, outputPerMillion: 0.4 },
60
+ 'gemini-1.5-pro': { inputPerMillion: 1.25, outputPerMillion: 5.0 },
61
+ 'gemini-1.5-flash': { inputPerMillion: 0.075, outputPerMillion: 0.3 },
62
+ 'gemini-1.5-flash-8b': { inputPerMillion: 0.0375, outputPerMillion: 0.15 },
63
+ 'gemini-embedding': { inputPerMillion: 0.15, outputPerMillion: 0 },
64
+ // ── Mistral ───────────────────────────────────────────────
65
+ 'mistral-large-latest': { inputPerMillion: 0.5, outputPerMillion: 1.5 },
66
+ 'mistral-medium-latest': { inputPerMillion: 0.4, outputPerMillion: 2.0 },
67
+ 'mistral-small-latest': { inputPerMillion: 0.03, outputPerMillion: 0.11 },
68
+ 'codestral-latest': { inputPerMillion: 0.3, outputPerMillion: 0.9 },
69
+ 'pixtral-large-latest': { inputPerMillion: 2.0, outputPerMillion: 6.0 },
70
+ 'mistral-nemo': { inputPerMillion: 0.02, outputPerMillion: 0.05 },
71
+ 'pixtral-12b': { inputPerMillion: 0.15, outputPerMillion: 0.15 },
72
+ // ── Groq ──────────────────────────────────────────────────
73
+ 'llama-3.3-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
74
+ 'llama-3.1-8b-instant': { inputPerMillion: 0.05, outputPerMillion: 0.08 },
75
+ 'llama-3.1-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
76
+ 'gemma2-9b-it': { inputPerMillion: 0.2, outputPerMillion: 0.2 },
77
+ 'mixtral-8x7b-32768': { inputPerMillion: 0.24, outputPerMillion: 0.24 },
78
+ // ── Together AI ───────────────────────────────────────────
79
+ 'meta-llama/Llama-3.3-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
80
+ 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo': { inputPerMillion: 0.18, outputPerMillion: 0.18 },
81
+ 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
82
+ 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo': { inputPerMillion: 3.5, outputPerMillion: 3.5 },
83
+ 'mistralai/Mixtral-8x7B-Instruct-v0.1': { inputPerMillion: 0.6, outputPerMillion: 0.6 },
84
+ 'Qwen/Qwen2.5-72B-Instruct-Turbo': { inputPerMillion: 1.2, outputPerMillion: 1.2 },
55
85
  };
56
86
  const ALIASES = {
57
87
  'gpt-4o-2024-11-20': 'gpt-4o',
@@ -66,6 +96,19 @@ const ALIASES = {
66
96
  'gpt-3.5-turbo-1106': 'gpt-3.5-turbo',
67
97
  'o3-2025-04-16': 'o3',
68
98
  'o4-mini-2025-04-16': 'o4-mini',
99
+ // Gemini aliases
100
+ 'models/gemini-2.5-pro': 'gemini-2.5-pro',
101
+ 'models/gemini-2.5-flash': 'gemini-2.5-flash',
102
+ 'models/gemini-2.5-flash-lite': 'gemini-2.5-flash-lite',
103
+ 'models/gemini-2.0-flash': 'gemini-2.0-flash',
104
+ 'models/gemini-1.5-pro': 'gemini-1.5-pro',
105
+ 'models/gemini-1.5-flash': 'gemini-1.5-flash',
106
+ 'models/gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
107
+ // Mistral aliases
108
+ 'mistral-large-2501': 'mistral-large-latest',
109
+ 'mistral-medium-2505': 'mistral-medium-latest',
110
+ 'mistral-small-2503': 'mistral-small-latest',
111
+ 'codestral-2501': 'codestral-latest',
69
112
  };
70
113
  function resolveModel(model) {
71
114
  return ALIASES[model] ?? model;
package/dist/wrapper.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Drop-in wrapper for OpenAI and Anthropic SDK clients.
2
+ * Drop-in wrapper for OpenAI, Anthropic, Google Gemini, and Mistral SDK clients.
3
3
  */
4
4
  import { CostTracker } from './tracker';
5
5
  export interface WrapOptions {
package/dist/wrapper.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  /**
3
- * Drop-in wrapper for OpenAI and Anthropic SDK clients.
3
+ * Drop-in wrapper for OpenAI, Anthropic, Google Gemini, and Mistral SDK clients.
4
4
  */
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.wrap = wrap;
@@ -25,7 +25,16 @@ function wrap(client, options) {
25
25
  if ('messages' in client && typeof client.messages?.create === 'function') {
26
26
  return wrapAnthropic(client, tracker);
27
27
  }
28
- throw new TypeError(`Unsupported client type. Supported: OpenAI, Anthropic`);
28
+ if ('models' in client && typeof client.models?.generateContent === 'function') {
29
+ return wrapGemini(client, tracker);
30
+ }
31
+ if ('chat' in client && typeof client.chat?.complete === 'function') {
32
+ return wrapMistral(client, tracker);
33
+ }
34
+ throw new TypeError(`Unsupported client type. Supported: OpenAI, Anthropic, Google Gemini, Mistral`);
35
+ // Note: Groq and Together AI use OpenAI-compatible APIs, so they match the
36
+ // first condition (chat.completions.create) and are wrapped as OpenAI clients.
37
+ // The provider will be detected as "openai" in cost events.
29
38
  }
30
39
  function wrapOpenAI(client, tracker) {
31
40
  const originalCreate = client.chat.completions.create.bind(client.chat.completions);
@@ -35,6 +44,35 @@ function wrapOpenAI(client, tracker) {
35
44
  const model = rest.model ?? 'unknown';
36
45
  const start = performance.now();
37
46
  const response = await originalCreate(rest);
47
+ // Handle streaming responses
48
+ if (rest.stream && response && typeof response[Symbol.asyncIterator] === 'function') {
49
+ const originalIterator = response[Symbol.asyncIterator].bind(response);
50
+ let usage = null;
51
+ response[Symbol.asyncIterator] = async function* () {
52
+ for await (const chunk of originalIterator()) {
53
+ if (chunk?.usage)
54
+ usage = chunk.usage;
55
+ yield chunk;
56
+ }
57
+ const latencyMs = Math.round((performance.now() - start) * 10) / 10;
58
+ const inputTokens = usage?.prompt_tokens ?? 0;
59
+ const outputTokens = usage?.completion_tokens ?? 0;
60
+ const costUsd = (0, pricing_1.getCost)(model, inputTokens, outputTokens);
61
+ const event = {
62
+ timestamp: new Date().toISOString(),
63
+ provider: 'openai',
64
+ model,
65
+ operation: 'chat.completions',
66
+ inputTokens,
67
+ outputTokens,
68
+ costUsd,
69
+ latencyMs,
70
+ tags,
71
+ };
72
+ tracker.record(event);
73
+ };
74
+ return response;
75
+ }
38
76
  const latencyMs = Math.round((performance.now() - start) * 10) / 10;
39
77
  const usage = response?.usage;
40
78
  const inputTokens = usage?.prompt_tokens ?? 0;
@@ -78,6 +116,94 @@ function wrapOpenAI(client, tracker) {
78
116
  },
79
117
  });
80
118
  }
119
+ function wrapGemini(client, tracker) {
120
+ const originalGenerateContent = client.models.generateContent.bind(client.models);
121
+ const trackedGenerateContent = async (params) => {
122
+ const { compute_cfo_tags, ...rest } = params ?? {};
123
+ const tags = compute_cfo_tags && typeof compute_cfo_tags === 'object' ? { ...compute_cfo_tags } : {};
124
+ let model = rest.model ?? 'unknown';
125
+ if (typeof model === 'string' && model.startsWith('models/')) {
126
+ model = model.slice('models/'.length);
127
+ }
128
+ const start = performance.now();
129
+ const response = await originalGenerateContent(rest);
130
+ const latencyMs = Math.round((performance.now() - start) * 10) / 10;
131
+ const usage = response?.usageMetadata;
132
+ const inputTokens = usage?.promptTokenCount ?? 0;
133
+ const outputTokens = usage?.candidatesTokenCount ?? 0;
134
+ const costUsd = (0, pricing_1.getCost)(model, inputTokens, outputTokens);
135
+ const event = {
136
+ timestamp: new Date().toISOString(),
137
+ provider: 'google',
138
+ model,
139
+ operation: 'generate_content',
140
+ inputTokens,
141
+ outputTokens,
142
+ costUsd,
143
+ latencyMs,
144
+ tags,
145
+ };
146
+ tracker.record(event);
147
+ return response;
148
+ };
149
+ return new Proxy(client, {
150
+ get(target, prop) {
151
+ if (prop === 'models') {
152
+ return new Proxy(target.models, {
153
+ get(modelsTarget, modelsProp) {
154
+ if (modelsProp === 'generateContent')
155
+ return trackedGenerateContent;
156
+ return modelsTarget[modelsProp];
157
+ },
158
+ });
159
+ }
160
+ return target[prop];
161
+ },
162
+ });
163
+ }
164
+ function wrapMistral(client, tracker) {
165
+ const originalComplete = client.chat.complete.bind(client.chat);
166
+ const trackedComplete = async (params) => {
167
+ const { compute_cfo_tags, ...rest } = params ?? {};
168
+ const tags = compute_cfo_tags && typeof compute_cfo_tags === 'object' ? { ...compute_cfo_tags } : {};
169
+ const model = rest.model ?? 'unknown';
170
+ const start = performance.now();
171
+ const response = await originalComplete(rest);
172
+ const latencyMs = Math.round((performance.now() - start) * 10) / 10;
173
+ const usage = response?.usage;
174
+ const inputTokens = usage?.prompt_tokens ?? 0;
175
+ const outputTokens = usage?.completion_tokens ?? 0;
176
+ const actualModel = response?.model ?? model;
177
+ const costUsd = (0, pricing_1.getCost)(actualModel, inputTokens, outputTokens);
178
+ const event = {
179
+ timestamp: new Date().toISOString(),
180
+ provider: 'mistral',
181
+ model: actualModel,
182
+ operation: 'chat.complete',
183
+ inputTokens,
184
+ outputTokens,
185
+ costUsd,
186
+ latencyMs,
187
+ tags,
188
+ };
189
+ tracker.record(event);
190
+ return response;
191
+ };
192
+ return new Proxy(client, {
193
+ get(target, prop) {
194
+ if (prop === 'chat') {
195
+ return new Proxy(target.chat, {
196
+ get(chatTarget, chatProp) {
197
+ if (chatProp === 'complete')
198
+ return trackedComplete;
199
+ return chatTarget[chatProp];
200
+ },
201
+ });
202
+ }
203
+ return target[prop];
204
+ },
205
+ });
206
+ }
81
207
  function wrapAnthropic(client, tracker) {
82
208
  const originalCreate = client.messages.create.bind(client.messages);
83
209
  const trackedCreate = async (params) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "compute-cfo",
3
- "version": "0.1.0",
3
+ "version": "0.3.0",
4
4
  "description": "Cost tracking, attribution, and budget enforcement for AI inference APIs",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",