compute-cfo 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pricing.js +13 -0
- package/dist/wrapper.js +32 -0
- package/package.json +1 -1
package/dist/pricing.js
CHANGED
|
@@ -69,6 +69,19 @@ const MODEL_PRICES = {
|
|
|
69
69
|
'pixtral-large-latest': { inputPerMillion: 2.0, outputPerMillion: 6.0 },
|
|
70
70
|
'mistral-nemo': { inputPerMillion: 0.02, outputPerMillion: 0.05 },
|
|
71
71
|
'pixtral-12b': { inputPerMillion: 0.15, outputPerMillion: 0.15 },
|
|
72
|
+
// ── Groq ──────────────────────────────────────────────────
|
|
73
|
+
'llama-3.3-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
|
|
74
|
+
'llama-3.1-8b-instant': { inputPerMillion: 0.05, outputPerMillion: 0.08 },
|
|
75
|
+
'llama-3.1-70b-versatile': { inputPerMillion: 0.59, outputPerMillion: 0.79 },
|
|
76
|
+
'gemma2-9b-it': { inputPerMillion: 0.2, outputPerMillion: 0.2 },
|
|
77
|
+
'mixtral-8x7b-32768': { inputPerMillion: 0.24, outputPerMillion: 0.24 },
|
|
78
|
+
// ── Together AI ───────────────────────────────────────────
|
|
79
|
+
'meta-llama/Llama-3.3-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
|
|
80
|
+
'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo': { inputPerMillion: 0.18, outputPerMillion: 0.18 },
|
|
81
|
+
'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo': { inputPerMillion: 0.88, outputPerMillion: 0.88 },
|
|
82
|
+
'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo': { inputPerMillion: 3.5, outputPerMillion: 3.5 },
|
|
83
|
+
'mistralai/Mixtral-8x7B-Instruct-v0.1': { inputPerMillion: 0.6, outputPerMillion: 0.6 },
|
|
84
|
+
'Qwen/Qwen2.5-72B-Instruct-Turbo': { inputPerMillion: 1.2, outputPerMillion: 1.2 },
|
|
72
85
|
};
|
|
73
86
|
const ALIASES = {
|
|
74
87
|
'gpt-4o-2024-11-20': 'gpt-4o',
|
package/dist/wrapper.js
CHANGED
|
@@ -32,6 +32,9 @@ function wrap(client, options) {
|
|
|
32
32
|
return wrapMistral(client, tracker);
|
|
33
33
|
}
|
|
34
34
|
throw new TypeError(`Unsupported client type. Supported: OpenAI, Anthropic, Google Gemini, Mistral`);
|
|
35
|
+
// Note: Groq and Together AI use OpenAI-compatible APIs, so they match the
|
|
36
|
+
// first condition (chat.completions.create) and are wrapped as OpenAI clients.
|
|
37
|
+
// The provider will be detected as "openai" in cost events.
|
|
35
38
|
}
|
|
36
39
|
function wrapOpenAI(client, tracker) {
|
|
37
40
|
const originalCreate = client.chat.completions.create.bind(client.chat.completions);
|
|
@@ -41,6 +44,35 @@ function wrapOpenAI(client, tracker) {
|
|
|
41
44
|
const model = rest.model ?? 'unknown';
|
|
42
45
|
const start = performance.now();
|
|
43
46
|
const response = await originalCreate(rest);
|
|
47
|
+
// Handle streaming responses
|
|
48
|
+
if (rest.stream && response && typeof response[Symbol.asyncIterator] === 'function') {
|
|
49
|
+
const originalIterator = response[Symbol.asyncIterator].bind(response);
|
|
50
|
+
let usage = null;
|
|
51
|
+
response[Symbol.asyncIterator] = async function* () {
|
|
52
|
+
for await (const chunk of originalIterator()) {
|
|
53
|
+
if (chunk?.usage)
|
|
54
|
+
usage = chunk.usage;
|
|
55
|
+
yield chunk;
|
|
56
|
+
}
|
|
57
|
+
const latencyMs = Math.round((performance.now() - start) * 10) / 10;
|
|
58
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
59
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
60
|
+
const costUsd = (0, pricing_1.getCost)(model, inputTokens, outputTokens);
|
|
61
|
+
const event = {
|
|
62
|
+
timestamp: new Date().toISOString(),
|
|
63
|
+
provider: 'openai',
|
|
64
|
+
model,
|
|
65
|
+
operation: 'chat.completions',
|
|
66
|
+
inputTokens,
|
|
67
|
+
outputTokens,
|
|
68
|
+
costUsd,
|
|
69
|
+
latencyMs,
|
|
70
|
+
tags,
|
|
71
|
+
};
|
|
72
|
+
tracker.record(event);
|
|
73
|
+
};
|
|
74
|
+
return response;
|
|
75
|
+
}
|
|
44
76
|
const latencyMs = Math.round((performance.now() - start) * 10) / 10;
|
|
45
77
|
const usage = response?.usage;
|
|
46
78
|
const inputTokens = usage?.prompt_tokens ?? 0;
|