mohdel 0.103.0 → 0.104.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/js/core/events.js
CHANGED
|
@@ -55,8 +55,19 @@
|
|
|
55
55
|
* @property {(string|null)} output
|
|
56
56
|
* Final text (null when `status === 'tool_use'` with no text).
|
|
57
57
|
* @property {number} inputTokens
|
|
58
|
+
* Regular (non-cached) input tokens. For OpenAI/cerebras/fireworks where
|
|
59
|
+
* `cached_tokens` is reported as a SUBSET of `prompt_tokens`, adapters
|
|
60
|
+
* subtract the cached portion before exposing it here so all providers
|
|
61
|
+
* produce additive token shapes.
|
|
58
62
|
* @property {number} outputTokens
|
|
59
63
|
* @property {number} thinkingTokens
|
|
64
|
+
* @property {number} [cacheWriteInputTokens]
|
|
65
|
+
* Input tokens written to a fresh prompt cache breakpoint, billed at
|
|
66
|
+
* `cacheWritePrice`. Absent when the provider has no separate
|
|
67
|
+
* cache-write counter.
|
|
68
|
+
* @property {number} [cacheReadInputTokens]
|
|
69
|
+
* Input tokens served from prompt cache, billed at `cacheReadPrice`.
|
|
70
|
+
* Absent when the provider has no prompt caching.
|
|
60
71
|
* @property {number} cost
|
|
61
72
|
* USD, computed from curated pricing. Single number (not a breakdown).
|
|
62
73
|
* @property {Timestamps} timestamps
|
|
@@ -258,10 +258,15 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
258
258
|
*/
|
|
259
259
|
function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first, reasoning = null }) {
|
|
260
260
|
const end = String(process.hrtime.bigint())
|
|
261
|
-
const
|
|
261
|
+
const totalInputTokens = usage.prompt_tokens || 0
|
|
262
262
|
const totalOutputTokens = usage.completion_tokens || 0
|
|
263
263
|
const thinkingTokens = usage.completion_tokens_details?.reasoning_tokens || 0
|
|
264
|
+
const cachedInputTokens = usage.prompt_tokens_details?.cached_tokens || 0
|
|
264
265
|
const visibleOutputTokens = Math.max(0, totalOutputTokens - thinkingTokens)
|
|
266
|
+
// OpenAI-shape APIs (cerebras/fireworks/...) report cached_tokens as a
|
|
267
|
+
// SUBSET of prompt_tokens. Convert to mohdel's additive convention so
|
|
268
|
+
// computeCost prices the cached portion at cacheReadPrice.
|
|
269
|
+
const inputTokens = Math.max(0, totalInputTokens - cachedInputTokens)
|
|
265
270
|
|
|
266
271
|
const truncated = finishReason === 'length'
|
|
267
272
|
let status = truncated ? STATUS_INCOMPLETE : STATUS_COMPLETED
|
|
@@ -276,9 +281,15 @@ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, f
|
|
|
276
281
|
inputTokens,
|
|
277
282
|
outputTokens: visibleOutputTokens,
|
|
278
283
|
thinkingTokens,
|
|
284
|
+
...(cachedInputTokens > 0 && { cacheReadInputTokens: cachedInputTokens }),
|
|
279
285
|
cost: costFor(
|
|
280
286
|
catalogKey(envelope.model),
|
|
281
|
-
{
|
|
287
|
+
{
|
|
288
|
+
inputTokens,
|
|
289
|
+
outputTokens: visibleOutputTokens,
|
|
290
|
+
thinkingTokens,
|
|
291
|
+
cacheReadInputTokens: cachedInputTokens
|
|
292
|
+
}
|
|
282
293
|
),
|
|
283
294
|
timestamps: { start, first: first ?? end, end }
|
|
284
295
|
}
|
|
@@ -14,8 +14,8 @@ import { getSpec, setCatalog } from './_catalog.js'
|
|
|
14
14
|
/**
|
|
15
15
|
* Pure cost computation from spec + usage.
|
|
16
16
|
*
|
|
17
|
-
* Each price field (`inputPrice` / `outputPrice` / `thinkingPrice`
|
|
18
|
-
* one of:
|
|
17
|
+
* Each price field (`inputPrice` / `outputPrice` / `thinkingPrice` /
|
|
18
|
+
* `cacheWritePrice` / `cacheReadPrice`) is one of:
|
|
19
19
|
*
|
|
20
20
|
* - a `number` — flat per-million rate; or
|
|
21
21
|
* - an object `{">N": number, ..., "default": number}` — tiered.
|
|
@@ -24,12 +24,19 @@ import { getSpec, setCatalog } from './_catalog.js'
|
|
|
24
24
|
* nothing matches. Keys that aren't `">N"` or `"default"` are
|
|
25
25
|
* ignored. `>` is strict — at exactly N, the default is used.
|
|
26
26
|
*
|
|
27
|
-
*
|
|
28
|
-
* `
|
|
27
|
+
* Optional fields fall back to other prices when absent:
|
|
28
|
+
* - `thinkingPrice` → `outputPrice`
|
|
29
|
+
* - `cacheWritePrice` → `inputPrice` (graceful for non-caching providers)
|
|
30
|
+
* - `cacheReadPrice` → `inputPrice`
|
|
29
31
|
*
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
32
|
+
* Token-counting convention: this function is purely additive across
|
|
33
|
+
* `inputTokens`, `cacheWriteInputTokens`, `cacheReadInputTokens`,
|
|
34
|
+
* `outputTokens`, and `thinkingTokens`. Adapters normalize provider-specific
|
|
35
|
+
* shapes (e.g. subset-of-input vs. additional-to-input) before calling here.
|
|
36
|
+
*
|
|
37
|
+
* @param {any} spec Catalog entry, or `undefined`.
|
|
38
|
+
* @param {{inputTokens?: number, outputTokens?: number, thinkingTokens?: number,
|
|
39
|
+
* cacheWriteInputTokens?: number, cacheReadInputTokens?: number}} usage
|
|
33
40
|
* @returns {number}
|
|
34
41
|
*/
|
|
35
42
|
export function computeCost (spec, usage) {
|
|
@@ -37,12 +44,18 @@ export function computeCost (spec, usage) {
|
|
|
37
44
|
const i = usage.inputTokens ?? 0
|
|
38
45
|
const o = usage.outputTokens ?? 0
|
|
39
46
|
const t = usage.thinkingTokens ?? 0
|
|
47
|
+
const cw = usage.cacheWriteInputTokens ?? 0
|
|
48
|
+
const cr = usage.cacheReadInputTokens ?? 0
|
|
40
49
|
const ip = resolveTier(spec.inputPrice, i)
|
|
41
50
|
const op = resolveTier(spec.outputPrice, i)
|
|
42
51
|
if (typeof ip !== 'number' || typeof op !== 'number') return 0
|
|
43
52
|
const tp = resolveTier(spec.thinkingPrice, i)
|
|
44
53
|
const tpFinal = typeof tp === 'number' ? tp : op
|
|
45
|
-
const
|
|
54
|
+
const cwp = resolveTier(spec.cacheWritePrice, i)
|
|
55
|
+
const cwpFinal = typeof cwp === 'number' ? cwp : ip
|
|
56
|
+
const crp = resolveTier(spec.cacheReadPrice, i)
|
|
57
|
+
const crpFinal = typeof crp === 'number' ? crp : ip
|
|
58
|
+
const total = (i * ip + cw * cwpFinal + cr * crpFinal + o * op + t * tpFinal) / 1_000_000
|
|
46
59
|
return round(total)
|
|
47
60
|
}
|
|
48
61
|
|
|
@@ -113,6 +113,8 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
113
113
|
const currentOutput = () => outputParts.join('')
|
|
114
114
|
let inputTokens = 0
|
|
115
115
|
let outputTokens = 0
|
|
116
|
+
let cacheWriteTokens = 0
|
|
117
|
+
let cacheReadTokens = 0
|
|
116
118
|
let thinkingChars = 0
|
|
117
119
|
let status = STATUS_COMPLETED
|
|
118
120
|
/** @type {string | undefined} */
|
|
@@ -135,6 +137,12 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
135
137
|
if (event.message?.usage?.input_tokens) {
|
|
136
138
|
inputTokens = event.message.usage.input_tokens
|
|
137
139
|
}
|
|
140
|
+
if (event.message?.usage?.cache_creation_input_tokens) {
|
|
141
|
+
cacheWriteTokens = event.message.usage.cache_creation_input_tokens
|
|
142
|
+
}
|
|
143
|
+
if (event.message?.usage?.cache_read_input_tokens) {
|
|
144
|
+
cacheReadTokens = event.message.usage.cache_read_input_tokens
|
|
145
|
+
}
|
|
138
146
|
break
|
|
139
147
|
|
|
140
148
|
case 'content_block_start':
|
|
@@ -226,9 +234,17 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
226
234
|
inputTokens,
|
|
227
235
|
outputTokens: messageOutputTokens,
|
|
228
236
|
thinkingTokens: estimatedThinkingTokens,
|
|
237
|
+
...(cacheWriteTokens > 0 && { cacheWriteInputTokens: cacheWriteTokens }),
|
|
238
|
+
...(cacheReadTokens > 0 && { cacheReadInputTokens: cacheReadTokens }),
|
|
229
239
|
cost: costFor(
|
|
230
240
|
catalogKey(envelope.model),
|
|
231
|
-
{
|
|
241
|
+
{
|
|
242
|
+
inputTokens,
|
|
243
|
+
outputTokens: messageOutputTokens,
|
|
244
|
+
thinkingTokens: estimatedThinkingTokens,
|
|
245
|
+
cacheWriteInputTokens: cacheWriteTokens,
|
|
246
|
+
cacheReadInputTokens: cacheReadTokens
|
|
247
|
+
}
|
|
232
248
|
),
|
|
233
249
|
timestamps: { start, first: first ?? end, end }
|
|
234
250
|
}
|
|
@@ -261,7 +277,9 @@ function safeParseJson (s) {
|
|
|
261
277
|
/**
|
|
262
278
|
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
263
279
|
* @param {Array<{role: string, content: any}>} conversation
|
|
264
|
-
* @param {string} system
|
|
280
|
+
* @param {string | Array<{type: string, text: string, cache_control?: object}>} system
|
|
281
|
+
* Either a flat string (legacy) or an array of typed blocks with optional
|
|
282
|
+
* `cache_control` for prompt caching. The Anthropic SDK accepts both shapes.
|
|
265
283
|
*/
|
|
266
284
|
function buildRequest (envelope, conversation, system) {
|
|
267
285
|
const spec = getSpec(catalogKey(envelope.model))
|
|
@@ -273,7 +291,9 @@ function buildRequest (envelope, conversation, system) {
|
|
|
273
291
|
max_tokens: envelope.outputBudget ?? outputTokenLimit ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
274
292
|
messages: conversation
|
|
275
293
|
}
|
|
276
|
-
if (system
|
|
294
|
+
if (typeof system === 'string' ? system : Array.isArray(system) && system.length) {
|
|
295
|
+
request.system = system
|
|
296
|
+
}
|
|
277
297
|
if (envelope.tools?.length) {
|
|
278
298
|
request.tools = toAnthropicTools(envelope.tools)
|
|
279
299
|
}
|
|
@@ -314,11 +334,39 @@ function splitPrompt (prompt) {
|
|
|
314
334
|
}
|
|
315
335
|
/** @type {string[]} */
|
|
316
336
|
const systemParts = []
|
|
337
|
+
/** @type {Array<{type: string, text: string, cache_control?: object}>} */
|
|
338
|
+
const systemBlocks = []
|
|
339
|
+
let hasCacheMarkers = false
|
|
317
340
|
/** @type {Array<{role: string, content: any}>} */
|
|
318
341
|
const conversation = []
|
|
319
342
|
for (const m of prompt) {
|
|
320
343
|
if (m.role === 'system') {
|
|
321
|
-
|
|
344
|
+
// Translate spore-style cache markers ({text, cache: '5m'|'1h'}) into
|
|
345
|
+
// Anthropic's cache_control. Preserves the block boundary that spore
|
|
346
|
+
// chose; collapsing into a single string would silently disable
|
|
347
|
+
// caching even when the upstream tier composed the prompt with
|
|
348
|
+
// explicit breakpoints.
|
|
349
|
+
if (Array.isArray(m.content)) {
|
|
350
|
+
for (const p of m.content) {
|
|
351
|
+
if (!p?.text) continue
|
|
352
|
+
const block = { type: 'text', text: p.text }
|
|
353
|
+
if (p.cache === '5m') {
|
|
354
|
+
block.cache_control = { type: 'ephemeral' }
|
|
355
|
+
hasCacheMarkers = true
|
|
356
|
+
} else if (p.cache === '1h') {
|
|
357
|
+
block.cache_control = { type: 'ephemeral', ttl: '1h' }
|
|
358
|
+
hasCacheMarkers = true
|
|
359
|
+
}
|
|
360
|
+
systemBlocks.push(block)
|
|
361
|
+
systemParts.push(p.text)
|
|
362
|
+
}
|
|
363
|
+
} else {
|
|
364
|
+
const text = flattenText(m.content)
|
|
365
|
+
if (text) {
|
|
366
|
+
systemBlocks.push({ type: 'text', text })
|
|
367
|
+
systemParts.push(text)
|
|
368
|
+
}
|
|
369
|
+
}
|
|
322
370
|
} else if (m.role === 'tool') {
|
|
323
371
|
// Tool results go in a user-role message with tool_result blocks.
|
|
324
372
|
conversation.push({
|
|
@@ -351,7 +399,14 @@ function splitPrompt (prompt) {
|
|
|
351
399
|
})
|
|
352
400
|
}
|
|
353
401
|
}
|
|
354
|
-
|
|
402
|
+
// If cache markers are present, return the structured block array so
|
|
403
|
+
// buildRequest can pass it through to Anthropic's typed system field.
|
|
404
|
+
// Otherwise fall back to the legacy string concatenation path so
|
|
405
|
+
// non-cached calls don't change request shape.
|
|
406
|
+
const system = hasCacheMarkers
|
|
407
|
+
? systemBlocks
|
|
408
|
+
: systemParts.filter(Boolean).join('\n\n')
|
|
409
|
+
return { system, conversation }
|
|
355
410
|
}
|
|
356
411
|
|
|
357
412
|
/** @param {string | import('#core/envelope.js').MessagePart[]} content */
|
|
@@ -76,6 +76,7 @@ export async function * openai (envelope, deps = {}) {
|
|
|
76
76
|
let inputTokens = 0
|
|
77
77
|
let outputTokens = 0
|
|
78
78
|
let thinkingTokens = 0
|
|
79
|
+
let cachedInputTokens = 0
|
|
79
80
|
let status = STATUS_COMPLETED
|
|
80
81
|
/** @type {string | undefined} */
|
|
81
82
|
let warning
|
|
@@ -130,6 +131,7 @@ export async function * openai (envelope, deps = {}) {
|
|
|
130
131
|
inputTokens = event.response.usage.input_tokens ?? 0
|
|
131
132
|
outputTokens = event.response.usage.output_tokens ?? 0
|
|
132
133
|
thinkingTokens = event.response.usage.output_tokens_details?.reasoning_tokens ?? 0
|
|
134
|
+
cachedInputTokens = event.response.usage.input_tokens_details?.cached_tokens ?? 0
|
|
133
135
|
}
|
|
134
136
|
if (toolItems.size > 0) {
|
|
135
137
|
status = STATUS_TOOL_USE
|
|
@@ -145,6 +147,7 @@ export async function * openai (envelope, deps = {}) {
|
|
|
145
147
|
inputTokens = event.response.usage.input_tokens ?? 0
|
|
146
148
|
outputTokens = event.response.usage.output_tokens ?? 0
|
|
147
149
|
thinkingTokens = event.response.usage.output_tokens_details?.reasoning_tokens ?? 0
|
|
150
|
+
cachedInputTokens = event.response.usage.input_tokens_details?.cached_tokens ?? 0
|
|
148
151
|
}
|
|
149
152
|
break
|
|
150
153
|
|
|
@@ -174,18 +177,30 @@ export async function * openai (envelope, deps = {}) {
|
|
|
174
177
|
// one from the other for the message-only count.
|
|
175
178
|
const messageOutputTokens = Math.max(0, outputTokens - thinkingTokens)
|
|
176
179
|
|
|
180
|
+
// OpenAI counts cached_tokens as a SUBSET of input_tokens. Convert to
|
|
181
|
+
// mohdel's additive convention (cacheReadInputTokens is separate from
|
|
182
|
+
// inputTokens) by subtracting the cached portion before pricing. Both
|
|
183
|
+
// adapters and computeCost stay simpler with the additive shape.
|
|
184
|
+
const regularInputTokens = Math.max(0, inputTokens - cachedInputTokens)
|
|
185
|
+
|
|
177
186
|
/** @type {import('#core/events.js').DoneEvent} */
|
|
178
187
|
const done = {
|
|
179
188
|
type: 'done',
|
|
180
189
|
result: {
|
|
181
190
|
status,
|
|
182
191
|
output: currentOutput() || null,
|
|
183
|
-
inputTokens,
|
|
192
|
+
inputTokens: regularInputTokens,
|
|
184
193
|
outputTokens: messageOutputTokens,
|
|
185
194
|
thinkingTokens,
|
|
195
|
+
...(cachedInputTokens > 0 && { cacheReadInputTokens: cachedInputTokens }),
|
|
186
196
|
cost: costFor(
|
|
187
197
|
catalogKey(envelope.model),
|
|
188
|
-
{
|
|
198
|
+
{
|
|
199
|
+
inputTokens: regularInputTokens,
|
|
200
|
+
outputTokens: messageOutputTokens,
|
|
201
|
+
thinkingTokens,
|
|
202
|
+
cacheReadInputTokens: cachedInputTokens
|
|
203
|
+
}
|
|
189
204
|
),
|
|
190
205
|
timestamps: { start, first: first ?? end, end }
|
|
191
206
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.104.1",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -87,12 +87,12 @@
|
|
|
87
87
|
"@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
|
|
88
88
|
"@opentelemetry/sdk-node": "^0.217.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.104.1"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
93
|
"@anthropic-ai/sdk": "^0.95.1",
|
|
94
94
|
"@cerebras/cerebras_cloud_sdk": "^1.61.1",
|
|
95
|
-
"@google/genai": "^2.0.
|
|
95
|
+
"@google/genai": "^2.0.1",
|
|
96
96
|
"@opentelemetry/api": "^1.9.1",
|
|
97
97
|
"env-paths": "^4.0.0",
|
|
98
98
|
"groq-sdk": "^1.1.2",
|