mohdel 0.104.0 → 0.104.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/js/core/events.js CHANGED
@@ -62,14 +62,12 @@
62
62
  * @property {number} outputTokens
63
63
  * @property {number} thinkingTokens
64
64
  * @property {number} [cacheWriteInputTokens]
65
- * Tokens written to a fresh prompt cache breakpoint, billed at
66
- * `cacheWritePrice` (typically 1.25× input on Anthropic). Absent on
67
- * providers that don't surface this counter (OpenAI doesn't separately
68
- * bill cache writes).
65
+ * Input tokens written to a fresh prompt cache breakpoint, billed at
66
+ * `cacheWritePrice`. Absent when the provider has no separate
67
+ * cache-write counter.
69
68
  * @property {number} [cacheReadInputTokens]
70
- * Tokens served from prompt cache, billed at `cacheReadPrice` (typically
71
- * 0.1× input). Set by Anthropic directly and by OpenAI-shape adapters
72
- * after subset→additive normalization of `prompt_tokens_details.cached_tokens`.
69
+ * Input tokens served from prompt cache, billed at `cacheReadPrice`.
70
+ * Absent when the provider has no prompt caching.
73
71
  * @property {number} cost
74
72
  * USD, computed from curated pricing. Single number (not a breakdown).
75
73
  * @property {Timestamps} timestamps
@@ -22,10 +22,16 @@ import { catalogKey } from '#core/model-id.js'
22
22
  * @param {string} output
23
23
  * @param {number} inputTokens
24
24
  * @param {number} outputTokens
25
+ * @param {{cacheWriteInputTokens?: number, cacheReadInputTokens?: number}} [extra]
26
+ * Optional cache token counts captured before cancellation. Threaded through
27
+ * so the cancellation-cost calculation prices any cache writes/reads that
28
+ * already happened before the abort.
25
29
  * @returns {import('#core/events.js').DoneEvent}
26
30
  */
27
- export function cancelledDone (start, first, envelope, output, inputTokens, outputTokens) {
31
+ export function cancelledDone (start, first, envelope, output, inputTokens, outputTokens, extra = {}) {
28
32
  const end = String(process.hrtime.bigint())
33
+ const cacheWriteInputTokens = extra.cacheWriteInputTokens || 0
34
+ const cacheReadInputTokens = extra.cacheReadInputTokens || 0
29
35
  return {
30
36
  type: 'done',
31
37
  result: {
@@ -34,9 +40,11 @@ export function cancelledDone (start, first, envelope, output, inputTokens, outp
34
40
  inputTokens,
35
41
  outputTokens,
36
42
  thinkingTokens: 0,
43
+ ...(cacheWriteInputTokens > 0 && { cacheWriteInputTokens }),
44
+ ...(cacheReadInputTokens > 0 && { cacheReadInputTokens }),
37
45
  cost: costFor(
38
46
  catalogKey(envelope.model),
39
- { inputTokens, outputTokens, thinkingTokens: 0 }
47
+ { inputTokens, outputTokens, thinkingTokens: 0, cacheWriteInputTokens, cacheReadInputTokens }
40
48
  ),
41
49
  timestamps: { start, first: first ?? end, end },
42
50
  warning: WARNING_CANCELLED
@@ -29,14 +29,10 @@ import { getSpec, setCatalog } from './_catalog.js'
29
29
  * - `cacheWritePrice` → `inputPrice` (graceful for non-caching providers)
30
30
  * - `cacheReadPrice` → `inputPrice`
31
31
  *
32
- * Token-counting conventions:
33
- * - Anthropic reports `cache_creation_input_tokens` and `cache_read_input_tokens`
34
- * as ADDITIONAL to `input_tokens` (separately billable). The adapter
35
- * surfaces them as `cacheWriteInputTokens` / `cacheReadInputTokens`
36
- * (write/read pair, matching catalog `cacheWritePrice`/`cacheReadPrice`).
37
- * - OpenAI reports `prompt_tokens_details.cached_tokens` as a SUBSET of
38
- * `prompt_tokens` (already counted). Adapters subtract before passing
39
- * `inputTokens` to keep this function additive across providers.
32
+ * Token-counting convention: this function is purely additive across
33
+ * `inputTokens`, `cacheWriteInputTokens`, `cacheReadInputTokens`,
34
+ * `outputTokens`, and `thinkingTokens`. Adapters normalize provider-specific
35
+ * shapes (e.g. subset-of-input vs. additional-to-input) before calling here.
40
36
  *
41
37
  * @param {any} spec Catalog entry, or `undefined`.
42
38
  * @param {{inputTokens?: number, outputTokens?: number, thinkingTokens?: number,
@@ -129,7 +129,7 @@ export async function * anthropic (envelope, deps = {}) {
129
129
 
130
130
  for await (const event of stream) {
131
131
  if (signal?.aborted) {
132
- yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
132
+ yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens, { cacheWriteInputTokens: cacheWriteTokens, cacheReadInputTokens: cacheReadTokens })
133
133
  return
134
134
  }
135
135
  switch (event.type) {
@@ -197,7 +197,7 @@ export async function * anthropic (envelope, deps = {}) {
197
197
  }
198
198
  } catch (e) {
199
199
  if (signal?.aborted) {
200
- yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
200
+ yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens, { cacheWriteInputTokens: cacheWriteTokens, cacheReadInputTokens: cacheReadTokens })
201
201
  return
202
202
  }
203
203
  log?.warn({ err: e }, '[mohdel:anthropic] stream failed')
@@ -206,7 +206,7 @@ export async function * anthropic (envelope, deps = {}) {
206
206
  }
207
207
 
208
208
  if (signal?.aborted) {
209
- yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
209
+ yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens, { cacheWriteInputTokens: cacheWriteTokens, cacheReadInputTokens: cacheReadTokens })
210
210
  return
211
211
  }
212
212
 
package/js/session/run.js CHANGED
@@ -376,16 +376,19 @@ function finalizeSpanOk (span, result, sawDelta = false, maxInterFrameMs = 0) {
376
376
  * @param {number} startedAt
377
377
  */
378
378
  function summarizeDone (result, startedAt) {
379
- return {
379
+ const summary = {
380
380
  status: result?.status,
381
381
  in: result?.inputTokens || 0,
382
382
  out: result?.outputTokens || 0,
383
- think: result?.thinkingTokens || 0,
384
- cost: result?.cost,
385
- warning: result?.warning,
386
- totalMs: Date.now() - startedAt,
387
- maxInterFrameMs: result?.maxInterFrameMs
383
+ think: result?.thinkingTokens || 0
388
384
  }
385
+ if (result?.cacheWriteInputTokens) summary.cacheW = result.cacheWriteInputTokens
386
+ if (result?.cacheReadInputTokens) summary.cacheR = result.cacheReadInputTokens
387
+ summary.cost = result?.cost
388
+ if (result?.warning) summary.warning = result.warning
389
+ summary.totalMs = Date.now() - startedAt
390
+ if (result?.maxInterFrameMs != null) summary.maxInterFrameMs = result.maxInterFrameMs
391
+ return summary
389
392
  }
390
393
 
391
394
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.104.0",
3
+ "version": "0.104.2",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -87,7 +87,7 @@
87
87
  "@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
88
88
  "@opentelemetry/sdk-node": "^0.217.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.104.0"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.104.2"
91
91
  },
92
92
  "dependencies": {
93
93
  "@anthropic-ai/sdk": "^0.95.1",