npm - mohdel - Versions diffs - 0.104.0 → 0.104.2 - Mend

mohdel 0.104.0 → 0.104.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/js/core/events.js +5 -7
package/js/session/adapters/_cancelled.js +10 -2
package/js/session/adapters/_pricing.js +4 -8
package/js/session/adapters/anthropic.js +3 -3
package/js/session/run.js +9 -6
package/package.json +2 -2

package/js/core/events.js CHANGED Viewed

@@ -62,14 +62,12 @@
  * @property {number} outputTokens
  * @property {number} thinkingTokens
  * @property {number} [cacheWriteInputTokens]
- *   Tokens written to a fresh prompt cache breakpoint, billed at
- *   `cacheWritePrice` (typically 1.25× input on Anthropic). Absent on
- *   providers that don't surface this counter (OpenAI doesn't separately
- *   bill cache writes).
+ *   Input tokens written to a fresh prompt cache breakpoint, billed at
+ *   `cacheWritePrice`. Absent when the provider has no separate
+ *   cache-write counter.
  * @property {number} [cacheReadInputTokens]
- *   Tokens served from prompt cache, billed at `cacheReadPrice` (typically
- *   0.1× input). Set by Anthropic directly and by OpenAI-shape adapters
- *   after subset→additive normalization of `prompt_tokens_details.cached_tokens`.
+ *   Input tokens served from prompt cache, billed at `cacheReadPrice`.
+ *   Absent when the provider has no prompt caching.
  * @property {number} cost
  *   USD, computed from curated pricing. Single number (not a breakdown).
  * @property {Timestamps} timestamps

package/js/session/adapters/_cancelled.js CHANGED Viewed

@@ -22,10 +22,16 @@ import { catalogKey } from '#core/model-id.js'
  * @param {string} output
  * @param {number} inputTokens
  * @param {number} outputTokens
+ * @param {{cacheWriteInputTokens?: number, cacheReadInputTokens?: number}} [extra]
+ *   Optional cache token counts captured before cancellation. Threaded through
+ *   so the cancellation-cost calculation prices any cache writes/reads that
+ *   already happened before the abort.
  * @returns {import('#core/events.js').DoneEvent}
  */
-export function cancelledDone (start, first, envelope, output, inputTokens, outputTokens) {
+export function cancelledDone (start, first, envelope, output, inputTokens, outputTokens, extra = {}) {
   const end = String(process.hrtime.bigint())
+  const cacheWriteInputTokens = extra.cacheWriteInputTokens || 0
+  const cacheReadInputTokens = extra.cacheReadInputTokens || 0
   return {
     type: 'done',
     result: {
@@ -34,9 +40,11 @@ export function cancelledDone (start, first, envelope, output, inputTokens, outp
       inputTokens,
       outputTokens,
       thinkingTokens: 0,
+      ...(cacheWriteInputTokens > 0 && { cacheWriteInputTokens }),
+      ...(cacheReadInputTokens > 0 && { cacheReadInputTokens }),
       cost: costFor(
         catalogKey(envelope.model),
-        { inputTokens, outputTokens, thinkingTokens: 0 }
+        { inputTokens, outputTokens, thinkingTokens: 0, cacheWriteInputTokens, cacheReadInputTokens }
       ),
       timestamps: { start, first: first ?? end, end },
       warning: WARNING_CANCELLED

package/js/session/adapters/_pricing.js CHANGED Viewed

@@ -29,14 +29,10 @@ import { getSpec, setCatalog } from './_catalog.js'
  *   - `cacheWritePrice` → `inputPrice` (graceful for non-caching providers)
  *   - `cacheReadPrice` → `inputPrice`
  *
- * Token-counting conventions:
- *   - Anthropic reports `cache_creation_input_tokens` and `cache_read_input_tokens`
- *     as ADDITIONAL to `input_tokens` (separately billable). The adapter
- *     surfaces them as `cacheWriteInputTokens` / `cacheReadInputTokens`
- *     (write/read pair, matching catalog `cacheWritePrice`/`cacheReadPrice`).
- *   - OpenAI reports `prompt_tokens_details.cached_tokens` as a SUBSET of
- *     `prompt_tokens` (already counted). Adapters subtract before passing
- *     `inputTokens` to keep this function additive across providers.
+ * Token-counting convention: this function is purely additive across
+ * `inputTokens`, `cacheWriteInputTokens`, `cacheReadInputTokens`,
+ * `outputTokens`, and `thinkingTokens`. Adapters normalize provider-specific
+ * shapes (e.g. subset-of-input vs. additional-to-input) before calling here.
  *
  * @param {any} spec  Catalog entry, or `undefined`.
  * @param {{inputTokens?: number, outputTokens?: number, thinkingTokens?: number,

package/js/session/adapters/anthropic.js CHANGED Viewed

@@ -129,7 +129,7 @@ export async function * anthropic (envelope, deps = {}) {
     for await (const event of stream) {
       if (signal?.aborted) {
-        yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
+        yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens, { cacheWriteInputTokens: cacheWriteTokens, cacheReadInputTokens: cacheReadTokens })
         return
       }
       switch (event.type) {
@@ -197,7 +197,7 @@ export async function * anthropic (envelope, deps = {}) {
     }
   } catch (e) {
     if (signal?.aborted) {
-      yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
+      yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens, { cacheWriteInputTokens: cacheWriteTokens, cacheReadInputTokens: cacheReadTokens })
       return
     }
     log?.warn({ err: e }, '[mohdel:anthropic] stream failed')
@@ -206,7 +206,7 @@ export async function * anthropic (envelope, deps = {}) {
   }
   if (signal?.aborted) {
-    yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
+    yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens, { cacheWriteInputTokens: cacheWriteTokens, cacheReadInputTokens: cacheReadTokens })
     return
   }

package/js/session/run.js CHANGED Viewed

@@ -376,16 +376,19 @@ function finalizeSpanOk (span, result, sawDelta = false, maxInterFrameMs = 0) {
  * @param {number} startedAt
  */
 function summarizeDone (result, startedAt) {
-  return {
+  const summary = {
     status: result?.status,
     in: result?.inputTokens || 0,
     out: result?.outputTokens || 0,
-    think: result?.thinkingTokens || 0,
-    cost: result?.cost,
-    warning: result?.warning,
-    totalMs: Date.now() - startedAt,
-    maxInterFrameMs: result?.maxInterFrameMs
+    think: result?.thinkingTokens || 0
   }
+  if (result?.cacheWriteInputTokens) summary.cacheW = result.cacheWriteInputTokens
+  if (result?.cacheReadInputTokens) summary.cacheR = result.cacheReadInputTokens
+  summary.cost = result?.cost
+  if (result?.warning) summary.warning = result.warning
+  summary.totalMs = Date.now() - startedAt
+  if (result?.maxInterFrameMs != null) summary.maxInterFrameMs = result.maxInterFrameMs
+  return summary
 }
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mohdel",
-  "version": "0.104.0",
+  "version": "0.104.2",
   "license": "MIT",
   "author": {
     "name": "Christophe Le Bars",
@@ -87,7 +87,7 @@
     "@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
     "@opentelemetry/sdk-node": "^0.217.0",
     "chalk": "^5.4.0",
-    "mohdel-thin-gate-linux-x64-gnu": "0.104.0"
+    "mohdel-thin-gate-linux-x64-gnu": "0.104.2"
   },
   "dependencies": {
     "@anthropic-ai/sdk": "^0.95.1",