npm - mohdel - Versions diffs - 0.97.1 → 0.98.1 - Mend

mohdel 0.97.1 → 0.98.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/js/factory/bridge.js +8 -5
package/js/session/adapters/_chat_completions.js +32 -6
package/js/session/adapters/fireworks.js +4 -10
package/package.json +4 -3
package/src/lib/catalog/fireworks.js +4 -4
package/src/lib/index.js +1 -0
package/src/lib/providers.js +24 -8
package/src/lib/utils.js +11 -0

package/js/factory/bridge.js CHANGED Viewed

@@ -71,7 +71,7 @@ import { createRealtimeDeltaBuffer } from '../../src/lib/utils.js'
  * @returns {Promise<any>}            AnswerResult (matches the factory's return shape).
  */
 export async function runAnswer ({ provider, model, modelKey, configuration, prompt, options = {} }, deps = {}) {
-  const envelope = toEnvelope({ provider, model, configuration, prompt, options })
+  const envelope = toEnvelope({ modelKey, configuration, prompt, options })
   // If the caller passed a `realtimeHandler`, feed every `delta`
   // event into a buffer that invokes the handler on batches matching
@@ -150,20 +150,23 @@ export async function runAnswerImage ({ provider, model, configuration, prompt,
 /**
  * @param {object} args
- * @param {string} args.provider
- * @param {string} args.model
+ * @param {string} args.modelKey      Mohdel catalog key `<provider>/<bare>`. The
+ *                                    envelope carries the mohdel key, not the
+ *                                    upstream provider model name — adapters
+ *                                    look up `spec.model` via `catalogKey()`
+ *                                    and use that for the actual API call.
  * @param {any} args.configuration
  * @param {string | any[] | {system?: any, messages: any[]}} args.prompt
  * @param {any} args.options
  * @returns {import('#core/envelope.js').CallEnvelope}
  */
-function toEnvelope ({ provider, model, configuration, prompt, options }) {
+function toEnvelope ({ modelKey, configuration, prompt, options }) {
   /** @type {import('#core/envelope.js').CallEnvelope} */
   const envelope = {
     callId: options.callId || newCallId(),
     authId: options.authId || 'local',
     auth: configToAuth(configuration),
-    model: /** @type {import('#core/model-id.js').ModelId} */ (`${provider}/${model}`),
+    model: /** @type {import('#core/model-id.js').ModelId} */ (modelKey),
     prompt: toEnvelopePrompt(prompt)
   }

package/js/session/adapters/_chat_completions.js CHANGED Viewed

@@ -107,6 +107,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
   let content = message.content || ''
   let toolCalls = message.tool_calls
+  const reasoning = message.reasoning_content || null
   if (config.parseDsml && content && (!toolCalls || !toolCalls.length)) {
     const dsml = parseDsmlToolCalls(content)
@@ -121,7 +122,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
   }
   yield finalize({
-    envelope, content, toolCalls, usage, finishReason, start, first
+    envelope, content, toolCalls, usage, finishReason, start, first, reasoning
   })
 }
@@ -140,6 +141,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
   // F53: accumulate via array + join to avoid per-delta V8 cons-string
   // churn on long streams.
   const contentParts = []
+  const reasoningParts = []
   let first = null
   let finishReason = null
   let usage = {}
@@ -163,6 +165,15 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
   try {
     for await (const chunk of stream) {
       const choice = chunk.choices?.[0]
+      // DeepSeek V4 / deepseek-reasoner / Cerebras reasoning models emit
+      // `delta.reasoning_content` chunks before visible content. Capture
+      // them so multi-turn callers can roundtrip reasoning back to the
+      // API (DeepSeek V4 hard-rejects assistant messages without it).
+      // Token count comes from `usage.completion_tokens_details.reasoning_tokens`.
+      if (choice?.delta?.reasoning_content) {
+        if (first === null) first = String(process.hrtime.bigint())
+        reasoningParts.push(choice.delta.reasoning_content)
+      }
       if (choice?.delta?.content) {
         if (first === null) first = String(process.hrtime.bigint())
         contentParts.push(choice.delta.content)
@@ -227,7 +238,8 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
     usage,
     finishReason,
     start,
-    first
+    first,
+    reasoning: reasoningParts.length ? reasoningParts.join('') : null
   })
 }
@@ -239,11 +251,12 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
  *   usage: any,
  *   finishReason: string | null,
  *   start: string,
- *   first: string | null
+ *   first: string | null,
+ *   reasoning?: string | null
  * }} p
  * @returns {import('#core/events.js').DoneEvent}
  */
-function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first }) {
+function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first, reasoning = null }) {
   const end = String(process.hrtime.bigint())
   const inputTokens = usage.prompt_tokens || 0
   const totalOutputTokens = usage.completion_tokens || 0
@@ -274,6 +287,7 @@ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, f
   if (toolCalls && toolCalls.length > 0) {
     done.result.toolCalls = fromCerebrasToolCalls(toolCalls)
   }
+  if (reasoning) done.result.reasoning = reasoning
   return done
 }
@@ -349,11 +363,12 @@ function toChatMessages (prompt) {
         content: flattenText(m.content)
       }
     }
+    const reasoning = m.role === 'assistant' ? extractReasoning(m.content) : null
     if (m.role === 'assistant' && m.toolCalls?.length) {
       // Chat Completions assistant turn: optional `content` + the
       // `tool_calls` array. `arguments` must be a JSON string on
       // the wire.
-      return {
+      const msg = {
         role: 'assistant',
         content: flattenText(m.content) || '',
         tool_calls: m.toolCalls.map(tc => ({
@@ -365,8 +380,12 @@ function toChatMessages (prompt) {
           }
         }))
       }
+      if (reasoning) msg.reasoning_content = reasoning
+      return msg
     }
-    return { role: m.role, content: flattenText(m.content) }
+    const msg = { role: m.role, content: flattenText(m.content) }
+    if (reasoning) msg.reasoning_content = reasoning
+    return msg
   })
 }
@@ -383,6 +402,13 @@ function flattenText (content) {
   return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
 }
+/** @param {string | import('#core/envelope.js').MessagePart[]} content */
+function extractReasoning (content) {
+  if (typeof content === 'string' || !Array.isArray(content)) return null
+  const parts = content.filter(p => p.type === 'reasoning' && p.text).map(p => p.text)
+  return parts.length ? parts.join('\n') : null
+}
 /**
  * @param {any} args
  * @param {import('#core/envelope.js').MediaRef[]} images

package/js/session/adapters/fireworks.js CHANGED Viewed

@@ -2,9 +2,9 @@
  * Fireworks adapter — OpenAI-compatible chat completions with
  * streaming, over api.fireworks.ai/inference/v1.
  *
- * Fireworks model IDs carry an `accounts/fireworks/models/` prefix;
- * envelopes can supply either form. The `mutateArgs` hook normalizes
- * `args.model` before the request leaves the adapter.
+ * Catalog `spec.model` carries the full upstream id (with the
+ * `accounts/fireworks/models/` prefix). The adapter forwards it
+ * verbatim — no normalization needed.
  *
  * Implementation uses the OpenAI SDK with a custom baseURL — the
  * wire shape is identical and the SDK's streaming iterator matches
@@ -18,7 +18,6 @@ import OpenAI from 'openai'
 import { runChatCompletions } from './_chat_completions.js'
 const BASE_URL = 'https://api.fireworks.ai/inference/v1'
-const MODEL_PREFIX = 'accounts/fireworks/models/'
 /**
  * @param {import('#core/envelope.js').CallEnvelope} envelope
@@ -32,12 +31,7 @@ export async function * fireworks (envelope, deps = {}) {
   })
   yield * runChatCompletions(envelope, client, {
     provider: 'fireworks',
-    stream: true,
-    mutateArgs: (env, args) => {
-      if (!args.model.includes('/')) {
-        args.model = `${MODEL_PREFIX}${args.model}`
-      }
-    }
+    stream: true
   }, {
     signal: deps.signal,
     log: deps.log,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mohdel",
-  "version": "0.97.1",
+  "version": "0.98.1",
   "license": "MIT",
   "author": {
     "name": "Christophe Le Bars",
@@ -87,7 +87,7 @@
     "@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
     "@opentelemetry/sdk-node": "^0.215.0",
     "chalk": "^5.4.0",
-    "mohdel-thin-gate-linux-x64-gnu": "0.97.1"
+    "mohdel-thin-gate-linux-x64-gnu": "0.98.1"
   },
   "dependencies": {
     "@anthropic-ai/sdk": "^0.91.1",
@@ -96,12 +96,13 @@
     "@opentelemetry/api": "^1.9.1",
     "env-paths": "^4.0.0",
     "groq-sdk": "^1.1.2",
-    "openai": "^6.34.0"
+    "openai": "^6.35.0"
   },
   "lint-staged": {
     "*.{js,cjs}": "standard"
   },
   "devDependencies": {
+    "gpt-tokenizer": "^3.4.0",
     "lint-staged": "^16.4.0",
     "release-it": "^20.0.1",
     "standard": "^17.1.2",

package/src/lib/catalog/fireworks.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // Fireworks' /inference/v1/models is openai-compatible for listing.
-// Upstream IDs carry a `accounts/fireworks/models/` prefix that the
-// runtime adapter adds back automatically — strip it here so curated
-// entries stay short.
+// Upstream IDs carry an `accounts/fireworks/models/` prefix. The mohdel
+// catalog key uses the bare suffix (`fireworks/<bare>`), but `spec.model`
+// preserves the full upstream id so the adapter sends it verbatim.
 const DEFAULT_BASE_URL = 'https://api.fireworks.ai/inference/v1'
 const FW_PREFIX = 'accounts/fireworks/models/'
@@ -29,7 +29,7 @@ export default (sdkConfig) => {
     getModelInfo: async (id) => {
       const m = (await load()).find(x => shortId(x.id) === id)
       if (!m) return null
-      const info = { model: shortId(m.id) }
+      const info = { model: m.id }
       if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
       if (typeof m.created === 'number') info.created = m.created
       return info

package/src/lib/index.js CHANGED Viewed

@@ -723,4 +723,5 @@ const createModelProxy = (resolvedModelId, modelSpec, handlers, aliasOutputEffor
 export { silent } from './logger.js'
 export { loadCuratedCache, getCuratedCacheSnapshot } from './curated-cache.js'
+export { effectiveContextLimit } from './utils.js'
 export default mohdel

package/src/lib/providers.js CHANGED Viewed

@@ -3,26 +3,34 @@ const providers = {
     sdk: 'anthropic',
     apiKeyEnv: 'ANTHROPIC_API_SK',
     createConfiguration: apiKey => ({ apiKey }),
-    creators: ['anthropic']
+    creators: ['anthropic'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'error'
   },
   cerebras: {
     sdk: 'cerebras',
     apiKeyEnv: 'CEREBRAS_API_SK',
     createConfiguration: apiKey => ({ apiKey }),
-    creators: ['openai', 'zai']
+    creators: ['openai', 'zai'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'accept'
   },
   deepseek: {
     sdk: 'openai',
     api: 'chatCompletions',
     apiKeyEnv: 'DEEPSEEK_API_SK',
     createConfiguration: apiKey => ({ baseURL: 'https://api.deepseek.com', apiKey }),
-    creators: ['deepseek']
+    creators: ['deepseek'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'accept'
   },
   gemini: {
     sdk: 'gemini',
     apiKeyEnv: 'GEMINI_API_SK',
     createConfiguration: apiKey => ({ apiKey }),
-    creators: ['google']
+    creators: ['google'],
+    contextSemantics: 'separate',
+    outputCapStrategy: 'accept'
   },
   groq: {
     sdk: 'groq',
@@ -41,7 +49,9 @@ const providers = {
     sdk: 'fireworks',
     apiKeyEnv: 'FIREWORKS_API_SK',
     createConfiguration: apiKey => ({ apiKey, baseURL: 'https://api.fireworks.ai/inference/v1' }),
-    creators: ['meta', 'alibaba']
+    creators: ['meta', 'alibaba'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'accept'
   },
   novita: {
     sdk: 'openai',
@@ -49,13 +59,17 @@ const providers = {
     imageHandler: 'novita',
     apiKeyEnv: 'NOVITA_API_SK',
     createConfiguration: apiKey => ({ apiKey, baseURL: 'https://api.novita.ai/openai' }),
-    creators: ['deepseek', 'openai', 'bfl']
+    creators: ['deepseek', 'openai', 'bfl'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'error'
   },
   openai: {
     sdk: 'openai',
     apiKeyEnv: 'OPENAI_API_SK',
     createConfiguration: apiKey => ({ apiKey }),
-    creators: ['openai']
+    creators: ['openai'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'accept'
   },
   openrouter: {
     sdk: 'openrouter',
@@ -78,7 +92,9 @@ const providers = {
     sdk: 'openai',
     apiKeyEnv: 'XAI_API_SK',
     createConfiguration: apiKey => ({ baseURL: 'https://api.x.ai/v1', apiKey }),
-    creators: ['xai']
+    creators: ['xai'],
+    contextSemantics: 'shared',
+    outputCapStrategy: 'accept'
   }
 }

package/src/lib/utils.js CHANGED Viewed

@@ -3,6 +3,17 @@ export const sanitizeOutput = str => {
   return str.replace(/\0/g, '\uFFFD').trim()
 }
+// Practical input ceiling for a model spec: catalog `contextTokenLimit`
+// minus any empirically-derived `inputCeilingMargin` reserve the API
+// silently steals (reasoning floor, structural overhead, pricing-tier
+// cliffs). Reduces to `contextTokenLimit` when the margin field is
+// unset. Consumers computing safe input budgets should call this in
+// place of `spec.contextTokenLimit`.
+export const effectiveContextLimit = spec => {
+  if (!spec || spec.contextTokenLimit == null) return 0
+  return Math.max(0, spec.contextTokenLimit - (spec.inputCeilingMargin ?? 0))
+}
 export const translateModelInfo = (model, infoTranslate = {}) => {
   if (!model || typeof model !== 'object') return model