mohdel 0.97.1 → 0.98.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,7 +71,7 @@ import { createRealtimeDeltaBuffer } from '../../src/lib/utils.js'
71
71
  * @returns {Promise<any>} AnswerResult (matches the factory's return shape).
72
72
  */
73
73
  export async function runAnswer ({ provider, model, modelKey, configuration, prompt, options = {} }, deps = {}) {
74
- const envelope = toEnvelope({ provider, model, configuration, prompt, options })
74
+ const envelope = toEnvelope({ modelKey, configuration, prompt, options })
75
75
 
76
76
  // If the caller passed a `realtimeHandler`, feed every `delta`
77
77
  // event into a buffer that invokes the handler on batches matching
@@ -150,20 +150,23 @@ export async function runAnswerImage ({ provider, model, configuration, prompt,
150
150
 
151
151
  /**
152
152
  * @param {object} args
153
- * @param {string} args.provider
154
- * @param {string} args.model
153
+ * @param {string} args.modelKey Mohdel catalog key `<provider>/<bare>`. The
154
+ * envelope carries the mohdel key, not the
155
+ * upstream provider model name — adapters
156
+ * look up `spec.model` via `catalogKey()`
157
+ * and use that for the actual API call.
155
158
  * @param {any} args.configuration
156
159
  * @param {string | any[] | {system?: any, messages: any[]}} args.prompt
157
160
  * @param {any} args.options
158
161
  * @returns {import('#core/envelope.js').CallEnvelope}
159
162
  */
160
- function toEnvelope ({ provider, model, configuration, prompt, options }) {
163
+ function toEnvelope ({ modelKey, configuration, prompt, options }) {
161
164
  /** @type {import('#core/envelope.js').CallEnvelope} */
162
165
  const envelope = {
163
166
  callId: options.callId || newCallId(),
164
167
  authId: options.authId || 'local',
165
168
  auth: configToAuth(configuration),
166
- model: /** @type {import('#core/model-id.js').ModelId} */ (`${provider}/${model}`),
169
+ model: /** @type {import('#core/model-id.js').ModelId} */ (modelKey),
167
170
  prompt: toEnvelopePrompt(prompt)
168
171
  }
169
172
 
@@ -107,6 +107,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
107
107
 
108
108
  let content = message.content || ''
109
109
  let toolCalls = message.tool_calls
110
+ const reasoning = message.reasoning_content || null
110
111
 
111
112
  if (config.parseDsml && content && (!toolCalls || !toolCalls.length)) {
112
113
  const dsml = parseDsmlToolCalls(content)
@@ -121,7 +122,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
121
122
  }
122
123
 
123
124
  yield finalize({
124
- envelope, content, toolCalls, usage, finishReason, start, first
125
+ envelope, content, toolCalls, usage, finishReason, start, first, reasoning
125
126
  })
126
127
  }
127
128
 
@@ -140,6 +141,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
140
141
  // F53: accumulate via array + join to avoid per-delta V8 cons-string
141
142
  // churn on long streams.
142
143
  const contentParts = []
144
+ const reasoningParts = []
143
145
  let first = null
144
146
  let finishReason = null
145
147
  let usage = {}
@@ -163,6 +165,15 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
163
165
  try {
164
166
  for await (const chunk of stream) {
165
167
  const choice = chunk.choices?.[0]
168
+ // DeepSeek V4 / deepseek-reasoner / Cerebras reasoning models emit
169
+ // `delta.reasoning_content` chunks before visible content. Capture
170
+ // them so multi-turn callers can roundtrip reasoning back to the
171
+ // API (DeepSeek V4 hard-rejects assistant messages without it).
172
+ // Token count comes from `usage.completion_tokens_details.reasoning_tokens`.
173
+ if (choice?.delta?.reasoning_content) {
174
+ if (first === null) first = String(process.hrtime.bigint())
175
+ reasoningParts.push(choice.delta.reasoning_content)
176
+ }
166
177
  if (choice?.delta?.content) {
167
178
  if (first === null) first = String(process.hrtime.bigint())
168
179
  contentParts.push(choice.delta.content)
@@ -227,7 +238,8 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
227
238
  usage,
228
239
  finishReason,
229
240
  start,
230
- first
241
+ first,
242
+ reasoning: reasoningParts.length ? reasoningParts.join('') : null
231
243
  })
232
244
  }
233
245
 
@@ -239,11 +251,12 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
239
251
  * usage: any,
240
252
  * finishReason: string | null,
241
253
  * start: string,
242
- * first: string | null
254
+ * first: string | null,
255
+ * reasoning?: string | null
243
256
  * }} p
244
257
  * @returns {import('#core/events.js').DoneEvent}
245
258
  */
246
- function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first }) {
259
+ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first, reasoning = null }) {
247
260
  const end = String(process.hrtime.bigint())
248
261
  const inputTokens = usage.prompt_tokens || 0
249
262
  const totalOutputTokens = usage.completion_tokens || 0
@@ -274,6 +287,7 @@ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, f
274
287
  if (toolCalls && toolCalls.length > 0) {
275
288
  done.result.toolCalls = fromCerebrasToolCalls(toolCalls)
276
289
  }
290
+ if (reasoning) done.result.reasoning = reasoning
277
291
  return done
278
292
  }
279
293
 
@@ -349,11 +363,12 @@ function toChatMessages (prompt) {
349
363
  content: flattenText(m.content)
350
364
  }
351
365
  }
366
+ const reasoning = m.role === 'assistant' ? extractReasoning(m.content) : null
352
367
  if (m.role === 'assistant' && m.toolCalls?.length) {
353
368
  // Chat Completions assistant turn: optional `content` + the
354
369
  // `tool_calls` array. `arguments` must be a JSON string on
355
370
  // the wire.
356
- return {
371
+ const msg = {
357
372
  role: 'assistant',
358
373
  content: flattenText(m.content) || '',
359
374
  tool_calls: m.toolCalls.map(tc => ({
@@ -365,8 +380,12 @@ function toChatMessages (prompt) {
365
380
  }
366
381
  }))
367
382
  }
383
+ if (reasoning) msg.reasoning_content = reasoning
384
+ return msg
368
385
  }
369
- return { role: m.role, content: flattenText(m.content) }
386
+ const msg = { role: m.role, content: flattenText(m.content) }
387
+ if (reasoning) msg.reasoning_content = reasoning
388
+ return msg
370
389
  })
371
390
  }
372
391
 
@@ -383,6 +402,13 @@ function flattenText (content) {
383
402
  return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
384
403
  }
385
404
 
405
+ /** @param {string | import('#core/envelope.js').MessagePart[]} content */
406
+ function extractReasoning (content) {
407
+ if (typeof content === 'string' || !Array.isArray(content)) return null
408
+ const parts = content.filter(p => p.type === 'reasoning' && p.text).map(p => p.text)
409
+ return parts.length ? parts.join('\n') : null
410
+ }
411
+
386
412
  /**
387
413
  * @param {any} args
388
414
  * @param {import('#core/envelope.js').MediaRef[]} images
@@ -2,9 +2,9 @@
2
2
  * Fireworks adapter — OpenAI-compatible chat completions with
3
3
  * streaming, over api.fireworks.ai/inference/v1.
4
4
  *
5
- * Fireworks model IDs carry an `accounts/fireworks/models/` prefix;
6
- * envelopes can supply either form. The `mutateArgs` hook normalizes
7
- * `args.model` before the request leaves the adapter.
5
+ * Catalog `spec.model` carries the full upstream id (with the
6
+ * `accounts/fireworks/models/` prefix). The adapter forwards it
7
+ * verbatim no normalization needed.
8
8
  *
9
9
  * Implementation uses the OpenAI SDK with a custom baseURL — the
10
10
  * wire shape is identical and the SDK's streaming iterator matches
@@ -18,7 +18,6 @@ import OpenAI from 'openai'
18
18
  import { runChatCompletions } from './_chat_completions.js'
19
19
 
20
20
  const BASE_URL = 'https://api.fireworks.ai/inference/v1'
21
- const MODEL_PREFIX = 'accounts/fireworks/models/'
22
21
 
23
22
  /**
24
23
  * @param {import('#core/envelope.js').CallEnvelope} envelope
@@ -32,12 +31,7 @@ export async function * fireworks (envelope, deps = {}) {
32
31
  })
33
32
  yield * runChatCompletions(envelope, client, {
34
33
  provider: 'fireworks',
35
- stream: true,
36
- mutateArgs: (env, args) => {
37
- if (!args.model.includes('/')) {
38
- args.model = `${MODEL_PREFIX}${args.model}`
39
- }
40
- }
34
+ stream: true
41
35
  }, {
42
36
  signal: deps.signal,
43
37
  log: deps.log,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.97.1",
3
+ "version": "0.98.1",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -87,7 +87,7 @@
87
87
  "@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
88
88
  "@opentelemetry/sdk-node": "^0.215.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.97.1"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.98.1"
91
91
  },
92
92
  "dependencies": {
93
93
  "@anthropic-ai/sdk": "^0.91.1",
@@ -96,12 +96,13 @@
96
96
  "@opentelemetry/api": "^1.9.1",
97
97
  "env-paths": "^4.0.0",
98
98
  "groq-sdk": "^1.1.2",
99
- "openai": "^6.34.0"
99
+ "openai": "^6.35.0"
100
100
  },
101
101
  "lint-staged": {
102
102
  "*.{js,cjs}": "standard"
103
103
  },
104
104
  "devDependencies": {
105
+ "gpt-tokenizer": "^3.4.0",
105
106
  "lint-staged": "^16.4.0",
106
107
  "release-it": "^20.0.1",
107
108
  "standard": "^17.1.2",
@@ -1,7 +1,7 @@
1
1
  // Fireworks' /inference/v1/models is openai-compatible for listing.
2
- // Upstream IDs carry a `accounts/fireworks/models/` prefix that the
3
- // runtime adapter adds back automatically strip it here so curated
4
- // entries stay short.
2
+ // Upstream IDs carry an `accounts/fireworks/models/` prefix. The mohdel
3
+ // catalog key uses the bare suffix (`fireworks/<bare>`), but `spec.model`
4
+ // preserves the full upstream id so the adapter sends it verbatim.
5
5
  const DEFAULT_BASE_URL = 'https://api.fireworks.ai/inference/v1'
6
6
  const FW_PREFIX = 'accounts/fireworks/models/'
7
7
 
@@ -29,7 +29,7 @@ export default (sdkConfig) => {
29
29
  getModelInfo: async (id) => {
30
30
  const m = (await load()).find(x => shortId(x.id) === id)
31
31
  if (!m) return null
32
- const info = { model: shortId(m.id) }
32
+ const info = { model: m.id }
33
33
  if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
34
34
  if (typeof m.created === 'number') info.created = m.created
35
35
  return info
package/src/lib/index.js CHANGED
@@ -723,4 +723,5 @@ const createModelProxy = (resolvedModelId, modelSpec, handlers, aliasOutputEffor
723
723
 
724
724
  export { silent } from './logger.js'
725
725
  export { loadCuratedCache, getCuratedCacheSnapshot } from './curated-cache.js'
726
+ export { effectiveContextLimit } from './utils.js'
726
727
  export default mohdel
@@ -3,26 +3,34 @@ const providers = {
3
3
  sdk: 'anthropic',
4
4
  apiKeyEnv: 'ANTHROPIC_API_SK',
5
5
  createConfiguration: apiKey => ({ apiKey }),
6
- creators: ['anthropic']
6
+ creators: ['anthropic'],
7
+ contextSemantics: 'shared',
8
+ outputCapStrategy: 'error'
7
9
  },
8
10
  cerebras: {
9
11
  sdk: 'cerebras',
10
12
  apiKeyEnv: 'CEREBRAS_API_SK',
11
13
  createConfiguration: apiKey => ({ apiKey }),
12
- creators: ['openai', 'zai']
14
+ creators: ['openai', 'zai'],
15
+ contextSemantics: 'shared',
16
+ outputCapStrategy: 'accept'
13
17
  },
14
18
  deepseek: {
15
19
  sdk: 'openai',
16
20
  api: 'chatCompletions',
17
21
  apiKeyEnv: 'DEEPSEEK_API_SK',
18
22
  createConfiguration: apiKey => ({ baseURL: 'https://api.deepseek.com', apiKey }),
19
- creators: ['deepseek']
23
+ creators: ['deepseek'],
24
+ contextSemantics: 'shared',
25
+ outputCapStrategy: 'accept'
20
26
  },
21
27
  gemini: {
22
28
  sdk: 'gemini',
23
29
  apiKeyEnv: 'GEMINI_API_SK',
24
30
  createConfiguration: apiKey => ({ apiKey }),
25
- creators: ['google']
31
+ creators: ['google'],
32
+ contextSemantics: 'separate',
33
+ outputCapStrategy: 'accept'
26
34
  },
27
35
  groq: {
28
36
  sdk: 'groq',
@@ -41,7 +49,9 @@ const providers = {
41
49
  sdk: 'fireworks',
42
50
  apiKeyEnv: 'FIREWORKS_API_SK',
43
51
  createConfiguration: apiKey => ({ apiKey, baseURL: 'https://api.fireworks.ai/inference/v1' }),
44
- creators: ['meta', 'alibaba']
52
+ creators: ['meta', 'alibaba'],
53
+ contextSemantics: 'shared',
54
+ outputCapStrategy: 'accept'
45
55
  },
46
56
  novita: {
47
57
  sdk: 'openai',
@@ -49,13 +59,17 @@ const providers = {
49
59
  imageHandler: 'novita',
50
60
  apiKeyEnv: 'NOVITA_API_SK',
51
61
  createConfiguration: apiKey => ({ apiKey, baseURL: 'https://api.novita.ai/openai' }),
52
- creators: ['deepseek', 'openai', 'bfl']
62
+ creators: ['deepseek', 'openai', 'bfl'],
63
+ contextSemantics: 'shared',
64
+ outputCapStrategy: 'error'
53
65
  },
54
66
  openai: {
55
67
  sdk: 'openai',
56
68
  apiKeyEnv: 'OPENAI_API_SK',
57
69
  createConfiguration: apiKey => ({ apiKey }),
58
- creators: ['openai']
70
+ creators: ['openai'],
71
+ contextSemantics: 'shared',
72
+ outputCapStrategy: 'accept'
59
73
  },
60
74
  openrouter: {
61
75
  sdk: 'openrouter',
@@ -78,7 +92,9 @@ const providers = {
78
92
  sdk: 'openai',
79
93
  apiKeyEnv: 'XAI_API_SK',
80
94
  createConfiguration: apiKey => ({ baseURL: 'https://api.x.ai/v1', apiKey }),
81
- creators: ['xai']
95
+ creators: ['xai'],
96
+ contextSemantics: 'shared',
97
+ outputCapStrategy: 'accept'
82
98
  }
83
99
  }
84
100
 
package/src/lib/utils.js CHANGED
@@ -3,6 +3,17 @@ export const sanitizeOutput = str => {
3
3
  return str.replace(/\0/g, '\uFFFD').trim()
4
4
  }
5
5
 
6
+ // Practical input ceiling for a model spec: catalog `contextTokenLimit`
7
+ // minus any empirically-derived `inputCeilingMargin` reserve the API
8
+ // silently steals (reasoning floor, structural overhead, pricing-tier
9
+ // cliffs). Reduces to `contextTokenLimit` when the margin field is
10
+ // unset. Consumers computing safe input budgets should call this in
11
+ // place of `spec.contextTokenLimit`.
12
+ export const effectiveContextLimit = spec => {
13
+ if (!spec || spec.contextTokenLimit == null) return 0
14
+ return Math.max(0, spec.contextTokenLimit - (spec.inputCeilingMargin ?? 0))
15
+ }
16
+
6
17
  export const translateModelInfo = (model, infoTranslate = {}) => {
7
18
  if (!model || typeof model !== 'object') return model
8
19