mohdel 0.97.1 → 0.98.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/factory/bridge.js +8 -5
- package/js/session/adapters/_chat_completions.js +32 -6
- package/js/session/adapters/fireworks.js +4 -10
- package/package.json +4 -3
- package/src/lib/catalog/fireworks.js +4 -4
- package/src/lib/index.js +1 -0
- package/src/lib/providers.js +24 -8
- package/src/lib/utils.js +11 -0
package/js/factory/bridge.js
CHANGED
|
@@ -71,7 +71,7 @@ import { createRealtimeDeltaBuffer } from '../../src/lib/utils.js'
|
|
|
71
71
|
* @returns {Promise<any>} AnswerResult (matches the factory's return shape).
|
|
72
72
|
*/
|
|
73
73
|
export async function runAnswer ({ provider, model, modelKey, configuration, prompt, options = {} }, deps = {}) {
|
|
74
|
-
const envelope = toEnvelope({
|
|
74
|
+
const envelope = toEnvelope({ modelKey, configuration, prompt, options })
|
|
75
75
|
|
|
76
76
|
// If the caller passed a `realtimeHandler`, feed every `delta`
|
|
77
77
|
// event into a buffer that invokes the handler on batches matching
|
|
@@ -150,20 +150,23 @@ export async function runAnswerImage ({ provider, model, configuration, prompt,
|
|
|
150
150
|
|
|
151
151
|
/**
|
|
152
152
|
* @param {object} args
|
|
153
|
-
* @param {string} args.provider
|
|
154
|
-
*
|
|
153
|
+
* @param {string} args.modelKey Mohdel catalog key `<provider>/<bare>`. The
|
|
154
|
+
* envelope carries the mohdel key, not the
|
|
155
|
+
* upstream provider model name — adapters
|
|
156
|
+
* look up `spec.model` via `catalogKey()`
|
|
157
|
+
* and use that for the actual API call.
|
|
155
158
|
* @param {any} args.configuration
|
|
156
159
|
* @param {string | any[] | {system?: any, messages: any[]}} args.prompt
|
|
157
160
|
* @param {any} args.options
|
|
158
161
|
* @returns {import('#core/envelope.js').CallEnvelope}
|
|
159
162
|
*/
|
|
160
|
-
function toEnvelope ({
|
|
163
|
+
function toEnvelope ({ modelKey, configuration, prompt, options }) {
|
|
161
164
|
/** @type {import('#core/envelope.js').CallEnvelope} */
|
|
162
165
|
const envelope = {
|
|
163
166
|
callId: options.callId || newCallId(),
|
|
164
167
|
authId: options.authId || 'local',
|
|
165
168
|
auth: configToAuth(configuration),
|
|
166
|
-
model: /** @type {import('#core/model-id.js').ModelId} */ (
|
|
169
|
+
model: /** @type {import('#core/model-id.js').ModelId} */ (modelKey),
|
|
167
170
|
prompt: toEnvelopePrompt(prompt)
|
|
168
171
|
}
|
|
169
172
|
|
|
@@ -107,6 +107,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
|
|
|
107
107
|
|
|
108
108
|
let content = message.content || ''
|
|
109
109
|
let toolCalls = message.tool_calls
|
|
110
|
+
const reasoning = message.reasoning_content || null
|
|
110
111
|
|
|
111
112
|
if (config.parseDsml && content && (!toolCalls || !toolCalls.length)) {
|
|
112
113
|
const dsml = parseDsmlToolCalls(content)
|
|
@@ -121,7 +122,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
|
|
|
121
122
|
}
|
|
122
123
|
|
|
123
124
|
yield finalize({
|
|
124
|
-
envelope, content, toolCalls, usage, finishReason, start, first
|
|
125
|
+
envelope, content, toolCalls, usage, finishReason, start, first, reasoning
|
|
125
126
|
})
|
|
126
127
|
}
|
|
127
128
|
|
|
@@ -140,6 +141,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
140
141
|
// F53: accumulate via array + join to avoid per-delta V8 cons-string
|
|
141
142
|
// churn on long streams.
|
|
142
143
|
const contentParts = []
|
|
144
|
+
const reasoningParts = []
|
|
143
145
|
let first = null
|
|
144
146
|
let finishReason = null
|
|
145
147
|
let usage = {}
|
|
@@ -163,6 +165,15 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
163
165
|
try {
|
|
164
166
|
for await (const chunk of stream) {
|
|
165
167
|
const choice = chunk.choices?.[0]
|
|
168
|
+
// DeepSeek V4 / deepseek-reasoner / Cerebras reasoning models emit
|
|
169
|
+
// `delta.reasoning_content` chunks before visible content. Capture
|
|
170
|
+
// them so multi-turn callers can roundtrip reasoning back to the
|
|
171
|
+
// API (DeepSeek V4 hard-rejects assistant messages without it).
|
|
172
|
+
// Token count comes from `usage.completion_tokens_details.reasoning_tokens`.
|
|
173
|
+
if (choice?.delta?.reasoning_content) {
|
|
174
|
+
if (first === null) first = String(process.hrtime.bigint())
|
|
175
|
+
reasoningParts.push(choice.delta.reasoning_content)
|
|
176
|
+
}
|
|
166
177
|
if (choice?.delta?.content) {
|
|
167
178
|
if (first === null) first = String(process.hrtime.bigint())
|
|
168
179
|
contentParts.push(choice.delta.content)
|
|
@@ -227,7 +238,8 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
227
238
|
usage,
|
|
228
239
|
finishReason,
|
|
229
240
|
start,
|
|
230
|
-
first
|
|
241
|
+
first,
|
|
242
|
+
reasoning: reasoningParts.length ? reasoningParts.join('') : null
|
|
231
243
|
})
|
|
232
244
|
}
|
|
233
245
|
|
|
@@ -239,11 +251,12 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
239
251
|
* usage: any,
|
|
240
252
|
* finishReason: string | null,
|
|
241
253
|
* start: string,
|
|
242
|
-
* first: string | null
|
|
254
|
+
* first: string | null,
|
|
255
|
+
* reasoning?: string | null
|
|
243
256
|
* }} p
|
|
244
257
|
* @returns {import('#core/events.js').DoneEvent}
|
|
245
258
|
*/
|
|
246
|
-
function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first }) {
|
|
259
|
+
function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first, reasoning = null }) {
|
|
247
260
|
const end = String(process.hrtime.bigint())
|
|
248
261
|
const inputTokens = usage.prompt_tokens || 0
|
|
249
262
|
const totalOutputTokens = usage.completion_tokens || 0
|
|
@@ -274,6 +287,7 @@ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, f
|
|
|
274
287
|
if (toolCalls && toolCalls.length > 0) {
|
|
275
288
|
done.result.toolCalls = fromCerebrasToolCalls(toolCalls)
|
|
276
289
|
}
|
|
290
|
+
if (reasoning) done.result.reasoning = reasoning
|
|
277
291
|
return done
|
|
278
292
|
}
|
|
279
293
|
|
|
@@ -349,11 +363,12 @@ function toChatMessages (prompt) {
|
|
|
349
363
|
content: flattenText(m.content)
|
|
350
364
|
}
|
|
351
365
|
}
|
|
366
|
+
const reasoning = m.role === 'assistant' ? extractReasoning(m.content) : null
|
|
352
367
|
if (m.role === 'assistant' && m.toolCalls?.length) {
|
|
353
368
|
// Chat Completions assistant turn: optional `content` + the
|
|
354
369
|
// `tool_calls` array. `arguments` must be a JSON string on
|
|
355
370
|
// the wire.
|
|
356
|
-
|
|
371
|
+
const msg = {
|
|
357
372
|
role: 'assistant',
|
|
358
373
|
content: flattenText(m.content) || '',
|
|
359
374
|
tool_calls: m.toolCalls.map(tc => ({
|
|
@@ -365,8 +380,12 @@ function toChatMessages (prompt) {
|
|
|
365
380
|
}
|
|
366
381
|
}))
|
|
367
382
|
}
|
|
383
|
+
if (reasoning) msg.reasoning_content = reasoning
|
|
384
|
+
return msg
|
|
368
385
|
}
|
|
369
|
-
|
|
386
|
+
const msg = { role: m.role, content: flattenText(m.content) }
|
|
387
|
+
if (reasoning) msg.reasoning_content = reasoning
|
|
388
|
+
return msg
|
|
370
389
|
})
|
|
371
390
|
}
|
|
372
391
|
|
|
@@ -383,6 +402,13 @@ function flattenText (content) {
|
|
|
383
402
|
return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
|
|
384
403
|
}
|
|
385
404
|
|
|
405
|
+
/** @param {string | import('#core/envelope.js').MessagePart[]} content */
|
|
406
|
+
function extractReasoning (content) {
|
|
407
|
+
if (typeof content === 'string' || !Array.isArray(content)) return null
|
|
408
|
+
const parts = content.filter(p => p.type === 'reasoning' && p.text).map(p => p.text)
|
|
409
|
+
return parts.length ? parts.join('\n') : null
|
|
410
|
+
}
|
|
411
|
+
|
|
386
412
|
/**
|
|
387
413
|
* @param {any} args
|
|
388
414
|
* @param {import('#core/envelope.js').MediaRef[]} images
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* Fireworks adapter — OpenAI-compatible chat completions with
|
|
3
3
|
* streaming, over api.fireworks.ai/inference/v1.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
5
|
+
* Catalog `spec.model` carries the full upstream id (with the
|
|
6
|
+
* `accounts/fireworks/models/` prefix). The adapter forwards it
|
|
7
|
+
* verbatim — no normalization needed.
|
|
8
8
|
*
|
|
9
9
|
* Implementation uses the OpenAI SDK with a custom baseURL — the
|
|
10
10
|
* wire shape is identical and the SDK's streaming iterator matches
|
|
@@ -18,7 +18,6 @@ import OpenAI from 'openai'
|
|
|
18
18
|
import { runChatCompletions } from './_chat_completions.js'
|
|
19
19
|
|
|
20
20
|
const BASE_URL = 'https://api.fireworks.ai/inference/v1'
|
|
21
|
-
const MODEL_PREFIX = 'accounts/fireworks/models/'
|
|
22
21
|
|
|
23
22
|
/**
|
|
24
23
|
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
@@ -32,12 +31,7 @@ export async function * fireworks (envelope, deps = {}) {
|
|
|
32
31
|
})
|
|
33
32
|
yield * runChatCompletions(envelope, client, {
|
|
34
33
|
provider: 'fireworks',
|
|
35
|
-
stream: true
|
|
36
|
-
mutateArgs: (env, args) => {
|
|
37
|
-
if (!args.model.includes('/')) {
|
|
38
|
-
args.model = `${MODEL_PREFIX}${args.model}`
|
|
39
|
-
}
|
|
40
|
-
}
|
|
34
|
+
stream: true
|
|
41
35
|
}, {
|
|
42
36
|
signal: deps.signal,
|
|
43
37
|
log: deps.log,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.98.1",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
|
|
88
88
|
"@opentelemetry/sdk-node": "^0.215.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.98.1"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
93
|
"@anthropic-ai/sdk": "^0.91.1",
|
|
@@ -96,12 +96,13 @@
|
|
|
96
96
|
"@opentelemetry/api": "^1.9.1",
|
|
97
97
|
"env-paths": "^4.0.0",
|
|
98
98
|
"groq-sdk": "^1.1.2",
|
|
99
|
-
"openai": "^6.
|
|
99
|
+
"openai": "^6.35.0"
|
|
100
100
|
},
|
|
101
101
|
"lint-staged": {
|
|
102
102
|
"*.{js,cjs}": "standard"
|
|
103
103
|
},
|
|
104
104
|
"devDependencies": {
|
|
105
|
+
"gpt-tokenizer": "^3.4.0",
|
|
105
106
|
"lint-staged": "^16.4.0",
|
|
106
107
|
"release-it": "^20.0.1",
|
|
107
108
|
"standard": "^17.1.2",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Fireworks' /inference/v1/models is openai-compatible for listing.
|
|
2
|
-
// Upstream IDs carry
|
|
3
|
-
//
|
|
4
|
-
//
|
|
2
|
+
// Upstream IDs carry an `accounts/fireworks/models/` prefix. The mohdel
|
|
3
|
+
// catalog key uses the bare suffix (`fireworks/<bare>`), but `spec.model`
|
|
4
|
+
// preserves the full upstream id so the adapter sends it verbatim.
|
|
5
5
|
const DEFAULT_BASE_URL = 'https://api.fireworks.ai/inference/v1'
|
|
6
6
|
const FW_PREFIX = 'accounts/fireworks/models/'
|
|
7
7
|
|
|
@@ -29,7 +29,7 @@ export default (sdkConfig) => {
|
|
|
29
29
|
getModelInfo: async (id) => {
|
|
30
30
|
const m = (await load()).find(x => shortId(x.id) === id)
|
|
31
31
|
if (!m) return null
|
|
32
|
-
const info = { model:
|
|
32
|
+
const info = { model: m.id }
|
|
33
33
|
if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
|
|
34
34
|
if (typeof m.created === 'number') info.created = m.created
|
|
35
35
|
return info
|
package/src/lib/index.js
CHANGED
|
@@ -723,4 +723,5 @@ const createModelProxy = (resolvedModelId, modelSpec, handlers, aliasOutputEffor
|
|
|
723
723
|
|
|
724
724
|
export { silent } from './logger.js'
|
|
725
725
|
export { loadCuratedCache, getCuratedCacheSnapshot } from './curated-cache.js'
|
|
726
|
+
export { effectiveContextLimit } from './utils.js'
|
|
726
727
|
export default mohdel
|
package/src/lib/providers.js
CHANGED
|
@@ -3,26 +3,34 @@ const providers = {
|
|
|
3
3
|
sdk: 'anthropic',
|
|
4
4
|
apiKeyEnv: 'ANTHROPIC_API_SK',
|
|
5
5
|
createConfiguration: apiKey => ({ apiKey }),
|
|
6
|
-
creators: ['anthropic']
|
|
6
|
+
creators: ['anthropic'],
|
|
7
|
+
contextSemantics: 'shared',
|
|
8
|
+
outputCapStrategy: 'error'
|
|
7
9
|
},
|
|
8
10
|
cerebras: {
|
|
9
11
|
sdk: 'cerebras',
|
|
10
12
|
apiKeyEnv: 'CEREBRAS_API_SK',
|
|
11
13
|
createConfiguration: apiKey => ({ apiKey }),
|
|
12
|
-
creators: ['openai', 'zai']
|
|
14
|
+
creators: ['openai', 'zai'],
|
|
15
|
+
contextSemantics: 'shared',
|
|
16
|
+
outputCapStrategy: 'accept'
|
|
13
17
|
},
|
|
14
18
|
deepseek: {
|
|
15
19
|
sdk: 'openai',
|
|
16
20
|
api: 'chatCompletions',
|
|
17
21
|
apiKeyEnv: 'DEEPSEEK_API_SK',
|
|
18
22
|
createConfiguration: apiKey => ({ baseURL: 'https://api.deepseek.com', apiKey }),
|
|
19
|
-
creators: ['deepseek']
|
|
23
|
+
creators: ['deepseek'],
|
|
24
|
+
contextSemantics: 'shared',
|
|
25
|
+
outputCapStrategy: 'accept'
|
|
20
26
|
},
|
|
21
27
|
gemini: {
|
|
22
28
|
sdk: 'gemini',
|
|
23
29
|
apiKeyEnv: 'GEMINI_API_SK',
|
|
24
30
|
createConfiguration: apiKey => ({ apiKey }),
|
|
25
|
-
creators: ['google']
|
|
31
|
+
creators: ['google'],
|
|
32
|
+
contextSemantics: 'separate',
|
|
33
|
+
outputCapStrategy: 'accept'
|
|
26
34
|
},
|
|
27
35
|
groq: {
|
|
28
36
|
sdk: 'groq',
|
|
@@ -41,7 +49,9 @@ const providers = {
|
|
|
41
49
|
sdk: 'fireworks',
|
|
42
50
|
apiKeyEnv: 'FIREWORKS_API_SK',
|
|
43
51
|
createConfiguration: apiKey => ({ apiKey, baseURL: 'https://api.fireworks.ai/inference/v1' }),
|
|
44
|
-
creators: ['meta', 'alibaba']
|
|
52
|
+
creators: ['meta', 'alibaba'],
|
|
53
|
+
contextSemantics: 'shared',
|
|
54
|
+
outputCapStrategy: 'accept'
|
|
45
55
|
},
|
|
46
56
|
novita: {
|
|
47
57
|
sdk: 'openai',
|
|
@@ -49,13 +59,17 @@ const providers = {
|
|
|
49
59
|
imageHandler: 'novita',
|
|
50
60
|
apiKeyEnv: 'NOVITA_API_SK',
|
|
51
61
|
createConfiguration: apiKey => ({ apiKey, baseURL: 'https://api.novita.ai/openai' }),
|
|
52
|
-
creators: ['deepseek', 'openai', 'bfl']
|
|
62
|
+
creators: ['deepseek', 'openai', 'bfl'],
|
|
63
|
+
contextSemantics: 'shared',
|
|
64
|
+
outputCapStrategy: 'error'
|
|
53
65
|
},
|
|
54
66
|
openai: {
|
|
55
67
|
sdk: 'openai',
|
|
56
68
|
apiKeyEnv: 'OPENAI_API_SK',
|
|
57
69
|
createConfiguration: apiKey => ({ apiKey }),
|
|
58
|
-
creators: ['openai']
|
|
70
|
+
creators: ['openai'],
|
|
71
|
+
contextSemantics: 'shared',
|
|
72
|
+
outputCapStrategy: 'accept'
|
|
59
73
|
},
|
|
60
74
|
openrouter: {
|
|
61
75
|
sdk: 'openrouter',
|
|
@@ -78,7 +92,9 @@ const providers = {
|
|
|
78
92
|
sdk: 'openai',
|
|
79
93
|
apiKeyEnv: 'XAI_API_SK',
|
|
80
94
|
createConfiguration: apiKey => ({ baseURL: 'https://api.x.ai/v1', apiKey }),
|
|
81
|
-
creators: ['xai']
|
|
95
|
+
creators: ['xai'],
|
|
96
|
+
contextSemantics: 'shared',
|
|
97
|
+
outputCapStrategy: 'accept'
|
|
82
98
|
}
|
|
83
99
|
}
|
|
84
100
|
|
package/src/lib/utils.js
CHANGED
|
@@ -3,6 +3,17 @@ export const sanitizeOutput = str => {
|
|
|
3
3
|
return str.replace(/\0/g, '\uFFFD').trim()
|
|
4
4
|
}
|
|
5
5
|
|
|
6
|
+
// Practical input ceiling for a model spec: catalog `contextTokenLimit`
|
|
7
|
+
// minus any empirically-derived `inputCeilingMargin` reserve the API
|
|
8
|
+
// silently steals (reasoning floor, structural overhead, pricing-tier
|
|
9
|
+
// cliffs). Reduces to `contextTokenLimit` when the margin field is
|
|
10
|
+
// unset. Consumers computing safe input budgets should call this in
|
|
11
|
+
// place of `spec.contextTokenLimit`.
|
|
12
|
+
export const effectiveContextLimit = spec => {
|
|
13
|
+
if (!spec || spec.contextTokenLimit == null) return 0
|
|
14
|
+
return Math.max(0, spec.contextTokenLimit - (spec.inputCeilingMargin ?? 0))
|
|
15
|
+
}
|
|
16
|
+
|
|
6
17
|
export const translateModelInfo = (model, infoTranslate = {}) => {
|
|
7
18
|
if (!model || typeof model !== 'object') return model
|
|
8
19
|
|