mohdel 0.99.0 → 0.101.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/session/adapters/_chat_completions.js +18 -4
- package/js/session/adapters/_dispatcher.js +29 -0
- package/js/session/adapters/anthropic.js +5 -1
- package/js/session/adapters/deepseek.js +6 -1
- package/js/session/adapters/fireworks.js +3 -1
- package/js/session/adapters/groq.js +5 -1
- package/js/session/adapters/mistral.js +6 -1
- package/js/session/adapters/openai.js +3 -1
- package/js/session/adapters/openrouter.js +3 -1
- package/js/session/adapters/xai.js +3 -1
- package/package.json +4 -3
|
@@ -301,7 +301,9 @@ function buildRequest (envelope, spec, config) {
|
|
|
301
301
|
const args = {
|
|
302
302
|
model: spec?.model ?? bareOf(envelope.model),
|
|
303
303
|
temperature: 0,
|
|
304
|
-
messages: toChatMessages(envelope.prompt
|
|
304
|
+
messages: toChatMessages(envelope.prompt, {
|
|
305
|
+
reasoningPad: typeof spec?.reasoningContentPlaceholder === 'string' ? spec.reasoningContentPlaceholder : null
|
|
306
|
+
})
|
|
305
307
|
}
|
|
306
308
|
|
|
307
309
|
if (envelope.outputBudget !== undefined) {
|
|
@@ -351,9 +353,20 @@ function buildRequest (envelope, spec, config) {
|
|
|
351
353
|
|
|
352
354
|
/**
|
|
353
355
|
* @param {string | import('#core/envelope.js').Message[]} prompt
|
|
356
|
+
* @param {{ reasoningPad?: string | null }} [opts]
|
|
357
|
+
* `reasoningPad`: when a string (including `''`), assistant messages without
|
|
358
|
+
* extractable reasoning get `reasoning_content: <pad>` so providers that
|
|
359
|
+
* require the field on every assistant turn (e.g. deepseek-v4-pro) accept
|
|
360
|
+
* the resumed transcript. `null` / unset disables padding (default — most
|
|
361
|
+
* chat-completions providers don't require it).
|
|
362
|
+
*
|
|
363
|
+
* Scope: this is a chat-completions wire-format quirk. Gemini's
|
|
364
|
+
* `thoughtSignature` and Anthropic's `thinking` blocks live in their
|
|
365
|
+
* own adapters and have their own roundtrip rules.
|
|
354
366
|
* @returns {Array<any>}
|
|
355
367
|
*/
|
|
356
|
-
function toChatMessages (prompt) {
|
|
368
|
+
function toChatMessages (prompt, opts = {}) {
|
|
369
|
+
const pad = typeof opts.reasoningPad === 'string' ? opts.reasoningPad : null
|
|
357
370
|
if (typeof prompt === 'string') return [{ role: 'user', content: prompt }]
|
|
358
371
|
return prompt.map(m => {
|
|
359
372
|
if (m.role === 'tool') {
|
|
@@ -364,6 +377,7 @@ function toChatMessages (prompt) {
|
|
|
364
377
|
}
|
|
365
378
|
}
|
|
366
379
|
const reasoning = m.role === 'assistant' ? extractReasoning(m.content) : null
|
|
380
|
+
const reasoningField = reasoning ?? (pad !== null && m.role === 'assistant' ? pad : null)
|
|
367
381
|
if (m.role === 'assistant' && m.toolCalls?.length) {
|
|
368
382
|
// Chat Completions assistant turn: optional `content` + the
|
|
369
383
|
// `tool_calls` array. `arguments` must be a JSON string on
|
|
@@ -380,11 +394,11 @@ function toChatMessages (prompt) {
|
|
|
380
394
|
}
|
|
381
395
|
}))
|
|
382
396
|
}
|
|
383
|
-
if (
|
|
397
|
+
if (reasoningField !== null) msg.reasoning_content = reasoningField
|
|
384
398
|
return msg
|
|
385
399
|
}
|
|
386
400
|
const msg = { role: m.role, content: flattenText(m.content) }
|
|
387
|
-
if (
|
|
401
|
+
if (reasoningField !== null) msg.reasoning_content = reasoningField
|
|
388
402
|
return msg
|
|
389
403
|
})
|
|
390
404
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared undici dispatcher for streaming-LLM adapters.
|
|
3
|
+
*
|
|
4
|
+
* Default `globalThis.fetch` (undici on Node 18+) closes a stream when
|
|
5
|
+
* no body chunk has arrived for `bodyTimeout` ms — 300 000 ms (5 min)
|
|
6
|
+
* by default. Reasoning models stream zero bytes during their thinking
|
|
7
|
+
* phase, so any non-trivial task on a thinking-capable provider can
|
|
8
|
+
* blow that limit and surface as a `NET_ERROR / "terminated"` mid-run.
|
|
9
|
+
*
|
|
10
|
+
* We disable the inter-chunk idle timeout. Cancellation comes from
|
|
11
|
+
* three layers above us:
|
|
12
|
+
* 1. caller's `AbortSignal` (per-run timeout, user cancel)
|
|
13
|
+
* 2. SDK request-level timeout (OpenAI/Anthropic/Groq default 600 s)
|
|
14
|
+
* 3. provider-side stream limits
|
|
15
|
+
*
|
|
16
|
+
* Headers timeout stays bounded — connect + first response must be
|
|
17
|
+
* fast even when the body afterwards may be slow.
|
|
18
|
+
*
|
|
19
|
+
* Singleton: undici Agents own a connection pool, so we keep one per
|
|
20
|
+
* process and let it manage per-origin keep-alive across all adapters.
|
|
21
|
+
*/
|
|
22
|
+
import { Agent } from 'undici'
|
|
23
|
+
|
|
24
|
+
let _agent = null
|
|
25
|
+
|
|
26
|
+
export function streamingDispatcher () {
|
|
27
|
+
if (!_agent) _agent = new Agent({ bodyTimeout: 0, headersTimeout: 60_000 })
|
|
28
|
+
return _agent
|
|
29
|
+
}
|
|
@@ -34,6 +34,7 @@ import {
|
|
|
34
34
|
fromAnthropicToolCalls,
|
|
35
35
|
toToolChoice
|
|
36
36
|
} from './_tools.js'
|
|
37
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
37
38
|
|
|
38
39
|
/**
|
|
39
40
|
* Approximate chars-per-token used to estimate Anthropic thinking
|
|
@@ -80,7 +81,10 @@ const ANTHROPIC_DEFAULT_MAX_TOKENS = 4096
|
|
|
80
81
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
81
82
|
*/
|
|
82
83
|
export async function * anthropic (envelope, deps = {}) {
|
|
83
|
-
const client = deps.client ?? new Anthropic({
|
|
84
|
+
const client = deps.client ?? new Anthropic({
|
|
85
|
+
apiKey: envelope.auth.key,
|
|
86
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
87
|
+
})
|
|
84
88
|
const signal = deps.signal
|
|
85
89
|
const log = deps.log
|
|
86
90
|
const start = String(process.hrtime.bigint())
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import OpenAI from 'openai'
|
|
12
12
|
|
|
13
13
|
import { runChatCompletions } from './_chat_completions.js'
|
|
14
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
14
15
|
|
|
15
16
|
const BASE_URL = 'https://api.deepseek.com'
|
|
16
17
|
|
|
@@ -20,7 +21,11 @@ const BASE_URL = 'https://api.deepseek.com'
|
|
|
20
21
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
21
22
|
*/
|
|
22
23
|
export async function * deepseek (envelope, deps = {}) {
|
|
23
|
-
const client = deps.client ?? new OpenAI({
|
|
24
|
+
const client = deps.client ?? new OpenAI({
|
|
25
|
+
apiKey: envelope.auth.key,
|
|
26
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
27
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
28
|
+
})
|
|
24
29
|
yield * runChatCompletions(envelope, client, {
|
|
25
30
|
provider: 'deepseek',
|
|
26
31
|
parseDsml: true
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import OpenAI from 'openai'
|
|
17
17
|
|
|
18
18
|
import { runChatCompletions } from './_chat_completions.js'
|
|
19
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
19
20
|
|
|
20
21
|
const BASE_URL = 'https://api.fireworks.ai/inference/v1'
|
|
21
22
|
|
|
@@ -27,7 +28,8 @@ const BASE_URL = 'https://api.fireworks.ai/inference/v1'
|
|
|
27
28
|
export async function * fireworks (envelope, deps = {}) {
|
|
28
29
|
const client = deps.client ?? new OpenAI({
|
|
29
30
|
apiKey: envelope.auth.key,
|
|
30
|
-
baseURL: envelope.auth.baseURL || BASE_URL
|
|
31
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
32
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
31
33
|
})
|
|
32
34
|
yield * runChatCompletions(envelope, client, {
|
|
33
35
|
provider: 'fireworks',
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import Groq from 'groq-sdk'
|
|
8
8
|
|
|
9
9
|
import { runChatCompletions } from './_chat_completions.js'
|
|
10
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
10
11
|
|
|
11
12
|
/**
|
|
12
13
|
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
@@ -14,7 +15,10 @@ import { runChatCompletions } from './_chat_completions.js'
|
|
|
14
15
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
15
16
|
*/
|
|
16
17
|
export async function * groq (envelope, deps = {}) {
|
|
17
|
-
const client = deps.client ?? new Groq({
|
|
18
|
+
const client = deps.client ?? new Groq({
|
|
19
|
+
apiKey: envelope.auth.key,
|
|
20
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
21
|
+
})
|
|
18
22
|
yield * runChatCompletions(envelope, client, { provider: 'groq' }, {
|
|
19
23
|
signal: deps.signal,
|
|
20
24
|
log: deps.log,
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import OpenAI from 'openai'
|
|
11
11
|
|
|
12
12
|
import { runChatCompletions } from './_chat_completions.js'
|
|
13
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
13
14
|
|
|
14
15
|
const BASE_URL = 'https://api.mistral.ai/v1'
|
|
15
16
|
|
|
@@ -19,7 +20,11 @@ const BASE_URL = 'https://api.mistral.ai/v1'
|
|
|
19
20
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
20
21
|
*/
|
|
21
22
|
export async function * mistral (envelope, deps = {}) {
|
|
22
|
-
const client = deps.client ?? new OpenAI({
|
|
23
|
+
const client = deps.client ?? new OpenAI({
|
|
24
|
+
apiKey: envelope.auth.key,
|
|
25
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
26
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
27
|
+
})
|
|
23
28
|
yield * runChatCompletions(envelope, client, {
|
|
24
29
|
provider: 'mistral',
|
|
25
30
|
toolChoiceFlavor: 'mistral'
|
|
@@ -35,6 +35,7 @@ import {
|
|
|
35
35
|
fromOpenAIToolCalls,
|
|
36
36
|
toToolChoice
|
|
37
37
|
} from './_tools.js'
|
|
38
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
38
39
|
|
|
39
40
|
/**
|
|
40
41
|
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
@@ -44,7 +45,8 @@ import {
|
|
|
44
45
|
export async function * openai (envelope, deps = {}) {
|
|
45
46
|
const client = deps.client ?? new OpenAI({
|
|
46
47
|
apiKey: envelope.auth.key,
|
|
47
|
-
...(envelope.auth.baseURL ? { baseURL: envelope.auth.baseURL } : {})
|
|
48
|
+
...(envelope.auth.baseURL ? { baseURL: envelope.auth.baseURL } : {}),
|
|
49
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
48
50
|
})
|
|
49
51
|
const signal = deps.signal
|
|
50
52
|
const log = deps.log
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
import OpenAI from 'openai'
|
|
13
13
|
|
|
14
14
|
import { runChatCompletions } from './_chat_completions.js'
|
|
15
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
15
16
|
|
|
16
17
|
const BASE_URL = 'https://openrouter.ai/api/v1'
|
|
17
18
|
|
|
@@ -47,7 +48,8 @@ export async function * openrouter (envelope, deps = {}) {
|
|
|
47
48
|
const client = deps.client ?? new OpenAI({
|
|
48
49
|
apiKey: envelope.auth.key,
|
|
49
50
|
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
50
|
-
defaultHeaders
|
|
51
|
+
defaultHeaders,
|
|
52
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
51
53
|
})
|
|
52
54
|
|
|
53
55
|
yield * runChatCompletions(envelope, client, {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import OpenAI from 'openai'
|
|
11
11
|
|
|
12
12
|
import { openai } from './openai.js'
|
|
13
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
13
14
|
|
|
14
15
|
const BASE_URL = 'https://api.x.ai/v1'
|
|
15
16
|
|
|
@@ -21,7 +22,8 @@ const BASE_URL = 'https://api.x.ai/v1'
|
|
|
21
22
|
export async function * xai (envelope, deps = {}) {
|
|
22
23
|
const client = deps.client ?? new OpenAI({
|
|
23
24
|
apiKey: envelope.auth.key,
|
|
24
|
-
baseURL: envelope.auth.baseURL || BASE_URL
|
|
25
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
26
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
25
27
|
})
|
|
26
28
|
yield * openai(envelope, { ...deps, client })
|
|
27
29
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.101.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"@opentelemetry/exporter-trace-otlp-grpc": "^0.216.0",
|
|
88
88
|
"@opentelemetry/sdk-node": "^0.216.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.101.0"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
93
|
"@anthropic-ai/sdk": "^0.91.1",
|
|
@@ -96,7 +96,8 @@
|
|
|
96
96
|
"@opentelemetry/api": "^1.9.1",
|
|
97
97
|
"env-paths": "^4.0.0",
|
|
98
98
|
"groq-sdk": "^1.1.2",
|
|
99
|
-
"openai": "^6.35.0"
|
|
99
|
+
"openai": "^6.35.0",
|
|
100
|
+
"undici": "^7.24.5"
|
|
100
101
|
},
|
|
101
102
|
"lint-staged": {
|
|
102
103
|
"*.{js,cjs}": "standard"
|