mohdel 0.100.0 → 0.102.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/session/adapters/_chat_completions.js +5 -3
- package/js/session/adapters/_dispatcher.js +29 -0
- package/js/session/adapters/anthropic.js +5 -1
- package/js/session/adapters/deepseek.js +6 -1
- package/js/session/adapters/fireworks.js +3 -1
- package/js/session/adapters/groq.js +5 -1
- package/js/session/adapters/mistral.js +6 -1
- package/js/session/adapters/openai.js +9 -8
- package/js/session/adapters/openrouter.js +3 -1
- package/js/session/adapters/xai.js +3 -1
- package/package.json +9 -8
|
@@ -330,14 +330,16 @@ function buildRequest (envelope, spec, config) {
|
|
|
330
330
|
|
|
331
331
|
if (spec.thinkingEffortLevels) {
|
|
332
332
|
const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
|
|
333
|
-
if (effort && effort
|
|
333
|
+
if (effort && spec.thinkingEffortLevels[effort] != null) {
|
|
334
334
|
const headroom = spec.thinkingEffortLevels[effort]
|
|
335
335
|
if (args.max_tokens && typeof headroom === 'number') {
|
|
336
336
|
args.max_tokens += headroom
|
|
337
337
|
}
|
|
338
|
-
|
|
338
|
+
// When reasoning is disabled ('none') the model accepts
|
|
339
|
+
// temperature again — only strip it when reasoning is on.
|
|
340
|
+
if (effort !== 'none') delete args.temperature
|
|
339
341
|
if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
|
|
340
|
-
args.disable_reasoning =
|
|
342
|
+
args.disable_reasoning = (effort === 'none')
|
|
341
343
|
} else {
|
|
342
344
|
args.reasoning_effort = effort
|
|
343
345
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared undici dispatcher for streaming-LLM adapters.
|
|
3
|
+
*
|
|
4
|
+
* Default `globalThis.fetch` (undici on Node 18+) closes a stream when
|
|
5
|
+
* no body chunk has arrived for `bodyTimeout` ms — 300 000 ms (5 min)
|
|
6
|
+
* by default. Reasoning models stream zero bytes during their thinking
|
|
7
|
+
* phase, so any non-trivial task on a thinking-capable provider can
|
|
8
|
+
* blow that limit and surface as a `NET_ERROR / "terminated"` mid-run.
|
|
9
|
+
*
|
|
10
|
+
* We disable the inter-chunk idle timeout. Cancellation comes from
|
|
11
|
+
* three layers above us:
|
|
12
|
+
* 1. caller's `AbortSignal` (per-run timeout, user cancel)
|
|
13
|
+
* 2. SDK request-level timeout (OpenAI/Anthropic/Groq default 600 s)
|
|
14
|
+
* 3. provider-side stream limits
|
|
15
|
+
*
|
|
16
|
+
* Headers timeout stays bounded — connect + first response must be
|
|
17
|
+
* fast even when the body afterwards may be slow.
|
|
18
|
+
*
|
|
19
|
+
* Singleton: undici Agents own a connection pool, so we keep one per
|
|
20
|
+
* process and let it manage per-origin keep-alive across all adapters.
|
|
21
|
+
*/
|
|
22
|
+
import { Agent } from 'undici'
|
|
23
|
+
|
|
24
|
+
let _agent = null
|
|
25
|
+
|
|
26
|
+
export function streamingDispatcher () {
|
|
27
|
+
if (!_agent) _agent = new Agent({ bodyTimeout: 0, headersTimeout: 60_000 })
|
|
28
|
+
return _agent
|
|
29
|
+
}
|
|
@@ -34,6 +34,7 @@ import {
|
|
|
34
34
|
fromAnthropicToolCalls,
|
|
35
35
|
toToolChoice
|
|
36
36
|
} from './_tools.js'
|
|
37
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
37
38
|
|
|
38
39
|
/**
|
|
39
40
|
* Approximate chars-per-token used to estimate Anthropic thinking
|
|
@@ -80,7 +81,10 @@ const ANTHROPIC_DEFAULT_MAX_TOKENS = 4096
|
|
|
80
81
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
81
82
|
*/
|
|
82
83
|
export async function * anthropic (envelope, deps = {}) {
|
|
83
|
-
const client = deps.client ?? new Anthropic({
|
|
84
|
+
const client = deps.client ?? new Anthropic({
|
|
85
|
+
apiKey: envelope.auth.key,
|
|
86
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
87
|
+
})
|
|
84
88
|
const signal = deps.signal
|
|
85
89
|
const log = deps.log
|
|
86
90
|
const start = String(process.hrtime.bigint())
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import OpenAI from 'openai'
|
|
12
12
|
|
|
13
13
|
import { runChatCompletions } from './_chat_completions.js'
|
|
14
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
14
15
|
|
|
15
16
|
const BASE_URL = 'https://api.deepseek.com'
|
|
16
17
|
|
|
@@ -20,7 +21,11 @@ const BASE_URL = 'https://api.deepseek.com'
|
|
|
20
21
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
21
22
|
*/
|
|
22
23
|
export async function * deepseek (envelope, deps = {}) {
|
|
23
|
-
const client = deps.client ?? new OpenAI({
|
|
24
|
+
const client = deps.client ?? new OpenAI({
|
|
25
|
+
apiKey: envelope.auth.key,
|
|
26
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
27
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
28
|
+
})
|
|
24
29
|
yield * runChatCompletions(envelope, client, {
|
|
25
30
|
provider: 'deepseek',
|
|
26
31
|
parseDsml: true
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import OpenAI from 'openai'
|
|
17
17
|
|
|
18
18
|
import { runChatCompletions } from './_chat_completions.js'
|
|
19
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
19
20
|
|
|
20
21
|
const BASE_URL = 'https://api.fireworks.ai/inference/v1'
|
|
21
22
|
|
|
@@ -27,7 +28,8 @@ const BASE_URL = 'https://api.fireworks.ai/inference/v1'
|
|
|
27
28
|
export async function * fireworks (envelope, deps = {}) {
|
|
28
29
|
const client = deps.client ?? new OpenAI({
|
|
29
30
|
apiKey: envelope.auth.key,
|
|
30
|
-
baseURL: envelope.auth.baseURL || BASE_URL
|
|
31
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
32
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
31
33
|
})
|
|
32
34
|
yield * runChatCompletions(envelope, client, {
|
|
33
35
|
provider: 'fireworks',
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import Groq from 'groq-sdk'
|
|
8
8
|
|
|
9
9
|
import { runChatCompletions } from './_chat_completions.js'
|
|
10
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
10
11
|
|
|
11
12
|
/**
|
|
12
13
|
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
@@ -14,7 +15,10 @@ import { runChatCompletions } from './_chat_completions.js'
|
|
|
14
15
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
15
16
|
*/
|
|
16
17
|
export async function * groq (envelope, deps = {}) {
|
|
17
|
-
const client = deps.client ?? new Groq({
|
|
18
|
+
const client = deps.client ?? new Groq({
|
|
19
|
+
apiKey: envelope.auth.key,
|
|
20
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
21
|
+
})
|
|
18
22
|
yield * runChatCompletions(envelope, client, { provider: 'groq' }, {
|
|
19
23
|
signal: deps.signal,
|
|
20
24
|
log: deps.log,
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import OpenAI from 'openai'
|
|
11
11
|
|
|
12
12
|
import { runChatCompletions } from './_chat_completions.js'
|
|
13
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
13
14
|
|
|
14
15
|
const BASE_URL = 'https://api.mistral.ai/v1'
|
|
15
16
|
|
|
@@ -19,7 +20,11 @@ const BASE_URL = 'https://api.mistral.ai/v1'
|
|
|
19
20
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
20
21
|
*/
|
|
21
22
|
export async function * mistral (envelope, deps = {}) {
|
|
22
|
-
const client = deps.client ?? new OpenAI({
|
|
23
|
+
const client = deps.client ?? new OpenAI({
|
|
24
|
+
apiKey: envelope.auth.key,
|
|
25
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
26
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
27
|
+
})
|
|
23
28
|
yield * runChatCompletions(envelope, client, {
|
|
24
29
|
provider: 'mistral',
|
|
25
30
|
toolChoiceFlavor: 'mistral'
|
|
@@ -35,6 +35,7 @@ import {
|
|
|
35
35
|
fromOpenAIToolCalls,
|
|
36
36
|
toToolChoice
|
|
37
37
|
} from './_tools.js'
|
|
38
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
38
39
|
|
|
39
40
|
/**
|
|
40
41
|
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
@@ -44,7 +45,8 @@ import {
|
|
|
44
45
|
export async function * openai (envelope, deps = {}) {
|
|
45
46
|
const client = deps.client ?? new OpenAI({
|
|
46
47
|
apiKey: envelope.auth.key,
|
|
47
|
-
...(envelope.auth.baseURL ? { baseURL: envelope.auth.baseURL } : {})
|
|
48
|
+
...(envelope.auth.baseURL ? { baseURL: envelope.auth.baseURL } : {}),
|
|
49
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
48
50
|
})
|
|
49
51
|
const signal = deps.signal
|
|
50
52
|
const log = deps.log
|
|
@@ -223,19 +225,18 @@ function buildRequest (envelope, input, instructions) {
|
|
|
223
225
|
|
|
224
226
|
// Thinking: when the spec has `thinkingEffortLevels`, set
|
|
225
227
|
// `reasoning.effort` and add the thinking-budget headroom on top
|
|
226
|
-
// of the user's `outputBudget`.
|
|
227
|
-
//
|
|
228
|
-
//
|
|
228
|
+
// of the user's `outputBudget`. Both OpenAI (gpt-5.x) and xAI
|
|
229
|
+
// (grok-4.3+) accept the same `reasoning: { effort }` shape on
|
|
230
|
+
// the Responses API, including the literal value 'none' to
|
|
231
|
+
// disable reasoning entirely.
|
|
229
232
|
if (spec?.thinkingEffortLevels) {
|
|
230
233
|
const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
|
|
231
|
-
if (effort && effort
|
|
234
|
+
if (effort && spec.thinkingEffortLevels[effort] != null) {
|
|
232
235
|
const headroom = spec.thinkingEffortLevels[effort]
|
|
233
236
|
if (request.max_output_tokens && typeof headroom === 'number') {
|
|
234
237
|
request.max_output_tokens += headroom
|
|
235
238
|
}
|
|
236
|
-
|
|
237
|
-
request.reasoning = { effort }
|
|
238
|
-
}
|
|
239
|
+
request.reasoning = { effort }
|
|
239
240
|
}
|
|
240
241
|
}
|
|
241
242
|
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
import OpenAI from 'openai'
|
|
13
13
|
|
|
14
14
|
import { runChatCompletions } from './_chat_completions.js'
|
|
15
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
15
16
|
|
|
16
17
|
const BASE_URL = 'https://openrouter.ai/api/v1'
|
|
17
18
|
|
|
@@ -47,7 +48,8 @@ export async function * openrouter (envelope, deps = {}) {
|
|
|
47
48
|
const client = deps.client ?? new OpenAI({
|
|
48
49
|
apiKey: envelope.auth.key,
|
|
49
50
|
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
50
|
-
defaultHeaders
|
|
51
|
+
defaultHeaders,
|
|
52
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
51
53
|
})
|
|
52
54
|
|
|
53
55
|
yield * runChatCompletions(envelope, client, {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import OpenAI from 'openai'
|
|
11
11
|
|
|
12
12
|
import { openai } from './openai.js'
|
|
13
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
13
14
|
|
|
14
15
|
const BASE_URL = 'https://api.x.ai/v1'
|
|
15
16
|
|
|
@@ -21,7 +22,8 @@ const BASE_URL = 'https://api.x.ai/v1'
|
|
|
21
22
|
export async function * xai (envelope, deps = {}) {
|
|
22
23
|
const client = deps.client ?? new OpenAI({
|
|
23
24
|
apiKey: envelope.auth.key,
|
|
24
|
-
baseURL: envelope.auth.baseURL || BASE_URL
|
|
25
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
26
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
25
27
|
})
|
|
26
28
|
yield * openai(envelope, { ...deps, client })
|
|
27
29
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.102.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -84,26 +84,27 @@
|
|
|
84
84
|
},
|
|
85
85
|
"optionalDependencies": {
|
|
86
86
|
"@clack/prompts": "^1.3.0",
|
|
87
|
-
"@opentelemetry/exporter-trace-otlp-grpc": "^0.
|
|
88
|
-
"@opentelemetry/sdk-node": "^0.
|
|
87
|
+
"@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
|
|
88
|
+
"@opentelemetry/sdk-node": "^0.217.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.102.0"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
|
-
"@anthropic-ai/sdk": "^0.
|
|
93
|
+
"@anthropic-ai/sdk": "^0.95.1",
|
|
94
94
|
"@cerebras/cerebras_cloud_sdk": "^1.61.1",
|
|
95
|
-
"@google/genai": "^
|
|
95
|
+
"@google/genai": "^2.0.0",
|
|
96
96
|
"@opentelemetry/api": "^1.9.1",
|
|
97
97
|
"env-paths": "^4.0.0",
|
|
98
98
|
"groq-sdk": "^1.1.2",
|
|
99
|
-
"openai": "^6.
|
|
99
|
+
"openai": "^6.37.0",
|
|
100
|
+
"undici": "^7.24.5"
|
|
100
101
|
},
|
|
101
102
|
"lint-staged": {
|
|
102
103
|
"*.{js,cjs}": "standard"
|
|
103
104
|
},
|
|
104
105
|
"devDependencies": {
|
|
105
106
|
"gpt-tokenizer": "^3.4.0",
|
|
106
|
-
"lint-staged": "^
|
|
107
|
+
"lint-staged": "^17.0.3",
|
|
107
108
|
"release-it": "^20.0.1",
|
|
108
109
|
"standard": "^17.1.2",
|
|
109
110
|
"vitest": "^4.1.5"
|