mohdel 0.105.1 → 0.106.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/core/envelope.js +9 -0
- package/js/core/events.js +25 -3
- package/js/factory/bridge.js +2 -0
- package/js/session/_idle_heartbeat.js +77 -0
- package/js/session/adapters/_errors.js +4 -0
- package/js/session/adapters/anthropic.js +17 -3
- package/js/session/adapters/index.js +3 -1
- package/js/session/adapters/xiaomi.js +35 -0
- package/js/session/run.js +13 -1
- package/package.json +2 -2
- package/src/lib/creators.js +5 -0
- package/src/lib/providers.js +9 -0
package/js/core/envelope.js
CHANGED
|
@@ -57,6 +57,14 @@
|
|
|
57
57
|
* Opaque per-user ID passed to the provider for tracking / abuse
|
|
58
58
|
* monitoring.
|
|
59
59
|
*
|
|
60
|
+
* @property {number} [idleHeartbeatMs]
|
|
61
|
+
* When set, the session emits a synthetic `{type:'idle', sinceMs}`
|
|
62
|
+
* event whenever the adapter has been silent for at least this
|
|
63
|
+
* many milliseconds, and re-emits every `idleHeartbeatMs` while
|
|
64
|
+
* the gap persists. The consumer decides whether to act (log,
|
|
65
|
+
* bump a watchdog, abort via its own AbortSignal). Mohdel never
|
|
66
|
+
* aborts on its own. Omitting the field disables the heartbeat.
|
|
67
|
+
*
|
|
60
68
|
* @property {Object<string, object>} [providerOptions]
|
|
61
69
|
* Namespaced bag of provider-specific knobs that don't fit the
|
|
62
70
|
* shared envelope. Keys are provider names; values are arbitrary
|
|
@@ -139,5 +147,6 @@ export const ENVELOPE_FIELDS = Object.freeze([
|
|
|
139
147
|
'toolChoice',
|
|
140
148
|
'parallelToolCalls',
|
|
141
149
|
'identifier',
|
|
150
|
+
'idleHeartbeatMs',
|
|
142
151
|
'providerOptions'
|
|
143
152
|
])
|
package/js/core/events.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Event union for the session → thin-gate → client stream.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Four events:
|
|
5
5
|
* - `delta` — streaming chunk (message text or function-call args).
|
|
6
|
+
* - `idle` — synthetic event emitted when the adapter has been
|
|
7
|
+
* silent for at least `idleHeartbeatMs` (opt-in via
|
|
8
|
+
* `CallEnvelope.idleHeartbeatMs`). Carries the elapsed
|
|
9
|
+
* gap so the consumer can drive its own stall policy.
|
|
10
|
+
* Never terminal; further events may follow.
|
|
6
11
|
* - `done` — terminal with the full `AnswerResult`.
|
|
7
12
|
* - `error` — wire-format error (serializable `TypedError`).
|
|
8
13
|
*
|
|
@@ -12,7 +17,7 @@
|
|
|
12
17
|
*/
|
|
13
18
|
|
|
14
19
|
/**
|
|
15
|
-
* @typedef {(DeltaEvent | DoneEvent | ErrorEvent)} Event
|
|
20
|
+
* @typedef {(DeltaEvent | IdleEvent | DoneEvent | ErrorEvent)} Event
|
|
16
21
|
*/
|
|
17
22
|
|
|
18
23
|
/**
|
|
@@ -23,6 +28,23 @@
|
|
|
23
28
|
* @property {DeltaChunk} delta
|
|
24
29
|
*/
|
|
25
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Synthetic heartbeat emitted by `session/run.js` while the adapter
|
|
33
|
+
* is silent — i.e. no real event (delta / done / error / tool call)
|
|
34
|
+
* for at least `idleHeartbeatMs`. The consumer decides what to do
|
|
35
|
+
* (log, bump a watchdog, abort via its own AbortSignal). Mohdel
|
|
36
|
+
* never aborts on its own.
|
|
37
|
+
*
|
|
38
|
+
* Re-emitted every `idleHeartbeatMs` while the gap persists; the
|
|
39
|
+
* timer resets on the next real event.
|
|
40
|
+
*
|
|
41
|
+
* @typedef {object} IdleEvent
|
|
42
|
+
* @property {'idle'} type
|
|
43
|
+
* @property {number} sinceMs
|
|
44
|
+
* Milliseconds since the last real event (or since the call
|
|
45
|
+
* started, when no event has arrived yet).
|
|
46
|
+
*/
|
|
47
|
+
|
|
26
48
|
/**
|
|
27
49
|
* @typedef {object} DeltaChunk
|
|
28
50
|
* @property {('message'|'function_call')} type
|
|
@@ -109,7 +131,7 @@
|
|
|
109
131
|
* ToolCall back unchanged so the adapter can re-attach it.
|
|
110
132
|
*/
|
|
111
133
|
|
|
112
|
-
export const EVENT_TYPES = Object.freeze(['delta', 'done', 'error'])
|
|
134
|
+
export const EVENT_TYPES = Object.freeze(['delta', 'idle', 'done', 'error'])
|
|
113
135
|
|
|
114
136
|
/**
|
|
115
137
|
* @param {unknown} x
|
package/js/factory/bridge.js
CHANGED
|
@@ -51,6 +51,7 @@ import { createRealtimeDeltaBuffer } from '../../src/lib/utils.js'
|
|
|
51
51
|
* | `images` / `videos` / `cache` | flat fields on envelope |
|
|
52
52
|
* | `tools` / `toolChoice` / `parallelToolCalls` | flat fields on envelope |
|
|
53
53
|
* | `identifier` | envelope.identifier (adapter maps to provider field) |
|
|
54
|
+
* | `idleHeartbeatMs` | envelope.idleHeartbeatMs (session emits idle events) |
|
|
54
55
|
* | `realtimeHandler` / `bufferOpts` | drained via createRealtimeDeltaBuffer |
|
|
55
56
|
* | `providerOrder` / `providerAllow` / `providerDeny`| envelope.providerOptions.openrouter |
|
|
56
57
|
* | `traceparent` / `baggage` | envelope transport metadata |
|
|
@@ -183,6 +184,7 @@ function toEnvelope ({ modelKey, configuration, prompt, options }) {
|
|
|
183
184
|
if (options.toolChoice) envelope.toolChoice = options.toolChoice
|
|
184
185
|
if (options.parallelToolCalls === false) envelope.parallelToolCalls = false
|
|
185
186
|
if (options.identifier) envelope.identifier = options.identifier
|
|
187
|
+
if (options.idleHeartbeatMs !== undefined) envelope.idleHeartbeatMs = options.idleHeartbeatMs
|
|
186
188
|
|
|
187
189
|
// OpenRouter routing prefs ride in their own bag to keep the flat
|
|
188
190
|
// envelope clean. The openrouter adapter reads this via
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Idle-heartbeat wrapper for an adapter's event stream.
|
|
3
|
+
*
|
|
4
|
+
* `withIdleHeartbeat(source, idleMs)` consumes `source` and re-emits
|
|
5
|
+
* every event it yields. When `source` is silent for at least
|
|
6
|
+
* `idleMs`, an `{type:'idle', sinceMs}` event is yielded; while the
|
|
7
|
+
* silence persists, further idle events are yielded every `idleMs`.
|
|
8
|
+
* The timer resets on every real event.
|
|
9
|
+
*
|
|
10
|
+
* Idle events are advisory — mohdel does not abort the call on its
|
|
11
|
+
* own. Consumers decide whether to log, bump a watchdog, or trigger
|
|
12
|
+
* an external AbortSignal.
|
|
13
|
+
*
|
|
14
|
+
* The in-flight `iterator.next()` is reused across timer firings so
|
|
15
|
+
* no real event is dropped: when the timer wins the race, the
|
|
16
|
+
* underlying promise stays pending and the next loop iteration
|
|
17
|
+
* attaches a fresh race to the same promise.
|
|
18
|
+
*
|
|
19
|
+
* @module session/_idle_heartbeat
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @template T
|
|
24
|
+
* @param {AsyncIterable<T>} source
|
|
25
|
+
* @param {number | undefined | null} idleMs
|
|
26
|
+
* When falsy or non-positive, the source is yielded through
|
|
27
|
+
* unchanged (no timer is set up).
|
|
28
|
+
* @returns {AsyncGenerator<T | import('#core/events.js').IdleEvent>}
|
|
29
|
+
*/
|
|
30
|
+
export async function * withIdleHeartbeat (source, idleMs) {
|
|
31
|
+
if (!idleMs || idleMs <= 0) {
|
|
32
|
+
yield * source
|
|
33
|
+
return
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const iter = source[Symbol.asyncIterator]()
|
|
37
|
+
let lastAt = Date.now()
|
|
38
|
+
/** @type {Promise<IteratorResult<T>> | null} */
|
|
39
|
+
let pending = null
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
while (true) {
|
|
43
|
+
if (!pending) pending = iter.next()
|
|
44
|
+
|
|
45
|
+
/** @type {NodeJS.Timeout | undefined} */
|
|
46
|
+
let timer
|
|
47
|
+
/** @type {{idle: true} | {real: IteratorResult<T>} | {err: unknown}} */
|
|
48
|
+
const winner = await new Promise(resolve => {
|
|
49
|
+
timer = setTimeout(() => resolve({ idle: true }), idleMs)
|
|
50
|
+
pending.then(
|
|
51
|
+
r => resolve({ real: r }),
|
|
52
|
+
e => resolve({ err: e })
|
|
53
|
+
)
|
|
54
|
+
})
|
|
55
|
+
clearTimeout(timer)
|
|
56
|
+
|
|
57
|
+
if ('idle' in winner) {
|
|
58
|
+
yield /** @type {import('#core/events.js').IdleEvent} */ ({
|
|
59
|
+
type: 'idle',
|
|
60
|
+
sinceMs: Date.now() - lastAt
|
|
61
|
+
})
|
|
62
|
+
continue
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
pending = null
|
|
66
|
+
if ('err' in winner) throw winner.err
|
|
67
|
+
if (winner.real.done) return
|
|
68
|
+
lastAt = Date.now()
|
|
69
|
+
yield winner.real.value
|
|
70
|
+
}
|
|
71
|
+
} finally {
|
|
72
|
+
// Best-effort cleanup if the consumer abandons us mid-stream.
|
|
73
|
+
if (typeof iter.return === 'function') {
|
|
74
|
+
try { await iter.return() } catch { /* ignore */ }
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
@@ -239,6 +239,10 @@ const providerOverrides = {
|
|
|
239
239
|
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
240
240
|
return undefined
|
|
241
241
|
},
|
|
242
|
+
xiaomi (_err, code, detail) {
|
|
243
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
244
|
+
return undefined
|
|
245
|
+
},
|
|
242
246
|
novita (_err, code, detail) {
|
|
243
247
|
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
244
248
|
return undefined
|
|
@@ -216,13 +216,27 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
216
216
|
// output_tokens and what actually streamed as visible output (text +
|
|
217
217
|
// tool input JSON) — catches redacted_thinking blocks (opus 4.7 default)
|
|
218
218
|
// that consume output tokens but emit no streaming deltas.
|
|
219
|
+
//
|
|
220
|
+
// When the caller explicitly disabled thinking via `outputEffort: 'none'`,
|
|
221
|
+
// we did NOT send `request.thinking` (see buildRequest) — Anthropic
|
|
222
|
+
// won't emit thinking content, redacted or otherwise — so the fallback
|
|
223
|
+
// heuristic would only attribute the natural chars/4 estimation gap
|
|
224
|
+
// (Anthropic packs denser than 4 chars/token on most content) to a
|
|
225
|
+
// non-existent thinking budget. Trust the explicit opt-out and report
|
|
226
|
+
// zero.
|
|
219
227
|
const streamedOutput = currentOutput()
|
|
220
228
|
const streamedOutputChars = streamedOutput.length +
|
|
221
229
|
[...toolBlocks.values()].reduce((s, b) => s + b.inputJson.length, 0)
|
|
222
230
|
const streamedOutputTokens = Math.ceil(streamedOutputChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN)
|
|
223
|
-
const
|
|
224
|
-
|
|
225
|
-
|
|
231
|
+
const thinkingDisabled = envelope.outputEffort === 'none'
|
|
232
|
+
let estimatedThinkingTokens
|
|
233
|
+
if (thinkingDisabled) {
|
|
234
|
+
estimatedThinkingTokens = 0
|
|
235
|
+
} else if (thinkingChars > 0) {
|
|
236
|
+
estimatedThinkingTokens = Math.min(Math.ceil(thinkingChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN), outputTokens)
|
|
237
|
+
} else {
|
|
238
|
+
estimatedThinkingTokens = Math.max(0, outputTokens - streamedOutputTokens)
|
|
239
|
+
}
|
|
226
240
|
const messageOutputTokens = Math.max(0, outputTokens - estimatedThinkingTokens)
|
|
227
241
|
|
|
228
242
|
/** @type {import('#core/events.js').DoneEvent} */
|
|
@@ -24,6 +24,7 @@ import { novita } from './novita.js'
|
|
|
24
24
|
import { openai } from './openai.js'
|
|
25
25
|
import { openrouter } from './openrouter.js'
|
|
26
26
|
import { xai } from './xai.js'
|
|
27
|
+
import { xiaomi } from './xiaomi.js'
|
|
27
28
|
|
|
28
29
|
export const adapters = Object.freeze({
|
|
29
30
|
anthropic,
|
|
@@ -38,7 +39,8 @@ export const adapters = Object.freeze({
|
|
|
38
39
|
novita,
|
|
39
40
|
openai,
|
|
40
41
|
openrouter,
|
|
41
|
-
xai
|
|
42
|
+
xai,
|
|
43
|
+
xiaomi
|
|
42
44
|
})
|
|
43
45
|
|
|
44
46
|
/**
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Xiaomi MiMo adapter — OpenAI-compatible chat completions against
|
|
3
|
+
* api.xiaomimimo.com. Standard `reasoning_content` field handling
|
|
4
|
+
* is provided by the shared core; this module just wires the base
|
|
5
|
+
* URL and provider tag.
|
|
6
|
+
*
|
|
7
|
+
* @module session/adapters/xiaomi
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import OpenAI from 'openai'
|
|
11
|
+
|
|
12
|
+
import { runChatCompletions } from './_chat_completions.js'
|
|
13
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
14
|
+
|
|
15
|
+
const BASE_URL = 'https://api.xiaomimimo.com/v1'
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
19
|
+
* @param {{client?: any, signal?: AbortSignal, log?: any, span?: any}} [deps]
|
|
20
|
+
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
21
|
+
*/
|
|
22
|
+
export async function * xiaomi (envelope, deps = {}) {
|
|
23
|
+
const client = deps.client ?? new OpenAI({
|
|
24
|
+
apiKey: envelope.auth.key,
|
|
25
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
26
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
27
|
+
})
|
|
28
|
+
yield * runChatCompletions(envelope, client, {
|
|
29
|
+
provider: 'xiaomi'
|
|
30
|
+
}, {
|
|
31
|
+
signal: deps.signal,
|
|
32
|
+
log: deps.log,
|
|
33
|
+
span: deps.span
|
|
34
|
+
})
|
|
35
|
+
}
|
package/js/session/run.js
CHANGED
|
@@ -29,6 +29,7 @@ import { getProviderLimits } from './adapters/_providers.js'
|
|
|
29
29
|
import { providerOf, catalogKey, effortOf } from '#core/model-id.js'
|
|
30
30
|
import * as defaultCooldown from './_cooldown.js'
|
|
31
31
|
import * as defaultLimiter from './_rate_limiter.js'
|
|
32
|
+
import { withIdleHeartbeat } from './_idle_heartbeat.js'
|
|
32
33
|
import { logger as defaultLogger } from './_logger.js'
|
|
33
34
|
import {
|
|
34
35
|
startSpan,
|
|
@@ -175,7 +176,18 @@ export async function * run (envelope, {
|
|
|
175
176
|
let lastFrameAt = startedAt
|
|
176
177
|
let maxInterFrameMs = 0
|
|
177
178
|
try {
|
|
178
|
-
|
|
179
|
+
const adapterStream = adapter(envelope, { signal, log, span })
|
|
180
|
+
const heartbeated = withIdleHeartbeat(adapterStream, envelope.idleHeartbeatMs)
|
|
181
|
+
for await (const ev of heartbeated) {
|
|
182
|
+
// Idle events are synthetic — they're emitted *because* nothing
|
|
183
|
+
// else has happened. Don't reset the inter-frame clock (that
|
|
184
|
+
// would mask the very stall the heartbeat is reporting) and
|
|
185
|
+
// don't run them through the terminal/cooldown branches.
|
|
186
|
+
if (ev.type === 'idle') {
|
|
187
|
+
yield ev
|
|
188
|
+
continue
|
|
189
|
+
}
|
|
190
|
+
|
|
179
191
|
const now = Date.now()
|
|
180
192
|
const gap = now - lastFrameAt
|
|
181
193
|
if (gap > maxInterFrameMs) maxInterFrameMs = gap
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.106.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"@opentelemetry/exporter-trace-otlp-grpc": "^0.218.0",
|
|
88
88
|
"@opentelemetry/sdk-node": "^0.218.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.106.0"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
93
|
"@anthropic-ai/sdk": "^0.95.2",
|
package/src/lib/creators.js
CHANGED
|
@@ -39,6 +39,11 @@ const creators = {
|
|
|
39
39
|
logo: 'minimax.svg',
|
|
40
40
|
description: 'Minimax offers versatile Chinese-first chat and coding models tuned for fast, cost-aware assistants and enterprise integrations.'
|
|
41
41
|
},
|
|
42
|
+
mistral: {
|
|
43
|
+
label: 'Mistral',
|
|
44
|
+
logo: 'mistral.svg',
|
|
45
|
+
description: 'Mistral AI ships strong open-weight and proprietary models with a focus on European hosting, multilingual quality, and efficient deployment.'
|
|
46
|
+
},
|
|
42
47
|
moonshotai: {
|
|
43
48
|
label: 'Moonshot AI',
|
|
44
49
|
logo: 'moonshotai.svg',
|
package/src/lib/providers.js
CHANGED
|
@@ -95,6 +95,15 @@ const providers = {
|
|
|
95
95
|
creators: ['xai'],
|
|
96
96
|
contextSemantics: 'shared',
|
|
97
97
|
outputCapStrategy: 'accept'
|
|
98
|
+
},
|
|
99
|
+
xiaomi: {
|
|
100
|
+
sdk: 'openai',
|
|
101
|
+
api: 'chatCompletions',
|
|
102
|
+
apiKeyEnv: 'XIAOMI_API_SK',
|
|
103
|
+
createConfiguration: apiKey => ({ baseURL: 'https://api.xiaomimimo.com/v1', apiKey }),
|
|
104
|
+
creators: ['xiaomi'],
|
|
105
|
+
contextSemantics: 'shared',
|
|
106
|
+
outputCapStrategy: 'accept'
|
|
98
107
|
}
|
|
99
108
|
}
|
|
100
109
|
|