mohdel 0.111.0 → 0.113.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/js/client/call_transcription.js +85 -0
- package/js/client/index.js +2 -0
- package/js/session/adapters/_chat_completions.js +10 -1
- package/js/session/adapters/_errors.js +4 -0
- package/js/session/adapters/index.js +2 -0
- package/js/session/adapters/qwen.js +37 -0
- package/js/session/driver.js +11 -0
- package/package.json +4 -4
- package/src/cli/onboard.js +7 -0
- package/src/lib/providers.js +9 -0
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Mohdel
|
|
2
2
|
|
|
3
|
-
Self-hosted LLM gateway and SDK for Node — think LiteLLM, for the JS world. One `answer()` call for
|
|
3
|
+
Self-hosted LLM gateway and SDK for Node — think LiteLLM, for the JS world. One `answer()` call for 13 providers; swap models by changing one string; get real per-call USD cost back on every result, with OpenTelemetry built in and process isolation when you need it. Your keys, your infra, no SaaS proxy in the path.
|
|
4
4
|
|
|
5
5
|
```bash
|
|
6
6
|
npm install -g mohdel
|
|
@@ -8,7 +8,7 @@ mo # interactive setup — pick a provider,
|
|
|
8
8
|
mo ask gemini/gemini-3-flash-preview "why is the sky blue"
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, OpenRouter, Novita. Node 22+, ES modules.
|
|
11
|
+
Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, Qwen Cloud, Xiaomi, OpenRouter, Novita. Node 22+, ES modules.
|
|
12
12
|
|
|
13
13
|
## Why mohdel
|
|
14
14
|
|
|
@@ -306,6 +306,8 @@ What each provider supports through mohdel's unified interface:
|
|
|
306
306
|
| DeepSeek | No | Yes | Yes | No | No | DSML tool-call fallback when model emits tags in content |
|
|
307
307
|
| Fireworks | Yes | Yes | Yes | No | Yes (`reasoning_effort`) | OpenAI SDK + `baseURL`; model id auto-prefixed |
|
|
308
308
|
| Mistral | No | Yes | Yes | No | No | `tool_choice: "any"` = required |
|
|
309
|
+
| Qwen Cloud | No | Yes | No | No | Yes (`enable_thinking` + `thinking_budget`) | Alibaba DashScope intl; hybrid models think by default — effort `none` sends explicit off |
|
|
310
|
+
| Xiaomi | No | Yes | Yes | No | Auto | MiMo; shared chat-completions path, `reasoning_content` captured |
|
|
309
311
|
| OpenRouter | Yes | Yes | Yes | No | Varies | Meta-provider; `providerOptions.openrouter` for routing prefs |
|
|
310
312
|
| Novita | No | No | No | No | No | Image generation only |
|
|
311
313
|
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Send a TranscriptionEnvelope to thin-gate's `POST /v1/transcription`.
|
|
3
|
+
*
|
|
4
|
+
* Transcription is one-shot: single JSON response body, no streaming,
|
|
5
|
+
* no cooldown/rate-limit. `audio.fileUri` must be a `file://` or
|
|
6
|
+
* `data:` URI — `file://` requires that the gate's sessions share a
|
|
7
|
+
* filesystem with the caller; `data:` carries the bytes inline subject
|
|
8
|
+
* to the gate's body-size cap.
|
|
9
|
+
*
|
|
10
|
+
* @module client/call_transcription
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { requestUnix } from './transport.js'
|
|
14
|
+
import { MohdelTypedError } from '#core'
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @param {import('#core/transcription.js').TranscriptionEnvelope} envelope
|
|
18
|
+
* @param {object} options
|
|
19
|
+
* @param {string} options.socketPath
|
|
20
|
+
* @param {AbortSignal} [options.signal]
|
|
21
|
+
* @param {string} [options.path] HTTP path; defaults to '/v1/transcription'
|
|
22
|
+
* @returns {Promise<import('#core/transcription.js').TranscriptionResult>}
|
|
23
|
+
*/
|
|
24
|
+
export async function callTranscription (envelope, { socketPath, signal, path = '/v1/transcription' }) {
|
|
25
|
+
const res = await requestUnix({
|
|
26
|
+
socketPath,
|
|
27
|
+
path,
|
|
28
|
+
method: 'POST',
|
|
29
|
+
body: envelope,
|
|
30
|
+
signal
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
const body = await readAll(res)
|
|
34
|
+
|
|
35
|
+
if (res.statusCode !== 200) {
|
|
36
|
+
throw MohdelTypedError.fromJSON(parseErrorBody(body, res.statusCode ?? 0))
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let parsed
|
|
40
|
+
try {
|
|
41
|
+
parsed = JSON.parse(body)
|
|
42
|
+
} catch (e) {
|
|
43
|
+
throw new MohdelTypedError(
|
|
44
|
+
'thin-gate returned non-JSON transcription response',
|
|
45
|
+
{ type: 'PROTOCOL_INVALID_EVENT', retryable: false }
|
|
46
|
+
)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!parsed || typeof parsed !== 'object' || parsed.status !== 'completed' || typeof parsed.text !== 'string') {
|
|
50
|
+
throw new MohdelTypedError(
|
|
51
|
+
'thin-gate returned malformed TranscriptionResult',
|
|
52
|
+
{ type: 'PROTOCOL_INVALID_EVENT', retryable: false }
|
|
53
|
+
)
|
|
54
|
+
}
|
|
55
|
+
return parsed
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @param {AsyncIterable<Buffer|string>} stream
|
|
60
|
+
* @returns {Promise<string>}
|
|
61
|
+
*/
|
|
62
|
+
async function readAll (stream) {
|
|
63
|
+
let s = ''
|
|
64
|
+
for await (const c of stream) s += typeof c === 'string' ? c : c.toString('utf8')
|
|
65
|
+
return s
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* @param {string} body
|
|
70
|
+
* @param {number} status
|
|
71
|
+
* @returns {import('#core/errors.js').TypedError}
|
|
72
|
+
*/
|
|
73
|
+
function parseErrorBody (body, status) {
|
|
74
|
+
try {
|
|
75
|
+
const parsed = JSON.parse(body)
|
|
76
|
+
if (parsed && typeof parsed === 'object' && typeof parsed.type === 'string') {
|
|
77
|
+
return parsed
|
|
78
|
+
}
|
|
79
|
+
} catch {}
|
|
80
|
+
return {
|
|
81
|
+
type: 'PROTOCOL_HTTP_ERROR',
|
|
82
|
+
message: `thin-gate returned HTTP ${status}`,
|
|
83
|
+
retryable: status >= 500
|
|
84
|
+
}
|
|
85
|
+
}
|
package/js/client/index.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Public surface (0.90):
|
|
5
5
|
* - call(envelope, { socketPath, signal }): AsyncGenerator<Event>
|
|
6
6
|
* - callImage(envelope, { socketPath, signal }): Promise<ImageResult>
|
|
7
|
+
* - callTranscription(envelope, { socketPath, signal }): Promise<TranscriptionResult>
|
|
7
8
|
*
|
|
8
9
|
* No provider SDKs are imported transitively. This module can be
|
|
9
10
|
* consumed by callers that must not pull openai-node, anthropic-sdk,
|
|
@@ -14,3 +15,4 @@
|
|
|
14
15
|
|
|
15
16
|
export { call } from './call.js'
|
|
16
17
|
export { callImage } from './call_image.js'
|
|
18
|
+
export { callTranscription } from './call_transcription.js'
|
|
@@ -55,10 +55,14 @@ const DSML_PARAM_RE = /<\uFF5CDSML\uFF5Cparameter\s+name="([^"]+)"(?:\s+string="
|
|
|
55
55
|
* @property {'openai'|'mistral'|'cerebras'} [toolChoiceFlavor]
|
|
56
56
|
* @property {'user'|'safety_identifier'} [identifierField]
|
|
57
57
|
* Defaults to 'user'.
|
|
58
|
-
* @property {'reasoning_effort'|'cerebras_zai'} [reasoningField]
|
|
58
|
+
* @property {'reasoning_effort'|'cerebras_zai'|'qwen'} [reasoningField]
|
|
59
59
|
* How to wire outputEffort into the request. `reasoning_effort`
|
|
60
60
|
* sets `args.reasoning_effort = effort`. `cerebras_zai` flips
|
|
61
61
|
* `args.disable_reasoning = false` instead (zai-family only).
|
|
62
|
+
* `qwen` sets `args.enable_thinking` plus a numeric
|
|
63
|
+
* `args.thinking_budget` from the spec's effort level — Qwen hybrid
|
|
64
|
+
* models think by default, so `enable_thinking: false` must be sent
|
|
65
|
+
* explicitly to switch thinking off.
|
|
62
66
|
* @property {boolean} [parseDsml]
|
|
63
67
|
* Extract DeepSeek DSML function-call blocks from message content
|
|
64
68
|
* when native `tool_calls` is absent.
|
|
@@ -351,6 +355,11 @@ function buildRequest (envelope, spec, config) {
|
|
|
351
355
|
if (effort !== 'none') delete args.temperature
|
|
352
356
|
if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
|
|
353
357
|
args.disable_reasoning = (effort === 'none')
|
|
358
|
+
} else if (config.reasoningField === 'qwen') {
|
|
359
|
+
args.enable_thinking = (effort !== 'none')
|
|
360
|
+
if (effort !== 'none' && typeof headroom === 'number') {
|
|
361
|
+
args.thinking_budget = headroom
|
|
362
|
+
}
|
|
354
363
|
} else {
|
|
355
364
|
args.reasoning_effort = effort
|
|
356
365
|
}
|
|
@@ -243,6 +243,10 @@ const providerOverrides = {
|
|
|
243
243
|
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
244
244
|
return undefined
|
|
245
245
|
},
|
|
246
|
+
qwen (_err, code, detail) {
|
|
247
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
248
|
+
return undefined
|
|
249
|
+
},
|
|
246
250
|
novita (_err, code, detail) {
|
|
247
251
|
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
248
252
|
return undefined
|
|
@@ -23,6 +23,7 @@ import { mistral } from './mistral.js'
|
|
|
23
23
|
import { novita } from './novita.js'
|
|
24
24
|
import { openai } from './openai.js'
|
|
25
25
|
import { openrouter } from './openrouter.js'
|
|
26
|
+
import { qwen } from './qwen.js'
|
|
26
27
|
import { xai } from './xai.js'
|
|
27
28
|
import { xiaomi } from './xiaomi.js'
|
|
28
29
|
|
|
@@ -39,6 +40,7 @@ export const adapters = Object.freeze({
|
|
|
39
40
|
novita,
|
|
40
41
|
openai,
|
|
41
42
|
openrouter,
|
|
43
|
+
qwen,
|
|
42
44
|
xai,
|
|
43
45
|
xiaomi
|
|
44
46
|
})
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Qwen Cloud adapter — OpenAI-compatible chat completions against
|
|
3
|
+
* Alibaba's international DashScope endpoint. Reasoning arrives as
|
|
4
|
+
* the standard `reasoning_content` field handled by the shared core;
|
|
5
|
+
* thinking is wired via `reasoningField: 'qwen'` (`enable_thinking`
|
|
6
|
+
* + `thinking_budget`) because Qwen hybrid models think by default.
|
|
7
|
+
*
|
|
8
|
+
* @module session/adapters/qwen
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import OpenAI from 'openai'
|
|
12
|
+
|
|
13
|
+
import { runChatCompletions } from './_chat_completions.js'
|
|
14
|
+
import { streamingDispatcher } from './_dispatcher.js'
|
|
15
|
+
|
|
16
|
+
const BASE_URL = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
20
|
+
* @param {{client?: any, signal?: AbortSignal, log?: any, span?: any}} [deps]
|
|
21
|
+
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
22
|
+
*/
|
|
23
|
+
export async function * qwen (envelope, deps = {}) {
|
|
24
|
+
const client = deps.client ?? new OpenAI({
|
|
25
|
+
apiKey: envelope.auth.key,
|
|
26
|
+
baseURL: envelope.auth.baseURL || BASE_URL,
|
|
27
|
+
fetchOptions: { dispatcher: streamingDispatcher() }
|
|
28
|
+
})
|
|
29
|
+
yield * runChatCompletions(envelope, client, {
|
|
30
|
+
provider: 'qwen',
|
|
31
|
+
reasoningField: 'qwen'
|
|
32
|
+
}, {
|
|
33
|
+
signal: deps.signal,
|
|
34
|
+
log: deps.log,
|
|
35
|
+
span: deps.span
|
|
36
|
+
})
|
|
37
|
+
}
|
package/js/session/driver.js
CHANGED
|
@@ -16,6 +16,7 @@ import readline from 'node:readline'
|
|
|
16
16
|
|
|
17
17
|
import { run } from './run.js'
|
|
18
18
|
import { runImage } from './run_image.js'
|
|
19
|
+
import { runTranscription } from './run_transcription.js'
|
|
19
20
|
import { setCatalog } from './adapters/_catalog.js'
|
|
20
21
|
|
|
21
22
|
// Bounded memory for pre-dequeue cancels. Hostile/buggy supervisors
|
|
@@ -148,6 +149,16 @@ export async function drive (stdin, stdout) {
|
|
|
148
149
|
} else {
|
|
149
150
|
stdout.write(JSON.stringify({ type: 'error', error: out.error }) + '\n')
|
|
150
151
|
}
|
|
152
|
+
} else if (envelope.op === 'transcription') {
|
|
153
|
+
// Same one-shot contract as the image path; shape matches
|
|
154
|
+
// `js/core/transcription.js` after the tag strip.
|
|
155
|
+
const { op: _op, ...trEnv } = envelope
|
|
156
|
+
const out = await runTranscription(trEnv)
|
|
157
|
+
if (out.ok) {
|
|
158
|
+
stdout.write(JSON.stringify({ type: 'transcription_done', result: out.result }) + '\n')
|
|
159
|
+
} else {
|
|
160
|
+
stdout.write(JSON.stringify({ type: 'error', error: out.error }) + '\n')
|
|
161
|
+
}
|
|
151
162
|
} else {
|
|
152
163
|
for await (const ev of run(envelope, { signal: controller.signal })) {
|
|
153
164
|
stdout.write(JSON.stringify(ev) + '\n')
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.113.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -84,10 +84,10 @@
|
|
|
84
84
|
},
|
|
85
85
|
"optionalDependencies": {
|
|
86
86
|
"@clack/prompts": "^1.5.1",
|
|
87
|
-
"@opentelemetry/exporter-trace-otlp-grpc": "^0.
|
|
88
|
-
"@opentelemetry/sdk-node": "^0.
|
|
87
|
+
"@opentelemetry/exporter-trace-otlp-grpc": "^0.219.0",
|
|
88
|
+
"@opentelemetry/sdk-node": "^0.219.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.113.0"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
93
|
"@anthropic-ai/sdk": "^0.104.1",
|
package/src/cli/onboard.js
CHANGED
|
@@ -83,6 +83,13 @@ const PROVIDER_INFO = {
|
|
|
83
83
|
url: 'https://novita.ai/dashboard/key',
|
|
84
84
|
hint: 'Create an API key at novita.ai → Dashboard → API Key',
|
|
85
85
|
free: false
|
|
86
|
+
},
|
|
87
|
+
qwen: {
|
|
88
|
+
label: 'Qwen Cloud',
|
|
89
|
+
description: 'Qwen 3.7 Max/Plus, 3.6 Flash — reasoning, coding, long context. Free quota for new users.',
|
|
90
|
+
url: 'https://home.qwencloud.com/api-keys',
|
|
91
|
+
hint: 'Create an API key at home.qwencloud.com → API Keys',
|
|
92
|
+
free: false
|
|
86
93
|
}
|
|
87
94
|
}
|
|
88
95
|
|
package/src/lib/providers.js
CHANGED
|
@@ -88,6 +88,15 @@ const providers = {
|
|
|
88
88
|
},
|
|
89
89
|
creators: []
|
|
90
90
|
},
|
|
91
|
+
qwen: {
|
|
92
|
+
sdk: 'openai',
|
|
93
|
+
api: 'chatCompletions',
|
|
94
|
+
apiKeyEnv: 'QWEN_API_SK',
|
|
95
|
+
createConfiguration: apiKey => ({ baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', apiKey }),
|
|
96
|
+
creators: ['alibaba'],
|
|
97
|
+
contextSemantics: 'shared',
|
|
98
|
+
outputCapStrategy: 'accept'
|
|
99
|
+
},
|
|
91
100
|
xai: {
|
|
92
101
|
sdk: 'openai',
|
|
93
102
|
apiKeyEnv: 'XAI_API_SK',
|