mohdel 0.111.0 → 0.113.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Mohdel
2
2
 
3
- Self-hosted LLM gateway and SDK for Node — think LiteLLM, for the JS world. One `answer()` call for 11 providers; swap models by changing one string; get real per-call USD cost back on every result, with OpenTelemetry built in and process isolation when you need it. Your keys, your infra, no SaaS proxy in the path.
3
+ Self-hosted LLM gateway and SDK for Node — think LiteLLM, for the JS world. One `answer()` call for 13 providers; swap models by changing one string; get real per-call USD cost back on every result, with OpenTelemetry built in and process isolation when you need it. Your keys, your infra, no SaaS proxy in the path.
4
4
 
5
5
  ```bash
6
6
  npm install -g mohdel
@@ -8,7 +8,7 @@ mo # interactive setup — pick a provider,
8
8
  mo ask gemini/gemini-3-flash-preview "why is the sky blue"
9
9
  ```
10
10
 
11
- Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, OpenRouter, Novita. Node 22+, ES modules.
11
+ Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, Qwen Cloud, Xiaomi, OpenRouter, Novita. Node 22+, ES modules.
12
12
 
13
13
  ## Why mohdel
14
14
 
@@ -306,6 +306,8 @@ What each provider supports through mohdel's unified interface:
306
306
  | DeepSeek | No | Yes | Yes | No | No | DSML tool-call fallback when model emits tags in content |
307
307
  | Fireworks | Yes | Yes | Yes | No | Yes (`reasoning_effort`) | OpenAI SDK + `baseURL`; model id auto-prefixed |
308
308
  | Mistral | No | Yes | Yes | No | No | `tool_choice: "any"` = required |
309
+ | Qwen Cloud | No | Yes | No | No | Yes (`enable_thinking` + `thinking_budget`) | Alibaba DashScope intl; hybrid models think by default — effort `none` sends explicit off |
310
+ | Xiaomi | No | Yes | Yes | No | Auto | MiMo; shared chat-completions path, `reasoning_content` captured |
309
311
  | OpenRouter | Yes | Yes | Yes | No | Varies | Meta-provider; `providerOptions.openrouter` for routing prefs |
310
312
  | Novita | No | No | No | No | No | Image generation only |
311
313
 
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Send a TranscriptionEnvelope to thin-gate's `POST /v1/transcription`.
3
+ *
4
+ * Transcription is one-shot: single JSON response body, no streaming,
5
+ * no cooldown/rate-limit. `audio.fileUri` must be a `file://` or
6
+ * `data:` URI — `file://` requires that the gate's sessions share a
7
+ * filesystem with the caller; `data:` carries the bytes inline subject
8
+ * to the gate's body-size cap.
9
+ *
10
+ * @module client/call_transcription
11
+ */
12
+
13
+ import { requestUnix } from './transport.js'
14
+ import { MohdelTypedError } from '#core'
15
+
16
+ /**
17
+ * @param {import('#core/transcription.js').TranscriptionEnvelope} envelope
18
+ * @param {object} options
19
+ * @param {string} options.socketPath
20
+ * @param {AbortSignal} [options.signal]
21
+ * @param {string} [options.path] HTTP path; defaults to '/v1/transcription'
22
+ * @returns {Promise<import('#core/transcription.js').TranscriptionResult>}
23
+ */
24
+ export async function callTranscription (envelope, { socketPath, signal, path = '/v1/transcription' }) {
25
+ const res = await requestUnix({
26
+ socketPath,
27
+ path,
28
+ method: 'POST',
29
+ body: envelope,
30
+ signal
31
+ })
32
+
33
+ const body = await readAll(res)
34
+
35
+ if (res.statusCode !== 200) {
36
+ throw MohdelTypedError.fromJSON(parseErrorBody(body, res.statusCode ?? 0))
37
+ }
38
+
39
+ let parsed
40
+ try {
41
+ parsed = JSON.parse(body)
42
+ } catch (e) {
43
+ throw new MohdelTypedError(
44
+ 'thin-gate returned non-JSON transcription response',
45
+ { type: 'PROTOCOL_INVALID_EVENT', retryable: false }
46
+ )
47
+ }
48
+
49
+ if (!parsed || typeof parsed !== 'object' || parsed.status !== 'completed' || typeof parsed.text !== 'string') {
50
+ throw new MohdelTypedError(
51
+ 'thin-gate returned malformed TranscriptionResult',
52
+ { type: 'PROTOCOL_INVALID_EVENT', retryable: false }
53
+ )
54
+ }
55
+ return parsed
56
+ }
57
+
58
+ /**
59
+ * @param {AsyncIterable<Buffer|string>} stream
60
+ * @returns {Promise<string>}
61
+ */
62
+ async function readAll (stream) {
63
+ let s = ''
64
+ for await (const c of stream) s += typeof c === 'string' ? c : c.toString('utf8')
65
+ return s
66
+ }
67
+
68
+ /**
69
+ * @param {string} body
70
+ * @param {number} status
71
+ * @returns {import('#core/errors.js').TypedError}
72
+ */
73
+ function parseErrorBody (body, status) {
74
+ try {
75
+ const parsed = JSON.parse(body)
76
+ if (parsed && typeof parsed === 'object' && typeof parsed.type === 'string') {
77
+ return parsed
78
+ }
79
+ } catch {}
80
+ return {
81
+ type: 'PROTOCOL_HTTP_ERROR',
82
+ message: `thin-gate returned HTTP ${status}`,
83
+ retryable: status >= 500
84
+ }
85
+ }
@@ -4,6 +4,7 @@
4
4
  * Public surface (0.90):
5
5
  * - call(envelope, { socketPath, signal }): AsyncGenerator<Event>
6
6
  * - callImage(envelope, { socketPath, signal }): Promise<ImageResult>
7
+ * - callTranscription(envelope, { socketPath, signal }): Promise<TranscriptionResult>
7
8
  *
8
9
  * No provider SDKs are imported transitively. This module can be
9
10
  * consumed by callers that must not pull openai-node, anthropic-sdk,
@@ -14,3 +15,4 @@
14
15
 
15
16
  export { call } from './call.js'
16
17
  export { callImage } from './call_image.js'
18
+ export { callTranscription } from './call_transcription.js'
@@ -55,10 +55,14 @@ const DSML_PARAM_RE = /<\uFF5CDSML\uFF5Cparameter\s+name="([^"]+)"(?:\s+string="
55
55
  * @property {'openai'|'mistral'|'cerebras'} [toolChoiceFlavor]
56
56
  * @property {'user'|'safety_identifier'} [identifierField]
57
57
  * Defaults to 'user'.
58
- * @property {'reasoning_effort'|'cerebras_zai'} [reasoningField]
58
+ * @property {'reasoning_effort'|'cerebras_zai'|'qwen'} [reasoningField]
59
59
  * How to wire outputEffort into the request. `reasoning_effort`
60
60
  * sets `args.reasoning_effort = effort`. `cerebras_zai` flips
61
61
  * `args.disable_reasoning = false` instead (zai-family only).
62
+ * `qwen` sets `args.enable_thinking` plus a numeric
63
+ * `args.thinking_budget` from the spec's effort level — Qwen hybrid
64
+ * models think by default, so `enable_thinking: false` must be sent
65
+ * explicitly to switch thinking off.
62
66
  * @property {boolean} [parseDsml]
63
67
  * Extract DeepSeek DSML function-call blocks from message content
64
68
  * when native `tool_calls` is absent.
@@ -351,6 +355,11 @@ function buildRequest (envelope, spec, config) {
351
355
  if (effort !== 'none') delete args.temperature
352
356
  if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
353
357
  args.disable_reasoning = (effort === 'none')
358
+ } else if (config.reasoningField === 'qwen') {
359
+ args.enable_thinking = (effort !== 'none')
360
+ if (effort !== 'none' && typeof headroom === 'number') {
361
+ args.thinking_budget = headroom
362
+ }
354
363
  } else {
355
364
  args.reasoning_effort = effort
356
365
  }
@@ -243,6 +243,10 @@ const providerOverrides = {
243
243
  if (code === 'rate_limit_exceeded') return tierResult(detail)
244
244
  return undefined
245
245
  },
246
+ qwen (_err, code, detail) {
247
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
248
+ return undefined
249
+ },
246
250
  novita (_err, code, detail) {
247
251
  if (code === 'rate_limit_exceeded') return tierResult(detail)
248
252
  return undefined
@@ -23,6 +23,7 @@ import { mistral } from './mistral.js'
23
23
  import { novita } from './novita.js'
24
24
  import { openai } from './openai.js'
25
25
  import { openrouter } from './openrouter.js'
26
+ import { qwen } from './qwen.js'
26
27
  import { xai } from './xai.js'
27
28
  import { xiaomi } from './xiaomi.js'
28
29
 
@@ -39,6 +40,7 @@ export const adapters = Object.freeze({
39
40
  novita,
40
41
  openai,
41
42
  openrouter,
43
+ qwen,
42
44
  xai,
43
45
  xiaomi
44
46
  })
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Qwen Cloud adapter — OpenAI-compatible chat completions against
3
+ * Alibaba's international DashScope endpoint. Reasoning arrives as
4
+ * the standard `reasoning_content` field handled by the shared core;
5
+ * thinking is wired via `reasoningField: 'qwen'` (`enable_thinking`
6
+ * + `thinking_budget`) because Qwen hybrid models think by default.
7
+ *
8
+ * @module session/adapters/qwen
9
+ */
10
+
11
+ import OpenAI from 'openai'
12
+
13
+ import { runChatCompletions } from './_chat_completions.js'
14
+ import { streamingDispatcher } from './_dispatcher.js'
15
+
16
+ const BASE_URL = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
17
+
18
+ /**
19
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
20
+ * @param {{client?: any, signal?: AbortSignal, log?: any, span?: any}} [deps]
21
+ * @returns {AsyncGenerator<import('#core/events.js').Event>}
22
+ */
23
+ export async function * qwen (envelope, deps = {}) {
24
+ const client = deps.client ?? new OpenAI({
25
+ apiKey: envelope.auth.key,
26
+ baseURL: envelope.auth.baseURL || BASE_URL,
27
+ fetchOptions: { dispatcher: streamingDispatcher() }
28
+ })
29
+ yield * runChatCompletions(envelope, client, {
30
+ provider: 'qwen',
31
+ reasoningField: 'qwen'
32
+ }, {
33
+ signal: deps.signal,
34
+ log: deps.log,
35
+ span: deps.span
36
+ })
37
+ }
@@ -16,6 +16,7 @@ import readline from 'node:readline'
16
16
 
17
17
  import { run } from './run.js'
18
18
  import { runImage } from './run_image.js'
19
+ import { runTranscription } from './run_transcription.js'
19
20
  import { setCatalog } from './adapters/_catalog.js'
20
21
 
21
22
  // Bounded memory for pre-dequeue cancels. Hostile/buggy supervisors
@@ -148,6 +149,16 @@ export async function drive (stdin, stdout) {
148
149
  } else {
149
150
  stdout.write(JSON.stringify({ type: 'error', error: out.error }) + '\n')
150
151
  }
152
+ } else if (envelope.op === 'transcription') {
153
+ // Same one-shot contract as the image path; shape matches
154
+ // `js/core/transcription.js` after the tag strip.
155
+ const { op: _op, ...trEnv } = envelope
156
+ const out = await runTranscription(trEnv)
157
+ if (out.ok) {
158
+ stdout.write(JSON.stringify({ type: 'transcription_done', result: out.result }) + '\n')
159
+ } else {
160
+ stdout.write(JSON.stringify({ type: 'error', error: out.error }) + '\n')
161
+ }
151
162
  } else {
152
163
  for await (const ev of run(envelope, { signal: controller.signal })) {
153
164
  stdout.write(JSON.stringify(ev) + '\n')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.111.0",
3
+ "version": "0.113.0",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -84,10 +84,10 @@
84
84
  },
85
85
  "optionalDependencies": {
86
86
  "@clack/prompts": "^1.5.1",
87
- "@opentelemetry/exporter-trace-otlp-grpc": "^0.218.0",
88
- "@opentelemetry/sdk-node": "^0.218.0",
87
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.219.0",
88
+ "@opentelemetry/sdk-node": "^0.219.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.111.0"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.113.0"
91
91
  },
92
92
  "dependencies": {
93
93
  "@anthropic-ai/sdk": "^0.104.1",
@@ -83,6 +83,13 @@ const PROVIDER_INFO = {
83
83
  url: 'https://novita.ai/dashboard/key',
84
84
  hint: 'Create an API key at novita.ai → Dashboard → API Key',
85
85
  free: false
86
+ },
87
+ qwen: {
88
+ label: 'Qwen Cloud',
89
+ description: 'Qwen 3.7 Max/Plus, 3.6 Flash — reasoning, coding, long context. Free quota for new users.',
90
+ url: 'https://home.qwencloud.com/api-keys',
91
+ hint: 'Create an API key at home.qwencloud.com → API Keys',
92
+ free: false
86
93
  }
87
94
  }
88
95
 
@@ -88,6 +88,15 @@ const providers = {
88
88
  },
89
89
  creators: []
90
90
  },
91
+ qwen: {
92
+ sdk: 'openai',
93
+ api: 'chatCompletions',
94
+ apiKeyEnv: 'QWEN_API_SK',
95
+ createConfiguration: apiKey => ({ baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', apiKey }),
96
+ creators: ['alibaba'],
97
+ contextSemantics: 'shared',
98
+ outputCapStrategy: 'accept'
99
+ },
91
100
  xai: {
92
101
  sdk: 'openai',
93
102
  apiKeyEnv: 'XAI_API_SK',