mohdel 0.112.0 → 0.113.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Mohdel
2
2
 
3
- Self-hosted LLM gateway and SDK for Node — think LiteLLM, for the JS world. One `answer()` call for 11 providers; swap models by changing one string; get real per-call USD cost back on every result, with OpenTelemetry built in and process isolation when you need it. Your keys, your infra, no SaaS proxy in the path.
3
+ Self-hosted LLM gateway and SDK for Node — think LiteLLM, for the JS world. One `answer()` call for 13 providers; swap models by changing one string; get real per-call USD cost back on every result, with OpenTelemetry built in and process isolation when you need it. Your keys, your infra, no SaaS proxy in the path.
4
4
 
5
5
  ```bash
6
6
  npm install -g mohdel
@@ -8,7 +8,7 @@ mo # interactive setup — pick a provider,
8
8
  mo ask gemini/gemini-3-flash-preview "why is the sky blue"
9
9
  ```
10
10
 
11
- Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, OpenRouter, Novita. Node 22+, ES modules.
11
+ Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, Qwen Cloud, Xiaomi, OpenRouter, Novita. Node 22+, ES modules.
12
12
 
13
13
  ## Why mohdel
14
14
 
@@ -306,6 +306,8 @@ What each provider supports through mohdel's unified interface:
306
306
  | DeepSeek | No | Yes | Yes | No | No | DSML tool-call fallback when model emits tags in content |
307
307
  | Fireworks | Yes | Yes | Yes | No | Yes (`reasoning_effort`) | OpenAI SDK + `baseURL`; model id auto-prefixed |
308
308
  | Mistral | No | Yes | Yes | No | No | `tool_choice: "any"` = required |
309
+ | Qwen Cloud | No | Yes | No | No | Yes (`enable_thinking` + `thinking_budget`) | Alibaba DashScope intl; hybrid models think by default — effort `none` sends explicit off |
310
+ | Xiaomi | No | Yes | Yes | No | Auto | MiMo; shared chat-completions path, `reasoning_content` captured |
309
311
  | OpenRouter | Yes | Yes | Yes | No | Varies | Meta-provider; `providerOptions.openrouter` for routing prefs |
310
312
  | Novita | No | No | No | No | No | Image generation only |
311
313
 
@@ -55,10 +55,14 @@ const DSML_PARAM_RE = /<\uFF5CDSML\uFF5Cparameter\s+name="([^"]+)"(?:\s+string="
55
55
  * @property {'openai'|'mistral'|'cerebras'} [toolChoiceFlavor]
56
56
  * @property {'user'|'safety_identifier'} [identifierField]
57
57
  * Defaults to 'user'.
58
- * @property {'reasoning_effort'|'cerebras_zai'} [reasoningField]
58
+ * @property {'reasoning_effort'|'cerebras_zai'|'qwen'} [reasoningField]
59
59
  * How to wire outputEffort into the request. `reasoning_effort`
60
60
  * sets `args.reasoning_effort = effort`. `cerebras_zai` flips
61
61
  * `args.disable_reasoning = false` instead (zai-family only).
62
+ * `qwen` sets `args.enable_thinking` plus a numeric
63
+ * `args.thinking_budget` from the spec's effort level — Qwen hybrid
64
+ * models think by default, so `enable_thinking: false` must be sent
65
+ * explicitly to switch thinking off.
62
66
  * @property {boolean} [parseDsml]
63
67
  * Extract DeepSeek DSML function-call blocks from message content
64
68
  * when native `tool_calls` is absent.
@@ -351,6 +355,11 @@ function buildRequest (envelope, spec, config) {
351
355
  if (effort !== 'none') delete args.temperature
352
356
  if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
353
357
  args.disable_reasoning = (effort === 'none')
358
+ } else if (config.reasoningField === 'qwen') {
359
+ args.enable_thinking = (effort !== 'none')
360
+ if (effort !== 'none' && typeof headroom === 'number') {
361
+ args.thinking_budget = headroom
362
+ }
354
363
  } else {
355
364
  args.reasoning_effort = effort
356
365
  }
@@ -243,6 +243,10 @@ const providerOverrides = {
243
243
  if (code === 'rate_limit_exceeded') return tierResult(detail)
244
244
  return undefined
245
245
  },
246
+ qwen (_err, code, detail) {
247
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
248
+ return undefined
249
+ },
246
250
  novita (_err, code, detail) {
247
251
  if (code === 'rate_limit_exceeded') return tierResult(detail)
248
252
  return undefined
@@ -23,6 +23,7 @@ import { mistral } from './mistral.js'
23
23
  import { novita } from './novita.js'
24
24
  import { openai } from './openai.js'
25
25
  import { openrouter } from './openrouter.js'
26
+ import { qwen } from './qwen.js'
26
27
  import { xai } from './xai.js'
27
28
  import { xiaomi } from './xiaomi.js'
28
29
 
@@ -39,6 +40,7 @@ export const adapters = Object.freeze({
39
40
  novita,
40
41
  openai,
41
42
  openrouter,
43
+ qwen,
42
44
  xai,
43
45
  xiaomi
44
46
  })
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Qwen Cloud adapter — OpenAI-compatible chat completions against
3
+ * Alibaba's international DashScope endpoint. Reasoning arrives as
4
+ * the standard `reasoning_content` field handled by the shared core;
5
+ * thinking is wired via `reasoningField: 'qwen'` (`enable_thinking`
6
+ * + `thinking_budget`) because Qwen hybrid models think by default.
7
+ *
8
+ * @module session/adapters/qwen
9
+ */
10
+
11
+ import OpenAI from 'openai'
12
+
13
+ import { runChatCompletions } from './_chat_completions.js'
14
+ import { streamingDispatcher } from './_dispatcher.js'
15
+
16
+ const BASE_URL = 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
17
+
18
+ /**
19
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
20
+ * @param {{client?: any, signal?: AbortSignal, log?: any, span?: any}} [deps]
21
+ * @returns {AsyncGenerator<import('#core/events.js').Event>}
22
+ */
23
+ export async function * qwen (envelope, deps = {}) {
24
+ const client = deps.client ?? new OpenAI({
25
+ apiKey: envelope.auth.key,
26
+ baseURL: envelope.auth.baseURL || BASE_URL,
27
+ fetchOptions: { dispatcher: streamingDispatcher() }
28
+ })
29
+ yield * runChatCompletions(envelope, client, {
30
+ provider: 'qwen',
31
+ reasoningField: 'qwen'
32
+ }, {
33
+ signal: deps.signal,
34
+ log: deps.log,
35
+ span: deps.span
36
+ })
37
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.112.0",
3
+ "version": "0.113.0",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -84,10 +84,10 @@
84
84
  },
85
85
  "optionalDependencies": {
86
86
  "@clack/prompts": "^1.5.1",
87
- "@opentelemetry/exporter-trace-otlp-grpc": "^0.218.0",
88
- "@opentelemetry/sdk-node": "^0.218.0",
87
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.219.0",
88
+ "@opentelemetry/sdk-node": "^0.219.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.112.0"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.113.0"
91
91
  },
92
92
  "dependencies": {
93
93
  "@anthropic-ai/sdk": "^0.104.1",
@@ -83,6 +83,13 @@ const PROVIDER_INFO = {
83
83
  url: 'https://novita.ai/dashboard/key',
84
84
  hint: 'Create an API key at novita.ai → Dashboard → API Key',
85
85
  free: false
86
+ },
87
+ qwen: {
88
+ label: 'Qwen Cloud',
89
+ description: 'Qwen 3.7 Max/Plus, 3.6 Flash — reasoning, coding, long context. Free quota for new users.',
90
+ url: 'https://home.qwencloud.com/api-keys',
91
+ hint: 'Create an API key at home.qwencloud.com → API Keys',
92
+ free: false
86
93
  }
87
94
  }
88
95
 
@@ -88,6 +88,15 @@ const providers = {
88
88
  },
89
89
  creators: []
90
90
  },
91
+ qwen: {
92
+ sdk: 'openai',
93
+ api: 'chatCompletions',
94
+ apiKeyEnv: 'QWEN_API_SK',
95
+ createConfiguration: apiKey => ({ baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', apiKey }),
96
+ creators: ['alibaba'],
97
+ contextSemantics: 'shared',
98
+ outputCapStrategy: 'accept'
99
+ },
91
100
  xai: {
92
101
  sdk: 'openai',
93
102
  apiKeyEnv: 'XAI_API_SK',