@qwen-code/qwen-code 0.15.12-preview.3 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/bundled/qc-helper/docs/configuration/settings.md +59 -61
  2. package/bundled/qc-helper/docs/features/_meta.ts +2 -0
  3. package/bundled/qc-helper/docs/features/approval-mode.md +119 -2
  4. package/bundled/qc-helper/docs/features/auto-mode.md +263 -0
  5. package/bundled/qc-helper/docs/features/commands.md +11 -10
  6. package/bundled/qc-helper/docs/features/skills.md +3 -0
  7. package/bundled/qc-helper/docs/features/structured-output.md +309 -0
  8. package/bundled/qc-helper/docs/features/sub-agents.md +47 -5
  9. package/bundled/qc-helper/docs/qwen-serve.md +134 -10
  10. package/bundled/review/SKILL.md +12 -3
  11. package/chunks/{agent-LIAWUWAO.js → agent-K6OWOMBN.js} +15 -17
  12. package/chunks/{anthropicContentGenerator-4QE6LTVV.js → anthropicContentGenerator-RQJNXJIY.js} +7 -4
  13. package/chunks/{askUserQuestion-QFSCBTUO.js → askUserQuestion-PQPMPNM3.js} +2 -2
  14. package/chunks/{ca-S3XJMT6P.js → ca-UZ7BANMN.js} +3 -3
  15. package/chunks/{chunk-AJSOD5IR.js → chunk-3T4ZT63H.js} +8833 -3600
  16. package/chunks/{chunk-B7ZL7HUA.js → chunk-4AOCVI6J.js} +2 -1
  17. package/chunks/{chunk-AOJ3BBY7.js → chunk-4J63U5QO.js} +17 -349
  18. package/chunks/{chunk-AEJ2DKLP.js → chunk-C6WMLUNB.js} +1 -1
  19. package/chunks/{chunk-JMZQICAL.js → chunk-CAVZVZX6.js} +2 -2
  20. package/chunks/{chunk-CAWKL3UC.js → chunk-CSWBPY3P.js} +2 -2
  21. package/chunks/{chunk-G27O2LD2.js → chunk-D5NTAHYL.js} +1 -1
  22. package/chunks/{chunk-BXNCPI75.js → chunk-DMIMF3CG.js} +2 -2
  23. package/chunks/{chunk-OCC4MZRS.js → chunk-F23NCRJ2.js} +1 -1
  24. package/chunks/{chunk-5QQ5FGTU.js → chunk-G7YTSRES.js} +1 -1
  25. package/chunks/{chunk-SQNQIOD5.js → chunk-GGNTZ2NH.js} +92 -21
  26. package/chunks/{chunk-FKVKVE6N.js → chunk-KXZ4TJB4.js} +1 -1
  27. package/chunks/chunk-L34E6AGL.js +19126 -0
  28. package/chunks/{chunk-CBVB66WY.js → chunk-L5E26RN6.js} +2 -2
  29. package/chunks/{chunk-UXW7MYAW.js → chunk-MAY32HXD.js} +376 -1
  30. package/chunks/{chunk-GC5RXNL2.js → chunk-NOAHME6A.js} +115 -23
  31. package/chunks/{chunk-CM2IESUE.js → chunk-PR4T27R7.js} +1 -1
  32. package/chunks/{chunk-FYMSCRHM.js → chunk-PVVL5Q3W.js} +32 -1
  33. package/chunks/{chunk-YHEAJFCI.js → chunk-USE2VQ5P.js} +3 -0
  34. package/chunks/chunk-VMOAQVBP.js +379 -0
  35. package/chunks/{chunk-XLQ4E5PS.js → chunk-WCZWAKFG.js} +795 -142
  36. package/chunks/{chunk-GJXIKCKL.js → chunk-XP27SJMH.js} +76 -5
  37. package/chunks/{chunk-TPGOGCWM.js → chunk-YJLGXDQJ.js} +1 -1
  38. package/chunks/{contextCommand-SVLAZMQL.js → contextCommand-7CPNXBLO.js} +17 -19
  39. package/chunks/{cron-create-WUTD5ZTH.js → cron-create-IGYXQVG4.js} +28 -2
  40. package/chunks/{cron-delete-N3UQYCRA.js → cron-delete-ETKIZCWT.js} +2 -2
  41. package/chunks/{cron-list-Z6RJJ4YH.js → cron-list-BVCUSWRU.js} +2 -2
  42. package/chunks/{de-MNR4SMAI.js → de-V4IE2OOZ.js} +3 -3
  43. package/chunks/{dist-RRYNPBOE.js → dist-4L54HRX2.js} +2 -2
  44. package/chunks/{dist-WP4AH3VK.js → dist-BXDUQ2QY.js} +1 -1
  45. package/chunks/{dist-M6GFCZ7S.js → dist-MN2PDDPR.js} +1 -1
  46. package/chunks/{edit-VNAZBIZR.js → edit-CBM5NDVK.js} +28 -18
  47. package/chunks/{en-NRN4QBAT.js → en-HGJ2SPLM.js} +5 -3
  48. package/chunks/{enter-worktree-FOF5YZIV.js → enter-worktree-XABKPLO6.js} +41 -17
  49. package/chunks/{exit-worktree-Y6QVAO3C.js → exit-worktree-56MN2PCL.js} +41 -17
  50. package/chunks/{exitPlanMode-QZKO7GH7.js → exitPlanMode-YDNPCSCJ.js} +15 -17
  51. package/chunks/{fr-OFJFHLCR.js → fr-CJULI7ZX.js} +3 -3
  52. package/chunks/{geminiContentGenerator-DYHZPKJX.js → geminiContentGenerator-ZGPNBFDS.js} +3 -3
  53. package/chunks/{glob-G7XATELV.js → glob-ZHA35VO5.js} +15 -17
  54. package/chunks/{grep-4SETMY47.js → grep-RV6V6T52.js} +15 -17
  55. package/chunks/{ja-V6OQ6VL7.js → ja-L7CHRQEW.js} +3 -3
  56. package/chunks/{keychain-token-storage-DMFP5IJM.js → keychain-token-storage-335UOLJ6.js} +2 -2
  57. package/chunks/{ls-SUILOZZB.js → ls-7HD6XG3V.js} +3 -3
  58. package/chunks/{lsp-6TQBWVMZ.js → lsp-ZZSFCIWD.js} +2 -2
  59. package/chunks/{monitor-JTLJBJ7H.js → monitor-5G2OBGE5.js} +27 -17
  60. package/chunks/notebook-edit-XUBTCT6L.js +756 -0
  61. package/chunks/{openaiContentGenerator-3H7XOZBW.js → openaiContentGenerator-POYAZQ6I.js} +12 -11
  62. package/chunks/{pt-ZLE6SA4A.js → pt-M6JULLEQ.js} +3 -3
  63. package/chunks/{qwenContentGenerator-FAU3QPYO.js → qwenContentGenerator-2E4H56DK.js} +17 -19
  64. package/chunks/{qwenOAuth2-JSQ7EPR3.js → qwenOAuth2-EEJGROP7.js} +9 -3
  65. package/chunks/{read-file-WWUQVNCZ.js → read-file-3JIOOXFT.js} +7 -8
  66. package/chunks/{ripGrep-WCOAIWL6.js → ripGrep-LEI3L6PM.js} +15 -17
  67. package/chunks/{ru-A4OHIUNN.js → ru-QILM4HBC.js} +3 -3
  68. package/chunks/{send-message-Q2JRAC3J.js → send-message-ULK4MQXJ.js} +23 -2
  69. package/chunks/{serve-VJEEEXA6.js → serve-CFVRMD4W.js} +8500 -1663
  70. package/chunks/{shell-IAOKGIJ6.js → shell-3B5DZ437.js} +15 -17
  71. package/chunks/{skill-NHW6222K.js → skill-STSZUBXR.js} +23 -10
  72. package/chunks/{src-OWV5HVQQ.js → src-ROFXAPEP.js} +211 -19
  73. package/chunks/{syntheticOutput-S4DRGMQM.js → syntheticOutput-IS2X5OZ2.js} +3 -3
  74. package/chunks/{task-stop-7THHVAQS.js → task-stop-7QSJGSSP.js} +2 -2
  75. package/chunks/{todoWrite-WKUGUTPX.js → todoWrite-7CVACFUX.js} +3 -3
  76. package/chunks/{tool-search-MSJ6SXLI.js → tool-search-ARWOD3GD.js} +7 -8
  77. package/chunks/{web-fetch-OZE6ZQUF.js → web-fetch-ENQ2I5JA.js} +7 -4
  78. package/chunks/{write-file-RKCENFZ5.js → write-file-6MRT7TEW.js} +25 -18
  79. package/chunks/{zh-RN3JULHO.js → zh-PWL2NKY3.js} +5 -3
  80. package/chunks/{zh-TW-XZEHEV5S.js → zh-TW-S3YGWICZ.js} +5 -3
  81. package/cli.js +58070 -75930
  82. package/locales/ca.js +4 -5
  83. package/locales/de.js +4 -5
  84. package/locales/en.js +8 -5
  85. package/locales/fr.js +4 -5
  86. package/locales/ja.js +4 -5
  87. package/locales/pt.js +4 -5
  88. package/locales/ru.js +4 -5
  89. package/locales/zh-TW.js +6 -4
  90. package/locales/zh.js +6 -4
  91. package/package.json +2 -2
  92. package/chunks/chunk-5P5XGNYH.js +0 -93
  93. package/chunks/chunk-SYCJMSIJ.js +0 -82
  94. package/chunks/chunk-Y6Z2O3WR.js +0 -33
@@ -134,7 +134,7 @@ Subagents are configured using Markdown files with YAML frontmatter. This format
134
134
  ---
135
135
  name: agent-name
136
136
  description: Brief description of when and how to use this agent
137
- model: inherit # Optional: inherit or model-id
137
+ model: inherit # Optional: inherit, fast, modelId, or authType:modelId
138
138
  approvalMode: auto-edit # Optional: default, plan, auto-edit, yolo
139
139
  tools: # Optional: allowlist of tools
140
140
  - tool1
@@ -151,10 +151,48 @@ Multiple paragraphs are supported.
151
151
 
152
152
  Use the optional `model` frontmatter field to control which model a subagent uses:
153
153
 
154
- - `inherit`: Use the same model as the main conversation
155
- - Omit the field: Same as `inherit`
156
- - `glm-5`: Use that model ID with the main conversation's auth type
157
- - `openai:gpt-4o`: Use a different provider (resolves credentials from env vars)
154
+ - `inherit`: Use the same model as the main conversation.
155
+ - Omit the field: Same as `inherit`.
156
+ - `fast`: Use the configured `fastModel`. If no valid fast model is configured,
157
+ the subagent falls back to `inherit`.
158
+ - `glm-5`: Use that model ID. Qwen Code first checks the main conversation's
159
+ auth type; if the model is not available there, it can resolve the model from
160
+ another configured provider.
161
+ - `openai:gpt-4o`: Use an explicit provider and model ID. This is useful when a
162
+ subagent should run on a model registered under a different auth type from the
163
+ main conversation.
164
+
165
+ For example:
166
+
167
+ ```
168
+ ---
169
+ name: fast-reviewer
170
+ description: Reviews small diffs with the configured fast model
171
+ model: fast
172
+ tools:
173
+ - read_file
174
+ - grep_search
175
+ ---
176
+ ```
177
+
178
+ ```
179
+ ---
180
+ name: openai-researcher
181
+ description: Uses an OpenAI-compatible provider for research tasks
182
+ model: openai:gpt-4o
183
+ tools:
184
+ - read_file
185
+ - grep_search
186
+ - glob
187
+ ---
188
+ ```
189
+
190
+ The `fast` selector uses the same `fastModel` setting configured in
191
+ `settings.json` or with `/model --fast`. That setting may itself refer to a
192
+ model under another configured auth type, such as `openai:deepseek-v4-flash`.
193
+ When the selector resolves to another auth type, Qwen Code creates a dedicated
194
+ runtime provider for that subagent request and sends the provider only the bare
195
+ model ID.
158
196
 
159
197
  #### Permission Mode
160
198
 
@@ -620,6 +658,10 @@ Always follow these standards:
620
658
 
621
659
  - **Tool Restrictions**: Use `tools` to limit which tools a subagent can access, or `disallowedTools` to block specific tools while inheriting everything else
622
660
  - **Permission Mode**: Subagents inherit their parent's permission mode by default. Plan-mode sessions cannot escalate to auto-edit through delegated agents. Privileged modes (auto-edit, yolo) are blocked in untrusted folders.
661
+ - **Provider Selection**: A subagent with `model: authType:modelId`, or
662
+ `model: fast` where `fastModel` resolves to another auth type, sends that
663
+ subagent's model requests to the selected provider. Make sure that provider is
664
+ appropriate for the subagent's task and data.
623
665
  - **Sandboxing**: All tool execution follows the same security model as direct tool use
624
666
  - **Audit Trail**: All Subagents actions are logged and visible in real-time
625
667
  - **Access Control**: Project and user-level separation provides appropriate boundaries
@@ -12,6 +12,7 @@ Run Qwen Code as a local HTTP daemon so multiple clients (IDE plugins, web UIs,
12
12
  - **Reconnect-safe streaming** — SSE with `Last-Event-ID` reconnect lets a client drop and pick up exactly where it left off (within the ring's replay window).
13
13
  - **First-responder permissions** — when the agent asks for permission to run a tool, every connected client sees the request; whichever client answers first wins.
14
14
  - **One daemon, one workspace** — each `qwen serve` process binds to exactly one workspace at boot (per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02). Multi-workspace deployments run one daemon per workspace on separate ports (or behind an orchestrator).
15
+ - **Remote runtime control** ([#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 17) — change a session's approval mode (`POST /session/:id/approval-mode`), toggle a tool per workspace (`POST /workspace/tools/:name/enable`), scaffold an empty `QWEN.md` (`POST /workspace/init`, mechanical only — does NOT call the model; for AI-fill, follow up with `POST /session/:id/prompt`), or restart a single MCP server with a budget pre-check (`POST /workspace/mcp/:server/restart`). All four are strict-gated — configure `--token` first.
15
16
 
16
17
  ## Quickstart
17
18
 
@@ -38,6 +39,53 @@ curl http://127.0.0.1:4170/capabilities
38
39
 
39
40
  The `workspaceCwd` field surfaces the bound workspace so clients can pre-flight check + omit `cwd` on `POST /session`.
40
41
 
42
+ The daemon also exposes read-only runtime snapshots for client UIs:
43
+ `GET /workspace/mcp`, `GET /workspace/skills`, `GET /workspace/providers`,
44
+ `GET /workspace/env`, `GET /workspace/preflight`,
45
+ `GET /session/:id/context`, and `GET /session/:id/supported-commands`.
46
+
47
+ `GET /workspace/mcp`, `GET /workspace/skills`, and `GET /workspace/providers`
48
+ report the live ACP runtime and do not start the ACP child when idle; an
49
+ idle daemon returns `initialized: false` with an empty snapshot. Once a
50
+ session is alive they switch to `initialized: true` and surface the real
51
+ state.
52
+
53
+ `GET /workspace/env` and `GET /workspace/preflight` always answer with
54
+ `initialized: true` regardless of ACP state. `env` never consults ACP
55
+ (daemon-process info only); `preflight` answers daemon-level cells from
56
+ `process.*` and emits `status: 'not_started'` placeholders for ACP-level
57
+ cells when the child is idle.
58
+
59
+ `GET /workspace/env` reports the daemon process's runtime, platform, sandbox,
60
+ proxy, and the **presence** (never the value) of whitelisted secret env vars
61
+ such as `OPENAI_API_KEY`. Proxy URLs are stripped of credentials and reduced
62
+ to `host:port` before they hit the wire. The route always answers from the
63
+ daemon process directly and never spawns an ACP child.
64
+
65
+ `GET /workspace/preflight` returns a list of readiness checks. **Daemon-level
66
+ cells** (Node version, CLI entry, workspace directory, ripgrep, git, npm)
67
+ always render. **ACP-level cells** (auth, MCP discovery, skills, providers,
68
+ tool registry, egress) require a live ACP child — when the daemon is idle
69
+ they emit `status: 'not_started'` placeholders rather than spawning ACP just
70
+ to populate them. Failures map to a closed `errorKind` enum (`missing_binary`,
71
+ `auth_env_error`, `init_timeout`, `protocol_error`, `missing_file`,
72
+ `parse_error`, `blocked_egress`) so client UIs can render structured
73
+ remediation.
74
+
75
+ The daemon also exposes workspace file helpers:
76
+
77
+ - `GET /file` reads text files and returns a raw-byte `sha256:<hex>` hash.
78
+ - `GET /file/bytes` reads bounded raw byte windows and returns base64 content.
79
+ - `POST /file/write` creates or replaces text files.
80
+ - `POST /file/edit` applies one exact text replacement.
81
+
82
+ Write/edit are **strict mutation routes**: even on loopback they require a
83
+ configured bearer token, otherwise they return `token_required`. Replacements
84
+ and edits require the latest `expectedHash` from `GET /file` (or a full-window
85
+ `GET /file/bytes`). `create` never overwrites. Explicit writes to ignored paths
86
+ are allowed but audited. Binary writes, delete/move/mkdir, and recursive parent
87
+ creation are not part of this surface.
88
+
41
89
  ### 3. Open a session
42
90
 
43
91
  ```bash
@@ -97,6 +145,17 @@ qwen serve --hostname 0.0.0.0 --port 4170
97
145
 
98
146
  Clients then send `Authorization: Bearer $QWEN_SERVER_TOKEN` on every request. `/health` is exempted **only on loopback binds** so k8s/Compose liveness probes inside the pod (where the daemon listens on `127.0.0.1`) don't need credentials. On non-loopback binds (`--hostname 0.0.0.0` etc.) `/health` requires the token like every other route — otherwise an attacker can probe arbitrary addresses to confirm the daemon's existence. Use `/capabilities` to verify your token is correct end-to-end (it always requires auth):
99
147
 
148
+ > **Hardened loopback (`--require-auth`).** The default loopback no-token behavior is fine for a single-user laptop but unsafe on shared dev hosts, CI runners, or multi-tenant workstations where any local user can `curl 127.0.0.1:4170`. Pass `--require-auth` to make the bearer token mandatory on every route — including `/health` and `/capabilities` — even when bound to `127.0.0.1`. Boot fails without a token. With the flag on, an **unauthenticated** client can't read `/capabilities` to discover that auth is required; the discovery surface is the 401 response body itself. Once authenticated, the `caps.features.require_auth` tag is a post-auth confirmation that the deployment is hardened (useful for audit / compliance UIs):
149
+ >
150
+ > ```bash
151
+ > qwen serve --require-auth --token "$(openssl rand -hex 32)"
152
+ > # → /health, /capabilities, /session, … all require Authorization: Bearer …
153
+ > curl http://127.0.0.1:4170/health
154
+ > # → 401
155
+ > curl -H "Authorization: Bearer $TOKEN" http://127.0.0.1:4170/capabilities | jq '.features | index("require_auth")'
156
+ > # → 13 (or whatever index — non-null after authenticating means the tag is present)
157
+ > ```
158
+
100
159
  ```bash
101
160
  curl -H "Authorization: Bearer $QWEN_SERVER_TOKEN" http://your-host:4170/capabilities
102
161
  # → {"v":1,"mode":"http-bridge","features":[...],"modelServices":[],"workspaceCwd":"/path/to/your-project"}
@@ -107,15 +166,19 @@ The token comparison is constant-time (SHA-256 + `crypto.timingSafeEqual`); 401
107
166
 
108
167
  ## CLI flags
109
168
 
110
- | Flag | Default | Purpose |
111
- | ----------------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
112
- | `--port <n>` | `4170` | TCP port. `0` = OS-assigned ephemeral port. |
113
- | `--hostname <addr>` | `127.0.0.1` | Bind interface. Anything beyond loopback requires a token. |
114
- | `--token <str>` | — | Bearer token. Falls back to `QWEN_SERVER_TOKEN` env var (with leading/trailing whitespace stripped — handy for `$(cat token.txt)`). |
115
- | `--max-sessions <n>` | `20` | Cap on concurrent live sessions. New `POST /session` requests that would spawn a fresh child return `503` (with `Retry-After: 5`) when the cap is hit; attaches to existing sessions are NOT counted. Set to `0` to disable. Sized for single-user / small-team usage; raise it if your deployment has the RAM/FD headroom (~30–50 MB per session). |
116
- | `--workspace <path>` | `process.cwd()` | Absolute workspace path this daemon binds to (per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02 1 daemon = 1 workspace). `POST /session` requests with a mismatched `cwd` return `400 workspace_mismatch`. For multi-workspace deployments, run one `qwen serve` per workspace on separate ports. |
117
- | `--max-connections <n>` | `256` | Listener-level TCP connection cap (`server.maxConnections`). Bounds raw socket count irrespective of session count slow / phantom SSE clients get rejected at accept time once full. Raise alongside `--max-sessions` if your deployment expects many SSE subscribers per session. |
118
- | `--http-bridge` | `true` | Stage 1 mode: one `qwen --acp` child per daemon (bound to one workspace at boot, per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02); N sessions multiplex onto that child via ACP `newSession()`. Stage 2 native in-process becomes available later. |
169
+ | Flag | Default | Purpose |
170
+ | ------------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
171
+ | `--port <n>` | `4170` | TCP port. `0` = OS-assigned ephemeral port. |
172
+ | `--hostname <addr>` | `127.0.0.1` | Bind interface. Anything beyond loopback requires a token. |
173
+ | `--token <str>` | — | Bearer token. Falls back to `QWEN_SERVER_TOKEN` env var (with leading/trailing whitespace stripped — handy for `$(cat token.txt)`). |
174
+ | `--require-auth` | `false` | Refuse to start without a bearer token, even on loopback. Hardens the `127.0.0.1` developer default for shared dev hosts / CI runners / multi-tenant workstations where any local user can hit the listener. Boots only with `--token` or `QWEN_SERVER_TOKEN` set; gates `/health` behind the bearer too. |
175
+ | `--max-sessions <n>` | `20` | Cap on concurrent live sessions. New `POST /session` requests that would spawn a fresh child return `503` (with `Retry-After: 5`) when the cap is hit; attaches to existing sessions are NOT counted. Set to `0` to disable. Sized for single-user / small-team usage; raise it if your deployment has the RAM/FD headroom (~30–50 MB per session). |
176
+ | `--workspace <path>` | `process.cwd()` | Absolute workspace path this daemon binds to (per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02 1 daemon = 1 workspace). `POST /session` requests with a mismatched `cwd` return `400 workspace_mismatch`. For multi-workspace deployments, run one `qwen serve` per workspace on separate ports. |
177
+ | `--max-connections <n>` | `256` | Listener-level TCP connection cap (`server.maxConnections`). Bounds raw socket count irrespective of session count slow / phantom SSE clients get rejected at accept time once full. Raise alongside `--max-sessions` if your deployment expects many SSE subscribers per session. |
178
+ | `--event-ring-size <n>` | `8000` | Per-session SSE replay ring depth (#3803 §02 target). Sets the backlog available to `GET /session/:id/events` with `Last-Event-ID: N`. Larger = more reconnect headroom at the cost of a few hundred KB extra RAM per session. SDK clients can additionally request a larger per-subscriber backlog cap on a specific subscription via `?maxQueued=N` (range `[16, 2048]`, default 256). Daemons also emit a non-terminal `slow_client_warning` SSE frame at 75% queue fill so clients can drain / reconnect before getting evicted. Pre-flight `caps.features.slow_client_warning`. |
179
+ | `--mcp-client-budget <n>` | — | Positive integer cap on live MCP clients **per ACP session** (issue [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 14 v1; PR 23 graduates this to per-workspace via the shared MCP pool). Combine with `--mcp-budget-mode`. When unset, no accounting-driven enforcement (but `GET /workspace/mcp` still reports `clientCount`). Distinct from claude-code's `MCP_SERVER_CONNECTION_BATCH_SIZE` which gates startup concurrency, not the total client count. Pre-flight `caps.features.mcp_guardrails`. |
180
+ | `--mcp-budget-mode <m>` | `warn` / `off` | How `--mcp-client-budget` is enforced. `warn` (default when budget set): no refusal, snapshot's `budgets[0].status` flips to `warning` at ≥75% of budget. `enforce`: connects past the cap are refused, per-server cell shows `disabledReason: 'budget'`, deterministic by `mcpServers` declaration order. `off` (default when budget unset): pure observability. Boot rejects `enforce` without a budget. |
181
+ | `--http-bridge` | `true` | Stage 1 mode: one `qwen --acp` child per daemon (bound to one workspace at boot, per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02); N sessions multiplex onto that child via ACP `newSession()`. Stage 2 native in-process becomes available later. |
119
182
 
120
183
  > **Sizing the load knobs.** `--max-sessions` is the **new-child** cap.
121
184
  > Three other layers also limit load — when sizing for a high-concurrency
@@ -135,6 +198,20 @@ The token comparison is constant-time (SHA-256 + `crypto.timingSafeEqual`); 401
135
198
  > sizing assumes single-user / small-team load; raise progressively
136
199
  > (and watch RSS) for multi-tenant deployments.
137
200
 
201
+ > **MCP client guardrails (issue [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 14).** A workspace declaring 30 MCP servers in `mcpServers` will start 30 clients with no upstream cap unless you set one. `--mcp-client-budget=N` caps the live MCP client count; `--mcp-budget-mode={enforce,warn,off}` chooses the behavior. Default is `warn` when a budget is set (snapshot surfaces the warning but no client is refused — useful for measuring real-world fanout before flipping on enforcement). Refused servers under `enforce` mode get `disabledReason: 'budget'` on their per-server cell, and the `budgets[0]` cell shows `status: 'error'` + `errorKind: 'budget_exhausted'`. Slot reservation is by server name and survives reconnects / discovery timeouts — a refused server can't take a slot from a healthy one.
202
+ >
203
+ > ⚠️ **v1 scope: per-session, not per-workspace.** Each ACP session inside the daemon has its own `Config`/`McpClientManager` (created via `newSessionConfig` per session). The budget caps live MCP clients **per session**, not aggregated across all sessions in the workspace. Snapshot at `GET /workspace/mcp` reflects the bootstrap session's view (the cell carries `scope: 'session'` for honesty). If you run 5 concurrent ACP sessions with `--mcp-client-budget=10`, you may have up to 50 live MCP clients across the daemon — the cap holds per session. **Wave 5 PR 23 (shared MCP pool)** introduces a workspace-scoped manager and graduates this to true per-workspace enforcement.
204
+ >
205
+ > ```sh
206
+ > qwen serve --mcp-client-budget=10 --mcp-budget-mode=warn
207
+ > # later, after telemetry shows your real-world distribution:
208
+ > qwen serve --mcp-client-budget=10 --mcp-budget-mode=enforce
209
+ > ```
210
+ >
211
+ > This is **not** the same as claude-code's `MCP_SERVER_CONNECTION_BATCH_SIZE` (which gates startup concurrency); they're orthogonal. PR 23 will add a real shared MCP pool (a `scope: 'workspace'` cell in `budgets[]` alongside the per-session cell); PR 14 v1 is the in-process counter + soft enforcement on the existing per-session manager.
212
+ >
213
+ > **Push events (issue [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 14b).** SDK clients subscribed to `GET /session/:id/events` receive typed frames when budget thresholds cross — `mcp_budget_warning` (synthetic, fires once per upward 75% crossing with hysteresis re-arm at 37.5%, advertised via `mcp_guardrail_events`) and `mcp_child_refused_batch` (coalesced once per discovery pass under `enforce` mode; length-1 from `readResource` lazy-spawn refusal). The snapshot at `GET /workspace/mcp` is still the source-of-truth for state-after-reconnect; events are change-edges. Useful when dashboarding in real-time without polling.
214
+
138
215
  ## Default deployment threat model
139
216
 
140
217
  - **127.0.0.1 only** — loopback bind, no auth needed.
@@ -227,7 +304,7 @@ Stage 1's contract is sized for prototyping. Per [#3889 chiga0 downstream-consum
227
304
 
228
305
  **Reliability baseline:**
229
306
 
230
- 3. **Client-initiated heartbeat path**distinguish "agent thinking" from "daemon dead" without waiting for the 15s server heartbeat.
307
+ 3. ~~**Client-initiated heartbeat path**~~shipped via [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 9. `POST /session/:id/heartbeat` records last-seen timestamps on the daemon (capability tag `client_heartbeat`); SDK helpers are `DaemonClient.heartbeat()` / `DaemonSessionClient.heartbeat()`.
231
308
  4. **`permission_already_resolved` event** when a vote loses the first-responder race — currently UIs have to infer state from a `404`.
232
309
  5. **Larger / per-session-configurable replay ring** — default 4000 covers short drops; mobile / chatty-turn workloads need 8000+ or per-session config.
233
310
  6. **`slow_client_warning` event before `client_evicted`** — soft backpressure so well-behaved slow clients can self-throttle (trim render depth, drop chunks) before being terminated.
@@ -300,6 +377,53 @@ The bridge keeps **one channel per daemon** (one daemon per workspace, per §02)
300
377
 
301
378
  **Peer agents (Cursor / Continue / Claude Code / OpenCode / Gemini CLI) all do single-process multi-session.** qwen-code matches them at the agent layer; the Stage 1 bridge in this PR makes the same architecture visible over HTTP.
302
379
 
380
+ ## Logging in to a remote daemon (issue #4175 PR 21)
381
+
382
+ When the daemon runs on a remote pod (no shared display with you), you can still log in to a Qwen account by triggering an OAuth device flow over HTTP. The daemon polls the IdP itself; your job is just to open a URL on whatever device has a browser.
383
+
384
+ ```bash
385
+ # 1. Start a flow. The daemon contacts the IdP, returns a code + URL.
386
+ curl -X POST http://127.0.0.1:4170/workspace/auth/device-flow \
387
+ -H "Authorization: Bearer $TOKEN" \
388
+ -H "Content-Type: application/json" \
389
+ -d '{"providerId":"qwen-oauth"}'
390
+ # → 201 {
391
+ # "deviceFlowId": "fa07c61b-…",
392
+ # "userCode": "USER-1",
393
+ # "verificationUri": "https://chat.qwen.ai/api/v1/oauth2/device",
394
+ # "verificationUriComplete": "https://chat.qwen.ai/...?user_code=USER-1",
395
+ # "expiresAt": 1700000600000,
396
+ # "intervalMs": 5000,
397
+ # "attached": false
398
+ # }
399
+
400
+ # 2. Visit the URL on your phone / laptop, enter the user code.
401
+ # 3. Poll for completion (or subscribe to SSE for the auth_device_flow_authorized event):
402
+ curl http://127.0.0.1:4170/workspace/auth/device-flow/fa07c61b-… \
403
+ -H "Authorization: Bearer $TOKEN"
404
+ # → status transitions: pending → authorized
405
+ ```
406
+
407
+ The TypeScript SDK wraps both steps into a single helper:
408
+
409
+ ```ts
410
+ import { DaemonClient } from '@qwen-code/sdk';
411
+
412
+ const client = new DaemonClient({ baseUrl, token });
413
+ const flow = await client.auth.start({ providerId: 'qwen-oauth' });
414
+ console.log(`Open ${flow.verificationUri}\nCode: ${flow.userCode}`);
415
+ const result = await flow.awaitCompletion({ signal: abortCtrl.signal });
416
+ // result.status === 'authorized'
417
+ ```
418
+
419
+ **The daemon never opens a browser on your behalf.** Even when running locally, the daemon stays passive — it returns the URL and lets the SDK / user choose where to open it. This is intentional: a daemon on a headless pod that called `xdg-open` would silently fail, masking the actual auth surface. Mirror `gh auth login`'s "Press Enter to open browser" UX in your client.
420
+
421
+ **`--require-auth` and dev convenience.** The device-flow routes use the strict mutation gate (PR 15), which means a token-less loopback default returns `401 token_required`. Locally, the simplest way around this during development is `qwen serve --token=dev-token`; you don't need `--require-auth` unless you're hardening the loopback default.
422
+
423
+ **Cross-daemon limitation.** `oauth_creds.json` is daemon-shared (`~/.qwen/oauth_creds.json`), so a successful login in daemon A is automatically picked up by daemon B's next token refresh — but daemon B's SDK clients won't receive the `auth_device_flow_authorized` event (events are per-daemon).
424
+
425
+ **Cross-client take-over.** Two SDK clients on the same daemon that both `POST /workspace/auth/device-flow` for the same provider get the per-provider singleton: the first call starts a fresh IdP request and returns `attached: false`; the second call returns the EXISTING in-flight entry with `attached: true`. The take-over is recorded on the audit trail (under the second client's `X-Qwen-Client-Id`) but does NOT emit a separate event — both clients eventually observe the SAME `auth_device_flow_authorized` once the user finishes the IdP page. If your UI distinguishes "I started this" from "someone else's flow I joined", branch on the `attached` field returned by `start()`.
426
+
303
427
  ## What's next
304
428
 
305
429
  - **Build a client?** See the [DaemonClient TypeScript quickstart](../developers/examples/daemon-client-quickstart.md) and the [HTTP protocol reference](../developers/qwen-serve-protocol.md).
@@ -18,8 +18,10 @@ You are an expert code reviewer. Your job is to review code changes and provide
18
18
 
19
19
  **Critical rules (most commonly violated — read these first):**
20
20
 
21
- 1. **Match the language of the PR.** If the PR is in English, ALL your output (terminal + PR comments) MUST be in English. If in Chinese, use Chinese. Do NOT switch languages. For **local reviews** (no PR), if the system prompt includes an output language preference, use that language; otherwise follow the user's input language.
22
- 2. **Step 9: use Create Review API** with `comments` array for inline comments. Do NOT use `gh api .../pulls/.../comments` to post individual comments. See Step 9 for the JSON format.
21
+ 1. **For same-repo PR reviews (PR number, or URL whose owner/repo matches a local remote), the worktree is MANDATORY.** After argument parsing and remote detection (early in Step 1), the first command that touches code state MUST be `qwen review fetch-pr`. Do NOT use `gh pr checkout`, `git checkout <branch>`, `git switch`, `git pull`, `git reset --hard`, or any other command that modifies the user's current HEAD or working tree. After `fetch-pr` returns, ALL subsequent reads, linters, builds, tests, and edits MUST happen inside the `worktreePath` it created. Violating this contaminates the user's local branch state. (Cross-repo PRs with no matching remote use lightweight mode and do NOT create a worktree — see Step 1.)
22
+ 2. **If `--comment` was specified, Step 8 (Autofix) is SKIPPED entirely.** `--comment` means the user wants inline PR comments posted, not code mutations. Do not ask "Apply auto-fixes? (y/n)" go straight from Step 7 to Step 9.
23
+ 3. **Match the language of the PR.** If the PR is in English, ALL your output (terminal + PR comments) MUST be in English. If in Chinese, use Chinese. Do NOT switch languages. For **local reviews** (no PR), if the system prompt includes an output language preference, use that language; otherwise follow the user's input language.
24
+ 4. **Step 9: use Create Review API** with `comments` array for inline comments. Do NOT use `gh api .../pulls/.../comments` to post individual comments. See Step 9 for the JSON format.
23
25
 
24
26
  **Design philosophy: Silence is better than noise.** Every comment you make should be worth the reader's time. If you're unsure whether something is a problem, DO NOT MENTION IT. Low-quality feedback causes "cry wolf" fatigue — developers stop reading all AI comments and miss real issues.
25
27
 
@@ -44,6 +46,8 @@ Based on the remaining arguments:
44
46
  - If both diffs are empty, inform the user there are no changes to review and stop here — do not proceed to the review agents
45
47
 
46
48
  - **PR number or same-repo URL** (e.g., `123` or a URL whose owner/repo matches the current repo — cross-repo URLs are handled by the lightweight mode above):
49
+
50
+ > ⚠️ **MANDATORY worktree flow.** Do NOT use `gh pr checkout`, `git checkout <branch>`, `git switch`, `git pull`, `git reset --hard`, or any other command that changes the user's current HEAD or working tree contents. The ONLY entry point is `qwen review fetch-pr` (below) — it isolates the PR into an ephemeral worktree so the user's local state is never touched. After it returns, every subsequent command in Steps 2-8 MUST operate inside the returned `worktreePath` (e.g. `cd <worktreePath>` first, or pass the path as a `--cwd` / explicit argument).
47
51
  - **Run `qwen review fetch-pr`** to set up the working state in one pass — it cleans any stale worktree, fetches the PR HEAD into `qwen-review/pr-<n>`, queries `gh pr view` for metadata, and creates an ephemeral worktree at `.qwen/tmp/review-pr-<n>`:
48
52
 
49
53
  ```bash
@@ -442,7 +446,12 @@ If the user responds with "post comments" (or similar intent like "yes post them
442
446
 
443
447
  ## Step 8: Autofix
444
448
 
445
- If there are **Critical** or **Suggestion** findings with clear, unambiguous fixes, offer to auto-apply them.
449
+ **Skip this entire step (do not even ask) if EITHER of the following is true:**
450
+
451
+ - `--comment` was specified in the arguments — the user explicitly asked for inline PR comments, not code edits. Go straight to Step 9.
452
+ - The review target is a cross-repo PR running in lightweight mode (no local files to edit).
453
+
454
+ Otherwise, if there are **Critical** or **Suggestion** findings with clear, unambiguous fixes, offer to auto-apply them. (If there are no such findings, this step is also a no-op — fall through to Step 9.)
446
455
 
447
456
  1. Count the number of auto-fixable findings (those with concrete suggested fixes that can be expressed as file edits).
448
457
  2. If there are fixable findings, ask the user:
@@ -7,32 +7,30 @@ import {
7
7
  hasRebuiltToolRegistry,
8
8
  rebuildToolRegistryOnOverride,
9
9
  resolveSubagentApprovalMode
10
- } from "./chunk-AJSOD5IR.js";
11
- import "./chunk-5P5XGNYH.js";
10
+ } from "./chunk-3T4ZT63H.js";
12
11
  import "./chunk-K5PGHDBN.js";
13
12
  import "./chunk-O4PICXES.js";
14
13
  import "./chunk-TW522KN6.js";
15
14
  import "./chunk-MLZQVCF3.js";
16
- import "./chunk-JMZQICAL.js";
17
- import "./chunk-5QQ5FGTU.js";
18
- import "./chunk-B7ZL7HUA.js";
15
+ import "./chunk-CAVZVZX6.js";
16
+ import "./chunk-G7YTSRES.js";
17
+ import "./chunk-4AOCVI6J.js";
19
18
  import "./chunk-77WXWU44.js";
20
- import "./chunk-OCC4MZRS.js";
21
- import "./chunk-CAWKL3UC.js";
22
- import "./chunk-XLQ4E5PS.js";
23
- import "./chunk-SYCJMSIJ.js";
19
+ import "./chunk-F23NCRJ2.js";
20
+ import "./chunk-CSWBPY3P.js";
21
+ import "./chunk-WCZWAKFG.js";
24
22
  import "./chunk-UWCTAVOD.js";
25
23
  import "./chunk-OFEVLU4C.js";
26
- import "./chunk-CM2IESUE.js";
27
- import "./chunk-UXW7MYAW.js";
28
- import "./chunk-G27O2LD2.js";
24
+ import "./chunk-PR4T27R7.js";
25
+ import "./chunk-MAY32HXD.js";
26
+ import "./chunk-D5NTAHYL.js";
29
27
  import "./chunk-T4VD6OJ4.js";
30
28
  import "./chunk-RDYWTWEM.js";
31
- import "./chunk-TPGOGCWM.js";
32
- import "./chunk-FYMSCRHM.js";
33
- import "./chunk-SQNQIOD5.js";
34
- import "./chunk-FKVKVE6N.js";
35
- import "./chunk-GJXIKCKL.js";
29
+ import "./chunk-YJLGXDQJ.js";
30
+ import "./chunk-PVVL5Q3W.js";
31
+ import "./chunk-GGNTZ2NH.js";
32
+ import "./chunk-KXZ4TJB4.js";
33
+ import "./chunk-XP27SJMH.js";
36
34
  import "./chunk-E7E2MFYM.js";
37
35
  import "./chunk-ZERZSAZL.js";
38
36
  import "./chunk-QN5NZ3UQ.js";
@@ -6,7 +6,7 @@ import {
6
6
  } from "./chunk-KQIKOTQJ.js";
7
7
  import {
8
8
  RequestTokenizer
9
- } from "./chunk-BXNCPI75.js";
9
+ } from "./chunk-DMIMF3CG.js";
10
10
  import {
11
11
  Blob,
12
12
  File,
@@ -16,15 +16,16 @@ import {
16
16
  import {
17
17
  buildRuntimeFetchOptions,
18
18
  redactProxyError
19
- } from "./chunk-CAWKL3UC.js";
19
+ } from "./chunk-CSWBPY3P.js";
20
20
  import {
21
21
  CAPPED_DEFAULT_MAX_TOKENS,
22
22
  DEFAULT_TIMEOUT,
23
23
  convertSchema,
24
24
  hasExplicitOutputLimit,
25
+ runtimeDiagnostics,
25
26
  safeJsonParse,
26
27
  tokenLimit
27
- } from "./chunk-UXW7MYAW.js";
28
+ } from "./chunk-MAY32HXD.js";
28
29
  import {
29
30
  FinishReason,
30
31
  GenerateContentResponse
@@ -32,7 +33,7 @@ import {
32
33
  import "./chunk-RDYWTWEM.js";
33
34
  import {
34
35
  createDebugLogger
35
- } from "./chunk-GJXIKCKL.js";
36
+ } from "./chunk-XP27SJMH.js";
36
37
  import "./chunk-E7E2MFYM.js";
37
38
  import {
38
39
  require_ms
@@ -4810,6 +4811,7 @@ var AnthropicContentGenerator = class {
4810
4811
  let response;
4811
4812
  try {
4812
4813
  const anthropicRequest = await this.buildRequest(request);
4814
+ runtimeDiagnostics.recordAnthropicWireRequest(anthropicRequest);
4813
4815
  const headers = this.buildPerRequestHeaders(anthropicRequest);
4814
4816
  response = await this.client.messages.create(anthropicRequest, {
4815
4817
  signal: request.config?.abortSignal,
@@ -4827,6 +4829,7 @@ var AnthropicContentGenerator = class {
4827
4829
  ...anthropicRequest,
4828
4830
  stream: true
4829
4831
  };
4832
+ runtimeDiagnostics.recordAnthropicWireRequest(streamingRequest);
4830
4833
  let stream;
4831
4834
  try {
4832
4835
  stream = await this.client.messages.create(
@@ -6,10 +6,10 @@ import {
6
6
  BaseToolInvocation,
7
7
  ToolDisplayNames,
8
8
  ToolNames
9
- } from "./chunk-FYMSCRHM.js";
9
+ } from "./chunk-PVVL5Q3W.js";
10
10
  import {
11
11
  createDebugLogger
12
- } from "./chunk-GJXIKCKL.js";
12
+ } from "./chunk-XP27SJMH.js";
13
13
  import "./chunk-QWSRH265.js";
14
14
  import {
15
15
  init_esbuild_shims
@@ -146,7 +146,7 @@ var ca_default = {
146
146
  "Compresses the context by replacing it with a summary.": "Comprimeix el context substituint-lo per un resum.",
147
147
  "open full Qwen Code documentation in your browser": "obrir la documentaci\xF3 completa de Qwen Code al navegador",
148
148
  "Configuration not available.": "Configuraci\xF3 no disponible.",
149
- "Configure authentication information for login": "Configurar la informaci\xF3 d'autenticaci\xF3 per a iniciar sessi\xF3",
149
+ "Connect an LLM provider": "Connectar un prove\xEFdor LLM",
150
150
  "Copy the last result or code snippet to clipboard": "Copiar l'\xFAltim resultat o fragment de codi al porta-retalls",
151
151
  // ============================================================================
152
152
  // Ordres - Agents
@@ -826,8 +826,8 @@ var ca_default = {
826
826
  "Continue previous conversation": "Continuar la conversa anterior",
827
827
  "\u{1F44B} Welcome back! (Last updated: {{timeAgo}})": "\u{1F44B} Benvingut de nou! (Darrera actualitzaci\xF3: {{timeAgo}})",
828
828
  "\u{1F3AF} Overall Goal:": "\u{1F3AF} Objectiu general:",
829
- "Select Authentication Method": "Seleccioneu el m\xE8tode d'autenticaci\xF3",
830
- "You must select an auth method to proceed. Press Ctrl+C again to exit.": "Cal seleccionar un m\xE8tode d'autenticaci\xF3 per continuar. Premeu Ctrl+C de nou per sortir.",
829
+ "Connect a Provider": "Connectar un prove\xEFdor",
830
+ "You must connect a provider to proceed. Press Ctrl+C again to exit.": "Cal connectar un prove\xEFdor per continuar. Premeu Ctrl+C de nou per sortir.",
831
831
  "Terms of Services and Privacy Notice": "Termes de servei i av\xEDs de privacitat",
832
832
  "Qwen OAuth": "Qwen OAuth",
833
833
  "Discontinued \u2014 switch to Coding Plan or API Key": "Descontinuat \u2014 canvieu a Coding Plan o API Key",