@qwen-code/qwen-code 0.18.0-preview.2 → 0.18.1-nightly.20260616.a68b2e1e7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/bundled/loop/SKILL.md +2 -1
  2. package/bundled/qc-helper/docs/_meta.ts +1 -0
  3. package/bundled/qc-helper/docs/common-workflow.md +4 -4
  4. package/bundled/qc-helper/docs/configuration/auth.md +1 -1
  5. package/bundled/qc-helper/docs/configuration/model-providers.md +13 -6
  6. package/bundled/qc-helper/docs/configuration/settings.md +90 -89
  7. package/bundled/qc-helper/docs/features/approval-mode.md +10 -14
  8. package/bundled/qc-helper/docs/features/commands.md +33 -11
  9. package/bundled/qc-helper/docs/features/dual-output.md +37 -3
  10. package/bundled/qc-helper/docs/features/followup-suggestions.md +2 -2
  11. package/bundled/qc-helper/docs/features/skills.md +29 -3
  12. package/bundled/qc-helper/docs/features/sub-agents.md +34 -12
  13. package/bundled/qc-helper/docs/qwen-serve-deploy-local.md +221 -0
  14. package/bundled/qc-helper/docs/qwen-serve.md +250 -31
  15. package/bundled/qc-helper/docs/reference/keyboard-shortcuts.md +30 -30
  16. package/chunks/{agent-QB7TZ4HW.js → agent-PXMT2XR5.js} +25 -24
  17. package/chunks/agent-headless-SY7VJUHV.js +51 -0
  18. package/chunks/{anthropicContentGenerator-M45EVVRM.js → anthropicContentGenerator-DCI26OQF.js} +7 -7
  19. package/chunks/{askUserQuestion-WM2KHM3K.js → askUserQuestion-NDNFGC35.js} +45 -3
  20. package/chunks/{ca-BARBRL6N.js → ca-RK4QPLIX.js} +18 -1
  21. package/chunks/{chunk-CNSMKPK6.js → chunk-26QELEL2.js} +1 -1
  22. package/chunks/{chunk-CWV3SJZS.js → chunk-3NRO6NHX.js} +2 -2
  23. package/chunks/{chunk-BNESGOSJ.js → chunk-55ZMG67I.js} +1 -1
  24. package/chunks/{chunk-2ZTWI7KH.js → chunk-6WPY6ES3.js} +62 -22
  25. package/chunks/{chunk-ZK4AMNIU.js → chunk-A3OEZT2F.js} +1294 -314
  26. package/chunks/{chunk-HXJE7VOG.js → chunk-ABRZC6FA.js} +1074 -144
  27. package/chunks/{chunk-JUGRPQAB.js → chunk-B4ZF2KSI.js} +1 -1
  28. package/chunks/chunk-BJ5HQ23U.js +178 -0
  29. package/chunks/{chunk-ICOI4E4S.js → chunk-CPVI5J2L.js} +101 -23
  30. package/chunks/{chunk-GX7VH5JQ.js → chunk-FIQECJTQ.js} +1 -1
  31. package/chunks/{chunk-QCG6KPNM.js → chunk-H4ZDM3N6.js} +18039 -11622
  32. package/chunks/{chunk-SZOEIL6S.js → chunk-H6BD2ELD.js} +1 -0
  33. package/chunks/{chunk-JXAZUMDW.js → chunk-HA2UEYZP.js} +7 -4
  34. package/chunks/{chunk-MVIVIPCU.js → chunk-IDYDPBBN.js} +361 -583
  35. package/chunks/{chunk-JVQOQ3OU.js → chunk-IQHSD7K5.js} +1 -1
  36. package/chunks/{chunk-CNHFPN7T.js → chunk-JZFEL3RB.js} +1 -1
  37. package/chunks/{chunk-UAMOBVVW.js → chunk-LXYWINWF.js} +1 -1
  38. package/chunks/{chunk-P4J26VDS.js → chunk-LYRSMKLS.js} +2 -2
  39. package/chunks/{chunk-Y7R6H6FT.js → chunk-LYSND7KR.js} +9 -4
  40. package/chunks/{chunk-AVW55ZCO.js → chunk-M5PJ5QAF.js} +37 -16
  41. package/chunks/{chunk-LR62TEET.js → chunk-NNIYWQIS.js} +1 -1
  42. package/chunks/chunk-OMX7CUOE.js +356 -0
  43. package/chunks/{chunk-HV3ZZ7G4.js → chunk-OT6JA3KQ.js} +2 -2
  44. package/chunks/{chunk-C6WMLUNB.js → chunk-QP4R5FTG.js} +1 -1
  45. package/chunks/chunk-QQDPRDVW.js +25 -0
  46. package/chunks/chunk-SFRV6BGY.js +243 -0
  47. package/chunks/{chunk-NW5QBUYO.js → chunk-TSBXGR73.js} +14 -14
  48. package/chunks/{chunk-7YKXFA3D.js → chunk-UOB6KPGG.js} +11 -11
  49. package/chunks/{chunk-USE2VQ5P.js → chunk-VU6A2OBJ.js} +41 -6
  50. package/chunks/{chunk-PAEBHDIO.js → chunk-VXHYMZXW.js} +1 -1
  51. package/chunks/{chunk-HGJPQK33.js → chunk-WPTCDQN6.js} +188 -534
  52. package/chunks/{chunk-WFVXF3OM.js → chunk-Z2Z3GUXZ.js} +1 -0
  53. package/chunks/{chunk-KC6ZMJ5X.js → chunk-ZMIBJS45.js} +1 -1
  54. package/chunks/chunk-ZOFNJQNJ.js +607 -0
  55. package/chunks/computer-use-7SEQDSHB.js +2052 -0
  56. package/chunks/contextCommand-KM5OWV65.js +53 -0
  57. package/chunks/cron-create-7CXEAJ2K.js +184 -0
  58. package/chunks/{cron-delete-ZGUXWBTG.js → cron-delete-2FQYYNQ6.js} +28 -5
  59. package/chunks/{cron-list-QNNZGMN3.js → cron-list-QCAJ73XE.js} +40 -7
  60. package/chunks/{de-YGKK2BC4.js → de-FGPM4KW5.js} +18 -1
  61. package/chunks/{devtools-IXE4UP72.js → devtools-FM6GJPYG.js} +1 -1
  62. package/chunks/{dist-R2SXPG74.js → dist-2UCAYOX7.js} +2 -2
  63. package/chunks/{dist-TE5QKMGR.js → dist-33LHH26D.js} +1 -1
  64. package/chunks/{dist-BXDUQ2QY.js → dist-KF43SZZV.js} +1 -1
  65. package/chunks/{dist-ZMQ4TXD5.js → dist-PF2IYSMD.js} +2 -2
  66. package/chunks/{edit-6UBTS2J5.js → edit-BMUKPLA7.js} +27 -28
  67. package/chunks/{en-HSQQNQUB.js → en-VP6XPGEC.js} +9 -2
  68. package/chunks/{enter-worktree-NN7LIXCM.js → enter-worktree-LXJ5WJ5A.js} +25 -24
  69. package/chunks/enterPlanMode-QWRZ54ZF.js +159 -0
  70. package/chunks/{exit-worktree-GGSS5KIE.js → exit-worktree-5HTQPNZO.js} +25 -24
  71. package/chunks/exitPlanMode-5WQAXNDA.js +743 -0
  72. package/chunks/{fr-JXBKPJKQ.js → fr-ATYBVCLT.js} +18 -1
  73. package/chunks/{geminiContentGenerator-I4H2NLJG.js → geminiContentGenerator-CAKHT5YE.js} +7 -7
  74. package/chunks/{getMachineId-bsd-F7GNPTER.js → getMachineId-bsd-4CASPIU4.js} +1 -1
  75. package/chunks/{getMachineId-darwin-T73DJL27.js → getMachineId-darwin-HPQPEMZR.js} +1 -1
  76. package/chunks/{getMachineId-linux-MKQTFPQM.js → getMachineId-linux-AUARKYHL.js} +1 -1
  77. package/chunks/{getMachineId-unsupported-MUR5KOQE.js → getMachineId-unsupported-S32ZDA2T.js} +1 -1
  78. package/chunks/{getMachineId-win-CDYFC6ZM.js → getMachineId-win-4EFLHYIJ.js} +1 -1
  79. package/chunks/{glob-OLCX57MD.js → glob-5DN6NSCD.js} +25 -24
  80. package/chunks/{grep-7HXIMDOW.js → grep-BJILOLCD.js} +37 -30
  81. package/chunks/{ja-TGPZSP2B.js → ja-W2QEA2OI.js} +18 -1
  82. package/chunks/{keychain-token-storage-LB46DAEK.js → keychain-token-storage-QSTRHKKL.js} +3 -3
  83. package/chunks/{ls-6PEZUK6O.js → ls-XVGXRYWD.js} +4 -4
  84. package/chunks/{lsp-JZSJOVT7.js → lsp-S6SHPULC.js} +3 -3
  85. package/chunks/{monitor-SQO7MVAV.js → monitor-SUEMSRN3.js} +25 -24
  86. package/chunks/{notebook-edit-72L3EBAL.js → notebook-edit-6F6Z5P6U.js} +26 -25
  87. package/chunks/{openaiContentGenerator-FTR7CDWF.js → openaiContentGenerator-DO27LL6O.js} +15 -15
  88. package/chunks/{pt-TIBG6BIO.js → pt-ZKEWJFBW.js} +18 -1
  89. package/chunks/{qwenContentGenerator-U5UFQ566.js → qwenContentGenerator-DQLGLQSH.js} +27 -26
  90. package/chunks/{qwenOAuth2-EFSECGHF.js → qwenOAuth2-KK433U33.js} +6 -5
  91. package/chunks/{read-file-UA64EEQC.js → read-file-3TBLYTOQ.js} +11 -11
  92. package/chunks/ripGrep-3INYT3QV.js +49 -0
  93. package/chunks/{ru-JBCHCK4L.js → ru-VEKTPJ74.js} +18 -1
  94. package/chunks/{scheduler-VBASHOCA.js → scheduler-23KQW6CX.js} +25 -24
  95. package/chunks/{send-message-OYJZ5TPG.js → send-message-SMNR5DBG.js} +3 -3
  96. package/chunks/{serve-A7E2OJDR.js → serve-Y5E4LKUI.js} +13164 -3840
  97. package/chunks/{shell-3NFOT6F5.js → shell-4H6XQXVY.js} +25 -24
  98. package/chunks/{skill-RA5YUREY.js → skill-SE6FECZR.js} +64 -113
  99. package/chunks/{src-NFCMARMT.js → src-76DUBH3A.js} +176 -44
  100. package/chunks/{syntheticOutput-DETQ2YM6.js → syntheticOutput-KMNF7YG6.js} +4 -4
  101. package/chunks/{task-create-Y3ZKTJIG.js → task-create-LIJHK75G.js} +8 -7
  102. package/chunks/{task-list-ONXJ3I3A.js → task-list-S4GNSILM.js} +7 -6
  103. package/chunks/{task-stop-UHDC4N5B.js → task-stop-3GBRYJHM.js} +3 -3
  104. package/chunks/{task-update-TCNOU3P5.js → task-update-F3UTVJMS.js} +21 -9
  105. package/chunks/{team-create-6SR4OVRG.js → team-create-Q5DTDDH4.js} +28 -26
  106. package/chunks/{team-delete-EJ4U4DDP.js → team-delete-54434EB7.js} +9 -6
  107. package/chunks/{todoWrite-TEYDRS5L.js → todoWrite-4ENGSBUX.js} +5 -5
  108. package/chunks/{tool-search-OD435A3X.js → tool-search-ABZMSDTU.js} +11 -11
  109. package/chunks/{web-fetch-6W67H5PO.js → web-fetch-RHZMF3MP.js} +5 -5
  110. package/chunks/workflow-NEMDQB75.js +1414 -0
  111. package/chunks/{write-file-475L5OPP.js → write-file-VAEHZPSL.js} +26 -25
  112. package/chunks/{zh-VCLWO26Y.js → zh-OIXDDQHB.js} +10 -3
  113. package/chunks/{zh-TW-G3HFHVVT.js → zh-TW-6YFNCKTA.js} +10 -3
  114. package/cli-entry.js +19 -0
  115. package/cli.js +11155 -6656
  116. package/examples/starter/QWEN.md +30 -0
  117. package/examples/starter/README.md +59 -0
  118. package/examples/starter/agents/diary.md +86 -0
  119. package/examples/starter/commands/writing/polish.md +13 -0
  120. package/examples/starter/example.ts +64 -0
  121. package/examples/starter/package.json +18 -0
  122. package/examples/starter/qwen-extension.json +12 -0
  123. package/examples/starter/skills/synonyms/SKILL.md +48 -0
  124. package/examples/starter/tsconfig.json +13 -0
  125. package/fzfWorker.js +1083 -0
  126. package/locales/ca.js +20 -2
  127. package/locales/de.js +21 -2
  128. package/locales/en.js +13 -4
  129. package/locales/fr.js +22 -2
  130. package/locales/ja.js +22 -2
  131. package/locales/pt.js +21 -2
  132. package/locales/ru.js +20 -2
  133. package/locales/zh-TW.js +11 -4
  134. package/locales/zh.js +11 -4
  135. package/package.json +5 -3
  136. package/chunks/agent-headless-APVHH7QM.js +0 -50
  137. package/chunks/chunk-AJIR24J2.js +0 -59
  138. package/chunks/chunk-SKBPNJEW.js +0 -45
  139. package/chunks/chunk-XBFVXFB2.js +0 -216
  140. package/chunks/computer-use-B7VIUI7F.js +0 -825
  141. package/chunks/contextCommand-63RZ3O5R.js +0 -52
  142. package/chunks/cron-create-FI5LJVUS.js +0 -140
  143. package/chunks/exitPlanMode-H323NHB2.js +0 -235
  144. package/chunks/ripGrep-WSYCWZVK.js +0 -48
  145. package/chunks/workflow-62DHH4EO.js +0 -708
@@ -2,6 +2,8 @@
2
2
 
3
3
  Run Qwen Code as a local HTTP daemon so multiple clients (IDE plugins, web UIs, CI scripts, custom CLIs) share one agent session over HTTP + Server-Sent Events instead of each spawning their own subprocess.
4
4
 
5
+ > **🚧 v0.16-alpha**: `qwen serve` first ships to npm in v0.16-alpha as **text-only chat / coding** with **local-only deployment**. Image / file attachments on the prompt path, containerized deployment (Docker / k8s / nginx reverse-proxy), and remote / multi-daemon hardening land in a follow-up patch when an enterprise pilot is committed. See [v0.16-alpha known limits](#v016-alpha-known-limits) for the full deferred list.
6
+
5
7
  > **Status:** Stage 1 (experimental). The protocol surface is locked at the §04 routes table from issue [#3803](https://github.com/QwenLM/qwen-code/issues/3803). Stage 1.5 (`qwen --serve` flag — TUI co-hosts the same HTTP server) and Stage 2 (in-process refactor + `mDNS`/OpenAPI/WebSocket/Prometheus polish) are immediately downstream.
6
8
  >
7
9
  > **Scope honesty:** Stage 1 is sized for **developers prototyping clients against the protocol surface** and for **local single-user / small-team collaboration**. Production-grade multi-client / long-running / network-flaky workloads (mobile companions, IM bots reaching 1000+ chats) need Stage 1.5+ guarantees that aren't in this release. See [Stage 1.5+ runtime guarantees](#stage-15-runtime-guarantees) for the full gap list and #3803 for the convergence roadmap.
@@ -12,7 +14,41 @@ Run Qwen Code as a local HTTP daemon so multiple clients (IDE plugins, web UIs,
12
14
  - **Reconnect-safe streaming** — SSE with `Last-Event-ID` reconnect lets a client drop and pick up exactly where it left off (within the ring's replay window).
13
15
  - **First-responder permissions** — when the agent asks for permission to run a tool, every connected client sees the request; whichever client answers first wins.
14
16
  - **One daemon, one workspace** — each `qwen serve` process binds to exactly one workspace at boot (per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02). Multi-workspace deployments run one daemon per workspace on separate ports (or behind an orchestrator).
15
- - **Remote runtime control** ([#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 17) — change a session's approval mode (`POST /session/:id/approval-mode`), toggle a tool per workspace (`POST /workspace/tools/:name/enable`), scaffold an empty `QWEN.md` (`POST /workspace/init`, mechanical only — does NOT call the model; for AI-fill, follow up with `POST /session/:id/prompt`), or restart a single MCP server with a budget pre-check (`POST /workspace/mcp/:server/restart`). All four are strict-gated — configure `--token` first.
17
+ - **Remote runtime control** ([#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 17) — change a session's approval mode (`POST /session/:id/approval-mode`), toggle a tool per workspace (`POST /workspace/tools/:name/enable`), scaffold an empty `QWEN.md` (`POST /workspace/init`, mechanical only — does NOT call the model; for AI-fill, follow up with `POST /session/:id/prompt`), restart a single MCP server with a budget pre-check (`POST /workspace/mcp/:server/restart`), or add/remove MCP servers at runtime without a daemon restart (`POST /workspace/mcp/servers`, `DELETE /workspace/mcp/servers/:name`). All strict-gated — configure `--token` first.
18
+ - **Session recap** ([#4175](https://github.com/QwenLM/qwen-code/issues/4175) follow-up) — fetch a one-sentence "where did I leave off" summary of an active session (`POST /session/:id/recap`). Wraps core's `generateSessionRecap` as a side-query against the fast model; pollutes neither the main chat history nor the SSE stream. Non-strict gate (same posture as `/prompt`); SDK helper `client.recapSession(sessionId)`.
19
+ - **Known limit — token-cost amplification:** the route is a pure-cost endpoint (each call is an LLM side-query, no state benefit) and the daemon has no per-route rate limit in v1. On a no-token loopback default a buggy or malicious local client can spam it to burn tokens. Configure `--token` (and optionally `--require-auth`) on shared dev hosts before exposing the daemon.
20
+ - **Concurrent recap safety:** two simultaneous `/recap` calls on the same session run two independent side-queries. `generateSessionRecap` reads a snapshot of the chat history via `GeminiClient.getChat().getHistory()` and feeds it to a separate `BaseLlmClient.generateText` call (via `runSideQuery`); it never appends to or mutates the session's `GeminiChat`. Safe to call from multiple clients without coordination.
21
+
22
+ ## v0.16-alpha known limits
23
+
24
+ The first npm release of `qwen serve` (v0.16-alpha) is intentionally narrow — text-only chat / coding for developers running the daemon on their own machine. The list below makes the deferred surface explicit so adopters can plan around it; everything here is on the v0.16.x patch roadmap or a near-term follow-up release.
25
+
26
+ **Product surface — text-only:**
27
+
28
+ - ✅ Text prompts and text responses (chat, coding, tool calls, MCP integration)
29
+ - ❌ **Image / file attachments on the prompt path** — `MessageEmitter` currently only renders text; multimodal echo lands when an alpha target with image needs is committed (#4175 chiga0 #27 P0 item)
30
+ - ❌ **Streaming uploads** — same gating as multimodal
31
+
32
+ **Deployment surface — local-only:**
33
+
34
+ - ✅ Loopback (`127.0.0.1`, default) — no auth required, suitable for dev workstations
35
+ - ✅ Local launch via `systemd` / `launchd` / `nohup &` / `tmux` — see [Local launch templates](./qwen-serve-deploy-local.md)
36
+ - ✅ Bring-your-own bearer token via `QWEN_SERVER_TOKEN` env var ([Authentication](#authentication) for setup)
37
+ - ❌ **Containerized deployment** — Docker / Compose / Kubernetes / nginx reverse-proxy with TLS termination NOT in v0.16-alpha. Defers to v0.16.x once an enterprise pilot is committed (would otherwise rot from no-one-validating).
38
+ - ❌ **Multi-daemon coordination on one host** — `1 daemon = 1 workspace × N sessions` is enforced. Cross-host federation, instance-path token keying, and stale-token cleanup defer to v0.16.x.
39
+ - ❌ **Auto-generated daemon tokens** — alpha is BYO-token (one `openssl rand -hex 32` away). Auto-gen + token-store infrastructure defers to v0.16.x.
40
+
41
+ **Hardening — minimum viable for local single-user:**
42
+
43
+ - ✅ Boot-time security gate (refuses non-loopback bind without a token, [PR 15 / #4236](https://github.com/QwenLM/qwen-code/pull/4236))
44
+ - ✅ Mutation-route auth gate, session-scoped permission routing (Wave 4 PRs)
45
+ - ✅ MCP guardrails + multi-client permission coordination (F2 / F3)
46
+ - ✅ **Prompt absolute deadline + SSE writer idle timeout** — opt-in via `--prompt-deadline-ms` and `--writer-idle-timeout-ms`; advertised through `prompt_absolute_deadline` and `writer_idle_timeout` when enabled.
47
+ - ✅ **HTTP rate limiting** — opt-in via `--rate-limit` and per-tier thresholds; advertised through `rate_limit` when enabled.
48
+ - ⏸️ **Prometheus metrics + load test harness** — defers to v0.17 F4 Phase-1 scale instrumentation when 30-50 active sessions becomes a real target.
49
+ - ⏸️ **`--max-body-size` CLI flag** — daemon enforces `express.json({ limit: '10mb' })` by default which comfortably covers text-only prompts (model context windows are well under 10 MiB of chars). Tunable via flag in v0.16.x.
50
+
51
+ For the deeper "what we won't fix in Stage 1" enumeration (single-host session-state mutation model + N-parallel-sessions sharing one ACP child), see [Stage 1 scope boundaries](#stage-1-scope-boundaries--what-we-wont-fix-in-stage-15) below.
16
52
 
17
53
  ## Quickstart
18
54
 
@@ -38,11 +74,13 @@ curl http://127.0.0.1:4170/capabilities
38
74
  ```
39
75
 
40
76
  The `workspaceCwd` field surfaces the bound workspace so clients can pre-flight check + omit `cwd` on `POST /session`.
77
+ The `limits.maxPendingPromptsPerSession` field advertises the active per-session prompt admission cap; `null` means the cap is disabled.
41
78
 
42
79
  The daemon also exposes read-only runtime snapshots for client UIs:
43
80
  `GET /workspace/mcp`, `GET /workspace/skills`, `GET /workspace/providers`,
44
81
  `GET /workspace/env`, `GET /workspace/preflight`,
45
- `GET /session/:id/context`, and `GET /session/:id/supported-commands`.
82
+ `GET /session/:id/context`, `GET /session/:id/supported-commands`, and
83
+ `GET /session/:id/tasks`.
46
84
 
47
85
  `GET /workspace/mcp`, `GET /workspace/skills`, and `GET /workspace/providers`
48
86
  report the live ACP runtime and do not start the ACP child when idle; an
@@ -166,19 +204,21 @@ The token comparison is constant-time (SHA-256 + `crypto.timingSafeEqual`); 401
166
204
 
167
205
  ## CLI flags
168
206
 
169
- | Flag | Default | Purpose |
170
- | ------------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
171
- | `--port <n>` | `4170` | TCP port. `0` = OS-assigned ephemeral port. |
172
- | `--hostname <addr>` | `127.0.0.1` | Bind interface. Anything beyond loopback requires a token. |
173
- | `--token <str>` | — | Bearer token. Falls back to `QWEN_SERVER_TOKEN` env var (with leading/trailing whitespace stripped — handy for `$(cat token.txt)`). |
174
- | `--require-auth` | `false` | Refuse to start without a bearer token, even on loopback. Hardens the `127.0.0.1` developer default for shared dev hosts / CI runners / multi-tenant workstations where any local user can hit the listener. Boots only with `--token` or `QWEN_SERVER_TOKEN` set; gates `/health` behind the bearer too. |
175
- | `--max-sessions <n>` | `20` | Cap on concurrent live sessions. New `POST /session` requests that would spawn a fresh child return `503` (with `Retry-After: 5`) when the cap is hit; attaches to existing sessions are NOT counted. Set to `0` to disable. Sized for single-user / small-team usage; raise it if your deployment has the RAM/FD headroom (~30–50 MB per session). |
176
- | `--workspace <path>` | `process.cwd()` | Absolute workspace path this daemon binds to (per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02 1 daemon = 1 workspace). `POST /session` requests with a mismatched `cwd` return `400 workspace_mismatch`. For multi-workspace deployments, run one `qwen serve` per workspace on separate ports. |
177
- | `--max-connections <n>` | `256` | Listener-level TCP connection cap (`server.maxConnections`). Bounds raw socket count irrespective of session count slow / phantom SSE clients get rejected at accept time once full. Raise alongside `--max-sessions` if your deployment expects many SSE subscribers per session. |
178
- | `--event-ring-size <n>` | `8000` | Per-session SSE replay ring depth (#3803 §02 target). Sets the backlog available to `GET /session/:id/events` with `Last-Event-ID: N`. Larger = more reconnect headroom at the cost of a few hundred KB extra RAM per session. SDK clients can additionally request a larger per-subscriber backlog cap on a specific subscription via `?maxQueued=N` (range `[16, 2048]`, default 256). Daemons also emit a non-terminal `slow_client_warning` SSE frame at 75% queue fill so clients can drain / reconnect before getting evicted. Pre-flight `caps.features.slow_client_warning`. |
179
- | `--mcp-client-budget <n>` | | Positive integer cap on live MCP clients **per ACP session** (issue [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 14 v1; PR 23 graduates this to per-workspace via the shared MCP pool). Combine with `--mcp-budget-mode`. When unset, no accounting-driven enforcement (but `GET /workspace/mcp` still reports `clientCount`). Distinct from claude-code's `MCP_SERVER_CONNECTION_BATCH_SIZE` which gates startup concurrency, not the total client count. Pre-flight `caps.features.mcp_guardrails`. |
180
- | `--mcp-budget-mode <m>` | `warn` / `off` | How `--mcp-client-budget` is enforced. `warn` (default when budget set): no refusal, snapshot's `budgets[0].status` flips to `warning` at ≥75% of budget. `enforce`: connects past the cap are refused, per-server cell shows `disabledReason: 'budget'`, deterministic by `mcpServers` declaration order. `off` (default when budget unset): pure observability. Boot rejects `enforce` without a budget. |
181
- | `--http-bridge` | `true` | Stage 1 mode: one `qwen --acp` child per daemon (bound to one workspace at boot, per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02); N sessions multiplex onto that child via ACP `newSession()`. Stage 2 native in-process becomes available later. |
207
+ | Flag | Default | Purpose |
208
+ | --------------------------------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
209
+ | `--port <n>` | `4170` | TCP port. `0` = OS-assigned ephemeral port. |
210
+ | `--hostname <addr>` | `127.0.0.1` | Bind interface. Anything beyond loopback requires a token. |
211
+ | `--token <str>` | — | Bearer token. Falls back to `QWEN_SERVER_TOKEN` env var (with leading/trailing whitespace stripped — handy for `$(cat token.txt)`). |
212
+ | `--require-auth` | `false` | Refuse to start without a bearer token, even on loopback. Hardens the `127.0.0.1` developer default for shared dev hosts / CI runners / multi-tenant workstations where any local user can hit the listener. Boots only with `--token` or `QWEN_SERVER_TOKEN` set; gates `/health` behind the bearer too. |
213
+ | `--max-sessions <n>` | `20` | Cap on concurrent live sessions. New `POST /session` requests that would spawn a fresh child return `503` (with `Retry-After: 5`) when the cap is hit; attaches to existing sessions are NOT counted. Set to `0` to disable. Sized for single-user / small-team usage; raise it if your deployment has the RAM/FD headroom (~30–50 MB per session). |
214
+ | `--max-pending-prompts-per-session <n>` | `5` | Per-session cap on prompts accepted by `POST /session/:id/prompt` but not yet settled, including queued prompts and the active prompt. The bridge rejects overflow synchronously with `503`, `Retry-After: 5`, and `code: "prompt_queue_full"` before returning a `promptId`. Set to `0` to disable. `branchSession` serializes on the same FIFO but does not count against this prompt cap. |
215
+ | `--workspace <path>` | `process.cwd()` | Absolute workspace path this daemon binds to (per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02 1 daemon = 1 workspace). `POST /session` requests with a mismatched `cwd` return `400 workspace_mismatch`. For multi-workspace deployments, run one `qwen serve` per workspace on separate ports. |
216
+ | `--max-connections <n>` | `256` | Listener-level TCP connection cap (`server.maxConnections`). Bounds raw socket count irrespective of session count slow / phantom SSE clients get rejected at accept time once full. Raise alongside `--max-sessions` if your deployment expects many SSE subscribers per session. |
217
+ | `--event-ring-size <n>` | `8000` | Per-session SSE replay ring depth (#3803 §02 target). Sets the backlog available to `GET /session/:id/events` with `Last-Event-ID: N`. Larger = more reconnect headroom at the cost of a few hundred KB extra RAM per session. SDK clients can additionally request a larger per-subscriber backlog cap on a specific subscription via `?maxQueued=N` (range `[16, 2048]`, default 256). Daemons also emit a non-terminal `slow_client_warning` SSE frame at 75% queue fill so clients can drain / reconnect before getting evicted. Pre-flight `caps.features.slow_client_warning`. |
218
+ | `--mcp-client-budget <n>` | | Positive integer cap on live MCP clients **per ACP session** (issue [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 14 v1; PR 23 graduates this to per-workspace via the shared MCP pool). Combine with `--mcp-budget-mode`. When unset, no accounting-driven enforcement (but `GET /workspace/mcp` still reports `clientCount`). Distinct from claude-code's `MCP_SERVER_CONNECTION_BATCH_SIZE` which gates startup concurrency, not the total client count. Pre-flight `caps.features.mcp_guardrails`. |
219
+ | `--mcp-budget-mode <m>` | `warn` / `off` | How `--mcp-client-budget` is enforced. `warn` (default when budget set): no refusal, snapshot's `budgets[0].status` flips to `warning` at ≥75% of budget. `enforce`: connects past the cap are refused, per-server cell shows `disabledReason: 'budget'`, deterministic by `mcpServers` declaration order. `off` (default when budget unset): pure observability. Boot rejects `enforce` without a budget. |
220
+ | `--http-bridge` | `true` | Stage 1 mode: one `qwen --acp` child per daemon (bound to one workspace at boot, per [#3803](https://github.com/QwenLM/qwen-code/issues/3803) §02); N sessions multiplex onto that child via ACP `newSession()`. Stage 2 native in-process becomes available later. |
221
+ | `--allow-origin <pat>` | — | T2.4 ([#4514](https://github.com/QwenLM/qwen-code/issues/4514)). Cross-origin allowlist for browser webui clients. Repeatable. Each value is `*` (any origin — boot refuses if no bearer token is configured; `--require-auth` on loopback is recommended so `/health` and `/demo` are also bearer-gated, since both are pre-auth on loopback by default) or a canonical URL origin (`<scheme>://<host>[:<port>]`, no trailing slash / path / userinfo / query). **Subdomain wildcards (`https://*.example.com`) are intentionally unsupported** — list each subdomain explicitly, or use `*` with a configured token (and `--require-auth` for full hardening). Matched origins receive CORS response headers (`Access-Control-Allow-Origin`, `Vary: Origin`, methods, headers, max-age, and exposed `Retry-After`); unmatched origins still get a 403 with the same envelope as today's wall. `Origin: null` (sandboxed iframes, file:// docs) is always rejected, even under `*`. Pre-flight via `caps.features.allow_origin`. Loopback self-origin hits are unaffected. |
182
222
 
183
223
  > **Sizing the load knobs.** `--max-sessions` is the **new-child** cap.
184
224
  > Three other layers also limit load — when sizing for a high-concurrency
@@ -189,14 +229,19 @@ The token comparison is constant-time (SHA-256 + `crypto.timingSafeEqual`); 401
189
229
  > - **per-session subscribers**: the EventBus caps SSE subscribers at
190
230
  > 64 per session by default; the 65th client gets a terminal
191
231
  > `stream_error` and is closed.
232
+ > - **per-session prompt admissions**:
233
+ > `--max-pending-prompts-per-session=5` bounds queued + active prompts
234
+ > accepted for one session. Overflow gets `503` with `Retry-After: 5`.
192
235
  > - **per-subscriber backlog**: a 256-frame queue per SSE client; an
193
236
  > over-capacity client gets a terminal `client_evicted` frame and is
194
237
  > closed (one slow consumer can't pin the daemon).
195
238
  >
196
- > The four caps interact: `--max-sessions × 64 subscribers × 256 frames`
197
- > is the worst-case in-flight memory at the EventBus layer. Default
198
- > sizing assumes single-user / small-team load; raise progressively
199
- > (and watch RSS) for multi-tenant deployments.
239
+ > These caps interact: `--max-sessions × 64 subscribers × 256 frames`
240
+ > is the worst-case in-flight memory at the EventBus layer, while
241
+ > `--max-sessions × --max-pending-prompts-per-session` bounds accepted
242
+ > prompt work at the admission layer. Default sizing assumes single-user /
243
+ > small-team load; raise progressively (and watch RSS) for multi-tenant
244
+ > deployments.
200
245
 
201
246
  > **MCP client guardrails (issue [#4175](https://github.com/QwenLM/qwen-code/issues/4175) PR 14).** A workspace declaring 30 MCP servers in `mcpServers` will start 30 clients with no upstream cap unless you set one. `--mcp-client-budget=N` caps the live MCP client count; `--mcp-budget-mode={enforce,warn,off}` chooses the behavior. Default is `warn` when a budget is set (snapshot surfaces the warning but no client is refused — useful for measuring real-world fanout before flipping on enforcement). Refused servers under `enforce` mode get `disabledReason: 'budget'` on their per-server cell, and the `budgets[0]` cell shows `status: 'error'` + `errorKind: 'budget_exhausted'`. Slot reservation is by server name and survives reconnects / discovery timeouts — a refused server can't take a slot from a healthy one.
202
247
  >
@@ -218,9 +263,10 @@ The token comparison is constant-time (SHA-256 + `crypto.timingSafeEqual`); 401
218
263
  - **`--hostname 0.0.0.0` requires a token** — boot refuses without one.
219
264
  - **`LOOPBACK_BINDS` includes IPv6** — `::1` and `[::1]` count as loopback for the no-token rule.
220
265
  - **Host header allowlist** — on **loopback** binds the daemon checks `Host:` matches `localhost:port` / `127.0.0.1:port` / `[::1]:port` / `host.docker.internal:port` (case-insensitive per RFC 7230 §5.4) to defend against DNS rebinding. **Non-loopback binds (`--hostname 0.0.0.0`) intentionally bypass the Host allowlist** — the operator has chosen the surface area, so the bearer-token gate is the sole authentication layer; reverse proxies / SNI / client cert pinning are the operator's responsibility, not the daemon's. If you need Host-based isolation on a non-loopback bind, terminate TLS + check Host at a front proxy.
221
- - **CORS denies any browser Origin** — returns `403` JSON. **Implication for browser-served webuis** (BUy4e): any `packages/webui`-style frontend that lives on a separate origin will get 403 at the wire. Stage 1 options for browser-style consumption: (a) package the webui as a native shell (Electron/Tauri) so no `Origin` header is sent, or (b) front the daemon with a same-origin reverse proxy that strips/rewrites `Origin` for a known frontend. Stage 1.5 will add `--allow-origin <pattern>` for opt-in named frontends.
266
+ - **CORS denies any browser Origin by default** — returns `403` JSON. Pass **`--allow-origin <pattern>`** (repeatable, T2.4 #4514) to opt specific browser origins through. Each value is either the literal `*` (any origin — boot refuses if no bearer token is configured; `--require-auth` on loopback is recommended for full hardening since `/health` and `/demo` remain pre-auth on loopback by default) or a canonical URL origin (`<scheme>://<host>[:<port>]`, no trailing slash / path / userinfo). Matched origins receive proper CORS response headers (`Access-Control-Allow-Origin: <echoed>`, `Vary: Origin`, plus standard methods / headers / max-age and exposed `Retry-After`); unmatched origins still get a 403 with the same envelope as the default wall. `caps.features.allow_origin` is advertised conditionally so SDK / webui clients can pre-flight whether the daemon honors cross-origin hits before issuing them. Example: `qwen serve --allow-origin http://localhost:3000 --allow-origin http://localhost:5173`. Loopback self-origin hits (e.g. the `/demo` page) are unaffected — a separate Origin-strip shim handles them regardless of `--allow-origin`. **Browser webuis without `--allow-origin` configured** still fall back to the same Stage 1 options as before: package as a native shell (Electron/Tauri) so no `Origin` header is sent, or front the daemon with a same-origin reverse proxy.
222
267
  - **Spawned `qwen --acp` child inherits the daemon's environment** with one explicit scrub: `QWEN_SERVER_TOKEN` is removed before the child starts (the daemon's own bearer; the agent doesn't need it). Everything else — `OPENAI_API_KEY` / `ANTHROPIC_API_KEY` / `QWEN_*` / `DASHSCOPE_API_KEY` / your custom `modelProviders[].envKey` / etc. — passes through, because the agent legitimately needs those to authenticate to the LLM. **This is intentional, not a sandbox.** The agent runs as the same UID with shell-tool access, so anything in `~/.bashrc` / `~/.aws/credentials` / `~/.npmrc` is reachable by prompt injection regardless. The env passthrough is not the security boundary; the user-as-trust-root is. Don't run `qwen serve` under an identity that has env-resident credentials you wouldn't trust the agent with.
223
268
  - **Per-subscriber bounded SSE queues** — a slow client that overflows its queue gets a `client_evicted` terminal frame and is closed; one stuck consumer can't pin the daemon.
269
+ - **Per-session prompt admission cap** — defaults to 5 accepted-but-unsettled prompts per session. A buggy client cannot enqueue unbounded prompt promises or temporary SSE waits for one session.
224
270
  - **Graceful shutdown** — SIGINT/SIGTERM drain the agent children before closing the listener (10s deadline per child).
225
271
 
226
272
  > ⚠️ **Stage 1 known gap — permissions are daemon-global, not per-session (BUy4H).** `pendingPermissions` lives at daemon scope; any client holding the bearer token can vote on any `requestId` for any session it can see (and SSE `permission_request` events carry the requestId in their payload). This is acceptable under the single-user / small-team trust model where every authenticated client is the same human or collaborators they trust. Stage 1.5 will move to `POST /session/:id/permission/:requestId` + session-scoped pending map + per-client identity (must-have #3 from the downstream review); until then, don't run `qwen serve` behind a bearer shared with untrusted parties.
@@ -234,11 +280,31 @@ The token comparison is constant-time (SHA-256 + `crypto.timingSafeEqual`); 401
234
280
  > "alive" until Node's keepalive probes time out — typically ~2 hours
235
281
  > on Linux defaults. On `--hostname 0.0.0.0` deployments behind such
236
282
  > NATs, phantom SSE connections can accumulate and eventually hit the
237
- > 256 `server.maxConnections` ceiling. Stage 2 will add an
238
- > application-level idle deadline (last-byte-written tracking +
239
- > per-connection timeout). Until then, operators on networks that
240
- > swallow RSTs may want to lower `server.keepAliveTimeout` via a
241
- > reverse proxy or accept periodic daemon restarts.
283
+ > 256 `server.maxConnections` ceiling.
284
+ >
285
+ > Set [`--writer-idle-timeout-ms <n>`](#deadlines-and-writer-idle-timeout)
286
+ > (issue [#4514](https://github.com/QwenLM/qwen-code/issues/4514) T2.9)
287
+ > to close the gap with an explicit application-level idle deadline:
288
+ > when no write has successfully flushed for `n` ms the daemon emits
289
+ > a terminal `client_evicted` frame with
290
+ > `reason: 'writer_idle_timeout'` and closes the stream. The flag is
291
+ > off by default to preserve the legacy contract — operators on
292
+ > networks that swallow RSTs should pick a value well above the 15s
293
+ > heartbeat interval (e.g. `60000`–`300000`) so legitimate idle
294
+ > connections aren't evicted while genuinely stuck writers are
295
+ > reaped promptly. Pre-flight `caps.features.includes('writer_idle_timeout')`
296
+ > from your SDK to confirm the daemon supports it.
297
+
298
+ ### Deadlines and writer idle timeout
299
+
300
+ Issue [#4514](https://github.com/QwenLM/qwen-code/issues/4514) T2.9 ships two opt-in flags that close the long-running / remote-deployment gaps the 15s heartbeat + AbortSignal don't cover. Both are off by default — single-user loopback workflows stay bit-for-bit unchanged.
301
+
302
+ | Flag | Env var | Default | What it does |
303
+ | ------------------------------ | ----------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
304
+ | `--prompt-deadline-ms <n>` | `QWEN_SERVE_PROMPT_DEADLINE_MS` | unset | Server-side wallclock cap on a single `POST /session/:id/prompt`. On expiry the daemon aborts the prompt's AbortController and returns HTTP `504` with `{code:"prompt_deadline_exceeded", errorKind:"prompt_deadline_exceeded", deadlineMs:n}`. A per-prompt request body field `deadlineMs` can SHORTEN the effective deadline below the flag but never extend it. Capability tag (conditional): `prompt_absolute_deadline`. |
305
+ | `--writer-idle-timeout-ms <n>` | `QWEN_SERVE_WRITER_IDLE_TIMEOUT_MS` | unset | Per-SSE-connection idle deadline. When no write has SUCCESSFULLY flushed for `n` ms — neither a real event nor the 15s heartbeat — the daemon emits a terminal `client_evicted` frame with `data.reason = 'writer_idle_timeout'` (mirrored on `data.errorKind`) and closes the stream. **Pick a value comfortably above the 15s heartbeat** (e.g. `30000`–`300000`) so legitimate idle streams aren't evicted; values `< 15000` WILL evict otherwise-healthy idle connections before the first heartbeat fires (intentional only for tests / short-lived dev sessions). Capability tag (conditional): `writer_idle_timeout`. |
306
+
307
+ Both flags accept a positive integer in milliseconds; `0`, `NaN`, non-integer, or negative values are rejected at boot with a clear error message. CLI flag wins over env var; explicit `ServeOptions` field (embedded callers) wins over env. SDK consumers should pre-flight the matching capability tag before relying on either behavior — daemons predating this PR omit both tags and the request `deadlineMs` field is silently dropped.
242
308
 
243
309
  ## Multi-session & multi-workspace deployment
244
310
 
@@ -252,12 +318,12 @@ To handle multiple **users** (each with their own quota, audit log, sandbox) or
252
318
 
253
319
  ## Loading and resuming a persisted session
254
320
 
255
- The daemon exposes ACP's `session/load` and `session/unstable_resumeSession` over HTTP via two routes:
321
+ The daemon exposes ACP's `session/load` and resume flow over HTTP via two routes:
256
322
 
257
323
  | Route | Use when |
258
324
  | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
259
325
  | `POST /session/:id/load` | The client has **no** history rendered (cold reconnect, picker-then-open). The daemon replays every persisted turn through SSE so subscribers see the full transcript. Capability tag: `session_load`. |
260
- | `POST /session/:id/resume` | The client already has the turns on screen and only needs the daemon-side handle back. Model context is restored on the agent side without UI replay — the SSE stream stays clean. Capability tag: `unstable_session_resume`. |
326
+ | `POST /session/:id/resume` | The client already has the turns on screen and only needs the daemon-side handle back. Model context is restored on the agent side without UI replay — the SSE stream stays clean. Capability tag: `session_resume` (`unstable_session_resume` remains a deprecated alias for older clients). |
261
327
 
262
328
  The TypeScript SDK exposes both as static factories on `DaemonSessionClient`:
263
329
 
@@ -278,7 +344,7 @@ for await (const event of session.events()) {
278
344
  }
279
345
  ```
280
346
 
281
- Pre-flight `caps.features.session_load` / `caps.features.unstable_session_resume` before calling — older daemons return `404`. Concurrent same-action requests for the same id coalesce; cross-action races (a `load` racing a `resume`) get `409 restore_in_progress` with `Retry-After: 5`. See the [protocol reference](../developers/qwen-serve-protocol.md) for the full error envelope.
347
+ Pre-flight `caps.features.session_load` / `caps.features.session_resume` before calling — older daemons return `404`. `unstable_session_resume` is still advertised as a deprecated compatibility alias. Concurrent same-action requests for the same id coalesce; cross-action races (a `load` racing a `resume`) get `409 restore_in_progress` with `Retry-After: 5`. See the [protocol reference](../developers/qwen-serve-protocol.md) for the full error envelope.
282
348
 
283
349
  Note: history replay is bounded by the SSE ring (default 4000 frames). Long histories with chatty turns can exceed that — earliest frames are dropped silently. For very long sessions, prefer `resume` and rely on the client's local persisted UI.
284
350
 
@@ -334,10 +400,10 @@ The Stage 1.5 plan describes TUI as an in-process EventBus subscriber. In practi
334
400
 
335
401
  No TUI shell runs inside the daemon. The slash commands listed above **don't exist** in this mode — there's no terminal UI to issue them from. Session state is therefore:
336
402
 
337
- - **Boot-time-frozen** for `approval-mode` / `memory` / `mcp servers` / `agents` / `tools` allowlist / `auth` — all loaded from settings + disk when the daemon's `qwen --acp` child starts; immutable for the session's lifetime.
338
- - **Mutable over HTTP** only via the routes this PR exposes primarily `POST /session/:id/model` (publishes `model_switched`). Permission votes (`POST /permission/:requestId`) are per-request, not per-session-state.
403
+ - **Boot-time-frozen** for `approval-mode` / `memory` / `agents` / `tools` allowlist / `auth` — all loaded from settings + disk when the daemon's `qwen --acp` child starts; immutable for the session's lifetime. Settings-defined MCP servers are likewise frozen at boot, but **runtime-added servers** (via `POST /workspace/mcp/servers`) can be added or removed without restart.
404
+ - **Mutable over HTTP** via `POST /session/:id/model` (publishes `model_switched`), `POST /workspace/mcp/servers` / `DELETE /workspace/mcp/servers/:name` (publishes `mcp_server_added` / `mcp_server_removed`), and permission votes (`POST /permission/:requestId`).
339
405
 
340
- **Consequence:** remote clients in headless mode see the **full session state**. No TUI hides additional state; no drift is possible. If you want to change `approval-mode` or add an MCP server, restart the daemon with new settings the daemon doesn't expose runtime mutation for those today.
406
+ **Consequence:** remote clients in headless mode see the **full session state**. No TUI hides additional state; no drift is possible. If you want to change `approval-mode`, restart the daemon with new settings. MCP servers can now be added/removed at runtime via the mutation routes (`POST /workspace/mcp/servers`, `DELETE /workspace/mcp/servers/:name`) — see [Runtime MCP server management](#runtime-mcp-server-management-issue-4514).
341
407
 
342
408
  #### Mode 2 — Stage 1.5 `qwen --serve` co-hosted TUI (not in this PR)
343
409
 
@@ -424,8 +490,161 @@ const result = await flow.awaitCompletion({ signal: abortCtrl.signal });
424
490
 
425
491
  **Cross-client take-over.** Two SDK clients on the same daemon that both `POST /workspace/auth/device-flow` for the same provider get the per-provider singleton: the first call starts a fresh IdP request and returns `attached: false`; the second call returns the EXISTING in-flight entry with `attached: true`. The take-over is recorded on the audit trail (under the second client's `X-Qwen-Client-Id`) but does NOT emit a separate event — both clients eventually observe the SAME `auth_device_flow_authorized` once the user finishes the IdP page. If your UI distinguishes "I started this" from "someone else's flow I joined", branch on the `attached` field returned by `start()`.
426
492
 
493
+ ## Daemon log file
494
+
495
+ `qwen serve` writes a per-process diagnostic log to:
496
+
497
+ ```
498
+ ${QWEN_RUNTIME_DIR or ~/.qwen}/debug/daemon/serve-<pid>-<workspaceHash>.log
499
+ ```
500
+
501
+ A `latest` symlink in the same directory always points at the current process's log, so `tail -f ~/.qwen/debug/daemon/latest` will follow whichever daemon is running.
502
+
503
+ The log captures lifecycle messages, route errors (with `route=` and `sessionId=` context), ACP child stderr, and — when `QWEN_SERVE_DEBUG=1` is set — extra bridge breadcrumbs. Lines that go to stderr today still go to stderr; the file log is **additive**, not a replacement.
504
+
505
+ ### Disabling
506
+
507
+ Set `QWEN_DAEMON_LOG_FILE=0` (or `false`/`off`/`no`) to skip file logging entirely. Stderr output is unaffected.
508
+
509
+ ### Relation to session debug logs
510
+
511
+ Session-scoped debug logs (`~/.qwen/debug/<sessionId>.txt` and the `~/.qwen/debug/latest` symlink) are independent. The daemon log lives in a sibling `daemon/` subdirectory; per-session debug semantics are unchanged by this feature.
512
+
513
+ ### No rotation
514
+
515
+ The daemon log appends indefinitely. Rotate manually if it grows large. A future enhancement may add automatic rotation; track via [#4548](https://github.com/QwenLM/qwen-code/issues/4548) follow-ups.
516
+
517
+ ## Runtime MCP server management (issue [#4514](https://github.com/QwenLM/qwen-code/issues/4514))
518
+
519
+ Add or remove MCP servers at runtime without restarting the daemon. Runtime entries live in an ephemeral overlay that **shadows** settings-defined servers of the same name; the underlying `settings.json` / `mcpServers` config is never written to.
520
+
521
+ **Pre-flight:** check `caps.features` for `mcp_server_runtime_mutation` before calling either route. Older daemons without this tag return `404`.
522
+
523
+ ### `POST /workspace/mcp/servers` — add a runtime MCP server
524
+
525
+ Strict-gated (bearer token required). Connects the server immediately via the live `McpClientManager` and discovers its tools.
526
+
527
+ Request:
528
+
529
+ ```json
530
+ {
531
+ "name": "my-server",
532
+ "config": {
533
+ "command": "npx",
534
+ "args": ["-y", "@my-org/mcp-server"]
535
+ }
536
+ }
537
+ ```
538
+
539
+ `name` must be alphanumeric plus `_` and `-` (max 256 characters). `config` is the same MCP server configuration object used in `settings.json` `mcpServers` entries (transport-dependent fields: `command`/`args` for stdio, `url` for SSE/HTTP). Security-sensitive fields (`trust`, `env`, `cwd`, `oauth`, `headers`, `authProviderType`, `includeTools`, `excludeTools`, `type`) are stripped by the daemon and ignored.
540
+
541
+ Response (200) — success:
542
+
543
+ ```json
544
+ {
545
+ "name": "my-server",
546
+ "transport": "stdio",
547
+ "replaced": false,
548
+ "shadowedSettings": false,
549
+ "toolCount": 3,
550
+ "originatorClientId": "client-1"
551
+ }
552
+ ```
553
+
554
+ - `replaced: true` — a runtime entry with the same name already existed and the config fingerprint differs; old connection torn down, new one established. When the fingerprint matches (idempotent re-add), `replaced` is `false`.
555
+ - `shadowedSettings: true` — a settings-defined server with the same name exists; the runtime entry now shadows it. The settings entry is untouched and re-emerges if the runtime entry is later removed.
556
+ - `toolCount` — number of tools discovered on the newly connected server.
557
+
558
+ Response (200) — soft refuse (budget warning mode):
559
+
560
+ ```json
561
+ {
562
+ "name": "my-server",
563
+ "skipped": true,
564
+ "reason": "budget_warning_only"
565
+ }
566
+ ```
567
+
568
+ Returned when `--mcp-budget-mode=warn` and adding the server would exceed the configured `--mcp-client-budget`. The server is NOT connected. Callers should surface the budget pressure to the user.
569
+
570
+ Errors:
571
+
572
+ | Status | Code | When |
573
+ | ------ | ------------------------- | -------------------------------------------------------------------------------------------------- |
574
+ | `400` | `invalid_server_name` | Name empty, exceeds 256 chars, or contains characters outside `[A-Za-z0-9_-]` |
575
+ | `400` | `missing_required_field` | `config` missing or not a non-null object |
576
+ | `400` | `invalid_client_id` | `X-Qwen-Client-Id` header present but not registered for this workspace |
577
+ | `400` | `invalid_config` | Config shape rejected by the MCP transport validator |
578
+ | `401` | `token_required` | No bearer token configured (strict gate) |
579
+ | `409` | `mcp_budget_would_exceed` | `--mcp-budget-mode=enforce` and budget is full |
580
+ | `502` | `mcp_server_spawn_failed` | Server process exited or timed out during connect; body carries `serverName`, `exitCode`, `stderr` |
581
+ | `503` | `acp_channel_unavailable` | No live ACP child (no session has been created yet) |
582
+
583
+ ### `DELETE /workspace/mcp/servers/:name` — remove a runtime MCP server
584
+
585
+ Strict-gated. Disconnects the server and removes it from the runtime overlay. Idempotent — removing a name that was never added returns a skip response (not an error).
586
+
587
+ The `:name` path parameter is the URL-encoded server name.
588
+
589
+ Response (200) — success:
590
+
591
+ ```json
592
+ {
593
+ "name": "my-server",
594
+ "removed": true,
595
+ "wasShadowingSettings": false,
596
+ "originatorClientId": "client-1"
597
+ }
598
+ ```
599
+
600
+ - `wasShadowingSettings: true` — the removed runtime entry was shadowing a settings-defined server of the same name. That settings entry is now un-shadowed and will be used on next discovery/restart.
601
+
602
+ Response (200) — idempotent skip:
603
+
604
+ ```json
605
+ {
606
+ "name": "ghost",
607
+ "skipped": true,
608
+ "reason": "not_present"
609
+ }
610
+ ```
611
+
612
+ Returned when the name was not in the runtime overlay (it may still exist in settings — settings entries cannot be removed via this route).
613
+
614
+ Errors:
615
+
616
+ | Status | Code | When |
617
+ | ------ | ------------------------- | ----------------------------------------------------------------------------- |
618
+ | `400` | `invalid_server_name` | Name empty, exceeds 256 chars, or contains characters outside `[A-Za-z0-9_-]` |
619
+ | `400` | `invalid_client_id` | `X-Qwen-Client-Id` header present but not registered for this workspace |
620
+ | `401` | `token_required` | No bearer token configured (strict gate) |
621
+ | `503` | `acp_channel_unavailable` | No live ACP child |
622
+
623
+ ### Shadow semantics
624
+
625
+ Runtime entries form an ephemeral overlay on top of settings-defined MCP servers:
626
+
627
+ - **Adding** a runtime server with the same name as a settings entry **shadows** it — the runtime config takes precedence. The original settings entry is not modified.
628
+ - **Removing** a runtime server that was shadowing a settings entry **un-shadows** it — the settings-defined config becomes active again on next connection.
629
+ - **Daemon restart** loses all runtime entries. Only settings-defined servers survive across restarts. Runtime servers are session-lifetime scoped.
630
+ - **`GET /workspace/mcp`** reports the merged view — both settings-defined and runtime servers appear in the `servers[]` array. There is no wire-level distinction between the two origins in the snapshot today.
631
+
632
+ ### Events
633
+
634
+ Both routes emit **workspace-scoped** SSE events (all active session buses receive them):
635
+
636
+ | Event | Emitted when | Payload fields |
637
+ | -------------------- | ------------------------------- | -------------------------------------------------------------------------------------- |
638
+ | `mcp_server_added` | `POST` succeeds (not skipped) | `name`, `transport`, `replaced`, `shadowedSettings`, `toolCount`, `originatorClientId` |
639
+ | `mcp_server_removed` | `DELETE` succeeds (not skipped) | `name`, `wasShadowingSettings`, `originatorClientId` |
640
+
641
+ Skipped responses (`budget_warning_only`, `not_present`) do NOT emit events.
642
+
643
+ Budget-related events from the existing `mcp_guardrail_events` surface (`mcp_budget_warning`, `mcp_child_refused_batch`) also fire when runtime additions cross the budget threshold.
644
+
427
645
  ## What's next
428
646
 
647
+ - **Setting up a long-running daemon?** [Local launch templates (systemd / launchd / nohup / tmux)](./qwen-serve-deploy-local.md) for v0.16-alpha (local-only).
429
648
  - **Build a client?** See the [DaemonClient TypeScript quickstart](../developers/examples/daemon-client-quickstart.md) and the [HTTP protocol reference](../developers/qwen-serve-protocol.md).
430
649
  - **Reading the source?** Bridge code lives at `packages/cli/src/serve/`; SDK client at `packages/sdk-typescript/src/daemon/`.
431
650
  - **Tracking the roadmap?** Stage 1.5 / Stage 2 progress is tracked on issue [#3803](https://github.com/QwenLM/qwen-code/issues/3803).
@@ -15,39 +15,39 @@ This document lists the available keyboard shortcuts in Qwen Code.
15
15
  | `Ctrl+T` | Toggle the display of tool descriptions. |
16
16
  | `Ctrl+B` | While a foreground shell command is running: promote it to a background task. The child keeps running, the agent's turn unblocks, and the shell appears in `/tasks` + the Background tasks dialog. No-op when no shell is executing — Ctrl+B then falls through to its prompt-area binding (cursor-left). |
17
17
  | `Alt/Option+M` | Toggle Markdown output between rich rendered previews and raw/source mode. On macOS, the terminal must send Option as Meta. |
18
- | `Shift+Tab` (`Tab` on Windows) | Cycle approval modes (`plan` → `default` → `auto-edit` → `yolo`) |
18
+ | `Shift+Tab` (`Tab` on Windows) | Cycle approval modes (`plan` → `default` → `auto-edit` → `auto` → `yolo`) |
19
19
 
20
20
  ## Input Prompt
21
21
 
22
- | Shortcut | Description |
23
- | -------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
24
- | `!` | Toggle shell mode when the input is empty. |
25
- | `?` | Toggle keyboard shortcuts display when the input is empty. |
26
- | `\` (at end of line) + `Enter` | Insert a newline. |
27
- | `Down Arrow` | Row down, then snap to end, then history next. |
28
- | `Enter` | Submit the current prompt. |
29
- | `Meta+Delete` / `Ctrl+Delete` | Delete the word to the right of the cursor. |
30
- | `Tab` | Autocomplete the current suggestion if one exists. |
31
- | `Up Arrow` | Row up, then snap to start, then history prev. |
32
- | `Ctrl+A` / `Home` | Move the cursor to the beginning of the line. |
33
- | `Ctrl+B` / `Left Arrow` | Move the cursor one character to the left. |
34
- | `Ctrl+C` | Clear the input prompt |
35
- | `Esc` (double press) | Clear the input prompt. |
36
- | `Ctrl+D` / `Delete` | Delete the character to the right of the cursor. |
37
- | `Ctrl+E` / `End` | Move the cursor to the end of the line. |
38
- | `Ctrl+F` / `Right Arrow` | Move the cursor one character to the right. |
39
- | `Ctrl+H` / `Backspace` | Delete the character to the left of the cursor. |
40
- | `Ctrl+K` | Delete from the cursor to the end of the line. |
41
- | `Ctrl+Left Arrow` / `Meta+Left Arrow` / `Meta+B` | Move the cursor one word to the left. |
42
- | `Ctrl+N` | Row down, then snap to end, then history next. |
43
- | `Ctrl+P` | Row up, then snap to start, then history prev. |
44
- | `Ctrl+R` | Reverse search through input/shell history. |
45
- | `Ctrl+Y` | Retry the last failed request. |
46
- | `Ctrl+Right Arrow` / `Meta+Right Arrow` / `Meta+F` | Move the cursor one word to the right. |
47
- | `Ctrl+U` | Delete from the cursor to the beginning of the line. |
48
- | `Ctrl+V` (Windows: `Alt+V`) | Paste clipboard content. If the clipboard contains an image, it will be saved and a reference to it will be inserted in the prompt. |
49
- | `Ctrl+W` / `Meta+Backspace` / `Ctrl+Backspace` | Delete the word to the left of the cursor. |
50
- | `Ctrl+X` / `Meta+Enter` | Open the current input in an external editor. |
22
+ | Shortcut | Description |
23
+ | ----------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
24
+ | `!` | Toggle shell mode when the input is empty. |
25
+ | `?` | Toggle keyboard shortcuts display when the input is empty. |
26
+ | `Ctrl+Enter` / `Cmd+Enter` / `Shift+Enter` / `Ctrl+J` | Insert a newline. |
27
+ | `Down Arrow` | Row down, then snap to end, then history next. |
28
+ | `Enter` | Submit the current prompt. |
29
+ | `Meta+Delete` / `Ctrl+Delete` | Delete the word to the right of the cursor. |
30
+ | `Tab` | Autocomplete the current suggestion if one exists. |
31
+ | `Up Arrow` | Row up, then snap to start, then history prev. |
32
+ | `Ctrl+A` / `Home` | Move the cursor to the beginning of the line. |
33
+ | `Ctrl+B` / `Left Arrow` | Move the cursor one character to the left. |
34
+ | `Ctrl+C` | Clear the input prompt |
35
+ | `Esc` (double press) | Clear the input prompt. |
36
+ | `Ctrl+D` / `Delete` | Delete the character to the right of the cursor. |
37
+ | `Ctrl+E` / `End` | Move the cursor to the end of the line. |
38
+ | `Ctrl+F` / `Right Arrow` | Move the cursor one character to the right. |
39
+ | `Ctrl+H` / `Backspace` | Delete the character to the left of the cursor. |
40
+ | `Ctrl+K` | Delete from the cursor to the end of the line. |
41
+ | `Ctrl+Left Arrow` / `Meta+Left Arrow` / `Meta+B` | Move the cursor one word to the left. |
42
+ | `Ctrl+N` | Row down, then snap to end, then history next. |
43
+ | `Ctrl+P` | Row up, then snap to start, then history prev. |
44
+ | `Ctrl+R` | Reverse search through input/shell history. |
45
+ | `Ctrl+Y` | Retry the last failed request. |
46
+ | `Ctrl+Right Arrow` / `Meta+Right Arrow` / `Meta+F` | Move the cursor one word to the right. |
47
+ | `Ctrl+U` | Delete from the cursor to the beginning of the line. |
48
+ | `Ctrl+V` (Windows: `Alt+V`) | Paste clipboard content. If the clipboard contains an image, it will be saved and a reference to it will be inserted in the prompt. |
49
+ | `Ctrl+W` / `Meta+Backspace` / `Ctrl+Backspace` | Delete the word to the left of the cursor. |
50
+ | `Ctrl+X` / `Meta+Enter` | Open the current input in an external editor. |
51
51
 
52
52
  ## Suggestions
53
53