qwen-agent-server 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js ADDED
@@ -0,0 +1,1050 @@
1
+ #!/usr/bin/env node
2
+ // SPDX-License-Identifier: MIT
3
+ //
4
+ // qwen-agent-server MCP entrypoint.
5
+ //
6
+ // Exports createToolHandlers() for testing and wires a McpServer +
7
+ // StdioServerTransport for production use when run as `node dist/server.js`.
8
+ //
9
+ // The 5 tools:
10
+ // qwen_spawn — create a new session
11
+ // qwen_poll — read events / state
12
+ // qwen_send — push the next user message into a session
13
+ // qwen_stop — cancel a session
14
+ // qwen_backends — list backend health
15
+ import { isAbsolute } from "node:path";
16
+ import { createLogger } from "./log.js";
17
+ import { SUPERVISOR_VERSION } from "./version.js";
18
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
19
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
20
+ import { McpError, ErrorCode } from "@modelcontextprotocol/sdk/types.js";
21
+ import { z } from "zod";
22
+ import { chooseBackendByModality, getCachedHealth, refreshPoolBackends, } from "./backends.js";
23
+ import { createPool, reapSweep, removeSession, spawnSession, } from "./pool.js";
24
+ import { setupShutdown } from "./shutdown.js";
25
+ import { createInstalledExtensionsCache, ExtensionResolutionError, getSessionDefaultExtensions, listInstalledExtensions, resolveExtensions, resolveQwenRealBin, resolveWrapperPath, } from "./extensions.js";
26
+ import { dispatchVisionOneshot, } from "./vision.js";
27
+ import { formatChatPrelude, formatTextPrelude, ThreadStore, } from "./threads.js";
28
+ import { dispatchEmbed } from "./embed.js";
29
+ import { dispatchRerank } from "./rerank.js";
30
+ import { dispatchTokenize, } from "./tokenize.js";
31
+ const NOOP_PROGRESS = () => { };
32
+ const log = createLogger("qwen-agent-server");
33
+ // ─────────────────────────────────────────────────────────────────
34
+ // qwen_spawn opts schema
35
+ //
36
+ // Extracted to a top-level export so tests can parse payloads against it
37
+ // directly without needing a live MCP transport. The same schema is wired
38
+ // into mcpServer.tool registration in main(). Keep these in sync.
39
+ export const qwenSpawnOptsSchema = z.object({
40
+ backend: z.string().optional(),
41
+ tier: z.enum(["local", "remote"]).optional(),
42
+ capacity: z.enum(["fast", "heavy"]).optional(),
43
+ write_authority: z.boolean().optional(),
44
+ allow_subagents: z.boolean().optional(),
45
+ system: z.string().optional(),
46
+ prior_context: z.object({
47
+ conversation_summary: z.string(),
48
+ last_user_message: z.string().optional(),
49
+ prior_session_id: z.string().optional(),
50
+ }).optional(),
51
+ // Must be an absolute path: it is handed verbatim to the inner Qwen Code
52
+ // subprocess as its working directory. A relative path would resolve
53
+ // against the supervisor's cwd (not the caller's intent) and silently
54
+ // run the agent in the wrong tree. Reject at the schema boundary rather
55
+ // than fail opaquely downstream (RDR-006 40v.1 stacked-review hardening).
56
+ cwd: z.string().refine(isAbsolute, { message: "cwd must be an absolute path" }).optional(),
57
+ // positive (not nonnegative): a per-turn output floor of 0 is meaningless;
58
+ // "no floor" is expressed by omitting the field. Keeps the schema honest with
59
+ // the session.ts `> 0` guard (else 0 passes validation then is silently
60
+ // dropped).
61
+ max_output_tokens: z.number().int().positive().optional(),
62
+ // Inner-Qwen HOME override (40v.13). Absolute path; isolates the inner
63
+ // model's config from the operator's real ~/.qwen without touching the
64
+ // supervisor's own HOME (which resolves the backend registry).
65
+ home: z.string().refine(isAbsolute, { message: "home must be an absolute path" }).optional(),
66
+ extensions: z.object({
67
+ enable: z.array(z.string()).optional(),
68
+ disable: z.array(z.string()).optional(),
69
+ only: z.array(z.string()).optional(),
70
+ }).optional(),
71
+ max_context_tokens: z.number().int().nonnegative().optional(),
72
+ max_tool_calls: z.number().int().nonnegative().optional(),
73
+ thinking_mode: z.boolean().optional(),
74
+ json_schema: z.record(z.string(), z.unknown()).optional(),
75
+ }).optional();
76
+ /**
77
+ * Translate the Zod-parsed opts payload into a Partial<SpawnOpts>,
78
+ * stripping undefined fields to satisfy `exactOptionalPropertyTypes`.
79
+ *
80
+ * Exported for testability — production wiring in main() funnels every
81
+ * qwen_spawn invocation through this helper.
82
+ */
83
+ export function buildSpawnOptsFromRaw(rawOpts) {
84
+ const spawnOpts = {};
85
+ if (rawOpts === undefined)
86
+ return spawnOpts;
87
+ if (rawOpts.backend !== undefined)
88
+ spawnOpts.backend = rawOpts.backend;
89
+ if (rawOpts.tier !== undefined)
90
+ spawnOpts.tier = rawOpts.tier;
91
+ if (rawOpts.capacity !== undefined)
92
+ spawnOpts.capacity = rawOpts.capacity;
93
+ if (rawOpts.write_authority !== undefined)
94
+ spawnOpts.write_authority = rawOpts.write_authority;
95
+ if (rawOpts.allow_subagents !== undefined)
96
+ spawnOpts.allow_subagents = rawOpts.allow_subagents;
97
+ if (rawOpts.system !== undefined)
98
+ spawnOpts.system = rawOpts.system;
99
+ if (rawOpts.prior_context !== undefined) {
100
+ const pc = rawOpts.prior_context;
101
+ spawnOpts.prior_context = { conversation_summary: pc.conversation_summary };
102
+ if (pc.last_user_message !== undefined)
103
+ spawnOpts.prior_context.last_user_message = pc.last_user_message;
104
+ if (pc.prior_session_id !== undefined)
105
+ spawnOpts.prior_context.prior_session_id = pc.prior_session_id;
106
+ }
107
+ if (rawOpts.extensions !== undefined) {
108
+ const ext = {};
109
+ if (rawOpts.extensions.enable !== undefined)
110
+ ext.enable = rawOpts.extensions.enable;
111
+ if (rawOpts.extensions.disable !== undefined)
112
+ ext.disable = rawOpts.extensions.disable;
113
+ if (rawOpts.extensions.only !== undefined)
114
+ ext.only = rawOpts.extensions.only;
115
+ spawnOpts.extensions = ext;
116
+ }
117
+ if (rawOpts.cwd !== undefined)
118
+ spawnOpts.cwd = rawOpts.cwd;
119
+ if (rawOpts.home !== undefined)
120
+ spawnOpts.home = rawOpts.home;
121
+ if (rawOpts.max_output_tokens !== undefined)
122
+ spawnOpts.max_output_tokens = rawOpts.max_output_tokens;
123
+ if (rawOpts.max_context_tokens !== undefined)
124
+ spawnOpts.max_context_tokens = rawOpts.max_context_tokens;
125
+ if (rawOpts.max_tool_calls !== undefined)
126
+ spawnOpts.max_tool_calls = rawOpts.max_tool_calls;
127
+ if (rawOpts.thinking_mode !== undefined)
128
+ spawnOpts.thinking_mode = rawOpts.thinking_mode;
129
+ if (rawOpts.json_schema !== undefined)
130
+ spawnOpts.json_schema = rawOpts.json_schema;
131
+ return spawnOpts;
132
+ }
133
+ export function createToolHandlers(existingPool, installedExtensionsCache, threadStore) {
134
+ const pool = existingPool ?? createPool();
135
+ // One thread store per handler set. In production main() one is wired
136
+ // explicitly; tests omit the arg to get a default (no-reaper) instance.
137
+ const threads = threadStore ?? new ThreadStore({ reap_interval_ms: 0 });
138
+ let shuttingDown = false;
139
+ // ── qwen_spawn ─────────────────────────────────────────────
140
+ const qwen_spawn = async ({ task, opts = {} }) => {
141
+ if (shuttingDown) {
142
+ log.warn({ event_type: "spawn_rejected" }, "qwen_spawn rejected: server shutting down");
143
+ return {
144
+ error: { code: "shutting_down", message: "server is shutting down; cannot spawn new sessions" },
145
+ };
146
+ }
147
+ // RDR-002 step 6 — pre-spawn validation. Only run when an installed-
148
+ // extensions cache is wired (production main()). Tests that don't
149
+ // supply a cache skip resolution and fall through to default SDK
150
+ // behaviour. Mirrors the shutting_down envelope shape (server.ts
151
+ // lines just above) — caller never sees an McpError throw for
152
+ // caller-supplied invalid input.
153
+ let resolvedExtensions;
154
+ if (installedExtensionsCache !== undefined) {
155
+ try {
156
+ const sessionDefault = getSessionDefaultExtensions(process.env);
157
+ resolvedExtensions = resolveExtensions(opts.extensions, sessionDefault, installedExtensionsCache.get());
158
+ }
159
+ catch (err) {
160
+ if (err instanceof ExtensionResolutionError) {
161
+ log.warn({ event_type: "spawn_rejected", reason: "extension_resolution", err: err.message }, "qwen_spawn rejected: extension resolution");
162
+ return {
163
+ error: { code: "spawn_error", message: err.message },
164
+ };
165
+ }
166
+ throw err;
167
+ }
168
+ }
169
+ // Hot-reload the backend list from env / config file before each
170
+ // spawn so operator edits via `/qwen-backends add|remove` apply
171
+ // without restarting the supervisor. Existing sessions stay pinned
172
+ // to their backend (RDR-001 §Q3) — only this fresh spawn sees the
173
+ // new list.
174
+ refreshPoolBackends(pool);
175
+ // Note: budget defaults (env / config / backend.ctx_size /
176
+ // hardcoded) are filled inside pool.spawnSession after the backend
177
+ // is chosen, so the per-backend ctx_size tier can apply
178
+ // (RDR-002 v0.7 amendment).
179
+ let session;
180
+ try {
181
+ session = await spawnSession(pool, task, opts, resolvedExtensions);
182
+ }
183
+ catch (err) {
184
+ const message = err instanceof Error ? err.message : String(err);
185
+ log.warn({ event_type: "spawn_no_backend", err: message }, "spawnSession failed");
186
+ throw new McpError(ErrorCode.InternalError, message);
187
+ }
188
+ return { task_id: session.task_id, chosen_backend: session.backend.id };
189
+ };
190
+ // ── qwen_poll ──────────────────────────────────────────────
191
+ const qwen_poll = async ({ task_id, opts }) => {
192
+ const session = pool.sessions.get(task_id);
193
+ if (!session) {
194
+ return {
195
+ state: "error",
196
+ recent_events: [],
197
+ more_events_available: false,
198
+ latest_event_id: "",
199
+ error: { code: "task_id_not_found", message: `task_id ${task_id} not found; session may have been evicted` },
200
+ };
201
+ }
202
+ session.last_polled_at = Date.now();
203
+ // Build PollOpts, omitting undefined fields to satisfy exactOptionalPropertyTypes
204
+ const pollOpts = {};
205
+ if (opts?.since !== undefined)
206
+ pollOpts.since = opts.since;
207
+ if (opts?.max_events !== undefined)
208
+ pollOpts.max_events = opts.max_events;
209
+ const realSession = session;
210
+ return realSession.poll(pollOpts);
211
+ };
212
+ // ── qwen_send ──────────────────────────────────────────────
213
+ const qwen_send = async ({ task_id, message }) => {
214
+ const session = pool.sessions.get(task_id);
215
+ if (!session) {
216
+ throw new McpError(ErrorCode.InvalidParams, `task_id ${task_id} not found`);
217
+ }
218
+ const realSession = session;
219
+ realSession.send(message);
220
+ return { ack: true };
221
+ };
222
+ // ── qwen_stop ──────────────────────────────────────────────
223
+ const qwen_stop = async ({ task_id }) => {
224
+ const session = pool.sessions.get(task_id);
225
+ if (!session) {
226
+ // Idempotent: stopping a non-existent session is fine
227
+ return { ack: false };
228
+ }
229
+ session.stop();
230
+ removeSession(pool, task_id);
231
+ log.info({ task_id, event_type: "stop", state: session.state }, "session stopped via qwen_stop");
232
+ return { ack: true };
233
+ };
234
+ // ── qwen_backends ─────────────────────────────────────────
235
+ const qwen_backends = async () => {
236
+ // Hot-reload from env / config file so operator edits surface in
237
+ // the next list call without restarting the supervisor.
238
+ refreshPoolBackends(pool);
239
+ // One pass over the live session map to count routed sessions
240
+ // per backend — load visibility for operator dashboards. O(n)
241
+ // in pool.sessions; cheap relative to the per-backend health
242
+ // probes below.
243
+ const sessionsByBackend = new Map();
244
+ for (const pooled of pool.sessions.values()) {
245
+ const id = pooled.backend.id;
246
+ sessionsByBackend.set(id, (sessionsByBackend.get(id) ?? 0) + 1);
247
+ }
248
+ const results = await Promise.all(pool.backends.map(async (b) => {
249
+ const healthy = await getCachedHealth(b);
250
+ const info = {
251
+ id: b.id,
252
+ url: b.url,
253
+ model: b.model,
254
+ tier: b.tier,
255
+ capacity: b.capacity,
256
+ healthy,
257
+ active_sessions: sessionsByBackend.get(b.id) ?? 0,
258
+ ...(b.modality !== undefined ? { modality: b.modality } : {}),
259
+ };
260
+ return info;
261
+ }));
262
+ return results;
263
+ };
264
+ // ── qwen_sessions (live overview) ──────────────────────────
265
+ const qwen_sessions = async () => {
266
+ const out = [];
267
+ for (const [task_id, pooled] of pool.sessions) {
268
+ const real = pooled;
269
+ out.push({
270
+ task_id,
271
+ backend_id: pooled.backend.id,
272
+ state: real.state,
273
+ last_polled_at: pooled.last_polled_at,
274
+ turns_completed: real.turns_completed,
275
+ budget: real.budgetStats(),
276
+ });
277
+ }
278
+ return out;
279
+ };
280
+ // ── qwen_oneshot (stateless dispatch, RDR-002 v0.8 amendment) ──
281
+ //
282
+ // Single-turn wrapper around spawn + poll-until-done + optional
283
+ // JSON.parse + stop. The schema-aware return shape exists to drop
284
+ // into nexus operator dispatch as a Qwen alternative to `claude -p
285
+ // --json-schema`. The supervisor itself does not run a full Ajv
286
+ // validator; callers either rely on Qwen3.6's instruction-following
287
+ // (system-prompt directive in session.ts) or post-validate.
288
+ // Validation-failure retry is bounded by `max_attempts` so a model
289
+ // that consistently emits prose doesn't burn budget infinitely.
290
+ const ONESHOT_POLL_INTERVAL_MS = 250;
291
+ const qwen_oneshot = async ({ task, opts }, progress = NOOP_PROGRESS) => {
292
+ const oneshot_start = Date.now();
293
+ const timeout_ms = opts?.timeout_ms ?? 300_000;
294
+ const max_attempts = Math.max(1, opts?.max_attempts ?? 1);
295
+ // Strip the oneshot-specific fields before forwarding to qwen_spawn.
296
+ const spawnOpts = { ...opts };
297
+ delete spawnOpts["timeout_ms"];
298
+ delete spawnOpts["max_attempts"];
299
+ delete spawnOpts["continuation_id"];
300
+ // Thread resolution. If continuation_id is supplied, fetch prior
301
+ // turns and prepend as a text prelude to the task. Always allocate
302
+ // a thread id (new or existing) so the caller can chain on success.
303
+ const thread = threads.resolve(opts?.continuation_id);
304
+ const prelude = formatTextPrelude(thread.turns);
305
+ const effective_task = prelude.length > 0 ? `${prelude}${task}` : task;
306
+ const continuation_id = thread.id;
307
+ let attempts = 0;
308
+ let last_task_id = "";
309
+ let last_state = "error";
310
+ let last_result;
311
+ let last_budget;
312
+ let last_error;
313
+ while (attempts < max_attempts) {
314
+ attempts++;
315
+ progress({
316
+ progress: attempts - 1,
317
+ total: max_attempts,
318
+ message: `attempt ${attempts}/${max_attempts}: spawning`,
319
+ });
320
+ const spawn = await qwen_spawn({ task: effective_task, opts: spawnOpts });
321
+ if ("error" in spawn) {
322
+ last_error = { code: "session_error", message: spawn.error.message };
323
+ break;
324
+ }
325
+ last_task_id = spawn.task_id;
326
+ // Per-attempt timeout origin. NOT to be confused with
327
+ // `oneshot_start` (function-scope, total wall-clock for elapsed_ms).
328
+ const attempt_start = Date.now();
329
+ // Poll until idle/complete/error/timeout.
330
+ let polled;
331
+ // eslint-disable-next-line no-constant-condition
332
+ while (true) {
333
+ polled = await qwen_poll({ task_id: last_task_id, opts: {} });
334
+ last_state = polled.state;
335
+ last_budget = polled.budget;
336
+ if (polled.state === "idle" || polled.state === "complete") {
337
+ last_result = polled.last_message;
338
+ break;
339
+ }
340
+ if (polled.state === "error") {
341
+ last_error = {
342
+ code: "session_error",
343
+ message: polled.error?.message ?? "session aborted without message",
344
+ };
345
+ break;
346
+ }
347
+ if (Date.now() - attempt_start > timeout_ms) {
348
+ last_error = {
349
+ code: "timeout",
350
+ message: `oneshot timed out after ${timeout_ms}ms (state=${polled.state})`,
351
+ };
352
+ break;
353
+ }
354
+ await new Promise((resolve) => setTimeout(resolve, ONESHOT_POLL_INTERVAL_MS));
355
+ }
356
+ // Always stop the session — oneshot is stateless by contract.
357
+ await qwen_stop({ task_id: last_task_id });
358
+ if (last_error?.code === "session_error" || last_error?.code === "timeout") {
359
+ // Don't retry on session errors / timeouts; those are real failures
360
+ // and retrying is expensive.
361
+ break;
362
+ }
363
+ // Idle/complete reached. If schema requested, try to parse.
364
+ if (last_result === undefined || last_result === "") {
365
+ last_error = { code: "no_result", message: "session ended with no assistant message" };
366
+ break; // retrying won't help if model produced nothing
367
+ }
368
+ // Qwen CLI passes upstream HTTP failures through to stdout as
369
+ // "[API Error: ...]" / "[Stream Error: ...]" / "[Tool Error: ...]"
370
+ // and exits 0. Without detecting these we'd report ok:true with an
371
+ // error string as the answer. Don't retry — upstream failures
372
+ // (auth, model not loaded, server-side config) won't self-heal in
373
+ // the next 30 s and retrying burns tokens.
374
+ const upstream = matchUpstreamCliError(last_result);
375
+ if (upstream !== undefined) {
376
+ last_error = { code: "upstream_api_error", message: upstream };
377
+ break;
378
+ }
379
+ if (spawnOpts.json_schema === undefined) {
380
+ // No schema requested → success on first reach.
381
+ threads.append(continuation_id, { role: "user", content: task });
382
+ threads.append(continuation_id, { role: "assistant", content: last_result });
383
+ return {
384
+ ok: true,
385
+ task_id: last_task_id,
386
+ attempts,
387
+ state: last_state,
388
+ result: last_result,
389
+ ...(last_budget !== undefined ? { budget: last_budget } : {}),
390
+ elapsed_ms: Date.now() - oneshot_start,
391
+ continuation_id,
392
+ };
393
+ }
394
+ // Defensive: Qwen3.6 frequently wraps schema-conforming JSON in
395
+ // markdown code fences (```json ... ```) despite the system-prompt
396
+ // directive. Strip them before JSON.parse — the content is right;
397
+ // it's just wearing a jacket. Observed in v0.8.0 bench (5/5 cases).
398
+ const stripped = stripCodeFences(last_result);
399
+ try {
400
+ const parsed = JSON.parse(stripped);
401
+ threads.append(continuation_id, { role: "user", content: task });
402
+ threads.append(continuation_id, { role: "assistant", content: last_result });
403
+ return {
404
+ ok: true,
405
+ task_id: last_task_id,
406
+ attempts,
407
+ state: last_state,
408
+ result: last_result,
409
+ parsed,
410
+ ...(last_budget !== undefined ? { budget: last_budget } : {}),
411
+ elapsed_ms: Date.now() - oneshot_start,
412
+ continuation_id,
413
+ };
414
+ }
415
+ catch (err) {
416
+ last_error = {
417
+ code: "validation_failed",
418
+ message: `JSON.parse failed: ${err instanceof Error ? err.message : String(err)}`,
419
+ };
420
+ // fall through to retry (if attempts remain)
421
+ }
422
+ }
423
+ // Exhausted attempts or hit a terminal error. We do NOT append
424
+ // failed turns to the thread — there's no useful "assistant" turn
425
+ // to carry forward — but we still emit the continuation_id so the
426
+ // caller can chain another attempt or recover the thread.
427
+ return {
428
+ ok: false,
429
+ task_id: last_task_id,
430
+ attempts,
431
+ state: last_state,
432
+ ...(last_result !== undefined ? { result: last_result } : {}),
433
+ ...(last_error !== undefined ? { error: last_error } : {}),
434
+ ...(last_budget !== undefined ? { budget: last_budget } : {}),
435
+ elapsed_ms: Date.now() - oneshot_start,
436
+ continuation_id,
437
+ };
438
+ };
439
+ // ── qwen_oneshot_vision (multimodal direct-HTTP dispatch) ──
440
+ //
441
+ // Bypasses the SDK / Qwen CLI subprocess entirely. POSTs OpenAI-compat
442
+ // multimodal content arrays directly to a backend's /v1/chat/completions.
443
+ // The chosen backend must be running llama-server with --mmproj loaded
444
+ // or the call fails with backend_no_mmproj.
445
+ const qwen_oneshot_vision = async ({ task, images, opts }, progress = NOOP_PROGRESS) => {
446
+ if (shuttingDown) {
447
+ return {
448
+ ok: false,
449
+ elapsed_ms: 0,
450
+ backend_id: "",
451
+ error: { code: "backend_error", message: "supervisor shutting down" },
452
+ };
453
+ }
454
+ if (!Array.isArray(images) || images.length === 0) {
455
+ return {
456
+ ok: false,
457
+ elapsed_ms: 0,
458
+ backend_id: "",
459
+ error: {
460
+ code: "backend_error",
461
+ message: "qwen_oneshot_vision requires at least one image",
462
+ },
463
+ };
464
+ }
465
+ // Vision requires a backend whose loaded model can accept image
466
+ // inputs (llama-server with --mmproj). Route by modality directly
467
+ // rather than through chooseBackend (which targets text chat).
468
+ // See bead qwen-coprocessor-stack-w63.
469
+ const backend = await chooseBackendByModality(pool.backends, "multimodal", opts?.backend);
470
+ if (!backend) {
471
+ return {
472
+ ok: false,
473
+ elapsed_ms: 0,
474
+ backend_id: "",
475
+ error: {
476
+ code: "backend_error",
477
+ message: opts?.backend
478
+ ? `no backend matches pin "${opts.backend}"`
479
+ : "no multimodal backends configured (need modality:'multimodal')",
480
+ },
481
+ };
482
+ }
483
+ // Pin bypasses the modality filter in chooseBackendByModality so the
484
+ // caller's authority is respected — but for vision the upstream will
485
+ // fail with the misleading "image input is not supported" hint
486
+ // (surfaced as backend_no_mmproj after a roundtrip). Reject the
487
+ // mismatch upfront with a specific code. Matches the
488
+ // qwen_embed/qwen_rerank pattern at the equivalent sites below.
489
+ if (opts?.backend !== undefined && (backend.modality ?? "text") !== "multimodal") {
490
+ return {
491
+ ok: false,
492
+ elapsed_ms: 0,
493
+ backend_id: backend.id,
494
+ error: {
495
+ code: "wrong_modality",
496
+ message: `backend "${backend.id}" has modality=${backend.modality ?? "text"}, not 'multimodal'`,
497
+ },
498
+ };
499
+ }
500
+ const dispatchOpts = { ...opts };
501
+ delete dispatchOpts.backend;
502
+ delete dispatchOpts.continuation_id;
503
+ // Thread resolution. continuation_id is optional; either way we
504
+ // allocate one and return it on success.
505
+ const thread = threads.resolve(opts?.continuation_id);
506
+ const prior_messages = formatChatPrelude(thread.turns);
507
+ const continuation_id = thread.id;
508
+ progress({ progress: 0, total: 1, message: `dispatching to ${backend.id}` });
509
+ const result = await dispatchVisionOneshot(backend, task, images, dispatchOpts, prior_messages);
510
+ progress({
511
+ progress: 1,
512
+ total: 1,
513
+ message: result.ok ? "done" : `error: ${result.error?.code}`,
514
+ });
515
+ if (result.ok && typeof result.result === "string") {
516
+ threads.append(continuation_id, {
517
+ role: "user",
518
+ content: task,
519
+ had_images: true,
520
+ });
521
+ threads.append(continuation_id, {
522
+ role: "assistant",
523
+ content: result.result,
524
+ });
525
+ }
526
+ return { ...result, continuation_id };
527
+ };
528
+ // ── qwen_embed / qwen_rerank / qwen_tokenize ────────────────
529
+ //
530
+ // All three bypass the SDK and POST directly to llama-server
531
+ // endpoints. Embed and rerank require backends declared with the
532
+ // corresponding modality; tokenize accepts any text/multimodal
533
+ // backend (the tokenizer is colocated with the loaded model).
534
+ const qwen_embed = async ({ texts, opts }) => {
535
+ const elapsed_start = Date.now();
536
+ if (!Array.isArray(texts) || texts.length === 0) {
537
+ return {
538
+ ok: false,
539
+ elapsed_ms: 0,
540
+ backend_id: "",
541
+ error: { code: "backend_error", message: "texts must be a non-empty array" },
542
+ };
543
+ }
544
+ refreshPoolBackends(pool);
545
+ const backend = await chooseBackendByModality(pool.backends, "embedding", opts?.backend);
546
+ if (!backend) {
547
+ return {
548
+ ok: false,
549
+ elapsed_ms: Date.now() - elapsed_start,
550
+ backend_id: "",
551
+ error: {
552
+ code: "backend_error",
553
+ message: opts?.backend
554
+ ? `no backend matches pin "${opts.backend}"`
555
+ : "no backend declared with modality='embedding'",
556
+ },
557
+ };
558
+ }
559
+ if (opts?.backend !== undefined && (backend.modality ?? "text") !== "embedding") {
560
+ return {
561
+ ok: false,
562
+ elapsed_ms: Date.now() - elapsed_start,
563
+ backend_id: backend.id,
564
+ error: {
565
+ code: "wrong_modality",
566
+ message: `backend "${backend.id}" has modality=${backend.modality ?? "text"}, not 'embedding'`,
567
+ },
568
+ };
569
+ }
570
+ const dispatchOpts = { ...opts };
571
+ delete dispatchOpts.backend;
572
+ return dispatchEmbed(backend, texts, dispatchOpts);
573
+ };
574
+ const qwen_rerank = async ({ query, documents, opts, }) => {
575
+ const elapsed_start = Date.now();
576
+ if (typeof query !== "string" || query.length === 0) {
577
+ return {
578
+ ok: false,
579
+ elapsed_ms: 0,
580
+ backend_id: "",
581
+ error: { code: "backend_error", message: "query must be a non-empty string" },
582
+ };
583
+ }
584
+ if (!Array.isArray(documents) || documents.length === 0) {
585
+ return {
586
+ ok: false,
587
+ elapsed_ms: 0,
588
+ backend_id: "",
589
+ error: {
590
+ code: "backend_error",
591
+ message: "documents must be a non-empty array",
592
+ },
593
+ };
594
+ }
595
+ refreshPoolBackends(pool);
596
+ const backend = await chooseBackendByModality(pool.backends, "rerank", opts?.backend);
597
+ if (!backend) {
598
+ return {
599
+ ok: false,
600
+ elapsed_ms: Date.now() - elapsed_start,
601
+ backend_id: "",
602
+ error: {
603
+ code: "backend_error",
604
+ message: opts?.backend
605
+ ? `no backend matches pin "${opts.backend}"`
606
+ : "no backend declared with modality='rerank'",
607
+ },
608
+ };
609
+ }
610
+ if (opts?.backend !== undefined && (backend.modality ?? "text") !== "rerank") {
611
+ return {
612
+ ok: false,
613
+ elapsed_ms: Date.now() - elapsed_start,
614
+ backend_id: backend.id,
615
+ error: {
616
+ code: "wrong_modality",
617
+ message: `backend "${backend.id}" has modality=${backend.modality ?? "text"}, not 'rerank'`,
618
+ },
619
+ };
620
+ }
621
+ const dispatchOpts = { ...opts };
622
+ delete dispatchOpts.backend;
623
+ return dispatchRerank(backend, query, documents, dispatchOpts);
624
+ };
625
+ const qwen_tokenize = async ({ content, opts }) => {
626
+ const elapsed_start = Date.now();
627
+ if (typeof content !== "string") {
628
+ return {
629
+ ok: false,
630
+ elapsed_ms: 0,
631
+ backend_id: "",
632
+ error: { code: "backend_error", message: "content must be a string" },
633
+ };
634
+ }
635
+ refreshPoolBackends(pool);
636
+ // Tokenize accepts any text/multimodal backend. Honour pin; otherwise
637
+ // try 'text', then 'multimodal'. We do NOT route to embedding /
638
+ // rerank backends — their tokenizer endpoint may be disabled
639
+ // depending on llama-server build flags.
640
+ let backend = null;
641
+ if (opts?.backend !== undefined) {
642
+ backend = pool.backends.find((b) => b.id === opts.backend) ?? null;
643
+ }
644
+ else {
645
+ backend =
646
+ (await chooseBackendByModality(pool.backends, "text")) ??
647
+ (await chooseBackendByModality(pool.backends, "multimodal"));
648
+ }
649
+ if (!backend) {
650
+ return {
651
+ ok: false,
652
+ elapsed_ms: Date.now() - elapsed_start,
653
+ backend_id: "",
654
+ error: {
655
+ code: "backend_error",
656
+ message: opts?.backend
657
+ ? `no backend matches pin "${opts.backend}"`
658
+ : "no healthy text/multimodal backend available",
659
+ },
660
+ };
661
+ }
662
+ const dispatchOpts = { ...opts };
663
+ delete dispatchOpts.backend;
664
+ return dispatchTokenize(backend, content, dispatchOpts);
665
+ };
666
+ // ── qwen_extensions (read-only listing) ────────────────────
667
+ const qwen_extensions = async () => {
668
+ if (!pool.qwenRealBin) {
669
+ log.warn({ event_type: "qwen_extensions_no_bin" }, "qwen_extensions called but pool.qwenRealBin is unset; returning empty list");
670
+ return [];
671
+ }
672
+ try {
673
+ return await listInstalledExtensions(pool.qwenRealBin);
674
+ }
675
+ catch (err) {
676
+ log.warn({ event_type: "qwen_extensions_exec_failed", err: err instanceof Error ? err.message : String(err) }, "qwen extensions list shell-out failed");
677
+ return [];
678
+ }
679
+ };
680
+ // ── qwen_reload_extensions ─────────────────────────────────
681
+ //
682
+ // RDR-002 amendment 2026-05-09: ungated. Single-operator stdio
683
+ // supervisor; the prior QWEN_ADMIN_TOOLS gate solved a
684
+ // multi-tenant-untrusted-client problem we don't have. Available
685
+ // whenever a cache was wired into createToolHandlers (production
686
+ // main() always wires one).
687
+ let qwen_reload_extensions;
688
+ if (installedExtensionsCache !== undefined) {
689
+ qwen_reload_extensions = async () => {
690
+ const newSet = await installedExtensionsCache.reload();
691
+ const names = Array.from(newSet);
692
+ log.info({ event_type: "extensions_reloaded", size: names.length }, "installed-extensions cache reloaded");
693
+ return { size: names.length, names };
694
+ };
695
+ }
696
+ return {
697
+ qwen_spawn,
698
+ qwen_poll,
699
+ qwen_send,
700
+ qwen_stop,
701
+ qwen_backends,
702
+ qwen_sessions,
703
+ qwen_oneshot,
704
+ qwen_oneshot_vision,
705
+ qwen_embed,
706
+ qwen_rerank,
707
+ qwen_tokenize,
708
+ qwen_extensions,
709
+ ...(qwen_reload_extensions !== undefined ? { qwen_reload_extensions } : {}),
710
+ __setShuttingDown: (v) => { shuttingDown = v; },
711
+ };
712
+ }
713
+ // ─────────────────────────────────────────────────────────────────
714
+ // MCP server wiring (production entrypoint)
715
+ /**
716
+ * Build a ProgressEmitter bound to the current MCP request. When the
717
+ * client supplied a `_meta.progressToken`, emitted events are forwarded
718
+ * as `notifications/progress`. When no token is present (the common
719
+ * case for non-streaming clients), every call is a no-op.
720
+ *
721
+ * The MCP SDK's `extra.sendNotification` is shaped to accept the
722
+ * `notifications/progress` schema; we widen `extra` to `unknown` and
723
+ * narrow inside to keep this helper agnostic of the SDK's exact
724
+ * RequestHandlerExtra type (which changes across minor versions).
725
+ */
726
+ function makeProgressEmitter(extra) {
727
+ const x = extra;
728
+ const token = x?._meta?.progressToken;
729
+ if (token === undefined || typeof x?.sendNotification !== "function") {
730
+ return NOOP_PROGRESS;
731
+ }
732
+ const send = x.sendNotification.bind(x);
733
+ return ({ progress, total, message }) => {
734
+ try {
735
+ void send({
736
+ method: "notifications/progress",
737
+ params: {
738
+ progressToken: token,
739
+ progress,
740
+ ...(total !== undefined ? { total } : {}),
741
+ ...(message !== undefined ? { message } : {}),
742
+ },
743
+ });
744
+ }
745
+ catch {
746
+ // Progress notifications are best-effort; never let an emission
747
+ // failure abort the underlying tool call.
748
+ }
749
+ };
750
+ }
751
+ async function main() {
752
+ log.info("qwen-agent-server starting");
753
+ // RDR-002 §The wrapper-script bridge — fail-fast at startup if the
754
+ // real qwen binary cannot be located. An operator who hasn't installed
755
+ // Qwen Code can't recover later by registering more sessions; only
756
+ // by fixing the install. Resolve once here and stash on the pool.
757
+ const qwenRealBin = resolveQwenRealBin(process.env);
758
+ const wrapperPath = resolveWrapperPath();
759
+ log.info({ qwen_real_bin: qwenRealBin, wrapper_path: wrapperPath }, "extension bridge resolved");
760
+ // Prime the installed-extensions cache once at startup. Exec errors
761
+ // propagate; unparseable output degrades to an empty cache + warn
762
+ // (RDR-002 audit-note #4 — no hard-brick on routine SDK output drift).
763
+ const installedExtensionsCache = await createInstalledExtensionsCache(qwenRealBin);
764
+ log.info({ event_type: "extensions_cache_loaded", size: installedExtensionsCache.size() }, "installed-extensions cache primed");
765
+ const pool = createPool({ qwenRealBin, wrapperPath });
766
+ const handlers = createToolHandlers(pool, installedExtensionsCache);
767
+ const mcpServer = new McpServer({
768
+ name: "qwen-agent-server",
769
+ version: SUPERVISOR_VERSION,
770
+ });
771
+ // ── Register tools with Zod schemas ───────────────────────
772
+ mcpServer.tool("qwen_spawn", "Spawn a new Qwen Code session. Returns task_id and chosen_backend immediately; inference runs async.", {
773
+ task: z.string().describe("The task/prompt to run"),
774
+ opts: qwenSpawnOptsSchema,
775
+ }, async (args) => {
776
+ const spawnOpts = buildSpawnOptsFromRaw(args.opts);
777
+ const result = await handlers.qwen_spawn({ task: args.task, opts: spawnOpts });
778
+ return {
779
+ content: [{ type: "text", text: JSON.stringify(result) }],
780
+ };
781
+ });
782
+ mcpServer.tool("qwen_poll", "Poll a session for events and current state. Pass opts.since as the previous latest_event_id for incremental reads.", {
783
+ task_id: z.string().describe("Session task ID returned by qwen_spawn"),
784
+ opts: z.object({
785
+ since: z.string().optional().describe("Event cursor: only return events with id > since"),
786
+ max_events: z.number().int().positive().optional().describe("Cap on events per call (default 16)"),
787
+ }).optional(),
788
+ }, async (args) => {
789
+ const pollOpts = {};
790
+ if (args.opts?.since !== undefined)
791
+ pollOpts.since = args.opts.since;
792
+ if (args.opts?.max_events !== undefined)
793
+ pollOpts.max_events = args.opts.max_events;
794
+ const result = await handlers.qwen_poll({
795
+ task_id: args.task_id,
796
+ opts: pollOpts,
797
+ });
798
+ return {
799
+ content: [{ type: "text", text: JSON.stringify(result) }],
800
+ };
801
+ });
802
+ mcpServer.tool("qwen_send", "Push the next user message into a running or idle session. Wakes idle sessions for the next turn.", {
803
+ task_id: z.string().describe("Session task ID"),
804
+ message: z.string().describe("The answer or message to deliver"),
805
+ }, async (args) => {
806
+ const result = await handlers.qwen_send(args);
807
+ return {
808
+ content: [{ type: "text", text: JSON.stringify(result) }],
809
+ };
810
+ });
811
+ mcpServer.tool("qwen_stop", "Stop and remove a session. Idempotent — stopping an unknown task_id returns { ack: false }.", {
812
+ task_id: z.string().describe("Session task ID to stop"),
813
+ }, async (args) => {
814
+ const result = await handlers.qwen_stop(args);
815
+ return {
816
+ content: [{ type: "text", text: JSON.stringify(result) }],
817
+ };
818
+ });
819
+ mcpServer.tool("qwen_backends", "List configured backends and their cached health status.", {}, async (_args) => {
820
+ const result = await handlers.qwen_backends({});
821
+ return {
822
+ content: [{ type: "text", text: JSON.stringify(result) }],
823
+ };
824
+ });
825
+ mcpServer.tool("qwen_extensions", "List installed Qwen Code extensions with version, path, source, enabled state, and declared commands/skills/agents/MCP servers. Read-only.", {}, async (_args) => {
826
+ const result = await handlers.qwen_extensions({});
827
+ return {
828
+ content: [{ type: "text", text: JSON.stringify(result) }],
829
+ };
830
+ });
831
+ mcpServer.tool("qwen_sessions", "List live sessions in the pool with state, last-polled timestamp, turns completed, and live budget counters. Read-only operator overview.", {}, async (_args) => {
832
+ const result = await handlers.qwen_sessions({});
833
+ return {
834
+ content: [{ type: "text", text: JSON.stringify(result) }],
835
+ };
836
+ });
837
+ mcpServer.tool("qwen_oneshot", "Stateless single-turn dispatch: spawn → wait until idle → optional JSON parse + retry → stop → return. Schema-aware where opts.json_schema is supplied. Drop-in shape for nexus operator dispatch as a Qwen alternative to `claude -p --json-schema`.", {
838
+ task: z.string().describe("Prompt for the inner Qwen"),
839
+ opts: qwenSpawnOptsSchema.unwrap().extend({
840
+ timeout_ms: z.number().int().positive().optional().describe("Per-attempt hard limit in ms; default 300000. Note: with max_attempts > 1 the returned OneshotResult.elapsed_ms (total wall-clock across all attempts) can exceed this; do not use elapsed_ms > timeout_ms as a timeout signal."),
841
+ max_attempts: z.number().int().positive().optional().describe("Retry on JSON-parse failure; default 1"),
842
+ continuation_id: z.string().optional().describe("Thread id returned by a prior call's OneshotResult.continuation_id; the supervisor prepends prior turns to this task. Omit for a fresh thread. Threads live in-process only (3h TTL, 20-turn cap, no cross-process persistence). The returned continuation_id is always present so callers can chain — even on failure."),
843
+ }).optional(),
844
+ }, async (args, extra) => {
845
+ const baseOpts = buildSpawnOptsFromRaw(args.opts);
846
+ const oneshotOpts = { ...baseOpts };
847
+ if (args.opts?.timeout_ms !== undefined)
848
+ oneshotOpts.timeout_ms = args.opts.timeout_ms;
849
+ if (args.opts?.max_attempts !== undefined)
850
+ oneshotOpts.max_attempts = args.opts.max_attempts;
851
+ if (args.opts?.continuation_id !== undefined)
852
+ oneshotOpts.continuation_id = args.opts.continuation_id;
853
+ const progress = makeProgressEmitter(extra);
854
+ const result = await handlers.qwen_oneshot({ task: args.task, opts: oneshotOpts }, progress);
855
+ return {
856
+ content: [{ type: "text", text: JSON.stringify(result) }],
857
+ };
858
+ });
859
+ // ── qwen_oneshot_vision MCP wire ──
860
+ //
861
+ // Direct-HTTP multimodal dispatch; bypasses the SDK because the SDK's
862
+ // ContentBlock union has no ImageBlock. Backend must be running with
863
+ // --mmproj loaded (see scripts/start-stack.sh and
864
+ // scripts/launch-llama-vulkan.cmd in this repo for the launch shape).
865
+ const visionImageInputSchema = z.union([
866
+ z.object({
867
+ path: z.string().describe("Filesystem path readable by the supervisor process."),
868
+ mime: z.string().optional().describe("MIME type override; inferred from extension if omitted."),
869
+ }),
870
+ z.object({
871
+ url: z.string().describe("http(s):// or data: URL passed through verbatim."),
872
+ }),
873
+ z.object({
874
+ base64: z.string().describe("Raw base64-encoded image bytes (no data: prefix)."),
875
+ mime: z.string().describe("MIME type, e.g. image/png, image/jpeg, image/webp."),
876
+ }),
877
+ ]);
878
+ mcpServer.tool("qwen_oneshot_vision", "Stateless multimodal dispatch: image(s) + text → JSON-or-text response. Bypasses the SDK (which is text-only) and POSTs OpenAI-compat content arrays directly to a backend's /v1/chat/completions. The chosen backend must be running llama-server with --mmproj loaded; otherwise the call fails with error.code='backend_no_mmproj'.", {
879
+ task: z.string().describe("Text prompt accompanying the image(s)."),
880
+ images: z.array(visionImageInputSchema).min(1).describe("One or more images. Discriminated union of {path}, {url}, or {base64,mime}."),
881
+ opts: z.object({
882
+ json_schema: z.record(z.string(), z.unknown()).optional().describe("JSON Schema constraint; emitted as response_format.json_schema."),
883
+ timeout_ms: z.number().int().positive().optional().describe("Per-request timeout in ms; default 300000."),
884
+ max_tokens: z.number().int().positive().optional().describe("Max tokens to generate; default 2048."),
885
+ temperature: z.number().min(0).max(2).optional().describe("Sampling temperature; default 0.3."),
886
+ system: z.string().optional().describe("Optional system-role prefix."),
887
+ no_think: z.boolean().optional().describe("Prepend /no_think to suppress Qwen thinking-mode reasoning; default true."),
888
+ grammar: z.string().optional().describe("GBNF grammar string for token-by-token output enforcement (llama-server `grammar` field). Strictly stronger than json_schema (which is post-hoc validated). Use for non-JSON constrained output or when json_schema validation has been observed to fail. Vision-only — qwen_oneshot's SDK path cannot accept GBNF; this is an architectural constraint, not a gap."),
889
+ backend: z.string().optional().describe("Pin to a specific backend by id; defaults to chooseBackend selection."),
890
+ continuation_id: z.string().optional().describe("Thread id from a prior qwen_oneshot or qwen_oneshot_vision call. Prior turns are injected as messages[] entries before the current user turn; images from prior turns are NOT carried forward in v1 (a `[image attached]` placeholder is emitted). Same thread store as qwen_oneshot — cross-tool threading works."),
891
+ }).optional(),
892
+ }, async (args, extra) => {
893
+ const progress = makeProgressEmitter(extra);
894
+ const result = await handlers.qwen_oneshot_vision({
895
+ task: args.task,
896
+ images: args.images,
897
+ ...(args.opts !== undefined ? { opts: args.opts } : {}),
898
+ }, progress);
899
+ return {
900
+ content: [{ type: "text", text: JSON.stringify(result) }],
901
+ };
902
+ });
903
+ // ── qwen_embed / qwen_rerank / qwen_tokenize MCP wires ──
904
+ //
905
+ // Surface llama-server's /v1/embeddings, /v1/rerank, /tokenize as
906
+ // first-class MCP tools. Each bypasses the SDK because the SDK is
907
+ // text-chat only. Backend selection is modality-based — operator
908
+ // declares which loaded model serves which role.
909
+ mcpServer.tool("qwen_embed", "Generate embeddings for one or many text inputs via /v1/embeddings. Routes to a backend declared with modality='embedding' (e.g. bge-m3, qwen3-embedding-0.6b). Order of returned embeddings matches the input order.", {
910
+ texts: z.array(z.string()).min(1).describe("One or more text inputs to embed."),
911
+ opts: z.object({
912
+ timeout_ms: z.number().int().positive().optional().describe("Per-request timeout in ms; default 60000."),
913
+ encoding_format: z.enum(["float", "base64"]).optional().describe("'float' (default) returns number[]; 'base64' is a llama-server passthrough."),
914
+ backend: z.string().optional().describe("Pin to a specific backend by id; bypasses modality routing."),
915
+ }).optional(),
916
+ }, async (args) => {
917
+ const result = await handlers.qwen_embed({
918
+ texts: args.texts,
919
+ ...(args.opts !== undefined ? { opts: args.opts } : {}),
920
+ });
921
+ return {
922
+ content: [{ type: "text", text: JSON.stringify(result) }],
923
+ };
924
+ });
925
+ mcpServer.tool("qwen_rerank", "Rerank documents by relevance to a query via /v1/rerank. Routes to a backend declared with modality='rerank' (e.g. qwen3-reranker, bge-reranker). Results are sorted by relevance_score descending; the original input index is preserved on each result.", {
926
+ query: z.string().describe("Query against which documents will be scored."),
927
+ documents: z.array(z.string()).min(1).describe("Documents to rerank."),
928
+ opts: z.object({
929
+ timeout_ms: z.number().int().positive().optional().describe("Per-request timeout in ms; default 60000."),
930
+ top_n: z.number().int().positive().optional().describe("Return only the top-N results server-side."),
931
+ return_documents: z.boolean().optional().describe("If true, include each document's text in its result entry; default false."),
932
+ backend: z.string().optional().describe("Pin to a specific backend by id."),
933
+ }).optional(),
934
+ }, async (args) => {
935
+ const result = await handlers.qwen_rerank({
936
+ query: args.query,
937
+ documents: args.documents,
938
+ ...(args.opts !== undefined ? { opts: args.opts } : {}),
939
+ });
940
+ return {
941
+ content: [{ type: "text", text: JSON.stringify(result) }],
942
+ };
943
+ });
944
+ mcpServer.tool("qwen_tokenize", "Return exact token IDs and count for `content` against a backend's loaded model. Hits llama-server's /tokenize endpoint (sits outside /v1). Used for pre-flight budget arithmetic and chunk sizing. Routes to any healthy text/multimodal backend (embedding/rerank backends are excluded).", {
945
+ content: z.string().describe("Text to tokenize."),
946
+ opts: z.object({
947
+ timeout_ms: z.number().int().positive().optional().describe("Per-request timeout in ms; default 30000."),
948
+ add_special: z.boolean().optional().describe("Include the model's special tokens (BOS etc) in the output; default false."),
949
+ with_pieces: z.boolean().optional().describe("Also return token pieces (string form) under result.pieces; default false."),
950
+ backend: z.string().optional().describe("Pin to a specific backend by id."),
951
+ }).optional(),
952
+ }, async (args) => {
953
+ const result = await handlers.qwen_tokenize({
954
+ content: args.content,
955
+ ...(args.opts !== undefined ? { opts: args.opts } : {}),
956
+ });
957
+ return {
958
+ content: [{ type: "text", text: JSON.stringify(result) }],
959
+ };
960
+ });
961
+ // RDR-002 amendment 2026-05-09: ungated. Available whenever a cache
962
+ // was wired into createToolHandlers (production main() always wires
963
+ // one). Single-operator stdio supervisor; no untrusted-client surface
964
+ // to protect against.
965
+ if (handlers.qwen_reload_extensions !== undefined) {
966
+ const reloadHandler = handlers.qwen_reload_extensions;
967
+ mcpServer.tool("qwen_reload_extensions", "Reload the supervisor's installed-extensions cache from `qwen extensions list`. Affects future spawns; running sessions are unaffected.", {}, async (_args) => {
968
+ const result = await reloadHandler({});
969
+ return {
970
+ content: [{ type: "text", text: JSON.stringify(result) }],
971
+ };
972
+ });
973
+ log.info("qwen_reload_extensions tool registered");
974
+ }
975
+ // ── Reaper interval ─────────────────────────────────────────
976
+ const reaperInterval = setInterval(() => {
977
+ reapSweep(pool);
978
+ }, 5 * 60 * 1000);
979
+ // CRITICAL: unref() so the interval doesn't keep the process alive
980
+ reaperInterval.unref();
981
+ // ── Signal handlers ─────────────────────────────────────────
982
+ const { handleSignal } = setupShutdown(mcpServer, pool, process.exit);
983
+ process.on("SIGTERM", () => {
984
+ clearInterval(reaperInterval);
985
+ void handleSignal("SIGTERM");
986
+ });
987
+ process.on("SIGINT", () => {
988
+ clearInterval(reaperInterval);
989
+ void handleSignal("SIGINT");
990
+ });
991
+ // ── Connect transport ───────────────────────────────────────
992
+ const transport = new StdioServerTransport();
993
+ await mcpServer.connect(transport);
994
+ log.info("qwen-agent-server ready on stdio");
995
+ }
996
+ /**
997
+ * Strip surrounding markdown code fences from a candidate JSON string.
998
+ * Qwen3.6 frequently wraps schema-conforming output in ```json ... ```
999
+ * (or plain ```) despite system-prompt directives forbidding it. The
1000
+ * content is right; defending against the jacket is cheaper than
1001
+ * fighting the model. Returns the input unchanged if no fences are
1002
+ * detected — `JSON.parse` then runs on the original.
1003
+ *
1004
+ * Recognises:
1005
+ * - ```json\n{...}\n```
1006
+ * - ```\n{...}\n```
1007
+ * - leading/trailing whitespace around the fence
1008
+ * - a single trailing newline before the closing fence
1009
+ *
1010
+ * Does NOT attempt heroics: if the input has prose before/after the
1011
+ * fences, or multiple fenced blocks, or unbalanced fences, returns
1012
+ * the original. The retry loop in qwen_oneshot is the safety net.
1013
+ */
1014
+ // The Qwen CLI surfaces upstream HTTP / streaming / tool failures by
1015
+ // writing a bracketed sentinel to stdout and exiting 0. Without
1016
+ // recognising the shape, the supervisor would forward the error string
1017
+ // as the assistant's answer with ok:true. Match an exact-prefix sentinel
1018
+ // at the start of the trimmed message; if a model legitimately wraps
1019
+ // its own answer in a `[API Error: ...]` quote it won't be at the head
1020
+ // of the message.
1021
+ const UPSTREAM_CLI_ERROR_PREFIXES = ["[API Error:", "[Stream Error:", "[Tool Error:"];
1022
+ export function matchUpstreamCliError(raw) {
1023
+ const trimmed = raw.trimStart();
1024
+ for (const prefix of UPSTREAM_CLI_ERROR_PREFIXES) {
1025
+ if (trimmed.startsWith(prefix)) {
1026
+ const end = trimmed.indexOf("]");
1027
+ const inner = end > prefix.length ? trimmed.slice(prefix.length, end).trim() : trimmed;
1028
+ return `${prefix.slice(1, -1)}: ${inner}`.trim();
1029
+ }
1030
+ }
1031
+ return undefined;
1032
+ }
1033
+ export function stripCodeFences(raw) {
1034
+ const trimmed = raw.trim();
1035
+ // Match: optional language tag, body, closing fence. Anchored at
1036
+ // both ends to refuse mid-prose stripping.
1037
+ const m = /^```(?:json|JSON)?\s*\n([\s\S]*?)\n?```$/.exec(trimmed);
1038
+ if (m && m[1] !== undefined)
1039
+ return m[1].trim();
1040
+ return raw;
1041
+ }
1042
+ // Only run main when executed directly (not when imported for testing).
1043
+ const isMain = process.argv[1]?.endsWith("server.js") || process.argv[1]?.endsWith("server.ts");
1044
+ if (isMain) {
1045
+ main().catch((err) => {
1046
+ log.error({ err }, "fatal startup error");
1047
+ process.exit(1);
1048
+ });
1049
+ }
1050
+ //# sourceMappingURL=server.js.map