muonroi-cli 1.6.2 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export declare const PACKAGE_VERSION = "1.6.2";
1
+ export declare const PACKAGE_VERSION = "1.6.4";
2
2
  export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
@@ -1,5 +1,5 @@
1
1
  // AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
2
2
  // Sourced from package.json at build time so it survives bun --compile bundling.
3
- export const PACKAGE_VERSION = "1.6.2";
3
+ export const PACKAGE_VERSION = "1.6.4";
4
4
  export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
5
5
  //# sourceMappingURL=version.js.map
@@ -109,6 +109,27 @@ describe("acquireMcpTools — cross-turn client pool", () => {
109
109
  expect(results.every((r) => r === "pong")).toBe(true);
110
110
  expect(connectOneServer).toHaveBeenCalledTimes(2); // 14 failures → exactly ONE shared reconnect
111
111
  });
112
+ it("waits for a criticalServerId past the normal deadline so it lands THIS turn (session 584ba476c07a)", async () => {
113
+ // Normal deadline is 500ms (mock). docs connects at ~700ms — past the normal
114
+ // deadline but within the critical window → must be included when critical.
115
+ connectOneServer.mockImplementation((s) => new Promise((res) => {
116
+ if (s.id === "docs")
117
+ setTimeout(() => res(connected(s.id)), 700);
118
+ else
119
+ res(connected(s.id));
120
+ }));
121
+ const b = await acquireMcpTools([srv("docs")], { criticalServerIds: ["docs"], criticalDeadlineMs: 3000 });
122
+ expect(Object.keys(b.tools)).toContain("mcp_docs__ping");
123
+ expect(b.errors).toHaveLength(0);
124
+ });
125
+ it("without criticalServerIds, a slow server is reported still-connecting (available next turn)", async () => {
126
+ connectOneServer.mockImplementation((s) => new Promise((res) => {
127
+ setTimeout(() => res(connected(s.id)), 700);
128
+ }));
129
+ const b = await acquireMcpTools([srv("docs")]);
130
+ expect(Object.keys(b.tools)).not.toContain("mcp_docs__ping");
131
+ expect(b.errors.some((e) => /still connecting/.test(e))).toBe(true);
132
+ });
112
133
  it("keys by cwd/config — a different command reconnects rather than reusing", async () => {
113
134
  connectOneServer.mockImplementation(async (s) => connected(s.id));
114
135
  await acquireMcpTools([
@@ -179,6 +179,28 @@ export async function acquireMcpTools(servers, opts) {
179
179
  await Promise.race([Promise.allSettled(attempts), deadline]);
180
180
  if (deadlineTimer)
181
181
  clearTimeout(deadlineTimer);
182
+ // Critical-server extended wait: a turn that MUST have a specific server (e.g.
183
+ // muonroi-docs on an ecosystem question) waits for just that server's connect
184
+ // beyond the normal deadline, so a cold first-connect is included THIS turn
185
+ // rather than reported "still connecting → next turn". Only the named servers
186
+ // are awaited; everything already settled is untouched (no added latency for
187
+ // normal turns, which pass no criticalServerIds).
188
+ const critical = new Set((opts?.criticalServerIds ?? []).filter(Boolean));
189
+ if (critical.size > 0) {
190
+ const pendingIdx = enabled.map((s, i) => ({ s, i })).filter(({ s, i }) => critical.has(s.id) && !slots[i].done);
191
+ if (pendingIdx.length > 0) {
192
+ const criticalDeadlineMs = Math.max(deadlineMs, opts?.criticalDeadlineMs ?? 8000);
193
+ const extraMs = Math.max(0, criticalDeadlineMs - deadlineMs);
194
+ let extraTimer;
195
+ const extraDeadline = new Promise((resolve) => {
196
+ extraTimer = setTimeout(resolve, extraMs);
197
+ extraTimer.unref?.();
198
+ });
199
+ await Promise.race([Promise.allSettled(pendingIdx.map(({ i }) => attempts[i])), extraDeadline]);
200
+ if (extraTimer)
201
+ clearTimeout(extraTimer);
202
+ }
203
+ }
182
204
  for (let i = 0; i < slots.length; i++) {
183
205
  const slot = slots[i];
184
206
  if (slot.done) {
@@ -8,6 +8,17 @@ export interface McpToolBundle {
8
8
  }
9
9
  export interface McpBuildOptions {
10
10
  onOAuthRequired?: (serverId: string, url: URL) => void;
11
+ /**
12
+ * Server ids the CURRENT turn critically needs (e.g. muonroi-docs on an
13
+ * ecosystem question). acquireMcpTools waits for these specifically beyond the
14
+ * normal build deadline — up to `criticalDeadlineMs` — so a cold first-connect
15
+ * is included THIS turn instead of "ready next turn" (session 584ba476c07a:
16
+ * first ecosystem question missed muonroi-docs while it was still warming).
17
+ * Other servers are unaffected — only the named ones get the extended wait.
18
+ */
19
+ criticalServerIds?: string[];
20
+ /** Extended ceiling (ms) for criticalServerIds. Default 8000. */
21
+ criticalDeadlineMs?: number;
11
22
  }
12
23
  /**
13
24
  * Total wall-clock budget for building the MCP tool set. Servers connect in
@@ -60,6 +60,7 @@ import * as phaseTracker from "../ee/phase-tracker.js";
60
60
  import { buildScope as buildScopeForVeto } from "../ee/scope.js";
61
61
  import { fireTrajectoryEvent } from "../ee/session-trajectory.js";
62
62
  import { getTenantId as getTenantIdForVeto } from "../ee/tenant.js";
63
+ import { mentionsEcosystemScope } from "../gsd/directives.js";
63
64
  import { acquireMcpTools } from "../mcp/client-pool.js";
64
65
  import { dropRedundantFsMcpTools, filterMcpServersByMessage } from "../mcp/smart-filter.js";
65
66
  import { getModelInfo } from "../models/registry.js";
@@ -88,7 +89,7 @@ import { statusBarStore } from "../ui/status-bar/store.js";
88
89
  import { appendDecisionLog } from "../usage/decision-log.js";
89
90
  import { openUrl } from "../utils/open-url.js";
90
91
  import { appendAudit, toolNeedsApproval } from "../utils/permission-mode.js";
91
- import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallTimeoutMs, getRoleModels, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
92
+ import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallRetries, getProviderStallTimeoutMs, getRoleModels, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
92
93
  import { resolveShell } from "../utils/shell.js";
93
94
  import { relaxCompactionSettings } from "./compaction.js";
94
95
  import { wrapToolSetWithDedup } from "./cross-turn-dedup.js";
@@ -107,7 +108,7 @@ import { forcedFinalize, getSessionLastTask, incSessionStep, parseBudgetOverride
107
108
  import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
108
109
  import { formatElisionManifest, getSessionExperienceCounts, recordCompaction, recordElision, } from "./session-experience.js";
109
110
  import { attemptStallRescue, pushStallToolResult } from "./stall-rescue.js";
110
- import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
111
+ import { createStallWatchdog, STALL_ERROR_MESSAGE, shouldRepromptStall, stallRepromptBackoffMs, } from "./stall-watchdog.js";
111
112
  import { wrapToolSetWithCap } from "./sub-agent-cap.js";
112
113
  import { compactSubAgentMessages, cumulativeMessageChars } from "./subagent-compactor.js";
113
114
  import { detectTextEmittedToolCall, parseDsmlToolCalls } from "./text-tool-call-detector.js";
@@ -812,6 +813,16 @@ export class MessageProcessor {
812
813
  // clear toast and SKIP the transient-retry (a stalled provider just stalls
813
814
  // again, wasting another full timeout of silence).
814
815
  let stallTriggered = false;
816
+ // Time-to-first-byte stall RE-PROMPT: some providers (observed:
817
+ // xai/grok-build-0.1) accept the request then never send the first byte —
818
+ // a single wedged socket, not a down backend, so a fresh request usually
819
+ // goes through. When the watchdog fires with ZERO chunks received this
820
+ // attempt, we re-issue the SAME request up to `maxStallRetries` times
821
+ // (loop-persistent counter). Gated on zero-chunks so it can NEVER restart a
822
+ // turn that already ran tools or emitted text — those go to the partial-
823
+ // answer rescue path instead. maxStallRetries = 0 restores legacy behaviour.
824
+ let stallRetryCount = 0;
825
+ const maxStallRetries = getProviderStallRetries();
815
826
  // Auto-council: route to multi-model debate when EITHER
816
827
  // (a) PIL classified taskType=plan|analyze with high confidence AND the
817
828
  // prompt is complex enough to justify the debate cost, OR
@@ -927,7 +938,7 @@ export class MessageProcessor {
927
938
  return;
928
939
  }
929
940
  try {
930
- while (true) {
941
+ streamAttempt: while (true) {
931
942
  // SAMR Phase 2: switch to fast model for tool-execution steps
932
943
  if (stepRouterPhase === "phase2" && phase2Runtime) {
933
944
  runtime = phase2Runtime;
@@ -935,6 +946,65 @@ export class MessageProcessor {
935
946
  }
936
947
  deps.setCompactedThisTurn(false);
937
948
  let assistantText = "";
949
+ // Count of stream parts received in THIS attempt. Stays 0 only when the
950
+ // provider never sent a first byte → the safe-to-re-prompt stall case.
951
+ let chunksThisAttempt = 0;
952
+ // Decide whether a fired stall watchdog should re-prompt (re-issue the
953
+ // same request) instead of falling through to rescue/error. Returns the
954
+ // backoff ms to wait before re-issuing, or null to NOT re-prompt. Reads
955
+ // the live per-attempt locals; safe to call only when stallTriggered.
956
+ const planStallReprompt = () => {
957
+ if (!shouldRepromptStall({
958
+ stallTriggered,
959
+ stallRetryCount,
960
+ maxStallRetries,
961
+ chunksThisAttempt,
962
+ assistantTextEmpty: assistantText.trim() === "",
963
+ aborted: signal.aborted,
964
+ })) {
965
+ return null;
966
+ }
967
+ stallRetryCount++;
968
+ const backoffMs = stallRepromptBackoffMs(stallRetryCount);
969
+ try {
970
+ const _ar = globalThis.__muonroiAgentRuntime;
971
+ _ar?.emitEvent({
972
+ t: "event",
973
+ kind: "stream-retry",
974
+ attempt: stallRetryCount,
975
+ maxAttempts: maxStallRetries + 1,
976
+ errorName: "TimeoutError",
977
+ errorMessage: "provider-stall (no first byte) — re-prompting",
978
+ nextDelayMs: backoffMs,
979
+ });
980
+ _ar?.emitEvent({
981
+ t: "event",
982
+ kind: "toast",
983
+ level: "warning",
984
+ text: `Model stalled — re-prompting (attempt ${stallRetryCount}/${maxStallRetries})…`,
985
+ });
986
+ }
987
+ catch (emitErr) {
988
+ console.error(`[message-processor] stall-reprompt telemetry failed: ${emitErr?.message}`);
989
+ }
990
+ try {
991
+ if (deps.session) {
992
+ logInteraction(deps.session.id, "stream_retry", {
993
+ data: {
994
+ attempt: stallRetryCount,
995
+ maxAttempts: maxStallRetries + 1,
996
+ errorName: "provider-stall",
997
+ errorMessage: "no first byte within stall timeout — re-prompted",
998
+ nextDelayMs: backoffMs,
999
+ },
1000
+ });
1001
+ }
1002
+ }
1003
+ catch (logErr) {
1004
+ console.error(`[message-processor] stall-reprompt log failed: ${logErr?.message}`);
1005
+ }
1006
+ return backoffMs;
1007
+ };
938
1008
  // Tracks where `assistantText` was at the previous step boundary so
939
1009
  // `onStepFinish` can compute the text emitted within the just-finished
940
1010
  // step (input to the self-repetition detector).
@@ -1022,6 +1092,16 @@ export class MessageProcessor {
1022
1092
  const filteredServers = filterMcpServersByMessage(loadMcpServers(), userMessage, {
1023
1093
  disabled: process.env.MUONROI_DISABLE_SMART_MCP === "1",
1024
1094
  });
1095
+ // Ecosystem question → muonroi-docs is the authoritative source the
1096
+ // agent is nudged to consult FIRST. Wait for it specifically beyond the
1097
+ // normal deadline so a cold first-connect lands THIS turn instead of
1098
+ // "ready next turn" (session 584ba476c07a: first ecosystem question
1099
+ // missed docs while warming → agent guessed from local files).
1100
+ const criticalServerIds = mentionsEcosystemScope(userMessage)
1101
+ ? filteredServers
1102
+ .filter((s) => /(^|[-_])docs([-_]|$)/.test(s.id) && /muonroi/i.test(s.id))
1103
+ .map((s) => s.id)
1104
+ : undefined;
1025
1105
  // MCP non-blocking: acquireMcpTools self-bounds — it connects servers
1026
1106
  // in parallel and returns PARTIAL results at its internal deadline
1027
1107
  // (fast/cached servers included; slow first-connects reported in
@@ -1038,6 +1118,7 @@ export class MessageProcessor {
1038
1118
  // command-injection vector the old exec() opener had.
1039
1119
  openUrl(url);
1040
1120
  },
1121
+ ...(criticalServerIds && criticalServerIds.length > 0 ? { criticalServerIds } : {}),
1041
1122
  });
1042
1123
  }
1043
1124
  catch (err) {
@@ -1718,6 +1799,11 @@ export class MessageProcessor {
1718
1799
  const _wireProviderIdTop = runtime.modelInfo?.provider ?? "unknown";
1719
1800
  for await (const part of result.fullStream) {
1720
1801
  stall.pet(); // chunk arrived — reset the stall watchdog
1802
+ // Count only real content parts. The watchdog abort itself surfaces
1803
+ // as an "abort" part — counting it would defeat the TTFB-stall gate
1804
+ // (a frozen-before-first-byte stall yields ONLY the abort part).
1805
+ if (part.type !== "abort")
1806
+ chunksThisAttempt++;
1721
1807
  if (signal.aborted) {
1722
1808
  yield { type: "content", content: "\n\n[Cancelled]" };
1723
1809
  break;
@@ -2271,6 +2357,19 @@ export class MessageProcessor {
2271
2357
  // instead of a benign "[Cancelled]" so a hung provider no longer
2272
2358
  // looks like a silent freeze.
2273
2359
  if (stallTriggered) {
2360
+ // Time-to-first-byte stall (no real chunk this attempt): the
2361
+ // socket wedged before any output — re-issue the SAME request
2362
+ // rather than giving up. Bounded by maxStallRetries; never
2363
+ // fires once tools ran or text flowed (planStallReprompt gate).
2364
+ const _stallBackoff = planStallReprompt();
2365
+ if (_stallBackoff != null) {
2366
+ stall.dispose();
2367
+ await new Promise((r) => setTimeout(r, _stallBackoff));
2368
+ if (!signal.aborted) {
2369
+ stallTriggered = false;
2370
+ continue streamAttempt;
2371
+ }
2372
+ }
2274
2373
  stall.dispose();
2275
2374
  // A response tool already produced the terminal structured
2276
2375
  // answer (buffered from its call args) before the provider
@@ -2901,6 +3000,20 @@ export class MessageProcessor {
2901
3000
  attemptedOverflowRecovery = true;
2902
3001
  continue;
2903
3002
  }
3003
+ // Stall surfaced as a throw (rather than an "abort" stream part):
3004
+ // apply the SAME time-to-first-byte re-prompt as the abort-part path.
3005
+ // The watchdog already fired (stallTriggered) so its timer is spent —
3006
+ // no dispose needed; the next attempt arms a fresh watchdog.
3007
+ if (stallTriggered) {
3008
+ const _stallBackoff = planStallReprompt();
3009
+ if (_stallBackoff != null) {
3010
+ await new Promise((r) => setTimeout(r, _stallBackoff));
3011
+ if (!signal.aborted) {
3012
+ stallTriggered = false;
3013
+ continue;
3014
+ }
3015
+ }
3016
+ }
2904
3017
  // Transient network/server error retry — up to MAX_STREAM_RETRIES extra attempts.
2905
3018
  // Only retry when no content has flowed yet (assistantText empty) to avoid
2906
3019
  // partial-output corruption. Honour the abort signal between retries.
@@ -31,4 +31,35 @@ export interface StallWatchdog {
31
31
  export declare const STALL_ABORT_REASON = "provider-stall";
32
32
  /** User-facing message surfaced when the stall watchdog fires. */
33
33
  export declare const STALL_ERROR_MESSAGE: string;
34
+ /** Inputs to the stall re-prompt decision — see {@link shouldRepromptStall}. */
35
+ export interface StallRepromptState {
36
+ /** The watchdog fired for this attempt. */
37
+ stallTriggered: boolean;
38
+ /** How many stall re-prompts have already happened this turn. */
39
+ stallRetryCount: number;
40
+ /** Configured cap (getProviderStallRetries); 0 disables re-prompt. */
41
+ maxStallRetries: number;
42
+ /** Real content parts received this attempt (the abort part is NOT counted). */
43
+ chunksThisAttempt: number;
44
+ /** True when no assistant text has flowed this attempt. */
45
+ assistantTextEmpty: boolean;
46
+ /** True on genuine user cancel (never re-prompt over a cancel). */
47
+ aborted: boolean;
48
+ }
49
+ /**
50
+ * Decide whether a fired stall watchdog should trigger a re-prompt (re-issue
51
+ * the same request) instead of surfacing the stall.
52
+ *
53
+ * ONLY a time-to-first-byte stall qualifies: zero real chunks AND no assistant
54
+ * text this attempt, under the retry cap, and not a user cancel. Re-issuing
55
+ * after tools ran or text flowed would corrupt/duplicate output — those cases
56
+ * fall through to the partial-answer rescue path instead. Pure (no side
57
+ * effects) so it is unit-testable in isolation from the orchestrator loop.
58
+ */
59
+ export declare function shouldRepromptStall(s: StallRepromptState): boolean;
60
+ /**
61
+ * Exponential backoff (ms, capped at 4s) before the Nth stall re-prompt
62
+ * (1-based): 500 → 1000 → 2000 → 4000 → 4000.
63
+ */
64
+ export declare function stallRepromptBackoffMs(attempt: number): number;
34
65
  export declare function createStallWatchdog(timeoutMs: number, onFire?: () => void): StallWatchdog;
@@ -23,6 +23,30 @@ export const STALL_ABORT_REASON = "provider-stall";
23
23
  export const STALL_ERROR_MESSAGE = "Model not responding — no output received within the stall timeout. " +
24
24
  "The provider may be out of balance, rate-limited, or unreachable. " +
25
25
  "Tune MUONROI_PROVIDER_STALL_TIMEOUT_MS (0 disables) or switch model/provider.";
26
+ /**
27
+ * Decide whether a fired stall watchdog should trigger a re-prompt (re-issue
28
+ * the same request) instead of surfacing the stall.
29
+ *
30
+ * ONLY a time-to-first-byte stall qualifies: zero real chunks AND no assistant
31
+ * text this attempt, under the retry cap, and not a user cancel. Re-issuing
32
+ * after tools ran or text flowed would corrupt/duplicate output — those cases
33
+ * fall through to the partial-answer rescue path instead. Pure (no side
34
+ * effects) so it is unit-testable in isolation from the orchestrator loop.
35
+ */
36
+ export function shouldRepromptStall(s) {
37
+ return (s.stallTriggered &&
38
+ s.stallRetryCount < s.maxStallRetries &&
39
+ s.chunksThisAttempt === 0 &&
40
+ s.assistantTextEmpty &&
41
+ !s.aborted);
42
+ }
43
+ /**
44
+ * Exponential backoff (ms, capped at 4s) before the Nth stall re-prompt
45
+ * (1-based): 500 → 1000 → 2000 → 4000 → 4000.
46
+ */
47
+ export function stallRepromptBackoffMs(attempt) {
48
+ return Math.min(500 * 2 ** (Math.max(1, attempt) - 1), 4_000);
49
+ }
26
50
  export function createStallWatchdog(timeoutMs, onFire) {
27
51
  const controller = new AbortController();
28
52
  let firedFlag = false;
@@ -1,5 +1,5 @@
1
1
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
- import { createStallWatchdog, STALL_ABORT_REASON } from "./stall-watchdog.js";
2
+ import { createStallWatchdog, STALL_ABORT_REASON, shouldRepromptStall, stallRepromptBackoffMs, } from "./stall-watchdog.js";
3
3
  describe("createStallWatchdog", () => {
4
4
  beforeEach(() => vi.useFakeTimers());
5
5
  afterEach(() => vi.useRealTimers());
@@ -70,4 +70,49 @@ describe("createStallWatchdog", () => {
70
70
  expect(wd.signal.aborted).toBe(true);
71
71
  });
72
72
  });
73
+ describe("shouldRepromptStall", () => {
74
+ // A clean time-to-first-byte stall: watchdog fired, zero chunks, no text,
75
+ // under the cap, not cancelled — the ONLY case that re-prompts.
76
+ const ttfb = (over = {}) => ({
77
+ stallTriggered: true,
78
+ stallRetryCount: 0,
79
+ maxStallRetries: 1,
80
+ chunksThisAttempt: 0,
81
+ assistantTextEmpty: true,
82
+ aborted: false,
83
+ ...over,
84
+ });
85
+ it("re-prompts a time-to-first-byte stall under the cap", () => {
86
+ expect(shouldRepromptStall(ttfb())).toBe(true);
87
+ });
88
+ it("does NOT re-prompt when the watchdog never fired", () => {
89
+ expect(shouldRepromptStall(ttfb({ stallTriggered: false }))).toBe(false);
90
+ });
91
+ it("does NOT re-prompt once the retry cap is reached", () => {
92
+ expect(shouldRepromptStall(ttfb({ stallRetryCount: 1, maxStallRetries: 1 }))).toBe(false);
93
+ // maxStallRetries=0 means the feature is disabled — never re-prompt.
94
+ expect(shouldRepromptStall(ttfb({ stallRetryCount: 0, maxStallRetries: 0 }))).toBe(false);
95
+ });
96
+ it("does NOT re-prompt once a real chunk has arrived (mid-stream stall → rescue)", () => {
97
+ expect(shouldRepromptStall(ttfb({ chunksThisAttempt: 1 }))).toBe(false);
98
+ });
99
+ it("does NOT re-prompt once assistant text has flowed (output would corrupt)", () => {
100
+ expect(shouldRepromptStall(ttfb({ assistantTextEmpty: false }))).toBe(false);
101
+ });
102
+ it("does NOT re-prompt over a genuine user cancel", () => {
103
+ expect(shouldRepromptStall(ttfb({ aborted: true }))).toBe(false);
104
+ });
105
+ });
106
+ describe("stallRepromptBackoffMs", () => {
107
+ it("grows exponentially and caps at 4s", () => {
108
+ expect(stallRepromptBackoffMs(1)).toBe(500);
109
+ expect(stallRepromptBackoffMs(2)).toBe(1000);
110
+ expect(stallRepromptBackoffMs(3)).toBe(2000);
111
+ expect(stallRepromptBackoffMs(4)).toBe(4000);
112
+ expect(stallRepromptBackoffMs(5)).toBe(4000);
113
+ });
114
+ it("treats attempt < 1 as the first attempt", () => {
115
+ expect(stallRepromptBackoffMs(0)).toBe(500);
116
+ });
117
+ });
73
118
  //# sourceMappingURL=stall-watchdog.test.js.map
@@ -104,6 +104,16 @@ describe("layer4Gsd (gsd-native)", () => {
104
104
  const result = await layer4Gsd(makeCtx({ raw, enriched: raw, taskType: "analyze", intentKind: "task", deliverableKind: "answer" }));
105
105
  expect(result.enriched).toContain("QUESTION / explanatory");
106
106
  });
107
+ it("deliverableKind='report' is informational (no council/discuss scaffold) — session 666630479c1a", async () => {
108
+ // "Đọc và tóm tắt kiến trúc…" classifies as deliverableKind 'report'. A
109
+ // report is human-facing with NO code change, so it must route to the
110
+ // QUESTION directive, not the heavy implement/discuss/council scaffold that
111
+ // over-asked with askcards on a read/summarize task.
112
+ const raw = "đọc và tóm tắt kiến trúc src/orchestrator, src/pil, src/mcp kèm file:line";
113
+ const result = await layer4Gsd(makeCtx({ raw, enriched: raw, taskType: "analyze", intentKind: "task", deliverableKind: "report" }));
114
+ expect(result.enriched).toContain("QUESTION / explanatory");
115
+ expect(result.enriched).not.toContain("MANDATORY");
116
+ });
107
117
  it("Phase 2b: deliverableKind='code' is NOT informational even for a question-shaped prompt", async () => {
108
118
  // The raw text reads as a question — the legacy regex would mark it
109
119
  // informational. The model's deliverableKind='code' must override that so
@@ -84,15 +84,20 @@ export async function layer4Gsd(ctx) {
84
84
  // into the human-facing reply as a "2-3 line plan" + process narration
85
85
  // (session 829a83888dd2). Route them to the human-facing question directive.
86
86
  //
87
- // Phase 2b: when the model classified the deliverable, CONSUME it an
88
- // "answer" deliverable IS informational. Only when the model didn't emit one
89
- // (deliverableKind null legacy cascade, or the model omitted the word) do
90
- // we fall back to the legacy regex predicates:
87
+ // Phase 2b: when the model classified the deliverable, CONSUME it. Both an
88
+ // "answer" AND a "report" deliverable are HUMAN-FACING with no code change, so
89
+ // both are informational only "code" routes through the implement/verify (and
90
+ // heavy discuss/council) scaffold. Treating "report" as non-informational sent
91
+ // read/summarize/architecture tasks (deliverableKind "report") down the heavy
92
+ // council + AskUserQuestion path, over-asking on a task that just wanted a
93
+ // written summary (session 666630479c1a: "Đọc và tóm tắt kiến trúc…" raised 2
94
+ // askcards + a council loop). Only when the model emitted no deliverable
95
+ // (deliverableKind null → legacy cascade) do we fall back to regex predicates:
91
96
  // 1. isMetaAnalysisPrompt — self/CLI evaluation, prior-turn reflection.
92
97
  // 2. taskType "general" classified as a real task by L1.
93
98
  // 3. question-shaped prompt that is NOT an implementation request.
94
99
  const informational = ctx.deliverableKind
95
- ? ctx.deliverableKind === "answer"
100
+ ? ctx.deliverableKind !== "code"
96
101
  : isMetaAnalysisPrompt(ctx.raw) ||
97
102
  (ctx.taskType === "general" && ctx.intentKind === "task") ||
98
103
  (isQuestionLike(ctx.raw) && !isImplementationIntent(ctx.raw));
@@ -2525,7 +2525,7 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
2525
2525
  applyLocalAssistantDelta(`\n⚠ [Experience] ${eeChunk.experienceWarning?.message ?? eeChunk.content ?? ""}\nWhy: ${eeChunk.experienceWarning?.why ?? ""}\n`);
2526
2526
  }
2527
2527
  else if (eeChunk.type === "experience_injected") {
2528
- applyLocalAssistantDelta(`\n💡 [Experience Injected] ${eeChunk.experienceInjected?.pointCount ?? 0} point(s) loaded (score ≥ ${eeChunk.experienceInjected?.scoreFloor ?? 0})\n`);
2528
+ applyLocalAssistantDelta(formatExperienceInjectedBlock(eeChunk.experienceInjected ?? {}));
2529
2529
  }
2530
2530
  });
2531
2531
  for await (const chunk of agent.processMessage(text.trim(), undefined, images)) {
@@ -3472,10 +3472,7 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
3472
3472
  },
3473
3473
  ];
3474
3474
  }
3475
- return [
3476
- ...prev,
3477
- buildAssistantEntry(`💡 [Experience Injected] ${chunk.experienceInjected.pointCount} point(s)`),
3478
- ];
3475
+ return [...prev, buildAssistantEntry(formatExperienceInjectedBlock(chunk.experienceInjected))];
3479
3476
  });
3480
3477
  }
3481
3478
  if (chunk.type === "halt" && chunk.haltChunk) {
@@ -3694,10 +3691,7 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
3694
3691
  },
3695
3692
  ];
3696
3693
  }
3697
- return [
3698
- ...prev,
3699
- buildAssistantEntry(`💡 [Experience Injected] ${chunk.experienceInjected.pointCount} point(s)`),
3700
- ];
3694
+ return [...prev, buildAssistantEntry(formatExperienceInjectedBlock(chunk.experienceInjected))];
3701
3695
  });
3702
3696
  }
3703
3697
  if (chunk.type === "done")
@@ -317,6 +317,19 @@ export declare function getSubAgentBudgetChars(): number;
317
317
  * Default 120_000 (2 min). Env override: MUONROI_PROVIDER_STALL_TIMEOUT_MS.
318
318
  */
319
319
  export declare function getProviderStallTimeoutMs(): number;
320
+ /**
321
+ * Number of times to AUTOMATICALLY re-issue a streaming model call after the
322
+ * stall watchdog fires WITHOUT any chunk having arrived (a time-to-first-byte
323
+ * "frozen" stall). Some providers (observed: xai/grok-build-0.1) accept a
324
+ * request then never send the first byte, yet a fresh request goes through —
325
+ * a single dead socket, not a down backend. Re-prompting is gated on
326
+ * zero-chunks-this-attempt so it can NEVER restart a turn that already ran
327
+ * tools or emitted text (that would corrupt/duplicate output — the partial-
328
+ * answer rescue path handles those). Each re-prompt waits a short backoff.
329
+ * Range 0–5; 0 restores the legacy "surface the stall, never retry" behaviour.
330
+ * Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
331
+ */
332
+ export declare function getProviderStallRetries(): number;
320
333
  /**
321
334
  * Phase B3 — threshold (in chars of cumulative message content) above which
322
335
  * the sub-agent `prepareStep` compactor rewrites older tool_result parts
@@ -693,6 +693,27 @@ export function getProviderStallTimeoutMs() {
693
693
  }
694
694
  return 120_000;
695
695
  }
696
+ /**
697
+ * Number of times to AUTOMATICALLY re-issue a streaming model call after the
698
+ * stall watchdog fires WITHOUT any chunk having arrived (a time-to-first-byte
699
+ * "frozen" stall). Some providers (observed: xai/grok-build-0.1) accept a
700
+ * request then never send the first byte, yet a fresh request goes through —
701
+ * a single dead socket, not a down backend. Re-prompting is gated on
702
+ * zero-chunks-this-attempt so it can NEVER restart a turn that already ran
703
+ * tools or emitted text (that would corrupt/duplicate output — the partial-
704
+ * answer rescue path handles those). Each re-prompt waits a short backoff.
705
+ * Range 0–5; 0 restores the legacy "surface the stall, never retry" behaviour.
706
+ * Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
707
+ */
708
+ export function getProviderStallRetries() {
709
+ const envRaw = process.env.MUONROI_PROVIDER_STALL_RETRIES;
710
+ if (envRaw !== undefined && envRaw !== "") {
711
+ const n = Number(envRaw);
712
+ if (Number.isFinite(n) && n >= 0 && n <= 5)
713
+ return Math.floor(n);
714
+ }
715
+ return 1;
716
+ }
696
717
  /**
697
718
  * Phase B3 — threshold (in chars of cumulative message content) above which
698
719
  * the sub-agent `prepareStep` compactor rewrites older tool_result parts
@@ -157,4 +157,35 @@ describe("resolveTelegramAudioInputSettings", () => {
157
157
  expect(result.language).toBe("vi");
158
158
  });
159
159
  });
160
+ describe("getProviderStallRetries", () => {
161
+ it("defaults to 1 when the env var is unset or blank", async () => {
162
+ vi.unstubAllEnvs();
163
+ const { getProviderStallRetries } = await import("./settings.js");
164
+ expect(getProviderStallRetries()).toBe(1);
165
+ vi.stubEnv("MUONROI_PROVIDER_STALL_RETRIES", "");
166
+ expect(getProviderStallRetries()).toBe(1);
167
+ });
168
+ it("honours an in-range override (0 disables, up to 5)", async () => {
169
+ const { getProviderStallRetries } = await import("./settings.js");
170
+ vi.stubEnv("MUONROI_PROVIDER_STALL_RETRIES", "0");
171
+ expect(getProviderStallRetries()).toBe(0);
172
+ vi.stubEnv("MUONROI_PROVIDER_STALL_RETRIES", "3");
173
+ expect(getProviderStallRetries()).toBe(3);
174
+ vi.stubEnv("MUONROI_PROVIDER_STALL_RETRIES", "5");
175
+ expect(getProviderStallRetries()).toBe(5);
176
+ });
177
+ it("falls back to the default for out-of-range or non-numeric values", async () => {
178
+ const { getProviderStallRetries } = await import("./settings.js");
179
+ for (const bad of ["6", "-1", "abc", "2.5"]) {
180
+ vi.stubEnv("MUONROI_PROVIDER_STALL_RETRIES", bad);
181
+ // "2.5" floors to 2 (in range) — only the others fall back.
182
+ if (bad === "2.5") {
183
+ expect(getProviderStallRetries()).toBe(2);
184
+ }
185
+ else {
186
+ expect(getProviderStallRetries()).toBe(1);
187
+ }
188
+ }
189
+ });
190
+ });
160
191
  //# sourceMappingURL=settings.test.js.map
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "workspaces": [
4
4
  "packages/*"
5
5
  ],
6
- "version": "1.6.2",
6
+ "version": "1.6.4",
7
7
  "description": "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.",
8
8
  "repository": {
9
9
  "type": "git",