muonroi-cli 1.6.3 → 1.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/packages/agent-harness-core/src/event-filter.js +1 -0
  2. package/dist/packages/agent-harness-core/src/event-redact.js +7 -2
  3. package/dist/packages/agent-harness-core/src/protocol.d.ts +8 -0
  4. package/dist/src/generated/version.d.ts +1 -1
  5. package/dist/src/generated/version.js +1 -1
  6. package/dist/src/gsd/__tests__/directives.test.js +37 -0
  7. package/dist/src/gsd/directives.d.ts +18 -0
  8. package/dist/src/gsd/directives.js +23 -2
  9. package/dist/src/orchestrator/message-processor.d.ts +8 -0
  10. package/dist/src/orchestrator/message-processor.js +159 -9
  11. package/dist/src/orchestrator/orchestrator.d.ts +10 -0
  12. package/dist/src/orchestrator/orchestrator.js +11 -0
  13. package/dist/src/orchestrator/stall-rescue.d.ts +1 -0
  14. package/dist/src/orchestrator/stall-rescue.js +20 -1
  15. package/dist/src/orchestrator/stall-rescue.test.js +30 -1
  16. package/dist/src/orchestrator/stall-watchdog.d.ts +31 -0
  17. package/dist/src/orchestrator/stall-watchdog.js +24 -0
  18. package/dist/src/orchestrator/stall-watchdog.test.js +46 -1
  19. package/dist/src/orchestrator/steer-inbox.d.ts +32 -0
  20. package/dist/src/orchestrator/steer-inbox.js +20 -0
  21. package/dist/src/orchestrator/steer-inbox.test.d.ts +1 -0
  22. package/dist/src/orchestrator/steer-inbox.test.js +33 -0
  23. package/dist/src/orchestrator/tool-loop-askcard.d.ts +59 -0
  24. package/dist/src/orchestrator/tool-loop-askcard.js +86 -0
  25. package/dist/src/orchestrator/tool-loop-askcard.test.d.ts +1 -0
  26. package/dist/src/orchestrator/tool-loop-askcard.test.js +71 -0
  27. package/dist/src/pil/layer4-gsd.js +5 -1
  28. package/dist/src/ui/app.js +51 -35
  29. package/dist/src/utils/settings.d.ts +23 -0
  30. package/dist/src/utils/settings.js +33 -0
  31. package/dist/src/utils/settings.test.js +52 -0
  32. package/package.json +1 -1
@@ -23,6 +23,30 @@ export const STALL_ABORT_REASON = "provider-stall";
23
23
  export const STALL_ERROR_MESSAGE = "Model not responding — no output received within the stall timeout. " +
24
24
  "The provider may be out of balance, rate-limited, or unreachable. " +
25
25
  "Tune MUONROI_PROVIDER_STALL_TIMEOUT_MS (0 disables) or switch model/provider.";
26
+ /**
27
+ * Decide whether a fired stall watchdog should trigger a re-prompt (re-issue
28
+ * the same request) instead of surfacing the stall.
29
+ *
30
+ * ONLY a time-to-first-byte stall qualifies: zero real chunks AND no assistant
31
+ * text this attempt, under the retry cap, and not a user cancel. Re-issuing
32
+ * after tools ran or text flowed would corrupt/duplicate output — those cases
33
+ * fall through to the partial-answer rescue path instead. Pure (no side
34
+ * effects) so it is unit-testable in isolation from the orchestrator loop.
35
+ */
36
+ export function shouldRepromptStall(s) {
37
+ return (s.stallTriggered &&
38
+ s.stallRetryCount < s.maxStallRetries &&
39
+ s.chunksThisAttempt === 0 &&
40
+ s.assistantTextEmpty &&
41
+ !s.aborted);
42
+ }
43
+ /**
44
+ * Exponential backoff (ms, capped at 4s) before the Nth stall re-prompt
45
+ * (1-based): 500 → 1000 → 2000 → 4000 → 4000.
46
+ */
47
+ export function stallRepromptBackoffMs(attempt) {
48
+ return Math.min(500 * 2 ** (Math.max(1, attempt) - 1), 4_000);
49
+ }
26
50
  export function createStallWatchdog(timeoutMs, onFire) {
27
51
  const controller = new AbortController();
28
52
  let firedFlag = false;
@@ -1,5 +1,5 @@
1
1
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
- import { createStallWatchdog, STALL_ABORT_REASON } from "./stall-watchdog.js";
2
+ import { createStallWatchdog, STALL_ABORT_REASON, shouldRepromptStall, stallRepromptBackoffMs, } from "./stall-watchdog.js";
3
3
  describe("createStallWatchdog", () => {
4
4
  beforeEach(() => vi.useFakeTimers());
5
5
  afterEach(() => vi.useRealTimers());
@@ -70,4 +70,49 @@ describe("createStallWatchdog", () => {
70
70
  expect(wd.signal.aborted).toBe(true);
71
71
  });
72
72
  });
73
+ describe("shouldRepromptStall", () => {
74
+ // A clean time-to-first-byte stall: watchdog fired, zero chunks, no text,
75
+ // under the cap, not cancelled — the ONLY case that re-prompts.
76
+ const ttfb = (over = {}) => ({
77
+ stallTriggered: true,
78
+ stallRetryCount: 0,
79
+ maxStallRetries: 1,
80
+ chunksThisAttempt: 0,
81
+ assistantTextEmpty: true,
82
+ aborted: false,
83
+ ...over,
84
+ });
85
+ it("re-prompts a time-to-first-byte stall under the cap", () => {
86
+ expect(shouldRepromptStall(ttfb())).toBe(true);
87
+ });
88
+ it("does NOT re-prompt when the watchdog never fired", () => {
89
+ expect(shouldRepromptStall(ttfb({ stallTriggered: false }))).toBe(false);
90
+ });
91
+ it("does NOT re-prompt once the retry cap is reached", () => {
92
+ expect(shouldRepromptStall(ttfb({ stallRetryCount: 1, maxStallRetries: 1 }))).toBe(false);
93
+ // maxStallRetries=0 means the feature is disabled — never re-prompt.
94
+ expect(shouldRepromptStall(ttfb({ stallRetryCount: 0, maxStallRetries: 0 }))).toBe(false);
95
+ });
96
+ it("does NOT re-prompt once a real chunk has arrived (mid-stream stall → rescue)", () => {
97
+ expect(shouldRepromptStall(ttfb({ chunksThisAttempt: 1 }))).toBe(false);
98
+ });
99
+ it("does NOT re-prompt once assistant text has flowed (output would corrupt)", () => {
100
+ expect(shouldRepromptStall(ttfb({ assistantTextEmpty: false }))).toBe(false);
101
+ });
102
+ it("does NOT re-prompt over a genuine user cancel", () => {
103
+ expect(shouldRepromptStall(ttfb({ aborted: true }))).toBe(false);
104
+ });
105
+ });
106
+ describe("stallRepromptBackoffMs", () => {
107
+ it("grows exponentially and caps at 4s", () => {
108
+ expect(stallRepromptBackoffMs(1)).toBe(500);
109
+ expect(stallRepromptBackoffMs(2)).toBe(1000);
110
+ expect(stallRepromptBackoffMs(3)).toBe(2000);
111
+ expect(stallRepromptBackoffMs(4)).toBe(4000);
112
+ expect(stallRepromptBackoffMs(5)).toBe(4000);
113
+ });
114
+ it("treats attempt < 1 as the first attempt", () => {
115
+ expect(stallRepromptBackoffMs(0)).toBe(500);
116
+ });
117
+ });
73
118
  //# sourceMappingURL=stall-watchdog.test.js.map
@@ -0,0 +1,32 @@
1
+ /**
2
+ * src/orchestrator/steer-inbox.ts
3
+ *
4
+ * Live-queue steering — pure decision helper.
5
+ *
6
+ * When the user types a message while a turn is streaming, the UI queue is
7
+ * drained at the next prepareStep boundary and the messages are injected into
8
+ * the running turn as `user` interjections (Claude-Code-style steering). This
9
+ * module holds the PURE mapping/gating decision so it is unit-testable in
10
+ * isolation from the orchestrator loop. The orchestrator owns the side effects
11
+ * (draining the queue, the pendingSteers accumulator, emitting telemetry).
12
+ */
13
+ import type { ModelMessage } from "ai";
14
+ /** Inputs to the steer-injection decision — see {@link planSteerInjection}. */
15
+ export interface SteerInjectionState {
16
+ /** Raw messages drained from the UI steer queue this step. */
17
+ drained: {
18
+ text: string;
19
+ }[];
20
+ /** True on a genuine user cancel — never steer an aborted turn. */
21
+ aborted: boolean;
22
+ /** Feature flag (getSteerInjectionEnabled). */
23
+ enabled: boolean;
24
+ }
25
+ /**
26
+ * Decide which (if any) drained messages to inject into the running turn.
27
+ *
28
+ * Returns user-role ModelMessages in FIFO order, trimmed, with empty/whitespace
29
+ * entries dropped. Returns `[]` when the feature is disabled or the turn was
30
+ * cancelled. Pure (no side effects).
31
+ */
32
+ export declare function planSteerInjection(s: SteerInjectionState): ModelMessage[];
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Decide which (if any) drained messages to inject into the running turn.
3
+ *
4
+ * Returns user-role ModelMessages in FIFO order, trimmed, with empty/whitespace
5
+ * entries dropped. Returns `[]` when the feature is disabled or the turn was
6
+ * cancelled. Pure (no side effects).
7
+ */
8
+ export function planSteerInjection(s) {
9
+ if (!s.enabled || s.aborted)
10
+ return [];
11
+ const out = [];
12
+ for (const m of s.drained) {
13
+ const text = m.text?.trim();
14
+ if (!text)
15
+ continue;
16
+ out.push({ role: "user", content: text });
17
+ }
18
+ return out;
19
+ }
20
+ //# sourceMappingURL=steer-inbox.js.map
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,33 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { planSteerInjection } from "./steer-inbox.js";
3
+ describe("planSteerInjection", () => {
4
+ // A valid baseline: feature enabled, not cancelled, one queued message.
5
+ const base = (over = {}) => ({
6
+ drained: [{ text: "also add tests" }],
7
+ aborted: false,
8
+ enabled: true,
9
+ ...over,
10
+ });
11
+ it("maps drained text into a single user ModelMessage", () => {
12
+ const out = planSteerInjection(base());
13
+ expect(out).toEqual([{ role: "user", content: "also add tests" }]);
14
+ });
15
+ it("preserves FIFO order across multiple drained messages", () => {
16
+ const out = planSteerInjection(base({ drained: [{ text: "a" }, { text: "b" }] }));
17
+ expect(out.map((m) => m.content)).toEqual(["a", "b"]);
18
+ });
19
+ it("returns [] when the feature is disabled", () => {
20
+ expect(planSteerInjection(base({ enabled: false }))).toEqual([]);
21
+ });
22
+ it("returns [] over a genuine user cancel (never steer an aborted turn)", () => {
23
+ expect(planSteerInjection(base({ aborted: true }))).toEqual([]);
24
+ });
25
+ it("returns [] when nothing was drained", () => {
26
+ expect(planSteerInjection(base({ drained: [] }))).toEqual([]);
27
+ });
28
+ it("skips empty / whitespace-only messages and trims the rest", () => {
29
+ const out = planSteerInjection(base({ drained: [{ text: " " }, { text: " keep me " }, { text: "" }] }));
30
+ expect(out).toEqual([{ role: "user", content: "keep me" }]);
31
+ });
32
+ });
33
+ //# sourceMappingURL=steer-inbox.test.js.map
@@ -0,0 +1,59 @@
1
+ /**
2
+ * src/orchestrator/tool-loop-askcard.ts
3
+ *
4
+ * Pure helper that computes the tool-loop-cap askcard tier (label set + default
5
+ * action) from the current step number and the resolved natural ceiling for
6
+ * the (taskType, size) matrix.
7
+ *
8
+ * Four tiers (open intervals — boundaries belong to the higher tier):
9
+ * - early : step < 0.5 × ceiling — a transient fixation. Default Continue.
10
+ * - normal : 0.5× ≤ step ≤ 2× ceiling — used cheap budget; Default Stop.
11
+ * - overBudget : 2× < step ≤ 5× ceiling — Continue still available but the
12
+ * label carries the overage multiplier so the cost of
13
+ * continuing is visible at decision time. Default Stop.
14
+ * - extreme : step > 5× ceiling — Stop is moved FIRST in the option
15
+ * array (Enter = Stop) and Continue is labelled "expensive".
16
+ * Default Stop (now at index 0).
17
+ *
18
+ * Live miss this tier set fixes (session 1f29e238, step 77/6 = 12.8×): extreme
19
+ * tier put Stop first with a warning — good. But the storyflow_ui session
20
+ * 22661c8de9f2 ran step 29/12 = 2.4× — the OLD code had no middle warning, so
21
+ * the askcard showed a plain "Continue (let agent try)" with no signal that
22
+ * continuing costs more. User chose Continue, the model stalled 4 tool-calls
23
+ * later, and forced-finalize had to rescue a degraded answer.
24
+ *
25
+ * Pure — no React, no DOM, no side effects. Unit-testable in isolation.
26
+ */
27
+ export type LoopCapTier = "early" | "normal" | "overBudget" | "extreme";
28
+ export interface LoopCapAskcardOptions {
29
+ /** AI-SDK step number when the pattern fired. */
30
+ stepNumber: number;
31
+ /**
32
+ * Natural step ceiling for (taskType, size). Optional — when undefined we
33
+ * cannot compute multipliers, so the askcard falls back to the legacy
34
+ * step-threshold heuristic (step ≤ 15 = early-ish, else normal).
35
+ */
36
+ naturalCeiling?: number;
37
+ }
38
+ export interface LoopCapAskcardLayout {
39
+ tier: LoopCapTier;
40
+ /** Index into `optionLabels` of the option pre-selected (Enter applies). */
41
+ defaultIndex: 0 | 1;
42
+ /**
43
+ * Exactly two labels in render order. The first is at index 0, the second
44
+ * at index 1 — order matters for the askcard UI (arrow-key navigation,
45
+ * Enter-applies-default).
46
+ */
47
+ optionLabels: [continueOrStop: string, stopOrContinue: string];
48
+ /** Values parallel to optionLabels — what the resolver returns to the loop. */
49
+ optionValues: [string, string];
50
+ /**
51
+ * x.x string (e.g. "2.4") when the tier is overBudget or extreme, else
52
+ * null. Caller can also surface this in the askcard context message.
53
+ */
54
+ overageMultiplier: string | null;
55
+ }
56
+ /**
57
+ * Decide the askcard layout for a tool-loop-cap pattern hit. Pure.
58
+ */
59
+ export declare function planLoopCapAskcard(opts: LoopCapAskcardOptions): LoopCapAskcardLayout;
@@ -0,0 +1,86 @@
1
+ /**
2
+ * src/orchestrator/tool-loop-askcard.ts
3
+ *
4
+ * Pure helper that computes the tool-loop-cap askcard tier (label set + default
5
+ * action) from the current step number and the resolved natural ceiling for
6
+ * the (taskType, size) matrix.
7
+ *
8
+ * Four tiers (open intervals — boundaries belong to the higher tier):
9
+ * - early : step < 0.5 × ceiling — a transient fixation. Default Continue.
10
+ * - normal : 0.5× ≤ step ≤ 2× ceiling — used cheap budget; Default Stop.
11
+ * - overBudget : 2× < step ≤ 5× ceiling — Continue still available but the
12
+ * label carries the overage multiplier so the cost of
13
+ * continuing is visible at decision time. Default Stop.
14
+ * - extreme : step > 5× ceiling — Stop is moved FIRST in the option
15
+ * array (Enter = Stop) and Continue is labelled "expensive".
16
+ * Default Stop (now at index 0).
17
+ *
18
+ * Live miss this tier set fixes (session 1f29e238, step 77/6 = 12.8×): extreme
19
+ * tier put Stop first with a warning — good. But the storyflow_ui session
20
+ * 22661c8de9f2 ran step 29/12 = 2.4× — the OLD code had no middle warning, so
21
+ * the askcard showed a plain "Continue (let agent try)" with no signal that
22
+ * continuing costs more. User chose Continue, the model stalled 4 tool-calls
23
+ * later, and forced-finalize had to rescue a degraded answer.
24
+ *
25
+ * Pure — no React, no DOM, no side effects. Unit-testable in isolation.
26
+ */
27
+ const NORMAL_LABELS = ["Continue (let agent try)", "Stop and answer"];
28
+ const NORMAL_VALUES = ["continue", "stop"];
29
+ /**
30
+ * Decide the askcard layout for a tool-loop-cap pattern hit. Pure.
31
+ */
32
+ export function planLoopCapAskcard(opts) {
33
+ const { stepNumber, naturalCeiling } = opts;
34
+ // No ceiling → cannot compute multipliers. Fall back to a static threshold:
35
+ // step ≤ 15 looks "early" enough to default Continue, else default Stop.
36
+ if (!naturalCeiling || naturalCeiling <= 0) {
37
+ const tier = stepNumber > 0 && stepNumber <= 15 ? "early" : "normal";
38
+ return {
39
+ tier,
40
+ defaultIndex: tier === "early" ? 0 : 1,
41
+ optionLabels: NORMAL_LABELS,
42
+ optionValues: NORMAL_VALUES,
43
+ overageMultiplier: null,
44
+ };
45
+ }
46
+ const ratio = stepNumber / naturalCeiling;
47
+ const multiplier = ratio.toFixed(1);
48
+ if (ratio > 5) {
49
+ return {
50
+ tier: "extreme",
51
+ defaultIndex: 0,
52
+ optionLabels: ["Stop and answer (recommended)", `Continue anyway (⚠ ${multiplier}× over budget — expensive)`],
53
+ optionValues: ["stop", "continue"],
54
+ overageMultiplier: multiplier,
55
+ };
56
+ }
57
+ if (ratio > 2) {
58
+ return {
59
+ tier: "overBudget",
60
+ defaultIndex: 1,
61
+ optionLabels: [
62
+ `Continue (⚠ ${multiplier}× past natural budget — quality may degrade)`,
63
+ "Stop and answer (recommended)",
64
+ ],
65
+ optionValues: NORMAL_VALUES,
66
+ overageMultiplier: multiplier,
67
+ };
68
+ }
69
+ if (ratio < 0.5) {
70
+ return {
71
+ tier: "early",
72
+ defaultIndex: 0,
73
+ optionLabels: NORMAL_LABELS,
74
+ optionValues: NORMAL_VALUES,
75
+ overageMultiplier: null,
76
+ };
77
+ }
78
+ return {
79
+ tier: "normal",
80
+ defaultIndex: 1,
81
+ optionLabels: NORMAL_LABELS,
82
+ optionValues: NORMAL_VALUES,
83
+ overageMultiplier: null,
84
+ };
85
+ }
86
+ //# sourceMappingURL=tool-loop-askcard.js.map
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,71 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { planLoopCapAskcard } from "./tool-loop-askcard.js";
3
+ describe("planLoopCapAskcard", () => {
4
+ it("early tier (< 0.5× ceiling): default Continue, no warning", () => {
5
+ const r = planLoopCapAskcard({ stepNumber: 5, naturalCeiling: 12 });
6
+ expect(r.tier).toBe("early");
7
+ expect(r.defaultIndex).toBe(0);
8
+ expect(r.optionLabels[0]).toMatch(/Continue/);
9
+ expect(r.optionValues[0]).toBe("continue");
10
+ expect(r.overageMultiplier).toBeNull();
11
+ // no warning emoji on the Continue label
12
+ expect(r.optionLabels[0]).not.toMatch(/⚠/);
13
+ });
14
+ it("normal tier (0.5×–2× ceiling): default Stop, no warning, Continue first", () => {
15
+ const r = planLoopCapAskcard({ stepNumber: 18, naturalCeiling: 12 });
16
+ expect(r.tier).toBe("normal");
17
+ expect(r.defaultIndex).toBe(1);
18
+ expect(r.optionLabels[0]).toBe("Continue (let agent try)");
19
+ expect(r.optionLabels[1]).toBe("Stop and answer");
20
+ expect(r.overageMultiplier).toBeNull();
21
+ });
22
+ it("overBudget tier (2×–5× ceiling): Continue carries the overage multiplier, default Stop", () => {
23
+ // The storyflow_ui case: step 29 / ceiling 12 = 2.4×
24
+ const r = planLoopCapAskcard({ stepNumber: 29, naturalCeiling: 12 });
25
+ expect(r.tier).toBe("overBudget");
26
+ expect(r.defaultIndex).toBe(1);
27
+ expect(r.optionLabels[0]).toMatch(/⚠ 2\.4× past natural budget/);
28
+ expect(r.optionLabels[1]).toMatch(/Stop and answer \(recommended\)/);
29
+ expect(r.overageMultiplier).toBe("2.4");
30
+ // order preserved: Continue at 0, Stop at 1
31
+ expect(r.optionValues).toEqual(["continue", "stop"]);
32
+ });
33
+ it("extreme tier (> 5× ceiling): Stop FIRST in the array, Continue labelled expensive", () => {
34
+ // session 1f29e238 — step 77 / ceiling 6 = 12.8×
35
+ const r = planLoopCapAskcard({ stepNumber: 77, naturalCeiling: 6 });
36
+ expect(r.tier).toBe("extreme");
37
+ expect(r.defaultIndex).toBe(0);
38
+ expect(r.optionLabels[0]).toMatch(/Stop and answer \(recommended\)/);
39
+ expect(r.optionLabels[1]).toMatch(/⚠ 12\.8× over budget — expensive/);
40
+ expect(r.optionValues).toEqual(["stop", "continue"]); // ORDER REVERSED at extreme
41
+ expect(r.overageMultiplier).toBe("12.8");
42
+ });
43
+ it("tier boundaries are open-on-the-lower-side (ratio==2 → normal; ratio==5 → overBudget; ratio==0.5 → normal)", () => {
44
+ // ratio === 2.0 exactly → still normal (the > 2 gate excludes 2.0)
45
+ expect(planLoopCapAskcard({ stepNumber: 24, naturalCeiling: 12 }).tier).toBe("normal");
46
+ // ratio === 5.0 exactly → still overBudget (the > 5 gate excludes 5.0)
47
+ expect(planLoopCapAskcard({ stepNumber: 60, naturalCeiling: 12 }).tier).toBe("overBudget");
48
+ // ratio === 0.5 exactly → normal (the < 0.5 gate excludes 0.5)
49
+ expect(planLoopCapAskcard({ stepNumber: 6, naturalCeiling: 12 }).tier).toBe("normal");
50
+ });
51
+ it("falls back to step-threshold heuristic when naturalCeiling is missing", () => {
52
+ const early = planLoopCapAskcard({ stepNumber: 8 });
53
+ expect(early.tier).toBe("early");
54
+ expect(early.defaultIndex).toBe(0);
55
+ const normal = planLoopCapAskcard({ stepNumber: 22 });
56
+ expect(normal.tier).toBe("normal");
57
+ expect(normal.defaultIndex).toBe(1);
58
+ // boundary: step === 15 → still early
59
+ expect(planLoopCapAskcard({ stepNumber: 15 }).tier).toBe("early");
60
+ // step === 16 → normal
61
+ expect(planLoopCapAskcard({ stepNumber: 16 }).tier).toBe("normal");
62
+ // step === 0 → normal (no early credit for nothing)
63
+ expect(planLoopCapAskcard({ stepNumber: 0 }).tier).toBe("normal");
64
+ });
65
+ it("treats naturalCeiling=0 the same as undefined (no multiplier possible)", () => {
66
+ const r = planLoopCapAskcard({ stepNumber: 30, naturalCeiling: 0 });
67
+ expect(r.overageMultiplier).toBeNull();
68
+ expect(r.tier).toBe("normal");
69
+ });
70
+ });
71
+ //# sourceMappingURL=tool-loop-askcard.test.js.map
@@ -102,7 +102,11 @@ export async function layer4Gsd(ctx) {
102
102
  (ctx.taskType === "general" && ctx.intentKind === "task") ||
103
103
  (isQuestionLike(ctx.raw) && !isImplementationIntent(ctx.raw));
104
104
  const ecosystem = mentionsEcosystemScope(ctx.raw);
105
- const directive = buildDirective({ complexity, phase, grayAreas, informational, ecosystem });
105
+ // Heuristic: VN diacritics user wrote Vietnamese re-anchor language rule
106
+ // inside the directive (storyflow_ui session 22661c8de9f2 — base rule
107
+ // crowded out by brevity/FIX-FIRST directives).
108
+ const replyLanguage = /[à-ỹÀ-Ỹ]/.test(ctx.raw) ? "Vietnamese" : undefined;
109
+ const directive = buildDirective({ complexity, phase, grayAreas, informational, ecosystem, replyLanguage });
106
110
  const budgetChars = Math.floor(ctx.tokenBudget * DIRECTIVE_BUDGET_FRACTION);
107
111
  const trimmed = truncateToBudget(directive.text, budgetChars);
108
112
  return {
@@ -13,6 +13,7 @@ import { POPULAR_MCP_CATALOG } from "../mcp/catalog.js";
13
13
  import { parseEnvLines, parseHeaderLines } from "../mcp/parse-headers.js";
14
14
  import { toMcpServerId, validateMcpServerConfig } from "../mcp/validate.js";
15
15
  import { Agent } from "../orchestrator/orchestrator.js";
16
+ import { planLoopCapAskcard } from "../orchestrator/tool-loop-askcard.js";
16
17
  import { getConfiguredProviders, setKeyForProvider } from "../providers/keychain.js";
17
18
  import { buildIdealContinuationPrompt } from "../scaffold/continuation-prompt.js";
18
19
  import { continueAsCouncil } from "../scaffold/continue-as-council.js";
@@ -24,7 +25,7 @@ import { processAtMentions } from "../utils/at-mentions.js";
24
25
  import { readClipboardImage } from "../utils/clipboard-image.js";
25
26
  import { FileIndex } from "../utils/file-index.js";
26
27
  import { copyTextToHostClipboard, readTextFromHostClipboard } from "../utils/host-clipboard.js";
27
- import { getApiKey, getCurrentModel, getTelegramBotToken, isModelDisabled, isReservedSubagentName, loadMcpServers, loadPaymentSettings, loadUserSettings, loadValidSubAgents, saveApprovedTelegramUserId, saveMcpServers, savePaymentSettings, saveProjectSettings, saveUserSettings, setDefaultProvider, setModelDisabled, setProviderDisabled, } from "../utils/settings.js";
28
+ import { getApiKey, getCurrentModel, getSteerInjectionEnabled, getTelegramBotToken, isModelDisabled, isReservedSubagentName, loadMcpServers, loadPaymentSettings, loadUserSettings, loadValidSubAgents, saveApprovedTelegramUserId, saveMcpServers, savePaymentSettings, saveProjectSettings, saveUserSettings, setDefaultProvider, setModelDisabled, setProviderDisabled, } from "../utils/settings.js";
28
29
  import { discoverSkills, formatSkillsForChat } from "../utils/skills.js";
29
30
  import { formatSubagentName } from "../utils/subagent-display.js";
30
31
  import { checkForUpdate, runUpdate } from "../utils/update-checker.js";
@@ -483,6 +484,11 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
483
484
  pushToast(lvl, text);
484
485
  return;
485
486
  }
487
+ if (e.kind === "steer-inject") {
488
+ const count = typeof e.count === "number" ? e.count : 1;
489
+ pushToast("info", `↳ steering applied (${count} message${count === 1 ? "" : "s"})`);
490
+ return;
491
+ }
486
492
  if (e.kind === "ee-timeout" || e.kind === "ee-error") {
487
493
  const source = typeof e.source === "string" ? e.source : "unknown";
488
494
  const kind = e.kind === "ee-timeout" ? "timeout" : "error";
@@ -539,6 +545,23 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
539
545
  }
540
546
  return undefined;
541
547
  }, [handleHarnessEvent]);
548
+ // Live-queue steering: expose the mid-turn queue to the running turn so
549
+ // prepareStep can inject typed-while-busy messages at the next step boundary
550
+ // instead of deferring them to a new turn. Disabled → callback not wired, so
551
+ // finishTurnProcessing drains the queue post-turn exactly as before.
552
+ useEffect(() => {
553
+ if (!getSteerInjectionEnabled())
554
+ return;
555
+ agent.setSteerDrain(() => {
556
+ if (queuedMessagesRef.current.length === 0)
557
+ return [];
558
+ const drained = queuedMessagesRef.current.map((m) => ({ text: m.text }));
559
+ queuedMessagesRef.current = [];
560
+ setQueuedMessages([]);
561
+ return drained;
562
+ });
563
+ return () => agent.setSteerDrain(null);
564
+ }, [agent]);
542
565
  const dismissToast = useCallback(() => setActiveToast(null), []);
543
566
  // ─── /Phase 21 toast subscriber ────────────────────────────────────────────
544
567
  const { model, setModel, showModelPicker, setShowModelPicker, modelPickerIndex, setModelPickerIndex, modelSearchQuery, setModelSearchQuery, configuredProviders, setConfiguredProviders, disabledProviders, setDisabledProvidersState, defaultProvider, setDefaultProviderState, disabledModels, setDisabledModelsState, modelPickerFocus, setModelPickerFocus, providerChipIndex, setProviderChipIndex, reasoningEffortByModel, setReasoningEffortByModel, } = useModelPicker(agent.getModel());
@@ -1779,50 +1802,43 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
1779
1802
  const isPattern = info.kind === "pattern";
1780
1803
  const qid = isPattern ? `tool-pattern-loop-${Date.now()}` : `tool-loop-cap-${info.stepNumber}-${Date.now()}`;
1781
1804
  toolLoopCapResolversRef.current.set(qid, resolve);
1782
- // Phase 5 BUG-Hcontext-aware default:
1783
- // - Early in the run (step < natural ceiling × 0.5) loops are
1784
- // usually a temporary fixation on a single file/cmd; "continue"
1785
- // is the right default.
1786
- // - Past the soft-warn line (≥ 50% of natural ceiling) we've used
1787
- // up the cheap budget — "stop" becomes the safer default.
1788
- // Falls back to a static stepNumber heuristic (≤ 15) when caller
1789
- // didn't supply a naturalCeiling.
1805
+ // Tier-aware askcard layout (planLoopCapAskcard) 4 tiers:
1806
+ // early (< 0.5× ceiling) Default Continue, no warning
1807
+ // normal (0.5×–2× ceiling) → Default Stop, no warning
1808
+ // overBudget (2×–5× ceiling) → Default Stop, Continue label carries
1809
+ // the overage multiplier so cost is
1810
+ // visible (storyflow_ui 22661c8de9f2:
1811
+ // 2.4× hit had no warning before)
1812
+ // extreme (> ceiling) → Stop FIRST in the array (Enter=Stop),
1813
+ // Continue labelled "expensive"
1814
+ // (session 1f29e238: 12.8× past ceiling)
1790
1815
  const patternStep = isPattern ? info.stepNumber : 0;
1791
1816
  const patternCeiling = isPattern ? info.naturalCeiling : undefined;
1792
- const patternEarly = patternCeiling !== undefined
1793
- ? patternStep < Math.floor(patternCeiling * 0.5)
1794
- : patternStep > 0 && patternStep <= 15;
1795
- // Extreme-overage trip: stepNumber > 5× naturalCeiling. Evidence
1796
- // (session 1f29e238): at step 77/6 = 12.8× ceiling the askcard still
1797
- // showed Continue as a first-class option and user chose Continue
1798
- // within 4s. At extreme overage we put Stop FIRST (Enter = Stop) and
1799
- // label Continue with the explicit overage multiplier so the cost is
1800
- // visible at decision time.
1801
- const patternExtreme = patternCeiling !== undefined && patternCeiling > 0 && patternStep > patternCeiling * 5;
1802
- const overageMultiplier = patternExtreme && patternCeiling ? (patternStep / patternCeiling).toFixed(1) : null;
1803
- const patternDefaultIdx = patternEarly ? 0 : patternExtreme ? 0 : 1;
1804
- const patternOptions = patternExtreme
1817
+ const layout = isPattern
1818
+ ? planLoopCapAskcard({ stepNumber: patternStep, naturalCeiling: patternCeiling })
1819
+ : null;
1820
+ const patternEarly = layout?.tier === "early";
1821
+ const patternOverBudget = layout?.tier === "overBudget";
1822
+ const patternExtreme = layout?.tier === "extreme";
1823
+ const overageMultiplier = layout?.overageMultiplier ?? null;
1824
+ const patternDefaultIdx = layout?.defaultIndex ?? 0;
1825
+ const patternOptions = layout
1805
1826
  ? [
1806
- { label: "Stop and answer (recommended)", value: "stop", kind: "choice" },
1807
- {
1808
- label: `Continue anyway (⚠ ${overageMultiplier}× over budget — expensive)`,
1809
- value: "continue",
1810
- kind: "choice",
1811
- },
1827
+ { label: layout.optionLabels[0], value: layout.optionValues[0], kind: "choice" },
1828
+ { label: layout.optionLabels[1], value: layout.optionValues[1], kind: "choice" },
1812
1829
  ]
1813
- : [
1814
- { label: "Continue (let agent try)", value: "continue", kind: "choice" },
1815
- { label: "Stop and answer", value: "stop", kind: "choice" },
1816
- ];
1830
+ : [];
1817
1831
  const question = isPattern
1818
1832
  ? {
1819
1833
  questionId: qid,
1820
1834
  question: `Tool \`${info.toolName}\` đã chạy ${info.count}/${info.windowSize} lần với args gần giống (step ${info.stepNumber}${patternCeiling ? `/${patternCeiling}` : ""}) — có thể đang loop. Tiếp tục?`,
1821
1835
  context: patternExtreme
1822
1836
  ? `EXTREME OVERAGE — ${overageMultiplier}× past natural budget. Continuing has historically not converged in this regime (see session 1f29e238: 8× over budget, still failed). Stop returns the agent's best answer with current context.`
1823
- : patternEarly
1824
- ? "Continue lets the agent keep trying likely the right call this early in the run. Stop returns the agent's best answer with current context."
1825
- : "You're past the natural budget for this task type. Stop usually recovers a clean answer; Continue keeps spending tokens.",
1837
+ : patternOverBudget
1838
+ ? `Past natural budget — ${overageMultiplier}× the typical step count for this task type. Continuing may still converge but quality often degrades (longer compaction, stale tool results, forced-finalize on stall). Stop returns the agent's best answer with current context.`
1839
+ : patternEarly
1840
+ ? "Continue lets the agent keep trying — likely the right call this early in the run. Stop returns the agent's best answer with current context."
1841
+ : "You're past the natural budget for this task type. Stop usually recovers a clean answer; Continue keeps spending tokens.",
1826
1842
  isRequired: true,
1827
1843
  phase: "tool-loop-cap",
1828
1844
  options: patternOptions,
@@ -317,6 +317,29 @@ export declare function getSubAgentBudgetChars(): number;
317
317
  * Default 120_000 (2 min). Env override: MUONROI_PROVIDER_STALL_TIMEOUT_MS.
318
318
  */
319
319
  export declare function getProviderStallTimeoutMs(): number;
320
+ /**
321
+ * Number of times to AUTOMATICALLY re-issue a streaming model call after the
322
+ * stall watchdog fires WITHOUT any chunk having arrived (a time-to-first-byte
323
+ * "frozen" stall). Some providers (observed: xai/grok-build-0.1) accept a
324
+ * request then never send the first byte, yet a fresh request goes through —
325
+ * a single dead socket, not a down backend. Re-prompting is gated on
326
+ * zero-chunks-this-attempt so it can NEVER restart a turn that already ran
327
+ * tools or emitted text (that would corrupt/duplicate output — the partial-
328
+ * answer rescue path handles those). Each re-prompt waits a short backoff.
329
+ * Range 0–5; 0 restores the legacy "surface the stall, never retry" behaviour.
330
+ * Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
331
+ */
332
+ export declare function getProviderStallRetries(): number;
333
+ /**
334
+ * Live-queue steering: when true, a message typed while a turn is streaming is
335
+ * injected into the running turn at the next prepareStep boundary (as a `user`
336
+ * interjection) instead of waiting for the turn to finish and running as a new
337
+ * turn. When false, the legacy deferred-queue behaviour is preserved (the
338
+ * message runs only after the current turn completes). House convention for a
339
+ * default-true boolean knob: only an explicit "0" disables; unset/blank/any
340
+ * other value = enabled. Env override: MUONROI_STEER_INJECTION.
341
+ */
342
+ export declare function getSteerInjectionEnabled(): boolean;
320
343
  /**
321
344
  * Phase B3 — threshold (in chars of cumulative message content) above which
322
345
  * the sub-agent `prepareStep` compactor rewrites older tool_result parts
@@ -693,6 +693,39 @@ export function getProviderStallTimeoutMs() {
693
693
  }
694
694
  return 120_000;
695
695
  }
696
+ /**
697
+ * Number of times to AUTOMATICALLY re-issue a streaming model call after the
698
+ * stall watchdog fires WITHOUT any chunk having arrived (a time-to-first-byte
699
+ * "frozen" stall). Some providers (observed: xai/grok-build-0.1) accept a
700
+ * request then never send the first byte, yet a fresh request goes through —
701
+ * a single dead socket, not a down backend. Re-prompting is gated on
702
+ * zero-chunks-this-attempt so it can NEVER restart a turn that already ran
703
+ * tools or emitted text (that would corrupt/duplicate output — the partial-
704
+ * answer rescue path handles those). Each re-prompt waits a short backoff.
705
+ * Range 0–5; 0 restores the legacy "surface the stall, never retry" behaviour.
706
+ * Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
707
+ */
708
+ export function getProviderStallRetries() {
709
+ const envRaw = process.env.MUONROI_PROVIDER_STALL_RETRIES;
710
+ if (envRaw !== undefined && envRaw !== "") {
711
+ const n = Number(envRaw);
712
+ if (Number.isFinite(n) && n >= 0 && n <= 5)
713
+ return Math.floor(n);
714
+ }
715
+ return 1;
716
+ }
717
+ /**
718
+ * Live-queue steering: when true, a message typed while a turn is streaming is
719
+ * injected into the running turn at the next prepareStep boundary (as a `user`
720
+ * interjection) instead of waiting for the turn to finish and running as a new
721
+ * turn. When false, the legacy deferred-queue behaviour is preserved (the
722
+ * message runs only after the current turn completes). House convention for a
723
+ * default-true boolean knob: only an explicit "0" disables; unset/blank/any
724
+ * other value = enabled. Env override: MUONROI_STEER_INJECTION.
725
+ */
726
+ export function getSteerInjectionEnabled() {
727
+ return process.env.MUONROI_STEER_INJECTION !== "0";
728
+ }
696
729
  /**
697
730
  * Phase B3 — threshold (in chars of cumulative message content) above which
698
731
  * the sub-agent `prepareStep` compactor rewrites older tool_result parts