muonroi-cli 1.6.3 → 1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/packages/agent-harness-core/src/event-filter.js +1 -0
- package/dist/packages/agent-harness-core/src/event-redact.js +7 -2
- package/dist/packages/agent-harness-core/src/protocol.d.ts +8 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +37 -0
- package/dist/src/gsd/directives.d.ts +18 -0
- package/dist/src/gsd/directives.js +23 -2
- package/dist/src/orchestrator/message-processor.d.ts +8 -0
- package/dist/src/orchestrator/message-processor.js +159 -9
- package/dist/src/orchestrator/orchestrator.d.ts +10 -0
- package/dist/src/orchestrator/orchestrator.js +11 -0
- package/dist/src/orchestrator/stall-rescue.d.ts +1 -0
- package/dist/src/orchestrator/stall-rescue.js +20 -1
- package/dist/src/orchestrator/stall-rescue.test.js +30 -1
- package/dist/src/orchestrator/stall-watchdog.d.ts +31 -0
- package/dist/src/orchestrator/stall-watchdog.js +24 -0
- package/dist/src/orchestrator/stall-watchdog.test.js +46 -1
- package/dist/src/orchestrator/steer-inbox.d.ts +32 -0
- package/dist/src/orchestrator/steer-inbox.js +20 -0
- package/dist/src/orchestrator/steer-inbox.test.d.ts +1 -0
- package/dist/src/orchestrator/steer-inbox.test.js +33 -0
- package/dist/src/orchestrator/tool-loop-askcard.d.ts +59 -0
- package/dist/src/orchestrator/tool-loop-askcard.js +86 -0
- package/dist/src/orchestrator/tool-loop-askcard.test.d.ts +1 -0
- package/dist/src/orchestrator/tool-loop-askcard.test.js +71 -0
- package/dist/src/pil/layer4-gsd.js +5 -1
- package/dist/src/ui/app.js +51 -35
- package/dist/src/utils/settings.d.ts +23 -0
- package/dist/src/utils/settings.js +33 -0
- package/dist/src/utils/settings.test.js +52 -0
- package/package.json +1 -1
|
@@ -23,6 +23,30 @@ export const STALL_ABORT_REASON = "provider-stall";
|
|
|
23
23
|
export const STALL_ERROR_MESSAGE = "Model not responding — no output received within the stall timeout. " +
|
|
24
24
|
"The provider may be out of balance, rate-limited, or unreachable. " +
|
|
25
25
|
"Tune MUONROI_PROVIDER_STALL_TIMEOUT_MS (0 disables) or switch model/provider.";
|
|
26
|
+
/**
|
|
27
|
+
* Decide whether a fired stall watchdog should trigger a re-prompt (re-issue
|
|
28
|
+
* the same request) instead of surfacing the stall.
|
|
29
|
+
*
|
|
30
|
+
* ONLY a time-to-first-byte stall qualifies: zero real chunks AND no assistant
|
|
31
|
+
* text this attempt, under the retry cap, and not a user cancel. Re-issuing
|
|
32
|
+
* after tools ran or text flowed would corrupt/duplicate output — those cases
|
|
33
|
+
* fall through to the partial-answer rescue path instead. Pure (no side
|
|
34
|
+
* effects) so it is unit-testable in isolation from the orchestrator loop.
|
|
35
|
+
*/
|
|
36
|
+
export function shouldRepromptStall(s) {
|
|
37
|
+
return (s.stallTriggered &&
|
|
38
|
+
s.stallRetryCount < s.maxStallRetries &&
|
|
39
|
+
s.chunksThisAttempt === 0 &&
|
|
40
|
+
s.assistantTextEmpty &&
|
|
41
|
+
!s.aborted);
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Exponential backoff (ms, capped at 4s) before the Nth stall re-prompt
|
|
45
|
+
* (1-based): 500 → 1000 → 2000 → 4000 → 4000.
|
|
46
|
+
*/
|
|
47
|
+
export function stallRepromptBackoffMs(attempt) {
|
|
48
|
+
return Math.min(500 * 2 ** (Math.max(1, attempt) - 1), 4_000);
|
|
49
|
+
}
|
|
26
50
|
export function createStallWatchdog(timeoutMs, onFire) {
|
|
27
51
|
const controller = new AbortController();
|
|
28
52
|
let firedFlag = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
-
import { createStallWatchdog, STALL_ABORT_REASON } from "./stall-watchdog.js";
|
|
2
|
+
import { createStallWatchdog, STALL_ABORT_REASON, shouldRepromptStall, stallRepromptBackoffMs, } from "./stall-watchdog.js";
|
|
3
3
|
describe("createStallWatchdog", () => {
|
|
4
4
|
beforeEach(() => vi.useFakeTimers());
|
|
5
5
|
afterEach(() => vi.useRealTimers());
|
|
@@ -70,4 +70,49 @@ describe("createStallWatchdog", () => {
|
|
|
70
70
|
expect(wd.signal.aborted).toBe(true);
|
|
71
71
|
});
|
|
72
72
|
});
|
|
73
|
+
describe("shouldRepromptStall", () => {
|
|
74
|
+
// A clean time-to-first-byte stall: watchdog fired, zero chunks, no text,
|
|
75
|
+
// under the cap, not cancelled — the ONLY case that re-prompts.
|
|
76
|
+
const ttfb = (over = {}) => ({
|
|
77
|
+
stallTriggered: true,
|
|
78
|
+
stallRetryCount: 0,
|
|
79
|
+
maxStallRetries: 1,
|
|
80
|
+
chunksThisAttempt: 0,
|
|
81
|
+
assistantTextEmpty: true,
|
|
82
|
+
aborted: false,
|
|
83
|
+
...over,
|
|
84
|
+
});
|
|
85
|
+
it("re-prompts a time-to-first-byte stall under the cap", () => {
|
|
86
|
+
expect(shouldRepromptStall(ttfb())).toBe(true);
|
|
87
|
+
});
|
|
88
|
+
it("does NOT re-prompt when the watchdog never fired", () => {
|
|
89
|
+
expect(shouldRepromptStall(ttfb({ stallTriggered: false }))).toBe(false);
|
|
90
|
+
});
|
|
91
|
+
it("does NOT re-prompt once the retry cap is reached", () => {
|
|
92
|
+
expect(shouldRepromptStall(ttfb({ stallRetryCount: 1, maxStallRetries: 1 }))).toBe(false);
|
|
93
|
+
// maxStallRetries=0 means the feature is disabled — never re-prompt.
|
|
94
|
+
expect(shouldRepromptStall(ttfb({ stallRetryCount: 0, maxStallRetries: 0 }))).toBe(false);
|
|
95
|
+
});
|
|
96
|
+
it("does NOT re-prompt once a real chunk has arrived (mid-stream stall → rescue)", () => {
|
|
97
|
+
expect(shouldRepromptStall(ttfb({ chunksThisAttempt: 1 }))).toBe(false);
|
|
98
|
+
});
|
|
99
|
+
it("does NOT re-prompt once assistant text has flowed (output would corrupt)", () => {
|
|
100
|
+
expect(shouldRepromptStall(ttfb({ assistantTextEmpty: false }))).toBe(false);
|
|
101
|
+
});
|
|
102
|
+
it("does NOT re-prompt over a genuine user cancel", () => {
|
|
103
|
+
expect(shouldRepromptStall(ttfb({ aborted: true }))).toBe(false);
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
describe("stallRepromptBackoffMs", () => {
|
|
107
|
+
it("grows exponentially and caps at 4s", () => {
|
|
108
|
+
expect(stallRepromptBackoffMs(1)).toBe(500);
|
|
109
|
+
expect(stallRepromptBackoffMs(2)).toBe(1000);
|
|
110
|
+
expect(stallRepromptBackoffMs(3)).toBe(2000);
|
|
111
|
+
expect(stallRepromptBackoffMs(4)).toBe(4000);
|
|
112
|
+
expect(stallRepromptBackoffMs(5)).toBe(4000);
|
|
113
|
+
});
|
|
114
|
+
it("treats attempt < 1 as the first attempt", () => {
|
|
115
|
+
expect(stallRepromptBackoffMs(0)).toBe(500);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
73
118
|
//# sourceMappingURL=stall-watchdog.test.js.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/orchestrator/steer-inbox.ts
|
|
3
|
+
*
|
|
4
|
+
* Live-queue steering — pure decision helper.
|
|
5
|
+
*
|
|
6
|
+
* When the user types a message while a turn is streaming, the UI queue is
|
|
7
|
+
* drained at the next prepareStep boundary and the messages are injected into
|
|
8
|
+
* the running turn as `user` interjections (Claude-Code-style steering). This
|
|
9
|
+
* module holds the PURE mapping/gating decision so it is unit-testable in
|
|
10
|
+
* isolation from the orchestrator loop. The orchestrator owns the side effects
|
|
11
|
+
* (draining the queue, the pendingSteers accumulator, emitting telemetry).
|
|
12
|
+
*/
|
|
13
|
+
import type { ModelMessage } from "ai";
|
|
14
|
+
/** Inputs to the steer-injection decision — see {@link planSteerInjection}. */
|
|
15
|
+
export interface SteerInjectionState {
|
|
16
|
+
/** Raw messages drained from the UI steer queue this step. */
|
|
17
|
+
drained: {
|
|
18
|
+
text: string;
|
|
19
|
+
}[];
|
|
20
|
+
/** True on a genuine user cancel — never steer an aborted turn. */
|
|
21
|
+
aborted: boolean;
|
|
22
|
+
/** Feature flag (getSteerInjectionEnabled). */
|
|
23
|
+
enabled: boolean;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Decide which (if any) drained messages to inject into the running turn.
|
|
27
|
+
*
|
|
28
|
+
* Returns user-role ModelMessages in FIFO order, trimmed, with empty/whitespace
|
|
29
|
+
* entries dropped. Returns `[]` when the feature is disabled or the turn was
|
|
30
|
+
* cancelled. Pure (no side effects).
|
|
31
|
+
*/
|
|
32
|
+
export declare function planSteerInjection(s: SteerInjectionState): ModelMessage[];
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decide which (if any) drained messages to inject into the running turn.
|
|
3
|
+
*
|
|
4
|
+
* Returns user-role ModelMessages in FIFO order, trimmed, with empty/whitespace
|
|
5
|
+
* entries dropped. Returns `[]` when the feature is disabled or the turn was
|
|
6
|
+
* cancelled. Pure (no side effects).
|
|
7
|
+
*/
|
|
8
|
+
export function planSteerInjection(s) {
|
|
9
|
+
if (!s.enabled || s.aborted)
|
|
10
|
+
return [];
|
|
11
|
+
const out = [];
|
|
12
|
+
for (const m of s.drained) {
|
|
13
|
+
const text = m.text?.trim();
|
|
14
|
+
if (!text)
|
|
15
|
+
continue;
|
|
16
|
+
out.push({ role: "user", content: text });
|
|
17
|
+
}
|
|
18
|
+
return out;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=steer-inbox.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { planSteerInjection } from "./steer-inbox.js";
|
|
3
|
+
describe("planSteerInjection", () => {
|
|
4
|
+
// A valid baseline: feature enabled, not cancelled, one queued message.
|
|
5
|
+
const base = (over = {}) => ({
|
|
6
|
+
drained: [{ text: "also add tests" }],
|
|
7
|
+
aborted: false,
|
|
8
|
+
enabled: true,
|
|
9
|
+
...over,
|
|
10
|
+
});
|
|
11
|
+
it("maps drained text into a single user ModelMessage", () => {
|
|
12
|
+
const out = planSteerInjection(base());
|
|
13
|
+
expect(out).toEqual([{ role: "user", content: "also add tests" }]);
|
|
14
|
+
});
|
|
15
|
+
it("preserves FIFO order across multiple drained messages", () => {
|
|
16
|
+
const out = planSteerInjection(base({ drained: [{ text: "a" }, { text: "b" }] }));
|
|
17
|
+
expect(out.map((m) => m.content)).toEqual(["a", "b"]);
|
|
18
|
+
});
|
|
19
|
+
it("returns [] when the feature is disabled", () => {
|
|
20
|
+
expect(planSteerInjection(base({ enabled: false }))).toEqual([]);
|
|
21
|
+
});
|
|
22
|
+
it("returns [] over a genuine user cancel (never steer an aborted turn)", () => {
|
|
23
|
+
expect(planSteerInjection(base({ aborted: true }))).toEqual([]);
|
|
24
|
+
});
|
|
25
|
+
it("returns [] when nothing was drained", () => {
|
|
26
|
+
expect(planSteerInjection(base({ drained: [] }))).toEqual([]);
|
|
27
|
+
});
|
|
28
|
+
it("skips empty / whitespace-only messages and trims the rest", () => {
|
|
29
|
+
const out = planSteerInjection(base({ drained: [{ text: " " }, { text: " keep me " }, { text: "" }] }));
|
|
30
|
+
expect(out).toEqual([{ role: "user", content: "keep me" }]);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
//# sourceMappingURL=steer-inbox.test.js.map
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/orchestrator/tool-loop-askcard.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure helper that computes the tool-loop-cap askcard tier (label set + default
|
|
5
|
+
* action) from the current step number and the resolved natural ceiling for
|
|
6
|
+
* the (taskType, size) matrix.
|
|
7
|
+
*
|
|
8
|
+
* Four tiers (open intervals — boundaries belong to the higher tier):
|
|
9
|
+
* - early : step < 0.5 × ceiling — a transient fixation. Default Continue.
|
|
10
|
+
* - normal : 0.5× ≤ step ≤ 2× ceiling — used cheap budget; Default Stop.
|
|
11
|
+
* - overBudget : 2× < step ≤ 5× ceiling — Continue still available but the
|
|
12
|
+
* label carries the overage multiplier so the cost of
|
|
13
|
+
* continuing is visible at decision time. Default Stop.
|
|
14
|
+
* - extreme : step > 5× ceiling — Stop is moved FIRST in the option
|
|
15
|
+
* array (Enter = Stop) and Continue is labelled "expensive".
|
|
16
|
+
* Default Stop (now at index 0).
|
|
17
|
+
*
|
|
18
|
+
* Live miss this tier set fixes (session 1f29e238, step 77/6 = 12.8×): extreme
|
|
19
|
+
* tier put Stop first with a warning — good. But the storyflow_ui session
|
|
20
|
+
* 22661c8de9f2 ran step 29/12 = 2.4× — the OLD code had no middle warning, so
|
|
21
|
+
* the askcard showed a plain "Continue (let agent try)" with no signal that
|
|
22
|
+
* continuing costs more. User chose Continue, the model stalled 4 tool-calls
|
|
23
|
+
* later, and forced-finalize had to rescue a degraded answer.
|
|
24
|
+
*
|
|
25
|
+
* Pure — no React, no DOM, no side effects. Unit-testable in isolation.
|
|
26
|
+
*/
|
|
27
|
+
export type LoopCapTier = "early" | "normal" | "overBudget" | "extreme";
|
|
28
|
+
export interface LoopCapAskcardOptions {
|
|
29
|
+
/** AI-SDK step number when the pattern fired. */
|
|
30
|
+
stepNumber: number;
|
|
31
|
+
/**
|
|
32
|
+
* Natural step ceiling for (taskType, size). Optional — when undefined we
|
|
33
|
+
* cannot compute multipliers, so the askcard falls back to the legacy
|
|
34
|
+
* step-threshold heuristic (step ≤ 15 = early-ish, else normal).
|
|
35
|
+
*/
|
|
36
|
+
naturalCeiling?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface LoopCapAskcardLayout {
|
|
39
|
+
tier: LoopCapTier;
|
|
40
|
+
/** Index into `optionLabels` of the option pre-selected (Enter applies). */
|
|
41
|
+
defaultIndex: 0 | 1;
|
|
42
|
+
/**
|
|
43
|
+
* Exactly two labels in render order. The first is at index 0, the second
|
|
44
|
+
* at index 1 — order matters for the askcard UI (arrow-key navigation,
|
|
45
|
+
* Enter-applies-default).
|
|
46
|
+
*/
|
|
47
|
+
optionLabels: [continueOrStop: string, stopOrContinue: string];
|
|
48
|
+
/** Values parallel to optionLabels — what the resolver returns to the loop. */
|
|
49
|
+
optionValues: [string, string];
|
|
50
|
+
/**
|
|
51
|
+
* x.x string (e.g. "2.4") when the tier is overBudget or extreme, else
|
|
52
|
+
* null. Caller can also surface this in the askcard context message.
|
|
53
|
+
*/
|
|
54
|
+
overageMultiplier: string | null;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Decide the askcard layout for a tool-loop-cap pattern hit. Pure.
|
|
58
|
+
*/
|
|
59
|
+
export declare function planLoopCapAskcard(opts: LoopCapAskcardOptions): LoopCapAskcardLayout;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/orchestrator/tool-loop-askcard.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure helper that computes the tool-loop-cap askcard tier (label set + default
|
|
5
|
+
* action) from the current step number and the resolved natural ceiling for
|
|
6
|
+
* the (taskType, size) matrix.
|
|
7
|
+
*
|
|
8
|
+
* Four tiers (open intervals — boundaries belong to the higher tier):
|
|
9
|
+
* - early : step < 0.5 × ceiling — a transient fixation. Default Continue.
|
|
10
|
+
* - normal : 0.5× ≤ step ≤ 2× ceiling — used cheap budget; Default Stop.
|
|
11
|
+
* - overBudget : 2× < step ≤ 5× ceiling — Continue still available but the
|
|
12
|
+
* label carries the overage multiplier so the cost of
|
|
13
|
+
* continuing is visible at decision time. Default Stop.
|
|
14
|
+
* - extreme : step > 5× ceiling — Stop is moved FIRST in the option
|
|
15
|
+
* array (Enter = Stop) and Continue is labelled "expensive".
|
|
16
|
+
* Default Stop (now at index 0).
|
|
17
|
+
*
|
|
18
|
+
* Live miss this tier set fixes (session 1f29e238, step 77/6 = 12.8×): extreme
|
|
19
|
+
* tier put Stop first with a warning — good. But the storyflow_ui session
|
|
20
|
+
* 22661c8de9f2 ran step 29/12 = 2.4× — the OLD code had no middle warning, so
|
|
21
|
+
* the askcard showed a plain "Continue (let agent try)" with no signal that
|
|
22
|
+
* continuing costs more. User chose Continue, the model stalled 4 tool-calls
|
|
23
|
+
* later, and forced-finalize had to rescue a degraded answer.
|
|
24
|
+
*
|
|
25
|
+
* Pure — no React, no DOM, no side effects. Unit-testable in isolation.
|
|
26
|
+
*/
|
|
27
|
+
const NORMAL_LABELS = ["Continue (let agent try)", "Stop and answer"];
|
|
28
|
+
const NORMAL_VALUES = ["continue", "stop"];
|
|
29
|
+
/**
|
|
30
|
+
* Decide the askcard layout for a tool-loop-cap pattern hit. Pure.
|
|
31
|
+
*/
|
|
32
|
+
export function planLoopCapAskcard(opts) {
|
|
33
|
+
const { stepNumber, naturalCeiling } = opts;
|
|
34
|
+
// No ceiling → cannot compute multipliers. Fall back to a static threshold:
|
|
35
|
+
// step ≤ 15 looks "early" enough to default Continue, else default Stop.
|
|
36
|
+
if (!naturalCeiling || naturalCeiling <= 0) {
|
|
37
|
+
const tier = stepNumber > 0 && stepNumber <= 15 ? "early" : "normal";
|
|
38
|
+
return {
|
|
39
|
+
tier,
|
|
40
|
+
defaultIndex: tier === "early" ? 0 : 1,
|
|
41
|
+
optionLabels: NORMAL_LABELS,
|
|
42
|
+
optionValues: NORMAL_VALUES,
|
|
43
|
+
overageMultiplier: null,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
const ratio = stepNumber / naturalCeiling;
|
|
47
|
+
const multiplier = ratio.toFixed(1);
|
|
48
|
+
if (ratio > 5) {
|
|
49
|
+
return {
|
|
50
|
+
tier: "extreme",
|
|
51
|
+
defaultIndex: 0,
|
|
52
|
+
optionLabels: ["Stop and answer (recommended)", `Continue anyway (⚠ ${multiplier}× over budget — expensive)`],
|
|
53
|
+
optionValues: ["stop", "continue"],
|
|
54
|
+
overageMultiplier: multiplier,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
if (ratio > 2) {
|
|
58
|
+
return {
|
|
59
|
+
tier: "overBudget",
|
|
60
|
+
defaultIndex: 1,
|
|
61
|
+
optionLabels: [
|
|
62
|
+
`Continue (⚠ ${multiplier}× past natural budget — quality may degrade)`,
|
|
63
|
+
"Stop and answer (recommended)",
|
|
64
|
+
],
|
|
65
|
+
optionValues: NORMAL_VALUES,
|
|
66
|
+
overageMultiplier: multiplier,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
if (ratio < 0.5) {
|
|
70
|
+
return {
|
|
71
|
+
tier: "early",
|
|
72
|
+
defaultIndex: 0,
|
|
73
|
+
optionLabels: NORMAL_LABELS,
|
|
74
|
+
optionValues: NORMAL_VALUES,
|
|
75
|
+
overageMultiplier: null,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
tier: "normal",
|
|
80
|
+
defaultIndex: 1,
|
|
81
|
+
optionLabels: NORMAL_LABELS,
|
|
82
|
+
optionValues: NORMAL_VALUES,
|
|
83
|
+
overageMultiplier: null,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
//# sourceMappingURL=tool-loop-askcard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { planLoopCapAskcard } from "./tool-loop-askcard.js";
|
|
3
|
+
describe("planLoopCapAskcard", () => {
|
|
4
|
+
it("early tier (< 0.5× ceiling): default Continue, no warning", () => {
|
|
5
|
+
const r = planLoopCapAskcard({ stepNumber: 5, naturalCeiling: 12 });
|
|
6
|
+
expect(r.tier).toBe("early");
|
|
7
|
+
expect(r.defaultIndex).toBe(0);
|
|
8
|
+
expect(r.optionLabels[0]).toMatch(/Continue/);
|
|
9
|
+
expect(r.optionValues[0]).toBe("continue");
|
|
10
|
+
expect(r.overageMultiplier).toBeNull();
|
|
11
|
+
// no warning emoji on the Continue label
|
|
12
|
+
expect(r.optionLabels[0]).not.toMatch(/⚠/);
|
|
13
|
+
});
|
|
14
|
+
it("normal tier (0.5×–2× ceiling): default Stop, no warning, Continue first", () => {
|
|
15
|
+
const r = planLoopCapAskcard({ stepNumber: 18, naturalCeiling: 12 });
|
|
16
|
+
expect(r.tier).toBe("normal");
|
|
17
|
+
expect(r.defaultIndex).toBe(1);
|
|
18
|
+
expect(r.optionLabels[0]).toBe("Continue (let agent try)");
|
|
19
|
+
expect(r.optionLabels[1]).toBe("Stop and answer");
|
|
20
|
+
expect(r.overageMultiplier).toBeNull();
|
|
21
|
+
});
|
|
22
|
+
it("overBudget tier (2×–5× ceiling): Continue carries the overage multiplier, default Stop", () => {
|
|
23
|
+
// The storyflow_ui case: step 29 / ceiling 12 = 2.4×
|
|
24
|
+
const r = planLoopCapAskcard({ stepNumber: 29, naturalCeiling: 12 });
|
|
25
|
+
expect(r.tier).toBe("overBudget");
|
|
26
|
+
expect(r.defaultIndex).toBe(1);
|
|
27
|
+
expect(r.optionLabels[0]).toMatch(/⚠ 2\.4× past natural budget/);
|
|
28
|
+
expect(r.optionLabels[1]).toMatch(/Stop and answer \(recommended\)/);
|
|
29
|
+
expect(r.overageMultiplier).toBe("2.4");
|
|
30
|
+
// order preserved: Continue at 0, Stop at 1
|
|
31
|
+
expect(r.optionValues).toEqual(["continue", "stop"]);
|
|
32
|
+
});
|
|
33
|
+
it("extreme tier (> 5× ceiling): Stop FIRST in the array, Continue labelled expensive", () => {
|
|
34
|
+
// session 1f29e238 — step 77 / ceiling 6 = 12.8×
|
|
35
|
+
const r = planLoopCapAskcard({ stepNumber: 77, naturalCeiling: 6 });
|
|
36
|
+
expect(r.tier).toBe("extreme");
|
|
37
|
+
expect(r.defaultIndex).toBe(0);
|
|
38
|
+
expect(r.optionLabels[0]).toMatch(/Stop and answer \(recommended\)/);
|
|
39
|
+
expect(r.optionLabels[1]).toMatch(/⚠ 12\.8× over budget — expensive/);
|
|
40
|
+
expect(r.optionValues).toEqual(["stop", "continue"]); // ORDER REVERSED at extreme
|
|
41
|
+
expect(r.overageMultiplier).toBe("12.8");
|
|
42
|
+
});
|
|
43
|
+
it("tier boundaries are open-on-the-lower-side (ratio==2 → normal; ratio==5 → overBudget; ratio==0.5 → normal)", () => {
|
|
44
|
+
// ratio === 2.0 exactly → still normal (the > 2 gate excludes 2.0)
|
|
45
|
+
expect(planLoopCapAskcard({ stepNumber: 24, naturalCeiling: 12 }).tier).toBe("normal");
|
|
46
|
+
// ratio === 5.0 exactly → still overBudget (the > 5 gate excludes 5.0)
|
|
47
|
+
expect(planLoopCapAskcard({ stepNumber: 60, naturalCeiling: 12 }).tier).toBe("overBudget");
|
|
48
|
+
// ratio === 0.5 exactly → normal (the < 0.5 gate excludes 0.5)
|
|
49
|
+
expect(planLoopCapAskcard({ stepNumber: 6, naturalCeiling: 12 }).tier).toBe("normal");
|
|
50
|
+
});
|
|
51
|
+
it("falls back to step-threshold heuristic when naturalCeiling is missing", () => {
|
|
52
|
+
const early = planLoopCapAskcard({ stepNumber: 8 });
|
|
53
|
+
expect(early.tier).toBe("early");
|
|
54
|
+
expect(early.defaultIndex).toBe(0);
|
|
55
|
+
const normal = planLoopCapAskcard({ stepNumber: 22 });
|
|
56
|
+
expect(normal.tier).toBe("normal");
|
|
57
|
+
expect(normal.defaultIndex).toBe(1);
|
|
58
|
+
// boundary: step === 15 → still early
|
|
59
|
+
expect(planLoopCapAskcard({ stepNumber: 15 }).tier).toBe("early");
|
|
60
|
+
// step === 16 → normal
|
|
61
|
+
expect(planLoopCapAskcard({ stepNumber: 16 }).tier).toBe("normal");
|
|
62
|
+
// step === 0 → normal (no early credit for nothing)
|
|
63
|
+
expect(planLoopCapAskcard({ stepNumber: 0 }).tier).toBe("normal");
|
|
64
|
+
});
|
|
65
|
+
it("treats naturalCeiling=0 the same as undefined (no multiplier possible)", () => {
|
|
66
|
+
const r = planLoopCapAskcard({ stepNumber: 30, naturalCeiling: 0 });
|
|
67
|
+
expect(r.overageMultiplier).toBeNull();
|
|
68
|
+
expect(r.tier).toBe("normal");
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
//# sourceMappingURL=tool-loop-askcard.test.js.map
|
|
@@ -102,7 +102,11 @@ export async function layer4Gsd(ctx) {
|
|
|
102
102
|
(ctx.taskType === "general" && ctx.intentKind === "task") ||
|
|
103
103
|
(isQuestionLike(ctx.raw) && !isImplementationIntent(ctx.raw));
|
|
104
104
|
const ecosystem = mentionsEcosystemScope(ctx.raw);
|
|
105
|
-
|
|
105
|
+
// Heuristic: VN diacritics → user wrote Vietnamese → re-anchor language rule
|
|
106
|
+
// inside the directive (storyflow_ui session 22661c8de9f2 — base rule
|
|
107
|
+
// crowded out by brevity/FIX-FIRST directives).
|
|
108
|
+
const replyLanguage = /[à-ỹÀ-Ỹ]/.test(ctx.raw) ? "Vietnamese" : undefined;
|
|
109
|
+
const directive = buildDirective({ complexity, phase, grayAreas, informational, ecosystem, replyLanguage });
|
|
106
110
|
const budgetChars = Math.floor(ctx.tokenBudget * DIRECTIVE_BUDGET_FRACTION);
|
|
107
111
|
const trimmed = truncateToBudget(directive.text, budgetChars);
|
|
108
112
|
return {
|
package/dist/src/ui/app.js
CHANGED
|
@@ -13,6 +13,7 @@ import { POPULAR_MCP_CATALOG } from "../mcp/catalog.js";
|
|
|
13
13
|
import { parseEnvLines, parseHeaderLines } from "../mcp/parse-headers.js";
|
|
14
14
|
import { toMcpServerId, validateMcpServerConfig } from "../mcp/validate.js";
|
|
15
15
|
import { Agent } from "../orchestrator/orchestrator.js";
|
|
16
|
+
import { planLoopCapAskcard } from "../orchestrator/tool-loop-askcard.js";
|
|
16
17
|
import { getConfiguredProviders, setKeyForProvider } from "../providers/keychain.js";
|
|
17
18
|
import { buildIdealContinuationPrompt } from "../scaffold/continuation-prompt.js";
|
|
18
19
|
import { continueAsCouncil } from "../scaffold/continue-as-council.js";
|
|
@@ -24,7 +25,7 @@ import { processAtMentions } from "../utils/at-mentions.js";
|
|
|
24
25
|
import { readClipboardImage } from "../utils/clipboard-image.js";
|
|
25
26
|
import { FileIndex } from "../utils/file-index.js";
|
|
26
27
|
import { copyTextToHostClipboard, readTextFromHostClipboard } from "../utils/host-clipboard.js";
|
|
27
|
-
import { getApiKey, getCurrentModel, getTelegramBotToken, isModelDisabled, isReservedSubagentName, loadMcpServers, loadPaymentSettings, loadUserSettings, loadValidSubAgents, saveApprovedTelegramUserId, saveMcpServers, savePaymentSettings, saveProjectSettings, saveUserSettings, setDefaultProvider, setModelDisabled, setProviderDisabled, } from "../utils/settings.js";
|
|
28
|
+
import { getApiKey, getCurrentModel, getSteerInjectionEnabled, getTelegramBotToken, isModelDisabled, isReservedSubagentName, loadMcpServers, loadPaymentSettings, loadUserSettings, loadValidSubAgents, saveApprovedTelegramUserId, saveMcpServers, savePaymentSettings, saveProjectSettings, saveUserSettings, setDefaultProvider, setModelDisabled, setProviderDisabled, } from "../utils/settings.js";
|
|
28
29
|
import { discoverSkills, formatSkillsForChat } from "../utils/skills.js";
|
|
29
30
|
import { formatSubagentName } from "../utils/subagent-display.js";
|
|
30
31
|
import { checkForUpdate, runUpdate } from "../utils/update-checker.js";
|
|
@@ -483,6 +484,11 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
|
|
|
483
484
|
pushToast(lvl, text);
|
|
484
485
|
return;
|
|
485
486
|
}
|
|
487
|
+
if (e.kind === "steer-inject") {
|
|
488
|
+
const count = typeof e.count === "number" ? e.count : 1;
|
|
489
|
+
pushToast("info", `↳ steering applied (${count} message${count === 1 ? "" : "s"})`);
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
486
492
|
if (e.kind === "ee-timeout" || e.kind === "ee-error") {
|
|
487
493
|
const source = typeof e.source === "string" ? e.source : "unknown";
|
|
488
494
|
const kind = e.kind === "ee-timeout" ? "timeout" : "error";
|
|
@@ -539,6 +545,23 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
|
|
|
539
545
|
}
|
|
540
546
|
return undefined;
|
|
541
547
|
}, [handleHarnessEvent]);
|
|
548
|
+
// Live-queue steering: expose the mid-turn queue to the running turn so
|
|
549
|
+
// prepareStep can inject typed-while-busy messages at the next step boundary
|
|
550
|
+
// instead of deferring them to a new turn. Disabled → callback not wired, so
|
|
551
|
+
// finishTurnProcessing drains the queue post-turn exactly as before.
|
|
552
|
+
useEffect(() => {
|
|
553
|
+
if (!getSteerInjectionEnabled())
|
|
554
|
+
return;
|
|
555
|
+
agent.setSteerDrain(() => {
|
|
556
|
+
if (queuedMessagesRef.current.length === 0)
|
|
557
|
+
return [];
|
|
558
|
+
const drained = queuedMessagesRef.current.map((m) => ({ text: m.text }));
|
|
559
|
+
queuedMessagesRef.current = [];
|
|
560
|
+
setQueuedMessages([]);
|
|
561
|
+
return drained;
|
|
562
|
+
});
|
|
563
|
+
return () => agent.setSteerDrain(null);
|
|
564
|
+
}, [agent]);
|
|
542
565
|
const dismissToast = useCallback(() => setActiveToast(null), []);
|
|
543
566
|
// ─── /Phase 21 toast subscriber ────────────────────────────────────────────
|
|
544
567
|
const { model, setModel, showModelPicker, setShowModelPicker, modelPickerIndex, setModelPickerIndex, modelSearchQuery, setModelSearchQuery, configuredProviders, setConfiguredProviders, disabledProviders, setDisabledProvidersState, defaultProvider, setDefaultProviderState, disabledModels, setDisabledModelsState, modelPickerFocus, setModelPickerFocus, providerChipIndex, setProviderChipIndex, reasoningEffortByModel, setReasoningEffortByModel, } = useModelPicker(agent.getModel());
|
|
@@ -1779,50 +1802,43 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
|
|
|
1779
1802
|
const isPattern = info.kind === "pattern";
|
|
1780
1803
|
const qid = isPattern ? `tool-pattern-loop-${Date.now()}` : `tool-loop-cap-${info.stepNumber}-${Date.now()}`;
|
|
1781
1804
|
toolLoopCapResolversRef.current.set(qid, resolve);
|
|
1782
|
-
//
|
|
1783
|
-
//
|
|
1784
|
-
//
|
|
1785
|
-
//
|
|
1786
|
-
//
|
|
1787
|
-
//
|
|
1788
|
-
//
|
|
1789
|
-
//
|
|
1805
|
+
// Tier-aware askcard layout (planLoopCapAskcard) — 4 tiers:
|
|
1806
|
+
// early (< 0.5× ceiling) → Default Continue, no warning
|
|
1807
|
+
// normal (0.5×–2× ceiling) → Default Stop, no warning
|
|
1808
|
+
// overBudget (2×–5× ceiling) → Default Stop, Continue label carries
|
|
1809
|
+
// the overage multiplier so cost is
|
|
1810
|
+
// visible (storyflow_ui 22661c8de9f2:
|
|
1811
|
+
// 2.4× hit had no warning before)
|
|
1812
|
+
// extreme (> 5× ceiling) → Stop FIRST in the array (Enter=Stop),
|
|
1813
|
+
// Continue labelled "expensive"
|
|
1814
|
+
// (session 1f29e238: 12.8× past ceiling)
|
|
1790
1815
|
const patternStep = isPattern ? info.stepNumber : 0;
|
|
1791
1816
|
const patternCeiling = isPattern ? info.naturalCeiling : undefined;
|
|
1792
|
-
const
|
|
1793
|
-
? patternStep
|
|
1794
|
-
:
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
const patternExtreme = patternCeiling !== undefined && patternCeiling > 0 && patternStep > patternCeiling * 5;
|
|
1802
|
-
const overageMultiplier = patternExtreme && patternCeiling ? (patternStep / patternCeiling).toFixed(1) : null;
|
|
1803
|
-
const patternDefaultIdx = patternEarly ? 0 : patternExtreme ? 0 : 1;
|
|
1804
|
-
const patternOptions = patternExtreme
|
|
1817
|
+
const layout = isPattern
|
|
1818
|
+
? planLoopCapAskcard({ stepNumber: patternStep, naturalCeiling: patternCeiling })
|
|
1819
|
+
: null;
|
|
1820
|
+
const patternEarly = layout?.tier === "early";
|
|
1821
|
+
const patternOverBudget = layout?.tier === "overBudget";
|
|
1822
|
+
const patternExtreme = layout?.tier === "extreme";
|
|
1823
|
+
const overageMultiplier = layout?.overageMultiplier ?? null;
|
|
1824
|
+
const patternDefaultIdx = layout?.defaultIndex ?? 0;
|
|
1825
|
+
const patternOptions = layout
|
|
1805
1826
|
? [
|
|
1806
|
-
{ label:
|
|
1807
|
-
{
|
|
1808
|
-
label: `Continue anyway (⚠ ${overageMultiplier}× over budget — expensive)`,
|
|
1809
|
-
value: "continue",
|
|
1810
|
-
kind: "choice",
|
|
1811
|
-
},
|
|
1827
|
+
{ label: layout.optionLabels[0], value: layout.optionValues[0], kind: "choice" },
|
|
1828
|
+
{ label: layout.optionLabels[1], value: layout.optionValues[1], kind: "choice" },
|
|
1812
1829
|
]
|
|
1813
|
-
: [
|
|
1814
|
-
{ label: "Continue (let agent try)", value: "continue", kind: "choice" },
|
|
1815
|
-
{ label: "Stop and answer", value: "stop", kind: "choice" },
|
|
1816
|
-
];
|
|
1830
|
+
: [];
|
|
1817
1831
|
const question = isPattern
|
|
1818
1832
|
? {
|
|
1819
1833
|
questionId: qid,
|
|
1820
1834
|
question: `Tool \`${info.toolName}\` đã chạy ${info.count}/${info.windowSize} lần với args gần giống (step ${info.stepNumber}${patternCeiling ? `/${patternCeiling}` : ""}) — có thể đang loop. Tiếp tục?`,
|
|
1821
1835
|
context: patternExtreme
|
|
1822
1836
|
? `EXTREME OVERAGE — ${overageMultiplier}× past natural budget. Continuing has historically not converged in this regime (see session 1f29e238: 8× over budget, still failed). Stop returns the agent's best answer with current context.`
|
|
1823
|
-
:
|
|
1824
|
-
?
|
|
1825
|
-
:
|
|
1837
|
+
: patternOverBudget
|
|
1838
|
+
? `Past natural budget — ${overageMultiplier}× the typical step count for this task type. Continuing may still converge but quality often degrades (longer compaction, stale tool results, forced-finalize on stall). Stop returns the agent's best answer with current context.`
|
|
1839
|
+
: patternEarly
|
|
1840
|
+
? "Continue lets the agent keep trying — likely the right call this early in the run. Stop returns the agent's best answer with current context."
|
|
1841
|
+
: "You're past the natural budget for this task type. Stop usually recovers a clean answer; Continue keeps spending tokens.",
|
|
1826
1842
|
isRequired: true,
|
|
1827
1843
|
phase: "tool-loop-cap",
|
|
1828
1844
|
options: patternOptions,
|
|
@@ -317,6 +317,29 @@ export declare function getSubAgentBudgetChars(): number;
|
|
|
317
317
|
* Default 120_000 (2 min). Env override: MUONROI_PROVIDER_STALL_TIMEOUT_MS.
|
|
318
318
|
*/
|
|
319
319
|
export declare function getProviderStallTimeoutMs(): number;
|
|
320
|
+
/**
|
|
321
|
+
* Number of times to AUTOMATICALLY re-issue a streaming model call after the
|
|
322
|
+
* stall watchdog fires WITHOUT any chunk having arrived (a time-to-first-byte
|
|
323
|
+
* "frozen" stall). Some providers (observed: xai/grok-build-0.1) accept a
|
|
324
|
+
* request then never send the first byte, yet a fresh request goes through —
|
|
325
|
+
* a single dead socket, not a down backend. Re-prompting is gated on
|
|
326
|
+
* zero-chunks-this-attempt so it can NEVER restart a turn that already ran
|
|
327
|
+
* tools or emitted text (that would corrupt/duplicate output — the partial-
|
|
328
|
+
* answer rescue path handles those). Each re-prompt waits a short backoff.
|
|
329
|
+
* Range 0–5; 0 restores the legacy "surface the stall, never retry" behaviour.
|
|
330
|
+
* Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
|
|
331
|
+
*/
|
|
332
|
+
export declare function getProviderStallRetries(): number;
|
|
333
|
+
/**
|
|
334
|
+
* Live-queue steering: when true, a message typed while a turn is streaming is
|
|
335
|
+
* injected into the running turn at the next prepareStep boundary (as a `user`
|
|
336
|
+
* interjection) instead of waiting for the turn to finish and running as a new
|
|
337
|
+
* turn. When false, the legacy deferred-queue behaviour is preserved (the
|
|
338
|
+
* message runs only after the current turn completes). House convention for a
|
|
339
|
+
* default-true boolean knob: only an explicit "0" disables; unset/blank/any
|
|
340
|
+
* other value = enabled. Env override: MUONROI_STEER_INJECTION.
|
|
341
|
+
*/
|
|
342
|
+
export declare function getSteerInjectionEnabled(): boolean;
|
|
320
343
|
/**
|
|
321
344
|
* Phase B3 — threshold (in chars of cumulative message content) above which
|
|
322
345
|
* the sub-agent `prepareStep` compactor rewrites older tool_result parts
|
|
@@ -693,6 +693,39 @@ export function getProviderStallTimeoutMs() {
|
|
|
693
693
|
}
|
|
694
694
|
return 120_000;
|
|
695
695
|
}
|
|
696
|
+
/**
|
|
697
|
+
* Number of times to AUTOMATICALLY re-issue a streaming model call after the
|
|
698
|
+
* stall watchdog fires WITHOUT any chunk having arrived (a time-to-first-byte
|
|
699
|
+
* "frozen" stall). Some providers (observed: xai/grok-build-0.1) accept a
|
|
700
|
+
* request then never send the first byte, yet a fresh request goes through —
|
|
701
|
+
* a single dead socket, not a down backend. Re-prompting is gated on
|
|
702
|
+
* zero-chunks-this-attempt so it can NEVER restart a turn that already ran
|
|
703
|
+
* tools or emitted text (that would corrupt/duplicate output — the partial-
|
|
704
|
+
* answer rescue path handles those). Each re-prompt waits a short backoff.
|
|
705
|
+
* Range 0–5; 0 restores the legacy "surface the stall, never retry" behaviour.
|
|
706
|
+
* Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
|
|
707
|
+
*/
|
|
708
|
+
export function getProviderStallRetries() {
|
|
709
|
+
const envRaw = process.env.MUONROI_PROVIDER_STALL_RETRIES;
|
|
710
|
+
if (envRaw !== undefined && envRaw !== "") {
|
|
711
|
+
const n = Number(envRaw);
|
|
712
|
+
if (Number.isFinite(n) && n >= 0 && n <= 5)
|
|
713
|
+
return Math.floor(n);
|
|
714
|
+
}
|
|
715
|
+
return 1;
|
|
716
|
+
}
|
|
717
|
+
/**
|
|
718
|
+
* Live-queue steering: when true, a message typed while a turn is streaming is
|
|
719
|
+
* injected into the running turn at the next prepareStep boundary (as a `user`
|
|
720
|
+
* interjection) instead of waiting for the turn to finish and running as a new
|
|
721
|
+
* turn. When false, the legacy deferred-queue behaviour is preserved (the
|
|
722
|
+
* message runs only after the current turn completes). House convention for a
|
|
723
|
+
* default-true boolean knob: only an explicit "0" disables; unset/blank/any
|
|
724
|
+
* other value = enabled. Env override: MUONROI_STEER_INJECTION.
|
|
725
|
+
*/
|
|
726
|
+
export function getSteerInjectionEnabled() {
|
|
727
|
+
return process.env.MUONROI_STEER_INJECTION !== "0";
|
|
728
|
+
}
|
|
696
729
|
/**
|
|
697
730
|
* Phase B3 — threshold (in chars of cumulative message content) above which
|
|
698
731
|
* the sub-agent `prepareStep` compactor rewrites older tool_result parts
|