muonroi-cli 1.6.4 → 1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/packages/agent-harness-core/src/event-filter.js +1 -0
- package/dist/packages/agent-harness-core/src/event-redact.js +7 -2
- package/dist/packages/agent-harness-core/src/protocol.d.ts +8 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +37 -0
- package/dist/src/gsd/directives.d.ts +18 -0
- package/dist/src/gsd/directives.js +23 -2
- package/dist/src/orchestrator/message-processor.d.ts +8 -0
- package/dist/src/orchestrator/message-processor.js +56 -7
- package/dist/src/orchestrator/orchestrator.d.ts +10 -0
- package/dist/src/orchestrator/orchestrator.js +11 -0
- package/dist/src/orchestrator/stall-rescue.d.ts +1 -0
- package/dist/src/orchestrator/stall-rescue.js +20 -1
- package/dist/src/orchestrator/stall-rescue.test.js +30 -1
- package/dist/src/orchestrator/steer-inbox.d.ts +32 -0
- package/dist/src/orchestrator/steer-inbox.js +20 -0
- package/dist/src/orchestrator/steer-inbox.test.d.ts +1 -0
- package/dist/src/orchestrator/steer-inbox.test.js +33 -0
- package/dist/src/orchestrator/tool-loop-askcard.d.ts +59 -0
- package/dist/src/orchestrator/tool-loop-askcard.js +86 -0
- package/dist/src/orchestrator/tool-loop-askcard.test.d.ts +1 -0
- package/dist/src/orchestrator/tool-loop-askcard.test.js +71 -0
- package/dist/src/pil/layer4-gsd.js +5 -1
- package/dist/src/ui/app.js +51 -35
- package/dist/src/utils/settings.d.ts +10 -0
- package/dist/src/utils/settings.js +12 -0
- package/dist/src/utils/settings.test.js +21 -0
- package/package.json +1 -1
|
@@ -85,7 +85,12 @@ const ALLOWED_FIELDS = {
|
|
|
85
85
|
forceCouncil: "pass",
|
|
86
86
|
runId: "pass",
|
|
87
87
|
},
|
|
88
|
-
"
|
|
88
|
+
"steer-inject": {
|
|
89
|
+
count: "pass",
|
|
90
|
+
atStep: "pass",
|
|
91
|
+
runId: "pass",
|
|
92
|
+
},
|
|
93
|
+
toast: {
|
|
89
94
|
level: "pass",
|
|
90
95
|
text: 500, // cap to 500 chars, then scrub
|
|
91
96
|
ttlMs: "pass",
|
|
@@ -94,7 +99,7 @@ const ALLOWED_FIELDS = {
|
|
|
94
99
|
target: "pass",
|
|
95
100
|
text: 500, // cap to 500 chars
|
|
96
101
|
},
|
|
97
|
-
|
|
102
|
+
usage: {
|
|
98
103
|
source: "pass",
|
|
99
104
|
model: "pass",
|
|
100
105
|
inputTokens: "pass",
|
|
@@ -177,6 +177,14 @@ export type LiveEvent = {
|
|
|
177
177
|
/** Total number of unverified claims in this turn. */
|
|
178
178
|
count: number;
|
|
179
179
|
ts: number;
|
|
180
|
+
} | {
|
|
181
|
+
t: "event";
|
|
182
|
+
kind: "steer-inject";
|
|
183
|
+
/** How many queued messages were injected at this boundary. */
|
|
184
|
+
count: number;
|
|
185
|
+
/** The prepareStep step number at which injection occurred (>= 1). */
|
|
186
|
+
atStep: number;
|
|
187
|
+
runId: string;
|
|
180
188
|
} | {
|
|
181
189
|
t: "idle";
|
|
182
190
|
};
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const PACKAGE_VERSION = "1.6.
|
|
1
|
+
export declare const PACKAGE_VERSION = "1.6.5";
|
|
2
2
|
export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
|
|
2
2
|
// Sourced from package.json at build time so it survives bun --compile bundling.
|
|
3
|
-
export const PACKAGE_VERSION = "1.6.
|
|
3
|
+
export const PACKAGE_VERSION = "1.6.5";
|
|
4
4
|
export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
5
5
|
//# sourceMappingURL=version.js.map
|
|
@@ -98,5 +98,42 @@ describe("buildDirective", () => {
|
|
|
98
98
|
expect(out.text).toMatch(/\[recommended\]/);
|
|
99
99
|
}
|
|
100
100
|
});
|
|
101
|
+
// Language nudge — re-anchors the "reply in user's language" rule INSIDE the
|
|
102
|
+
// directive so layered brevity / FIX-FIRST directives can't drown it (live
|
|
103
|
+
// miss: storyflow_ui session 22661c8de9f2).
|
|
104
|
+
describe("language nudge", () => {
|
|
105
|
+
it("appends the nudge when replyLanguage is set", () => {
|
|
106
|
+
const out = buildDirective({
|
|
107
|
+
complexity: scoreComplexity("fix CI fail"),
|
|
108
|
+
phase: "debug",
|
|
109
|
+
grayAreas: [],
|
|
110
|
+
replyLanguage: "Vietnamese",
|
|
111
|
+
});
|
|
112
|
+
expect(out.text).toMatch(/LANGUAGE — the user wrote in Vietnamese/);
|
|
113
|
+
expect(out.text).toMatch(/Reply in Vietnamese/);
|
|
114
|
+
expect(out.text).toMatch(/OVERRIDES any brevity/);
|
|
115
|
+
});
|
|
116
|
+
it("omits the nudge when replyLanguage is undefined", () => {
|
|
117
|
+
const out = buildDirective({
|
|
118
|
+
complexity: scoreComplexity("fix CI fail"),
|
|
119
|
+
phase: "debug",
|
|
120
|
+
grayAreas: [],
|
|
121
|
+
});
|
|
122
|
+
expect(out.text).not.toMatch(/LANGUAGE —/);
|
|
123
|
+
});
|
|
124
|
+
it("stacks with the ecosystem nudge when both apply", () => {
|
|
125
|
+
const out = buildDirective({
|
|
126
|
+
complexity: scoreComplexity("how does the muonroi ecosystem work"),
|
|
127
|
+
phase: null,
|
|
128
|
+
grayAreas: [],
|
|
129
|
+
ecosystem: true,
|
|
130
|
+
replyLanguage: "Vietnamese",
|
|
131
|
+
});
|
|
132
|
+
expect(out.text).toMatch(/ECOSYSTEM SCOPE/);
|
|
133
|
+
expect(out.text).toMatch(/LANGUAGE —/);
|
|
134
|
+
// ecosystem nudge precedes language nudge (deterministic order)
|
|
135
|
+
expect(out.text.indexOf("ECOSYSTEM SCOPE")).toBeLessThan(out.text.indexOf("LANGUAGE —"));
|
|
136
|
+
});
|
|
137
|
+
});
|
|
101
138
|
});
|
|
102
139
|
//# sourceMappingURL=directives.test.js.map
|
|
@@ -46,6 +46,15 @@ export interface DirectiveInput {
|
|
|
46
46
|
* the shipped authoritative source.
|
|
47
47
|
*/
|
|
48
48
|
ecosystem?: boolean;
|
|
49
|
+
/**
|
|
50
|
+
* User's reply language (heuristic — Vietnamese|undefined). When set, the
|
|
51
|
+
* directive appends an explicit language nudge so the rule survives the
|
|
52
|
+
* personality/GSD instructions stacking on top of it (storyflow_ui session
|
|
53
|
+
* 22661c8de9f2: user wrote Vietnamese, layered directives + a stalled
|
|
54
|
+
* forced-finalize drowned out the base "reply in user's language" rule and
|
|
55
|
+
* the agent answered in English).
|
|
56
|
+
*/
|
|
57
|
+
replyLanguage?: string;
|
|
49
58
|
}
|
|
50
59
|
export interface DirectiveOutput {
|
|
51
60
|
text: string;
|
|
@@ -60,4 +69,13 @@ export declare function mentionsEcosystemScope(message: string): boolean;
|
|
|
60
69
|
* configured — the model simply finds no such tool and falls back to local files.
|
|
61
70
|
*/
|
|
62
71
|
export declare const ECOSYSTEM_DOCS_NUDGE: string;
|
|
72
|
+
/**
|
|
73
|
+
* Appended to any directive when the user's reply language is non-English.
|
|
74
|
+
* The base system prompt's "reply in user's language" rule normally suffices,
|
|
75
|
+
* but `concise` / `FIX-FIRST` / GSD-debug directive bodies stack on top of it
|
|
76
|
+
* with strong "be terse / code over prose" language that crowds the rule out
|
|
77
|
+
* — observed live (storyflow_ui 22661c8de9f2). This NUDGE re-anchors the rule
|
|
78
|
+
* inside the directive itself so brevity preferences cannot override it.
|
|
79
|
+
*/
|
|
80
|
+
export declare function buildLanguageNudge(lang: string): string;
|
|
63
81
|
export declare function buildDirective(input: DirectiveInput): DirectiveOutput;
|
|
@@ -36,6 +36,20 @@ export const ECOSYSTEM_DOCS_NUDGE = [
|
|
|
36
36
|
`${HEADER} ECOSYSTEM SCOPE — this turn concerns the Muonroi ecosystem (platform overview, BB/.NET packages, building-block, open-core boundary, setup).`,
|
|
37
37
|
"If the muonroi-docs MCP is available, it is the AUTHORITATIVE source — call it FIRST (docs_search / setup_guide / bb_recipe_list / bb_package_describe), THEN ground with local files. Do NOT characterize the ecosystem from local repo files alone.",
|
|
38
38
|
].join("\n");
|
|
39
|
+
/**
|
|
40
|
+
* Appended to any directive when the user's reply language is non-English.
|
|
41
|
+
* The base system prompt's "reply in user's language" rule normally suffices,
|
|
42
|
+
* but `concise` / `FIX-FIRST` / GSD-debug directive bodies stack on top of it
|
|
43
|
+
* with strong "be terse / code over prose" language that crowds the rule out
|
|
44
|
+
* — observed live (storyflow_ui 22661c8de9f2). This NUDGE re-anchors the rule
|
|
45
|
+
* inside the directive itself so brevity preferences cannot override it.
|
|
46
|
+
*/
|
|
47
|
+
export function buildLanguageNudge(lang) {
|
|
48
|
+
return [
|
|
49
|
+
`${HEADER} LANGUAGE — the user wrote in ${lang}. Reply in ${lang}.`,
|
|
50
|
+
"This rule OVERRIDES any brevity / concise / code-over-prose directive: terseness is fine, but the response language stays the user's.",
|
|
51
|
+
].join("\n");
|
|
52
|
+
}
|
|
39
53
|
function renderGrayAreas(qs) {
|
|
40
54
|
if (qs.length === 0)
|
|
41
55
|
return " (no gray areas detected — confirm the request is fully specified before proceeding)";
|
|
@@ -125,9 +139,16 @@ export function buildDirective(input) {
|
|
|
125
139
|
// Ecosystem-scoped turns get a docs-first nudge regardless of tier (question
|
|
126
140
|
// OR task): muonroi-docs is the authoritative source and must not be skipped
|
|
127
141
|
// in favour of guessing from local files (session 41ccfeb2ceee turn 1).
|
|
142
|
+
let text = base.text;
|
|
128
143
|
if (input.ecosystem) {
|
|
129
|
-
|
|
144
|
+
text = `${text}\n${ECOSYSTEM_DOCS_NUDGE}`;
|
|
145
|
+
}
|
|
146
|
+
// Language nudge: re-anchor the "reply in user's language" rule INSIDE the
|
|
147
|
+
// directive when the user wrote in a non-English language, so layered
|
|
148
|
+
// brevity/concise directives can't drown it (storyflow_ui 22661c8de9f2).
|
|
149
|
+
if (input.replyLanguage) {
|
|
150
|
+
text = `${text}\n${buildLanguageNudge(input.replyLanguage)}`;
|
|
130
151
|
}
|
|
131
|
-
return base;
|
|
152
|
+
return { ...base, text };
|
|
132
153
|
}
|
|
133
154
|
//# sourceMappingURL=directives.js.map
|
|
@@ -100,6 +100,14 @@ export interface MessageProcessorDeps extends TurnRunnerDepsBase {
|
|
|
100
100
|
* — preserves backward compat for batch / headless paths that have no UI to
|
|
101
101
|
* surface the askcard.
|
|
102
102
|
*/
|
|
103
|
+
/**
|
|
104
|
+
* Live-queue steering drain (UI-provided). Returns and CLEARS any messages
|
|
105
|
+
* the user typed while this turn is streaming, so prepareStep can inject them
|
|
106
|
+
* mid-turn. Undefined / returns [] → no steering (legacy deferred queue).
|
|
107
|
+
*/
|
|
108
|
+
drainSteerMessages?: () => {
|
|
109
|
+
text: string;
|
|
110
|
+
}[];
|
|
103
111
|
askToolLoopContinue?: ToolLoopCapAsk;
|
|
104
112
|
runCouncilV2(userMessage: string, opts: {
|
|
105
113
|
skipClarification: boolean;
|
|
@@ -89,7 +89,7 @@ import { statusBarStore } from "../ui/status-bar/store.js";
|
|
|
89
89
|
import { appendDecisionLog } from "../usage/decision-log.js";
|
|
90
90
|
import { openUrl } from "../utils/open-url.js";
|
|
91
91
|
import { appendAudit, toolNeedsApproval } from "../utils/permission-mode.js";
|
|
92
|
-
import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallRetries, getProviderStallTimeoutMs, getRoleModels, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
|
|
92
|
+
import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallRetries, getProviderStallTimeoutMs, getRoleModels, getSteerInjectionEnabled, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
|
|
93
93
|
import { resolveShell } from "../utils/shell.js";
|
|
94
94
|
import { relaxCompactionSettings } from "./compaction.js";
|
|
95
95
|
import { wrapToolSetWithDedup } from "./cross-turn-dedup.js";
|
|
@@ -109,6 +109,7 @@ import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder,
|
|
|
109
109
|
import { formatElisionManifest, getSessionExperienceCounts, recordCompaction, recordElision, } from "./session-experience.js";
|
|
110
110
|
import { attemptStallRescue, pushStallToolResult } from "./stall-rescue.js";
|
|
111
111
|
import { createStallWatchdog, STALL_ERROR_MESSAGE, shouldRepromptStall, stallRepromptBackoffMs, } from "./stall-watchdog.js";
|
|
112
|
+
import { planSteerInjection } from "./steer-inbox.js";
|
|
112
113
|
import { wrapToolSetWithCap } from "./sub-agent-cap.js";
|
|
113
114
|
import { compactSubAgentMessages, cumulativeMessageChars } from "./subagent-compactor.js";
|
|
114
115
|
import { detectTextEmittedToolCall, parseDsmlToolCalls } from "./text-tool-call-detector.js";
|
|
@@ -823,6 +824,14 @@ export class MessageProcessor {
|
|
|
823
824
|
// answer rescue path instead. maxStallRetries = 0 restores legacy behaviour.
|
|
824
825
|
let stallRetryCount = 0;
|
|
825
826
|
const maxStallRetries = getProviderStallRetries();
|
|
827
|
+
// Live-queue steering: messages the user typed mid-turn are drained at a
|
|
828
|
+
// prepareStep boundary and accumulated here, then re-appended (deduped) to
|
|
829
|
+
// the messages returned for each subsequent step. Loop-persistent so they
|
|
830
|
+
// survive a stall-reprompt restart of streamText. NOT pushed into
|
|
831
|
+
// deps.messages in v1 — model-context only; the assistant response captures
|
|
832
|
+
// the steering effect and is persisted via appendCompletedTurn.
|
|
833
|
+
const pendingSteers = [];
|
|
834
|
+
const steerEnabled = getSteerInjectionEnabled();
|
|
826
835
|
// Auto-council: route to multi-model debate when EITHER
|
|
827
836
|
// (a) PIL classified taskType=plan|analyze with high confidence AND the
|
|
828
837
|
// prompt is complex enough to justify the debate cost, OR
|
|
@@ -1538,6 +1547,46 @@ export class MessageProcessor {
|
|
|
1538
1547
|
prepareStep: ({ stepNumber: sn, messages: stepMessages }) => {
|
|
1539
1548
|
if (sn < 1)
|
|
1540
1549
|
return {};
|
|
1550
|
+
// --- Live-queue steering injection ---------------------------
|
|
1551
|
+
// Drain the UI steer queue ONCE per prepareStep call (sn >= 1),
|
|
1552
|
+
// accumulate into pendingSteers, and graft pendingSteers onto the
|
|
1553
|
+
// messages this step returns. Dedup-by-content makes re-appending
|
|
1554
|
+
// idempotent even if a stall-reprompt restart re-reads history.
|
|
1555
|
+
const withSteers = (r) => {
|
|
1556
|
+
// Guard the drain on !signal.aborted too: planSteerInjection
|
|
1557
|
+
// already refuses to inject on abort, but draining still CLEARS
|
|
1558
|
+
// the UI queue — so on a (programmatic) abort we must not drain,
|
|
1559
|
+
// or a queued-but-uninjected message is lost (spec §143).
|
|
1560
|
+
const _drained = steerEnabled && !signal.aborted ? (deps.drainSteerMessages?.() ?? []) : [];
|
|
1561
|
+
const _newSteers = planSteerInjection({
|
|
1562
|
+
drained: _drained,
|
|
1563
|
+
aborted: signal.aborted,
|
|
1564
|
+
enabled: steerEnabled,
|
|
1565
|
+
});
|
|
1566
|
+
if (_newSteers.length > 0) {
|
|
1567
|
+
pendingSteers.push(..._newSteers);
|
|
1568
|
+
try {
|
|
1569
|
+
const _ar = globalThis.__muonroiAgentRuntime;
|
|
1570
|
+
_ar?.emitEvent({
|
|
1571
|
+
t: "event",
|
|
1572
|
+
kind: "steer-inject",
|
|
1573
|
+
count: _newSteers.length,
|
|
1574
|
+
atStep: sn,
|
|
1575
|
+
runId: deps.getActiveRunId() ?? "",
|
|
1576
|
+
});
|
|
1577
|
+
}
|
|
1578
|
+
catch (emitErr) {
|
|
1579
|
+
console.error(`[message-processor] steer-inject telemetry failed: ${emitErr?.message}`);
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
if (pendingSteers.length === 0)
|
|
1583
|
+
return r;
|
|
1584
|
+
const _base = r.messages ?? stepMessages;
|
|
1585
|
+
const _steerContents = new Set(pendingSteers.map((s) => (typeof s.content === "string" ? s.content : JSON.stringify(s.content))));
|
|
1586
|
+
const _deduped = _base.filter((m) => !(m.role === "user" &&
|
|
1587
|
+
_steerContents.has(typeof m.content === "string" ? m.content : JSON.stringify(m.content))));
|
|
1588
|
+
return { ...r, messages: [..._deduped, ...pendingSteers] };
|
|
1589
|
+
};
|
|
1541
1590
|
const stripped = turnCaps.sanitizeHistory(stepMessages);
|
|
1542
1591
|
// Agent-controlled veto (PRESERVE) or lighter selective keep (KEEP_TOOL_IDS) for this turn's B4 compaction.
|
|
1543
1592
|
// PRESERVE_FULL_CONTEXT skips the compactor entirely (full history).
|
|
@@ -1568,7 +1617,7 @@ export class MessageProcessor {
|
|
|
1568
1617
|
return false;
|
|
1569
1618
|
});
|
|
1570
1619
|
if (hasPreserve) {
|
|
1571
|
-
return { messages: stripped };
|
|
1620
|
+
return withSteers({ messages: stripped });
|
|
1572
1621
|
}
|
|
1573
1622
|
// F2 — envelope = system prompt + JSON-Schema of every tool
|
|
1574
1623
|
// re-sent on every step. Without this the threshold check
|
|
@@ -1642,7 +1691,7 @@ export class MessageProcessor {
|
|
|
1642
1691
|
if (compacted === stripped && shouldPreWarnCompaction(_preWarnChars, topLevelCompactThreshold)) {
|
|
1643
1692
|
const _cp = buildCheckpointReminder(sn, true);
|
|
1644
1693
|
const _pre = `[pre-compaction warning at step ${sn} — next step(s) will likely rewrite older tool results to stubs (threshold ${topLevelCompactThreshold}, keepLast=${topLevelCompactKeepLast}). ${_cp} Summarize or finish if possible.]`;
|
|
1645
|
-
return { messages: attachReminderToMessages(stripped, _pre) };
|
|
1694
|
+
return withSteers({ messages: attachReminderToMessages(stripped, _pre) });
|
|
1646
1695
|
}
|
|
1647
1696
|
// Phase 4A — scope reminder injection (REQ-005).
|
|
1648
1697
|
// Cadence K = 3/5/8 for small/medium/large. Soft-warn fires
|
|
@@ -1704,10 +1753,10 @@ export class MessageProcessor {
|
|
|
1704
1753
|
: buildRepetitionReminder(_ceilingSessionId)
|
|
1705
1754
|
: _scopePart;
|
|
1706
1755
|
const withReminder = attachReminderToMessages(compacted, _reminder);
|
|
1707
|
-
return { messages: withReminder };
|
|
1756
|
+
return withSteers({ messages: withReminder });
|
|
1708
1757
|
}
|
|
1709
1758
|
if (compacted === stripped && stripped === stepMessages)
|
|
1710
|
-
return {};
|
|
1759
|
+
return withSteers({});
|
|
1711
1760
|
// Self-awareness note: tell the model compaction happened so it
|
|
1712
1761
|
// knows earlier context was elided and can adjust its behavior.
|
|
1713
1762
|
// Enhanced per EE anti-mù plan (docs/ee-anti-mu-compaction-plan.md Phase 2): include proactive
|
|
@@ -1725,9 +1774,9 @@ export class MessageProcessor {
|
|
|
1725
1774
|
})()
|
|
1726
1775
|
: null;
|
|
1727
1776
|
if (_compactNote) {
|
|
1728
|
-
return { messages: attachReminderToMessages(compacted, _compactNote) };
|
|
1777
|
+
return withSteers({ messages: attachReminderToMessages(compacted, _compactNote) });
|
|
1729
1778
|
}
|
|
1730
|
-
return { messages: compacted };
|
|
1779
|
+
return withSteers({ messages: compacted });
|
|
1731
1780
|
},
|
|
1732
1781
|
...(dropParam("temperature") ? {} : { temperature: 0.7 }),
|
|
1733
1782
|
...(dropParam("maxOutputTokens") ? {} : { maxOutputTokens: taskTypeToMaxTokens(pilCtx.taskType) }),
|
|
@@ -20,6 +20,8 @@ export declare class Agent {
|
|
|
20
20
|
private messages;
|
|
21
21
|
private messageSeqs;
|
|
22
22
|
private abortController;
|
|
23
|
+
/** UI-registered live-queue steer drain; see Agent.setSteerDrain. */
|
|
24
|
+
private steerDrain;
|
|
23
25
|
private maxToolRounds;
|
|
24
26
|
private mode;
|
|
25
27
|
private modelId;
|
|
@@ -97,6 +99,14 @@ export declare class Agent {
|
|
|
97
99
|
*/
|
|
98
100
|
private _initFlow;
|
|
99
101
|
getModel(): string;
|
|
102
|
+
/**
|
|
103
|
+
* Register (or clear with null) the UI callback that drains messages typed
|
|
104
|
+
* while a turn is streaming, for mid-turn steering injection. Called from the
|
|
105
|
+
* TUI when MUONROI_STEER_INJECTION is enabled.
|
|
106
|
+
*/
|
|
107
|
+
setSteerDrain(fn: (() => {
|
|
108
|
+
text: string;
|
|
109
|
+
}[]) | null): void;
|
|
100
110
|
getActiveRunId(): string | null;
|
|
101
111
|
setModel(model: string): void;
|
|
102
112
|
getMode(): AgentMode;
|
|
@@ -130,6 +130,8 @@ export class Agent {
|
|
|
130
130
|
messages = [];
|
|
131
131
|
messageSeqs = [];
|
|
132
132
|
abortController = null;
|
|
133
|
+
/** UI-registered live-queue steer drain; see Agent.setSteerDrain. */
|
|
134
|
+
steerDrain = null;
|
|
133
135
|
maxToolRounds;
|
|
134
136
|
mode = "agent";
|
|
135
137
|
modelId;
|
|
@@ -309,6 +311,14 @@ export class Agent {
|
|
|
309
311
|
getModel() {
|
|
310
312
|
return this.modelId;
|
|
311
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Register (or clear with null) the UI callback that drains messages typed
|
|
316
|
+
* while a turn is streaming, for mid-turn steering injection. Called from the
|
|
317
|
+
* TUI when MUONROI_STEER_INJECTION is enabled.
|
|
318
|
+
*/
|
|
319
|
+
setSteerDrain(fn) {
|
|
320
|
+
this.steerDrain = fn;
|
|
321
|
+
}
|
|
312
322
|
getActiveRunId() {
|
|
313
323
|
return this._activeRunId;
|
|
314
324
|
}
|
|
@@ -2123,6 +2133,7 @@ export class Agent {
|
|
|
2123
2133
|
runDelegation: (request, signal) => self.runDelegation(request, signal),
|
|
2124
2134
|
readDelegation: (id) => self.readDelegation(id),
|
|
2125
2135
|
listDelegations: () => self.listDelegations(),
|
|
2136
|
+
drainSteerMessages: () => self.steerDrain?.() ?? [],
|
|
2126
2137
|
appendCompletedTurn: (user, asst) => self.appendCompletedTurn(user, asst),
|
|
2127
2138
|
discardAbortedTurn: (user) => self.discardAbortedTurn(user),
|
|
2128
2139
|
recordUsage: (usage, source, model, shape) => self.recordUsage(usage, source, model, shape),
|
|
@@ -31,6 +31,7 @@ export interface StallToolResult {
|
|
|
31
31
|
export declare const STALL_RESCUE_MAX_RESULTS = 8;
|
|
32
32
|
/** Max chars kept per tool output in the digest. */
|
|
33
33
|
export declare const STALL_RESCUE_MAX_CHARS_PER_RESULT = 1500;
|
|
34
|
+
export declare function detectReplyLanguageHint(userText: string): string | null;
|
|
34
35
|
/**
|
|
35
36
|
* Capture a tool result into a capped ring buffer (mutates `buffer`). Keeps the
|
|
36
37
|
* buffer bounded in BOTH count and per-entry size so a long turn can't blow
|
|
@@ -26,6 +26,22 @@
|
|
|
26
26
|
export const STALL_RESCUE_MAX_RESULTS = 8;
|
|
27
27
|
/** Max chars kept per tool output in the digest. */
|
|
28
28
|
export const STALL_RESCUE_MAX_CHARS_PER_RESULT = 1500;
|
|
29
|
+
/**
|
|
30
|
+
* Heuristic: detect the user's reply language from the original request so the
|
|
31
|
+
* synthesis prompt can carry an EXPLICIT language directive. The base system
|
|
32
|
+
* prompt's "reply in user's language" rule is normally enough, but after a
|
|
33
|
+
* stall the rescue path issues a NEW synthesis call whose extra directives
|
|
34
|
+
* ("use ONLY tool outputs", "no more tools") can crowd out the language rule
|
|
35
|
+
* — observed live (storyflow_ui session 22661c8de9f2): user wrote Vietnamese,
|
|
36
|
+
* the rescued answer came back in English. Returns a language name or null
|
|
37
|
+
* (skip the hint = treat as English-default).
|
|
38
|
+
*/
|
|
39
|
+
const VI_DIACRITIC_RE = /[à-ỹÀ-Ỹ]/;
|
|
40
|
+
export function detectReplyLanguageHint(userText) {
|
|
41
|
+
if (VI_DIACRITIC_RE.test(userText))
|
|
42
|
+
return "Vietnamese";
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
29
45
|
/**
|
|
30
46
|
* Capture a tool result into a capped ring buffer (mutates `buffer`). Keeps the
|
|
31
47
|
* buffer bounded in BOTH count and per-entry size so a long turn can't blow
|
|
@@ -44,7 +60,10 @@ export function pushStallToolResult(buffer, tool, rawText) {
|
|
|
44
60
|
*/
|
|
45
61
|
export function buildStallSynthesisMessages(baseMessages, userText, toolResults) {
|
|
46
62
|
const digest = toolResults.map((r, i) => `[${i + 1}] ${r.tool}:\n${r.text}`).join("\n\n");
|
|
47
|
-
const
|
|
63
|
+
const lang = detectReplyLanguageHint(userText);
|
|
64
|
+
const langLine = lang ? `Reply in ${lang} (the user wrote in ${lang}).\n\n` : "";
|
|
65
|
+
const content = langLine +
|
|
66
|
+
"The connection to the model stalled before it could finish its answer. " +
|
|
48
67
|
"You already ran the tools below this turn — use ONLY their outputs to give " +
|
|
49
68
|
"your best final answer now. Do NOT call any more tools.\n\n" +
|
|
50
69
|
`Original request:\n${userText}\n\n` +
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import { attemptStallRescue, buildStallSynthesisMessages, pushStallToolResult, STALL_RESCUE_MAX_CHARS_PER_RESULT, STALL_RESCUE_MAX_RESULTS, } from "./stall-rescue.js";
|
|
2
|
+
import { attemptStallRescue, buildStallSynthesisMessages, detectReplyLanguageHint, pushStallToolResult, STALL_RESCUE_MAX_CHARS_PER_RESULT, STALL_RESCUE_MAX_RESULTS, } from "./stall-rescue.js";
|
|
3
3
|
describe("pushStallToolResult", () => {
|
|
4
4
|
it("caps the buffer count, keeping the most recent results", () => {
|
|
5
5
|
const buf = [];
|
|
@@ -34,6 +34,35 @@ describe("buildStallSynthesisMessages", () => {
|
|
|
34
34
|
// does not mutate the base array
|
|
35
35
|
expect(base.length).toBe(1);
|
|
36
36
|
});
|
|
37
|
+
it("prepends a Vietnamese reply directive when user wrote in Vietnamese", () => {
|
|
38
|
+
const out = buildStallSynthesisMessages([], "sửa thêm popup reload không xoá login", [
|
|
39
|
+
{ tool: "grep", text: "found" },
|
|
40
|
+
]);
|
|
41
|
+
const last = out[out.length - 1];
|
|
42
|
+
expect(last.content).toMatch(/^Reply in Vietnamese/);
|
|
43
|
+
expect(last.content).toContain("the user wrote in Vietnamese");
|
|
44
|
+
});
|
|
45
|
+
it("omits the language directive for plain English input", () => {
|
|
46
|
+
const out = buildStallSynthesisMessages([], "find a bug please", [{ tool: "grep", text: "found" }]);
|
|
47
|
+
const last = out[out.length - 1];
|
|
48
|
+
expect(last.content).not.toMatch(/^Reply in /);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
describe("detectReplyLanguageHint", () => {
|
|
52
|
+
it("detects Vietnamese from any diacritic", () => {
|
|
53
|
+
expect(detectReplyLanguageHint("tìm")).toBe("Vietnamese");
|
|
54
|
+
expect(detectReplyLanguageHint("sửa thêm cho tôi")).toBe("Vietnamese");
|
|
55
|
+
expect(detectReplyLanguageHint("Cập nhật")).toBe("Vietnamese");
|
|
56
|
+
});
|
|
57
|
+
it("returns null for ASCII / English text", () => {
|
|
58
|
+
expect(detectReplyLanguageHint("commit and push please")).toBeNull();
|
|
59
|
+
expect(detectReplyLanguageHint("")).toBeNull();
|
|
60
|
+
});
|
|
61
|
+
it("returns null for a VN-only word with NO diacritics (the heuristic is conservative)", () => {
|
|
62
|
+
// 'commit push de auto ci/cd len prod nhe' — VN sans diacritics is indistinguishable
|
|
63
|
+
// from English under this heuristic. Acceptable: false negative, never false positive.
|
|
64
|
+
expect(detectReplyLanguageHint("commit push de auto ci/cd len prod nhe")).toBeNull();
|
|
65
|
+
});
|
|
37
66
|
});
|
|
38
67
|
describe("attemptStallRescue", () => {
|
|
39
68
|
it("returns null when there are no tool results (nothing to synthesize)", async () => {
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/orchestrator/steer-inbox.ts
|
|
3
|
+
*
|
|
4
|
+
* Live-queue steering — pure decision helper.
|
|
5
|
+
*
|
|
6
|
+
* When the user types a message while a turn is streaming, the UI queue is
|
|
7
|
+
* drained at the next prepareStep boundary and the messages are injected into
|
|
8
|
+
* the running turn as `user` interjections (Claude-Code-style steering). This
|
|
9
|
+
* module holds the PURE mapping/gating decision so it is unit-testable in
|
|
10
|
+
* isolation from the orchestrator loop. The orchestrator owns the side effects
|
|
11
|
+
* (draining the queue, the pendingSteers accumulator, emitting telemetry).
|
|
12
|
+
*/
|
|
13
|
+
import type { ModelMessage } from "ai";
|
|
14
|
+
/** Inputs to the steer-injection decision — see {@link planSteerInjection}. */
|
|
15
|
+
export interface SteerInjectionState {
|
|
16
|
+
/** Raw messages drained from the UI steer queue this step. */
|
|
17
|
+
drained: {
|
|
18
|
+
text: string;
|
|
19
|
+
}[];
|
|
20
|
+
/** True on a genuine user cancel — never steer an aborted turn. */
|
|
21
|
+
aborted: boolean;
|
|
22
|
+
/** Feature flag (getSteerInjectionEnabled). */
|
|
23
|
+
enabled: boolean;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Decide which (if any) drained messages to inject into the running turn.
|
|
27
|
+
*
|
|
28
|
+
* Returns user-role ModelMessages in FIFO order, trimmed, with empty/whitespace
|
|
29
|
+
* entries dropped. Returns `[]` when the feature is disabled or the turn was
|
|
30
|
+
* cancelled. Pure (no side effects).
|
|
31
|
+
*/
|
|
32
|
+
export declare function planSteerInjection(s: SteerInjectionState): ModelMessage[];
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decide which (if any) drained messages to inject into the running turn.
|
|
3
|
+
*
|
|
4
|
+
* Returns user-role ModelMessages in FIFO order, trimmed, with empty/whitespace
|
|
5
|
+
* entries dropped. Returns `[]` when the feature is disabled or the turn was
|
|
6
|
+
* cancelled. Pure (no side effects).
|
|
7
|
+
*/
|
|
8
|
+
export function planSteerInjection(s) {
|
|
9
|
+
if (!s.enabled || s.aborted)
|
|
10
|
+
return [];
|
|
11
|
+
const out = [];
|
|
12
|
+
for (const m of s.drained) {
|
|
13
|
+
const text = m.text?.trim();
|
|
14
|
+
if (!text)
|
|
15
|
+
continue;
|
|
16
|
+
out.push({ role: "user", content: text });
|
|
17
|
+
}
|
|
18
|
+
return out;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=steer-inbox.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { planSteerInjection } from "./steer-inbox.js";
|
|
3
|
+
describe("planSteerInjection", () => {
|
|
4
|
+
// A valid baseline: feature enabled, not cancelled, one queued message.
|
|
5
|
+
const base = (over = {}) => ({
|
|
6
|
+
drained: [{ text: "also add tests" }],
|
|
7
|
+
aborted: false,
|
|
8
|
+
enabled: true,
|
|
9
|
+
...over,
|
|
10
|
+
});
|
|
11
|
+
it("maps drained text into a single user ModelMessage", () => {
|
|
12
|
+
const out = planSteerInjection(base());
|
|
13
|
+
expect(out).toEqual([{ role: "user", content: "also add tests" }]);
|
|
14
|
+
});
|
|
15
|
+
it("preserves FIFO order across multiple drained messages", () => {
|
|
16
|
+
const out = planSteerInjection(base({ drained: [{ text: "a" }, { text: "b" }] }));
|
|
17
|
+
expect(out.map((m) => m.content)).toEqual(["a", "b"]);
|
|
18
|
+
});
|
|
19
|
+
it("returns [] when the feature is disabled", () => {
|
|
20
|
+
expect(planSteerInjection(base({ enabled: false }))).toEqual([]);
|
|
21
|
+
});
|
|
22
|
+
it("returns [] over a genuine user cancel (never steer an aborted turn)", () => {
|
|
23
|
+
expect(planSteerInjection(base({ aborted: true }))).toEqual([]);
|
|
24
|
+
});
|
|
25
|
+
it("returns [] when nothing was drained", () => {
|
|
26
|
+
expect(planSteerInjection(base({ drained: [] }))).toEqual([]);
|
|
27
|
+
});
|
|
28
|
+
it("skips empty / whitespace-only messages and trims the rest", () => {
|
|
29
|
+
const out = planSteerInjection(base({ drained: [{ text: " " }, { text: " keep me " }, { text: "" }] }));
|
|
30
|
+
expect(out).toEqual([{ role: "user", content: "keep me" }]);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
//# sourceMappingURL=steer-inbox.test.js.map
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/orchestrator/tool-loop-askcard.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure helper that computes the tool-loop-cap askcard tier (label set + default
|
|
5
|
+
* action) from the current step number and the resolved natural ceiling for
|
|
6
|
+
* the (taskType, size) matrix.
|
|
7
|
+
*
|
|
8
|
+
* Four tiers (open intervals — boundaries belong to the higher tier):
|
|
9
|
+
* - early : step < 0.5 × ceiling — a transient fixation. Default Continue.
|
|
10
|
+
* - normal : 0.5× ≤ step ≤ 2× ceiling — used cheap budget; Default Stop.
|
|
11
|
+
* - overBudget : 2× < step ≤ 5× ceiling — Continue still available but the
|
|
12
|
+
* label carries the overage multiplier so the cost of
|
|
13
|
+
* continuing is visible at decision time. Default Stop.
|
|
14
|
+
* - extreme : step > 5× ceiling — Stop is moved FIRST in the option
|
|
15
|
+
* array (Enter = Stop) and Continue is labelled "expensive".
|
|
16
|
+
* Default Stop (now at index 0).
|
|
17
|
+
*
|
|
18
|
+
* Live miss this tier set fixes (session 1f29e238, step 77/6 = 12.8×): extreme
|
|
19
|
+
* tier put Stop first with a warning — good. But the storyflow_ui session
|
|
20
|
+
* 22661c8de9f2 ran step 29/12 = 2.4× — the OLD code had no middle warning, so
|
|
21
|
+
* the askcard showed a plain "Continue (let agent try)" with no signal that
|
|
22
|
+
* continuing costs more. User chose Continue, the model stalled 4 tool-calls
|
|
23
|
+
* later, and forced-finalize had to rescue a degraded answer.
|
|
24
|
+
*
|
|
25
|
+
* Pure — no React, no DOM, no side effects. Unit-testable in isolation.
|
|
26
|
+
*/
|
|
27
|
+
export type LoopCapTier = "early" | "normal" | "overBudget" | "extreme";
|
|
28
|
+
export interface LoopCapAskcardOptions {
|
|
29
|
+
/** AI-SDK step number when the pattern fired. */
|
|
30
|
+
stepNumber: number;
|
|
31
|
+
/**
|
|
32
|
+
* Natural step ceiling for (taskType, size). Optional — when undefined we
|
|
33
|
+
* cannot compute multipliers, so the askcard falls back to the legacy
|
|
34
|
+
* step-threshold heuristic (step ≤ 15 = early-ish, else normal).
|
|
35
|
+
*/
|
|
36
|
+
naturalCeiling?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface LoopCapAskcardLayout {
|
|
39
|
+
tier: LoopCapTier;
|
|
40
|
+
/** Index into `optionLabels` of the option pre-selected (Enter applies). */
|
|
41
|
+
defaultIndex: 0 | 1;
|
|
42
|
+
/**
|
|
43
|
+
* Exactly two labels in render order. The first is at index 0, the second
|
|
44
|
+
* at index 1 — order matters for the askcard UI (arrow-key navigation,
|
|
45
|
+
* Enter-applies-default).
|
|
46
|
+
*/
|
|
47
|
+
optionLabels: [continueOrStop: string, stopOrContinue: string];
|
|
48
|
+
/** Values parallel to optionLabels — what the resolver returns to the loop. */
|
|
49
|
+
optionValues: [string, string];
|
|
50
|
+
/**
|
|
51
|
+
* x.x string (e.g. "2.4") when the tier is overBudget or extreme, else
|
|
52
|
+
* null. Caller can also surface this in the askcard context message.
|
|
53
|
+
*/
|
|
54
|
+
overageMultiplier: string | null;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Decide the askcard layout for a tool-loop-cap pattern hit. Pure.
|
|
58
|
+
*/
|
|
59
|
+
export declare function planLoopCapAskcard(opts: LoopCapAskcardOptions): LoopCapAskcardLayout;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/orchestrator/tool-loop-askcard.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure helper that computes the tool-loop-cap askcard tier (label set + default
|
|
5
|
+
* action) from the current step number and the resolved natural ceiling for
|
|
6
|
+
* the (taskType, size) matrix.
|
|
7
|
+
*
|
|
8
|
+
* Four tiers (open intervals — boundaries belong to the higher tier):
|
|
9
|
+
* - early : step < 0.5 × ceiling — a transient fixation. Default Continue.
|
|
10
|
+
* - normal : 0.5× ≤ step ≤ 2× ceiling — used cheap budget; Default Stop.
|
|
11
|
+
* - overBudget : 2× < step ≤ 5× ceiling — Continue still available but the
|
|
12
|
+
* label carries the overage multiplier so the cost of
|
|
13
|
+
* continuing is visible at decision time. Default Stop.
|
|
14
|
+
* - extreme : step > 5× ceiling — Stop is moved FIRST in the option
|
|
15
|
+
* array (Enter = Stop) and Continue is labelled "expensive".
|
|
16
|
+
* Default Stop (now at index 0).
|
|
17
|
+
*
|
|
18
|
+
* Live miss this tier set fixes (session 1f29e238, step 77/6 = 12.8×): extreme
|
|
19
|
+
* tier put Stop first with a warning — good. But the storyflow_ui session
|
|
20
|
+
* 22661c8de9f2 ran step 29/12 = 2.4× — the OLD code had no middle warning, so
|
|
21
|
+
* the askcard showed a plain "Continue (let agent try)" with no signal that
|
|
22
|
+
* continuing costs more. User chose Continue, the model stalled 4 tool-calls
|
|
23
|
+
* later, and forced-finalize had to rescue a degraded answer.
|
|
24
|
+
*
|
|
25
|
+
* Pure — no React, no DOM, no side effects. Unit-testable in isolation.
|
|
26
|
+
*/
|
|
27
|
+
const NORMAL_LABELS = ["Continue (let agent try)", "Stop and answer"];
|
|
28
|
+
const NORMAL_VALUES = ["continue", "stop"];
|
|
29
|
+
/**
|
|
30
|
+
* Decide the askcard layout for a tool-loop-cap pattern hit. Pure.
|
|
31
|
+
*/
|
|
32
|
+
export function planLoopCapAskcard(opts) {
|
|
33
|
+
const { stepNumber, naturalCeiling } = opts;
|
|
34
|
+
// No ceiling → cannot compute multipliers. Fall back to a static threshold:
|
|
35
|
+
// step ≤ 15 looks "early" enough to default Continue, else default Stop.
|
|
36
|
+
if (!naturalCeiling || naturalCeiling <= 0) {
|
|
37
|
+
const tier = stepNumber > 0 && stepNumber <= 15 ? "early" : "normal";
|
|
38
|
+
return {
|
|
39
|
+
tier,
|
|
40
|
+
defaultIndex: tier === "early" ? 0 : 1,
|
|
41
|
+
optionLabels: NORMAL_LABELS,
|
|
42
|
+
optionValues: NORMAL_VALUES,
|
|
43
|
+
overageMultiplier: null,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
const ratio = stepNumber / naturalCeiling;
|
|
47
|
+
const multiplier = ratio.toFixed(1);
|
|
48
|
+
if (ratio > 5) {
|
|
49
|
+
return {
|
|
50
|
+
tier: "extreme",
|
|
51
|
+
defaultIndex: 0,
|
|
52
|
+
optionLabels: ["Stop and answer (recommended)", `Continue anyway (⚠ ${multiplier}× over budget — expensive)`],
|
|
53
|
+
optionValues: ["stop", "continue"],
|
|
54
|
+
overageMultiplier: multiplier,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
if (ratio > 2) {
|
|
58
|
+
return {
|
|
59
|
+
tier: "overBudget",
|
|
60
|
+
defaultIndex: 1,
|
|
61
|
+
optionLabels: [
|
|
62
|
+
`Continue (⚠ ${multiplier}× past natural budget — quality may degrade)`,
|
|
63
|
+
"Stop and answer (recommended)",
|
|
64
|
+
],
|
|
65
|
+
optionValues: NORMAL_VALUES,
|
|
66
|
+
overageMultiplier: multiplier,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
if (ratio < 0.5) {
|
|
70
|
+
return {
|
|
71
|
+
tier: "early",
|
|
72
|
+
defaultIndex: 0,
|
|
73
|
+
optionLabels: NORMAL_LABELS,
|
|
74
|
+
optionValues: NORMAL_VALUES,
|
|
75
|
+
overageMultiplier: null,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
tier: "normal",
|
|
80
|
+
defaultIndex: 1,
|
|
81
|
+
optionLabels: NORMAL_LABELS,
|
|
82
|
+
optionValues: NORMAL_VALUES,
|
|
83
|
+
overageMultiplier: null,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
//# sourceMappingURL=tool-loop-askcard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { planLoopCapAskcard } from "./tool-loop-askcard.js";
|
|
3
|
+
describe("planLoopCapAskcard", () => {
|
|
4
|
+
it("early tier (< 0.5× ceiling): default Continue, no warning", () => {
|
|
5
|
+
const r = planLoopCapAskcard({ stepNumber: 5, naturalCeiling: 12 });
|
|
6
|
+
expect(r.tier).toBe("early");
|
|
7
|
+
expect(r.defaultIndex).toBe(0);
|
|
8
|
+
expect(r.optionLabels[0]).toMatch(/Continue/);
|
|
9
|
+
expect(r.optionValues[0]).toBe("continue");
|
|
10
|
+
expect(r.overageMultiplier).toBeNull();
|
|
11
|
+
// no warning emoji on the Continue label
|
|
12
|
+
expect(r.optionLabels[0]).not.toMatch(/⚠/);
|
|
13
|
+
});
|
|
14
|
+
it("normal tier (0.5×–2× ceiling): default Stop, no warning, Continue first", () => {
|
|
15
|
+
const r = planLoopCapAskcard({ stepNumber: 18, naturalCeiling: 12 });
|
|
16
|
+
expect(r.tier).toBe("normal");
|
|
17
|
+
expect(r.defaultIndex).toBe(1);
|
|
18
|
+
expect(r.optionLabels[0]).toBe("Continue (let agent try)");
|
|
19
|
+
expect(r.optionLabels[1]).toBe("Stop and answer");
|
|
20
|
+
expect(r.overageMultiplier).toBeNull();
|
|
21
|
+
});
|
|
22
|
+
it("overBudget tier (2×–5× ceiling): Continue carries the overage multiplier, default Stop", () => {
|
|
23
|
+
// The storyflow_ui case: step 29 / ceiling 12 = 2.4×
|
|
24
|
+
const r = planLoopCapAskcard({ stepNumber: 29, naturalCeiling: 12 });
|
|
25
|
+
expect(r.tier).toBe("overBudget");
|
|
26
|
+
expect(r.defaultIndex).toBe(1);
|
|
27
|
+
expect(r.optionLabels[0]).toMatch(/⚠ 2\.4× past natural budget/);
|
|
28
|
+
expect(r.optionLabels[1]).toMatch(/Stop and answer \(recommended\)/);
|
|
29
|
+
expect(r.overageMultiplier).toBe("2.4");
|
|
30
|
+
// order preserved: Continue at 0, Stop at 1
|
|
31
|
+
expect(r.optionValues).toEqual(["continue", "stop"]);
|
|
32
|
+
});
|
|
33
|
+
it("extreme tier (> 5× ceiling): Stop FIRST in the array, Continue labelled expensive", () => {
|
|
34
|
+
// session 1f29e238 — step 77 / ceiling 6 = 12.8×
|
|
35
|
+
const r = planLoopCapAskcard({ stepNumber: 77, naturalCeiling: 6 });
|
|
36
|
+
expect(r.tier).toBe("extreme");
|
|
37
|
+
expect(r.defaultIndex).toBe(0);
|
|
38
|
+
expect(r.optionLabels[0]).toMatch(/Stop and answer \(recommended\)/);
|
|
39
|
+
expect(r.optionLabels[1]).toMatch(/⚠ 12\.8× over budget — expensive/);
|
|
40
|
+
expect(r.optionValues).toEqual(["stop", "continue"]); // ORDER REVERSED at extreme
|
|
41
|
+
expect(r.overageMultiplier).toBe("12.8");
|
|
42
|
+
});
|
|
43
|
+
it("tier boundaries are open-on-the-lower-side (ratio==2 → normal; ratio==5 → overBudget; ratio==0.5 → normal)", () => {
|
|
44
|
+
// ratio === 2.0 exactly → still normal (the > 2 gate excludes 2.0)
|
|
45
|
+
expect(planLoopCapAskcard({ stepNumber: 24, naturalCeiling: 12 }).tier).toBe("normal");
|
|
46
|
+
// ratio === 5.0 exactly → still overBudget (the > 5 gate excludes 5.0)
|
|
47
|
+
expect(planLoopCapAskcard({ stepNumber: 60, naturalCeiling: 12 }).tier).toBe("overBudget");
|
|
48
|
+
// ratio === 0.5 exactly → normal (the < 0.5 gate excludes 0.5)
|
|
49
|
+
expect(planLoopCapAskcard({ stepNumber: 6, naturalCeiling: 12 }).tier).toBe("normal");
|
|
50
|
+
});
|
|
51
|
+
it("falls back to step-threshold heuristic when naturalCeiling is missing", () => {
|
|
52
|
+
const early = planLoopCapAskcard({ stepNumber: 8 });
|
|
53
|
+
expect(early.tier).toBe("early");
|
|
54
|
+
expect(early.defaultIndex).toBe(0);
|
|
55
|
+
const normal = planLoopCapAskcard({ stepNumber: 22 });
|
|
56
|
+
expect(normal.tier).toBe("normal");
|
|
57
|
+
expect(normal.defaultIndex).toBe(1);
|
|
58
|
+
// boundary: step === 15 → still early
|
|
59
|
+
expect(planLoopCapAskcard({ stepNumber: 15 }).tier).toBe("early");
|
|
60
|
+
// step === 16 → normal
|
|
61
|
+
expect(planLoopCapAskcard({ stepNumber: 16 }).tier).toBe("normal");
|
|
62
|
+
// step === 0 → normal (no early credit for nothing)
|
|
63
|
+
expect(planLoopCapAskcard({ stepNumber: 0 }).tier).toBe("normal");
|
|
64
|
+
});
|
|
65
|
+
it("treats naturalCeiling=0 the same as undefined (no multiplier possible)", () => {
|
|
66
|
+
const r = planLoopCapAskcard({ stepNumber: 30, naturalCeiling: 0 });
|
|
67
|
+
expect(r.overageMultiplier).toBeNull();
|
|
68
|
+
expect(r.tier).toBe("normal");
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
//# sourceMappingURL=tool-loop-askcard.test.js.map
|
|
@@ -102,7 +102,11 @@ export async function layer4Gsd(ctx) {
|
|
|
102
102
|
(ctx.taskType === "general" && ctx.intentKind === "task") ||
|
|
103
103
|
(isQuestionLike(ctx.raw) && !isImplementationIntent(ctx.raw));
|
|
104
104
|
const ecosystem = mentionsEcosystemScope(ctx.raw);
|
|
105
|
-
|
|
105
|
+
// Heuristic: VN diacritics → user wrote Vietnamese → re-anchor language rule
|
|
106
|
+
// inside the directive (storyflow_ui session 22661c8de9f2 — base rule
|
|
107
|
+
// crowded out by brevity/FIX-FIRST directives).
|
|
108
|
+
const replyLanguage = /[à-ỹÀ-Ỹ]/.test(ctx.raw) ? "Vietnamese" : undefined;
|
|
109
|
+
const directive = buildDirective({ complexity, phase, grayAreas, informational, ecosystem, replyLanguage });
|
|
106
110
|
const budgetChars = Math.floor(ctx.tokenBudget * DIRECTIVE_BUDGET_FRACTION);
|
|
107
111
|
const trimmed = truncateToBudget(directive.text, budgetChars);
|
|
108
112
|
return {
|
package/dist/src/ui/app.js
CHANGED
|
@@ -13,6 +13,7 @@ import { POPULAR_MCP_CATALOG } from "../mcp/catalog.js";
|
|
|
13
13
|
import { parseEnvLines, parseHeaderLines } from "../mcp/parse-headers.js";
|
|
14
14
|
import { toMcpServerId, validateMcpServerConfig } from "../mcp/validate.js";
|
|
15
15
|
import { Agent } from "../orchestrator/orchestrator.js";
|
|
16
|
+
import { planLoopCapAskcard } from "../orchestrator/tool-loop-askcard.js";
|
|
16
17
|
import { getConfiguredProviders, setKeyForProvider } from "../providers/keychain.js";
|
|
17
18
|
import { buildIdealContinuationPrompt } from "../scaffold/continuation-prompt.js";
|
|
18
19
|
import { continueAsCouncil } from "../scaffold/continue-as-council.js";
|
|
@@ -24,7 +25,7 @@ import { processAtMentions } from "../utils/at-mentions.js";
|
|
|
24
25
|
import { readClipboardImage } from "../utils/clipboard-image.js";
|
|
25
26
|
import { FileIndex } from "../utils/file-index.js";
|
|
26
27
|
import { copyTextToHostClipboard, readTextFromHostClipboard } from "../utils/host-clipboard.js";
|
|
27
|
-
import { getApiKey, getCurrentModel, getTelegramBotToken, isModelDisabled, isReservedSubagentName, loadMcpServers, loadPaymentSettings, loadUserSettings, loadValidSubAgents, saveApprovedTelegramUserId, saveMcpServers, savePaymentSettings, saveProjectSettings, saveUserSettings, setDefaultProvider, setModelDisabled, setProviderDisabled, } from "../utils/settings.js";
|
|
28
|
+
import { getApiKey, getCurrentModel, getSteerInjectionEnabled, getTelegramBotToken, isModelDisabled, isReservedSubagentName, loadMcpServers, loadPaymentSettings, loadUserSettings, loadValidSubAgents, saveApprovedTelegramUserId, saveMcpServers, savePaymentSettings, saveProjectSettings, saveUserSettings, setDefaultProvider, setModelDisabled, setProviderDisabled, } from "../utils/settings.js";
|
|
28
29
|
import { discoverSkills, formatSkillsForChat } from "../utils/skills.js";
|
|
29
30
|
import { formatSubagentName } from "../utils/subagent-display.js";
|
|
30
31
|
import { checkForUpdate, runUpdate } from "../utils/update-checker.js";
|
|
@@ -483,6 +484,11 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
|
|
|
483
484
|
pushToast(lvl, text);
|
|
484
485
|
return;
|
|
485
486
|
}
|
|
487
|
+
if (e.kind === "steer-inject") {
|
|
488
|
+
const count = typeof e.count === "number" ? e.count : 1;
|
|
489
|
+
pushToast("info", `↳ steering applied (${count} message${count === 1 ? "" : "s"})`);
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
486
492
|
if (e.kind === "ee-timeout" || e.kind === "ee-error") {
|
|
487
493
|
const source = typeof e.source === "string" ? e.source : "unknown";
|
|
488
494
|
const kind = e.kind === "ee-timeout" ? "timeout" : "error";
|
|
@@ -539,6 +545,23 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
|
|
|
539
545
|
}
|
|
540
546
|
return undefined;
|
|
541
547
|
}, [handleHarnessEvent]);
|
|
548
|
+
// Live-queue steering: expose the mid-turn queue to the running turn so
|
|
549
|
+
// prepareStep can inject typed-while-busy messages at the next step boundary
|
|
550
|
+
// instead of deferring them to a new turn. Disabled → callback not wired, so
|
|
551
|
+
// finishTurnProcessing drains the queue post-turn exactly as before.
|
|
552
|
+
useEffect(() => {
|
|
553
|
+
if (!getSteerInjectionEnabled())
|
|
554
|
+
return;
|
|
555
|
+
agent.setSteerDrain(() => {
|
|
556
|
+
if (queuedMessagesRef.current.length === 0)
|
|
557
|
+
return [];
|
|
558
|
+
const drained = queuedMessagesRef.current.map((m) => ({ text: m.text }));
|
|
559
|
+
queuedMessagesRef.current = [];
|
|
560
|
+
setQueuedMessages([]);
|
|
561
|
+
return drained;
|
|
562
|
+
});
|
|
563
|
+
return () => agent.setSteerDrain(null);
|
|
564
|
+
}, [agent]);
|
|
542
565
|
const dismissToast = useCallback(() => setActiveToast(null), []);
|
|
543
566
|
// ─── /Phase 21 toast subscriber ────────────────────────────────────────────
|
|
544
567
|
const { model, setModel, showModelPicker, setShowModelPicker, modelPickerIndex, setModelPickerIndex, modelSearchQuery, setModelSearchQuery, configuredProviders, setConfiguredProviders, disabledProviders, setDisabledProvidersState, defaultProvider, setDefaultProviderState, disabledModels, setDisabledModelsState, modelPickerFocus, setModelPickerFocus, providerChipIndex, setProviderChipIndex, reasoningEffortByModel, setReasoningEffortByModel, } = useModelPicker(agent.getModel());
|
|
@@ -1779,50 +1802,43 @@ export function App({ agent, startupConfig, initialMessage, onExit }) {
|
|
|
1779
1802
|
const isPattern = info.kind === "pattern";
|
|
1780
1803
|
const qid = isPattern ? `tool-pattern-loop-${Date.now()}` : `tool-loop-cap-${info.stepNumber}-${Date.now()}`;
|
|
1781
1804
|
toolLoopCapResolversRef.current.set(qid, resolve);
|
|
1782
|
-
//
|
|
1783
|
-
//
|
|
1784
|
-
//
|
|
1785
|
-
//
|
|
1786
|
-
//
|
|
1787
|
-
//
|
|
1788
|
-
//
|
|
1789
|
-
//
|
|
1805
|
+
// Tier-aware askcard layout (planLoopCapAskcard) — 4 tiers:
|
|
1806
|
+
// early (< 0.5× ceiling) → Default Continue, no warning
|
|
1807
|
+
// normal (0.5×–2× ceiling) → Default Stop, no warning
|
|
1808
|
+
// overBudget (2×–5× ceiling) → Default Stop, Continue label carries
|
|
1809
|
+
// the overage multiplier so cost is
|
|
1810
|
+
// visible (storyflow_ui 22661c8de9f2:
|
|
1811
|
+
// 2.4× hit had no warning before)
|
|
1812
|
+
// extreme (> 5× ceiling) → Stop FIRST in the array (Enter=Stop),
|
|
1813
|
+
// Continue labelled "expensive"
|
|
1814
|
+
// (session 1f29e238: 12.8× past ceiling)
|
|
1790
1815
|
const patternStep = isPattern ? info.stepNumber : 0;
|
|
1791
1816
|
const patternCeiling = isPattern ? info.naturalCeiling : undefined;
|
|
1792
|
-
const
|
|
1793
|
-
? patternStep
|
|
1794
|
-
:
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
const patternExtreme = patternCeiling !== undefined && patternCeiling > 0 && patternStep > patternCeiling * 5;
|
|
1802
|
-
const overageMultiplier = patternExtreme && patternCeiling ? (patternStep / patternCeiling).toFixed(1) : null;
|
|
1803
|
-
const patternDefaultIdx = patternEarly ? 0 : patternExtreme ? 0 : 1;
|
|
1804
|
-
const patternOptions = patternExtreme
|
|
1817
|
+
const layout = isPattern
|
|
1818
|
+
? planLoopCapAskcard({ stepNumber: patternStep, naturalCeiling: patternCeiling })
|
|
1819
|
+
: null;
|
|
1820
|
+
const patternEarly = layout?.tier === "early";
|
|
1821
|
+
const patternOverBudget = layout?.tier === "overBudget";
|
|
1822
|
+
const patternExtreme = layout?.tier === "extreme";
|
|
1823
|
+
const overageMultiplier = layout?.overageMultiplier ?? null;
|
|
1824
|
+
const patternDefaultIdx = layout?.defaultIndex ?? 0;
|
|
1825
|
+
const patternOptions = layout
|
|
1805
1826
|
? [
|
|
1806
|
-
{ label:
|
|
1807
|
-
{
|
|
1808
|
-
label: `Continue anyway (⚠ ${overageMultiplier}× over budget — expensive)`,
|
|
1809
|
-
value: "continue",
|
|
1810
|
-
kind: "choice",
|
|
1811
|
-
},
|
|
1827
|
+
{ label: layout.optionLabels[0], value: layout.optionValues[0], kind: "choice" },
|
|
1828
|
+
{ label: layout.optionLabels[1], value: layout.optionValues[1], kind: "choice" },
|
|
1812
1829
|
]
|
|
1813
|
-
: [
|
|
1814
|
-
{ label: "Continue (let agent try)", value: "continue", kind: "choice" },
|
|
1815
|
-
{ label: "Stop and answer", value: "stop", kind: "choice" },
|
|
1816
|
-
];
|
|
1830
|
+
: [];
|
|
1817
1831
|
const question = isPattern
|
|
1818
1832
|
? {
|
|
1819
1833
|
questionId: qid,
|
|
1820
1834
|
question: `Tool \`${info.toolName}\` đã chạy ${info.count}/${info.windowSize} lần với args gần giống (step ${info.stepNumber}${patternCeiling ? `/${patternCeiling}` : ""}) — có thể đang loop. Tiếp tục?`,
|
|
1821
1835
|
context: patternExtreme
|
|
1822
1836
|
? `EXTREME OVERAGE — ${overageMultiplier}× past natural budget. Continuing has historically not converged in this regime (see session 1f29e238: 8× over budget, still failed). Stop returns the agent's best answer with current context.`
|
|
1823
|
-
:
|
|
1824
|
-
?
|
|
1825
|
-
:
|
|
1837
|
+
: patternOverBudget
|
|
1838
|
+
? `Past natural budget — ${overageMultiplier}× the typical step count for this task type. Continuing may still converge but quality often degrades (longer compaction, stale tool results, forced-finalize on stall). Stop returns the agent's best answer with current context.`
|
|
1839
|
+
: patternEarly
|
|
1840
|
+
? "Continue lets the agent keep trying — likely the right call this early in the run. Stop returns the agent's best answer with current context."
|
|
1841
|
+
: "You're past the natural budget for this task type. Stop usually recovers a clean answer; Continue keeps spending tokens.",
|
|
1826
1842
|
isRequired: true,
|
|
1827
1843
|
phase: "tool-loop-cap",
|
|
1828
1844
|
options: patternOptions,
|
|
@@ -330,6 +330,16 @@ export declare function getProviderStallTimeoutMs(): number;
|
|
|
330
330
|
* Default 1. Env override: MUONROI_PROVIDER_STALL_RETRIES.
|
|
331
331
|
*/
|
|
332
332
|
export declare function getProviderStallRetries(): number;
|
|
333
|
+
/**
|
|
334
|
+
* Live-queue steering: when true, a message typed while a turn is streaming is
|
|
335
|
+
* injected into the running turn at the next prepareStep boundary (as a `user`
|
|
336
|
+
* interjection) instead of waiting for the turn to finish and running as a new
|
|
337
|
+
* turn. When false, the legacy deferred-queue behaviour is preserved (the
|
|
338
|
+
* message runs only after the current turn completes). House convention for a
|
|
339
|
+
* default-true boolean knob: only an explicit "0" disables; unset/blank/any
|
|
340
|
+
* other value = enabled. Env override: MUONROI_STEER_INJECTION.
|
|
341
|
+
*/
|
|
342
|
+
export declare function getSteerInjectionEnabled(): boolean;
|
|
333
343
|
/**
|
|
334
344
|
* Phase B3 — threshold (in chars of cumulative message content) above which
|
|
335
345
|
* the sub-agent `prepareStep` compactor rewrites older tool_result parts
|
|
@@ -714,6 +714,18 @@ export function getProviderStallRetries() {
|
|
|
714
714
|
}
|
|
715
715
|
return 1;
|
|
716
716
|
}
|
|
717
|
+
/**
|
|
718
|
+
* Live-queue steering: when true, a message typed while a turn is streaming is
|
|
719
|
+
* injected into the running turn at the next prepareStep boundary (as a `user`
|
|
720
|
+
* interjection) instead of waiting for the turn to finish and running as a new
|
|
721
|
+
* turn. When false, the legacy deferred-queue behaviour is preserved (the
|
|
722
|
+
* message runs only after the current turn completes). House convention for a
|
|
723
|
+
* default-true boolean knob: only an explicit "0" disables; unset/blank/any
|
|
724
|
+
* other value = enabled. Env override: MUONROI_STEER_INJECTION.
|
|
725
|
+
*/
|
|
726
|
+
export function getSteerInjectionEnabled() {
|
|
727
|
+
return process.env.MUONROI_STEER_INJECTION !== "0";
|
|
728
|
+
}
|
|
717
729
|
/**
|
|
718
730
|
* Phase B3 — threshold (in chars of cumulative message content) above which
|
|
719
731
|
* the sub-agent `prepareStep` compactor rewrites older tool_result parts
|
|
@@ -188,4 +188,25 @@ describe("getProviderStallRetries", () => {
|
|
|
188
188
|
}
|
|
189
189
|
});
|
|
190
190
|
});
|
|
191
|
+
describe("getSteerInjectionEnabled", () => {
|
|
192
|
+
it("defaults to true when the env var is unset or blank", async () => {
|
|
193
|
+
vi.unstubAllEnvs();
|
|
194
|
+
const { getSteerInjectionEnabled } = await import("./settings.js");
|
|
195
|
+
expect(getSteerInjectionEnabled()).toBe(true);
|
|
196
|
+
vi.stubEnv("MUONROI_STEER_INJECTION", "");
|
|
197
|
+
expect(getSteerInjectionEnabled()).toBe(true);
|
|
198
|
+
});
|
|
199
|
+
it("returns false only for an explicit '0'", async () => {
|
|
200
|
+
const { getSteerInjectionEnabled } = await import("./settings.js");
|
|
201
|
+
vi.stubEnv("MUONROI_STEER_INJECTION", "0");
|
|
202
|
+
expect(getSteerInjectionEnabled()).toBe(false);
|
|
203
|
+
});
|
|
204
|
+
it("returns true for '1' and any other non-'0' value", async () => {
|
|
205
|
+
const { getSteerInjectionEnabled } = await import("./settings.js");
|
|
206
|
+
for (const v of ["1", "true", "yes", "on", "xyz"]) {
|
|
207
|
+
vi.stubEnv("MUONROI_STEER_INJECTION", v);
|
|
208
|
+
expect(getSteerInjectionEnabled()).toBe(true);
|
|
209
|
+
}
|
|
210
|
+
});
|
|
211
|
+
});
|
|
191
212
|
//# sourceMappingURL=settings.test.js.map
|