muonroi-cli 1.6.3 → 1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/packages/agent-harness-core/src/event-filter.js +1 -0
- package/dist/packages/agent-harness-core/src/event-redact.js +7 -2
- package/dist/packages/agent-harness-core/src/protocol.d.ts +8 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +37 -0
- package/dist/src/gsd/directives.d.ts +18 -0
- package/dist/src/gsd/directives.js +23 -2
- package/dist/src/orchestrator/message-processor.d.ts +8 -0
- package/dist/src/orchestrator/message-processor.js +159 -9
- package/dist/src/orchestrator/orchestrator.d.ts +10 -0
- package/dist/src/orchestrator/orchestrator.js +11 -0
- package/dist/src/orchestrator/stall-rescue.d.ts +1 -0
- package/dist/src/orchestrator/stall-rescue.js +20 -1
- package/dist/src/orchestrator/stall-rescue.test.js +30 -1
- package/dist/src/orchestrator/stall-watchdog.d.ts +31 -0
- package/dist/src/orchestrator/stall-watchdog.js +24 -0
- package/dist/src/orchestrator/stall-watchdog.test.js +46 -1
- package/dist/src/orchestrator/steer-inbox.d.ts +32 -0
- package/dist/src/orchestrator/steer-inbox.js +20 -0
- package/dist/src/orchestrator/steer-inbox.test.d.ts +1 -0
- package/dist/src/orchestrator/steer-inbox.test.js +33 -0
- package/dist/src/orchestrator/tool-loop-askcard.d.ts +59 -0
- package/dist/src/orchestrator/tool-loop-askcard.js +86 -0
- package/dist/src/orchestrator/tool-loop-askcard.test.d.ts +1 -0
- package/dist/src/orchestrator/tool-loop-askcard.test.js +71 -0
- package/dist/src/pil/layer4-gsd.js +5 -1
- package/dist/src/ui/app.js +51 -35
- package/dist/src/utils/settings.d.ts +23 -0
- package/dist/src/utils/settings.js +33 -0
- package/dist/src/utils/settings.test.js +52 -0
- package/package.json +1 -1
|
@@ -85,7 +85,12 @@ const ALLOWED_FIELDS = {
|
|
|
85
85
|
forceCouncil: "pass",
|
|
86
86
|
runId: "pass",
|
|
87
87
|
},
|
|
88
|
-
"
|
|
88
|
+
"steer-inject": {
|
|
89
|
+
count: "pass",
|
|
90
|
+
atStep: "pass",
|
|
91
|
+
runId: "pass",
|
|
92
|
+
},
|
|
93
|
+
toast: {
|
|
89
94
|
level: "pass",
|
|
90
95
|
text: 500, // cap to 500 chars, then scrub
|
|
91
96
|
ttlMs: "pass",
|
|
@@ -94,7 +99,7 @@ const ALLOWED_FIELDS = {
|
|
|
94
99
|
target: "pass",
|
|
95
100
|
text: 500, // cap to 500 chars
|
|
96
101
|
},
|
|
97
|
-
|
|
102
|
+
usage: {
|
|
98
103
|
source: "pass",
|
|
99
104
|
model: "pass",
|
|
100
105
|
inputTokens: "pass",
|
|
@@ -177,6 +177,14 @@ export type LiveEvent = {
|
|
|
177
177
|
/** Total number of unverified claims in this turn. */
|
|
178
178
|
count: number;
|
|
179
179
|
ts: number;
|
|
180
|
+
} | {
|
|
181
|
+
t: "event";
|
|
182
|
+
kind: "steer-inject";
|
|
183
|
+
/** How many queued messages were injected at this boundary. */
|
|
184
|
+
count: number;
|
|
185
|
+
/** The prepareStep step number at which injection occurred (>= 1). */
|
|
186
|
+
atStep: number;
|
|
187
|
+
runId: string;
|
|
180
188
|
} | {
|
|
181
189
|
t: "idle";
|
|
182
190
|
};
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const PACKAGE_VERSION = "1.6.
|
|
1
|
+
export declare const PACKAGE_VERSION = "1.6.5";
|
|
2
2
|
export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
|
|
2
2
|
// Sourced from package.json at build time so it survives bun --compile bundling.
|
|
3
|
-
export const PACKAGE_VERSION = "1.6.
|
|
3
|
+
export const PACKAGE_VERSION = "1.6.5";
|
|
4
4
|
export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
5
5
|
//# sourceMappingURL=version.js.map
|
|
@@ -98,5 +98,42 @@ describe("buildDirective", () => {
|
|
|
98
98
|
expect(out.text).toMatch(/\[recommended\]/);
|
|
99
99
|
}
|
|
100
100
|
});
|
|
101
|
+
// Language nudge — re-anchors the "reply in user's language" rule INSIDE the
|
|
102
|
+
// directive so layered brevity / FIX-FIRST directives can't drown it (live
|
|
103
|
+
// miss: storyflow_ui session 22661c8de9f2).
|
|
104
|
+
describe("language nudge", () => {
|
|
105
|
+
it("appends the nudge when replyLanguage is set", () => {
|
|
106
|
+
const out = buildDirective({
|
|
107
|
+
complexity: scoreComplexity("fix CI fail"),
|
|
108
|
+
phase: "debug",
|
|
109
|
+
grayAreas: [],
|
|
110
|
+
replyLanguage: "Vietnamese",
|
|
111
|
+
});
|
|
112
|
+
expect(out.text).toMatch(/LANGUAGE — the user wrote in Vietnamese/);
|
|
113
|
+
expect(out.text).toMatch(/Reply in Vietnamese/);
|
|
114
|
+
expect(out.text).toMatch(/OVERRIDES any brevity/);
|
|
115
|
+
});
|
|
116
|
+
it("omits the nudge when replyLanguage is undefined", () => {
|
|
117
|
+
const out = buildDirective({
|
|
118
|
+
complexity: scoreComplexity("fix CI fail"),
|
|
119
|
+
phase: "debug",
|
|
120
|
+
grayAreas: [],
|
|
121
|
+
});
|
|
122
|
+
expect(out.text).not.toMatch(/LANGUAGE —/);
|
|
123
|
+
});
|
|
124
|
+
it("stacks with the ecosystem nudge when both apply", () => {
|
|
125
|
+
const out = buildDirective({
|
|
126
|
+
complexity: scoreComplexity("how does the muonroi ecosystem work"),
|
|
127
|
+
phase: null,
|
|
128
|
+
grayAreas: [],
|
|
129
|
+
ecosystem: true,
|
|
130
|
+
replyLanguage: "Vietnamese",
|
|
131
|
+
});
|
|
132
|
+
expect(out.text).toMatch(/ECOSYSTEM SCOPE/);
|
|
133
|
+
expect(out.text).toMatch(/LANGUAGE —/);
|
|
134
|
+
// ecosystem nudge precedes language nudge (deterministic order)
|
|
135
|
+
expect(out.text.indexOf("ECOSYSTEM SCOPE")).toBeLessThan(out.text.indexOf("LANGUAGE —"));
|
|
136
|
+
});
|
|
137
|
+
});
|
|
101
138
|
});
|
|
102
139
|
//# sourceMappingURL=directives.test.js.map
|
|
@@ -46,6 +46,15 @@ export interface DirectiveInput {
|
|
|
46
46
|
* the shipped authoritative source.
|
|
47
47
|
*/
|
|
48
48
|
ecosystem?: boolean;
|
|
49
|
+
/**
|
|
50
|
+
* User's reply language (heuristic — Vietnamese|undefined). When set, the
|
|
51
|
+
* directive appends an explicit language nudge so the rule survives the
|
|
52
|
+
* personality/GSD instructions stacking on top of it (storyflow_ui session
|
|
53
|
+
* 22661c8de9f2: user wrote Vietnamese, layered directives + a stalled
|
|
54
|
+
* forced-finalize drowned out the base "reply in user's language" rule and
|
|
55
|
+
* the agent answered in English).
|
|
56
|
+
*/
|
|
57
|
+
replyLanguage?: string;
|
|
49
58
|
}
|
|
50
59
|
export interface DirectiveOutput {
|
|
51
60
|
text: string;
|
|
@@ -60,4 +69,13 @@ export declare function mentionsEcosystemScope(message: string): boolean;
|
|
|
60
69
|
* configured — the model simply finds no such tool and falls back to local files.
|
|
61
70
|
*/
|
|
62
71
|
export declare const ECOSYSTEM_DOCS_NUDGE: string;
|
|
72
|
+
/**
|
|
73
|
+
* Appended to any directive when the user's reply language is non-English.
|
|
74
|
+
* The base system prompt's "reply in user's language" rule normally suffices,
|
|
75
|
+
* but `concise` / `FIX-FIRST` / GSD-debug directive bodies stack on top of it
|
|
76
|
+
* with strong "be terse / code over prose" language that crowds the rule out
|
|
77
|
+
* — observed live (storyflow_ui 22661c8de9f2). This NUDGE re-anchors the rule
|
|
78
|
+
* inside the directive itself so brevity preferences cannot override it.
|
|
79
|
+
*/
|
|
80
|
+
export declare function buildLanguageNudge(lang: string): string;
|
|
63
81
|
export declare function buildDirective(input: DirectiveInput): DirectiveOutput;
|
|
@@ -36,6 +36,20 @@ export const ECOSYSTEM_DOCS_NUDGE = [
|
|
|
36
36
|
`${HEADER} ECOSYSTEM SCOPE — this turn concerns the Muonroi ecosystem (platform overview, BB/.NET packages, building-block, open-core boundary, setup).`,
|
|
37
37
|
"If the muonroi-docs MCP is available, it is the AUTHORITATIVE source — call it FIRST (docs_search / setup_guide / bb_recipe_list / bb_package_describe), THEN ground with local files. Do NOT characterize the ecosystem from local repo files alone.",
|
|
38
38
|
].join("\n");
|
|
39
|
+
/**
|
|
40
|
+
* Appended to any directive when the user's reply language is non-English.
|
|
41
|
+
* The base system prompt's "reply in user's language" rule normally suffices,
|
|
42
|
+
* but `concise` / `FIX-FIRST` / GSD-debug directive bodies stack on top of it
|
|
43
|
+
* with strong "be terse / code over prose" language that crowds the rule out
|
|
44
|
+
* — observed live (storyflow_ui 22661c8de9f2). This NUDGE re-anchors the rule
|
|
45
|
+
* inside the directive itself so brevity preferences cannot override it.
|
|
46
|
+
*/
|
|
47
|
+
export function buildLanguageNudge(lang) {
|
|
48
|
+
return [
|
|
49
|
+
`${HEADER} LANGUAGE — the user wrote in ${lang}. Reply in ${lang}.`,
|
|
50
|
+
"This rule OVERRIDES any brevity / concise / code-over-prose directive: terseness is fine, but the response language stays the user's.",
|
|
51
|
+
].join("\n");
|
|
52
|
+
}
|
|
39
53
|
function renderGrayAreas(qs) {
|
|
40
54
|
if (qs.length === 0)
|
|
41
55
|
return " (no gray areas detected — confirm the request is fully specified before proceeding)";
|
|
@@ -125,9 +139,16 @@ export function buildDirective(input) {
|
|
|
125
139
|
// Ecosystem-scoped turns get a docs-first nudge regardless of tier (question
|
|
126
140
|
// OR task): muonroi-docs is the authoritative source and must not be skipped
|
|
127
141
|
// in favour of guessing from local files (session 41ccfeb2ceee turn 1).
|
|
142
|
+
let text = base.text;
|
|
128
143
|
if (input.ecosystem) {
|
|
129
|
-
|
|
144
|
+
text = `${text}\n${ECOSYSTEM_DOCS_NUDGE}`;
|
|
145
|
+
}
|
|
146
|
+
// Language nudge: re-anchor the "reply in user's language" rule INSIDE the
|
|
147
|
+
// directive when the user wrote in a non-English language, so layered
|
|
148
|
+
// brevity/concise directives can't drown it (storyflow_ui 22661c8de9f2).
|
|
149
|
+
if (input.replyLanguage) {
|
|
150
|
+
text = `${text}\n${buildLanguageNudge(input.replyLanguage)}`;
|
|
130
151
|
}
|
|
131
|
-
return base;
|
|
152
|
+
return { ...base, text };
|
|
132
153
|
}
|
|
133
154
|
//# sourceMappingURL=directives.js.map
|
|
@@ -100,6 +100,14 @@ export interface MessageProcessorDeps extends TurnRunnerDepsBase {
|
|
|
100
100
|
* — preserves backward compat for batch / headless paths that have no UI to
|
|
101
101
|
* surface the askcard.
|
|
102
102
|
*/
|
|
103
|
+
/**
|
|
104
|
+
* Live-queue steering drain (UI-provided). Returns and CLEARS any messages
|
|
105
|
+
* the user typed while this turn is streaming, so prepareStep can inject them
|
|
106
|
+
* mid-turn. Undefined / returns [] → no steering (legacy deferred queue).
|
|
107
|
+
*/
|
|
108
|
+
drainSteerMessages?: () => {
|
|
109
|
+
text: string;
|
|
110
|
+
}[];
|
|
103
111
|
askToolLoopContinue?: ToolLoopCapAsk;
|
|
104
112
|
runCouncilV2(userMessage: string, opts: {
|
|
105
113
|
skipClarification: boolean;
|
|
@@ -89,7 +89,7 @@ import { statusBarStore } from "../ui/status-bar/store.js";
|
|
|
89
89
|
import { appendDecisionLog } from "../usage/decision-log.js";
|
|
90
90
|
import { openUrl } from "../utils/open-url.js";
|
|
91
91
|
import { appendAudit, toolNeedsApproval } from "../utils/permission-mode.js";
|
|
92
|
-
import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallTimeoutMs, getRoleModels, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
|
|
92
|
+
import { getAutoCouncilConfidence, getAutoCouncilMinRoles, getProviderStallRetries, getProviderStallTimeoutMs, getRoleModels, getSteerInjectionEnabled, getTopLevelCompactKeepLast, getTopLevelCompactThresholdChars, getTopLevelToolBudgetChars, isAutoCouncilEnabled, isProviderDisabled, loadMcpServers, loadValidSubAgents, } from "../utils/settings.js";
|
|
93
93
|
import { resolveShell } from "../utils/shell.js";
|
|
94
94
|
import { relaxCompactionSettings } from "./compaction.js";
|
|
95
95
|
import { wrapToolSetWithDedup } from "./cross-turn-dedup.js";
|
|
@@ -108,7 +108,8 @@ import { forcedFinalize, getSessionLastTask, incSessionStep, parseBudgetOverride
|
|
|
108
108
|
import { attachReminderToMessages, buildCheckpointReminder, buildScopeReminder, cadenceForSize, shouldInjectCeilingCrossing, shouldInjectReminder, shouldInjectSoftWarn, shouldPreWarnCompaction, } from "./scope-reminder.js";
|
|
109
109
|
import { formatElisionManifest, getSessionExperienceCounts, recordCompaction, recordElision, } from "./session-experience.js";
|
|
110
110
|
import { attemptStallRescue, pushStallToolResult } from "./stall-rescue.js";
|
|
111
|
-
import { createStallWatchdog, STALL_ERROR_MESSAGE } from "./stall-watchdog.js";
|
|
111
|
+
import { createStallWatchdog, STALL_ERROR_MESSAGE, shouldRepromptStall, stallRepromptBackoffMs, } from "./stall-watchdog.js";
|
|
112
|
+
import { planSteerInjection } from "./steer-inbox.js";
|
|
112
113
|
import { wrapToolSetWithCap } from "./sub-agent-cap.js";
|
|
113
114
|
import { compactSubAgentMessages, cumulativeMessageChars } from "./subagent-compactor.js";
|
|
114
115
|
import { detectTextEmittedToolCall, parseDsmlToolCalls } from "./text-tool-call-detector.js";
|
|
@@ -813,6 +814,24 @@ export class MessageProcessor {
|
|
|
813
814
|
// clear toast and SKIP the transient-retry (a stalled provider just stalls
|
|
814
815
|
// again, wasting another full timeout of silence).
|
|
815
816
|
let stallTriggered = false;
|
|
817
|
+
// Time-to-first-byte stall RE-PROMPT: some providers (observed:
|
|
818
|
+
// xai/grok-build-0.1) accept the request then never send the first byte —
|
|
819
|
+
// a single wedged socket, not a down backend, so a fresh request usually
|
|
820
|
+
// goes through. When the watchdog fires with ZERO chunks received this
|
|
821
|
+
// attempt, we re-issue the SAME request up to `maxStallRetries` times
|
|
822
|
+
// (loop-persistent counter). Gated on zero-chunks so it can NEVER restart a
|
|
823
|
+
// turn that already ran tools or emitted text — those go to the partial-
|
|
824
|
+
// answer rescue path instead. maxStallRetries = 0 restores legacy behaviour.
|
|
825
|
+
let stallRetryCount = 0;
|
|
826
|
+
const maxStallRetries = getProviderStallRetries();
|
|
827
|
+
// Live-queue steering: messages the user typed mid-turn are drained at a
|
|
828
|
+
// prepareStep boundary and accumulated here, then re-appended (deduped) to
|
|
829
|
+
// the messages returned for each subsequent step. Loop-persistent so they
|
|
830
|
+
// survive a stall-reprompt restart of streamText. NOT pushed into
|
|
831
|
+
// deps.messages in v1 — model-context only; the assistant response captures
|
|
832
|
+
// the steering effect and is persisted via appendCompletedTurn.
|
|
833
|
+
const pendingSteers = [];
|
|
834
|
+
const steerEnabled = getSteerInjectionEnabled();
|
|
816
835
|
// Auto-council: route to multi-model debate when EITHER
|
|
817
836
|
// (a) PIL classified taskType=plan|analyze with high confidence AND the
|
|
818
837
|
// prompt is complex enough to justify the debate cost, OR
|
|
@@ -928,7 +947,7 @@ export class MessageProcessor {
|
|
|
928
947
|
return;
|
|
929
948
|
}
|
|
930
949
|
try {
|
|
931
|
-
while (true) {
|
|
950
|
+
streamAttempt: while (true) {
|
|
932
951
|
// SAMR Phase 2: switch to fast model for tool-execution steps
|
|
933
952
|
if (stepRouterPhase === "phase2" && phase2Runtime) {
|
|
934
953
|
runtime = phase2Runtime;
|
|
@@ -936,6 +955,65 @@ export class MessageProcessor {
|
|
|
936
955
|
}
|
|
937
956
|
deps.setCompactedThisTurn(false);
|
|
938
957
|
let assistantText = "";
|
|
958
|
+
// Count of stream parts received in THIS attempt. Stays 0 only when the
|
|
959
|
+
// provider never sent a first byte → the safe-to-re-prompt stall case.
|
|
960
|
+
let chunksThisAttempt = 0;
|
|
961
|
+
// Decide whether a fired stall watchdog should re-prompt (re-issue the
|
|
962
|
+
// same request) instead of falling through to rescue/error. Returns the
|
|
963
|
+
// backoff ms to wait before re-issuing, or null to NOT re-prompt. Reads
|
|
964
|
+
// the live per-attempt locals; safe to call only when stallTriggered.
|
|
965
|
+
const planStallReprompt = () => {
|
|
966
|
+
if (!shouldRepromptStall({
|
|
967
|
+
stallTriggered,
|
|
968
|
+
stallRetryCount,
|
|
969
|
+
maxStallRetries,
|
|
970
|
+
chunksThisAttempt,
|
|
971
|
+
assistantTextEmpty: assistantText.trim() === "",
|
|
972
|
+
aborted: signal.aborted,
|
|
973
|
+
})) {
|
|
974
|
+
return null;
|
|
975
|
+
}
|
|
976
|
+
stallRetryCount++;
|
|
977
|
+
const backoffMs = stallRepromptBackoffMs(stallRetryCount);
|
|
978
|
+
try {
|
|
979
|
+
const _ar = globalThis.__muonroiAgentRuntime;
|
|
980
|
+
_ar?.emitEvent({
|
|
981
|
+
t: "event",
|
|
982
|
+
kind: "stream-retry",
|
|
983
|
+
attempt: stallRetryCount,
|
|
984
|
+
maxAttempts: maxStallRetries + 1,
|
|
985
|
+
errorName: "TimeoutError",
|
|
986
|
+
errorMessage: "provider-stall (no first byte) — re-prompting",
|
|
987
|
+
nextDelayMs: backoffMs,
|
|
988
|
+
});
|
|
989
|
+
_ar?.emitEvent({
|
|
990
|
+
t: "event",
|
|
991
|
+
kind: "toast",
|
|
992
|
+
level: "warning",
|
|
993
|
+
text: `Model stalled — re-prompting (attempt ${stallRetryCount}/${maxStallRetries})…`,
|
|
994
|
+
});
|
|
995
|
+
}
|
|
996
|
+
catch (emitErr) {
|
|
997
|
+
console.error(`[message-processor] stall-reprompt telemetry failed: ${emitErr?.message}`);
|
|
998
|
+
}
|
|
999
|
+
try {
|
|
1000
|
+
if (deps.session) {
|
|
1001
|
+
logInteraction(deps.session.id, "stream_retry", {
|
|
1002
|
+
data: {
|
|
1003
|
+
attempt: stallRetryCount,
|
|
1004
|
+
maxAttempts: maxStallRetries + 1,
|
|
1005
|
+
errorName: "provider-stall",
|
|
1006
|
+
errorMessage: "no first byte within stall timeout — re-prompted",
|
|
1007
|
+
nextDelayMs: backoffMs,
|
|
1008
|
+
},
|
|
1009
|
+
});
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
catch (logErr) {
|
|
1013
|
+
console.error(`[message-processor] stall-reprompt log failed: ${logErr?.message}`);
|
|
1014
|
+
}
|
|
1015
|
+
return backoffMs;
|
|
1016
|
+
};
|
|
939
1017
|
// Tracks where `assistantText` was at the previous step boundary so
|
|
940
1018
|
// `onStepFinish` can compute the text emitted within the just-finished
|
|
941
1019
|
// step (input to the self-repetition detector).
|
|
@@ -1469,6 +1547,46 @@ export class MessageProcessor {
|
|
|
1469
1547
|
prepareStep: ({ stepNumber: sn, messages: stepMessages }) => {
|
|
1470
1548
|
if (sn < 1)
|
|
1471
1549
|
return {};
|
|
1550
|
+
// --- Live-queue steering injection ---------------------------
|
|
1551
|
+
// Drain the UI steer queue ONCE per prepareStep call (sn >= 1),
|
|
1552
|
+
// accumulate into pendingSteers, and graft pendingSteers onto the
|
|
1553
|
+
// messages this step returns. Dedup-by-content makes re-appending
|
|
1554
|
+
// idempotent even if a stall-reprompt restart re-reads history.
|
|
1555
|
+
const withSteers = (r) => {
|
|
1556
|
+
// Guard the drain on !signal.aborted too: planSteerInjection
|
|
1557
|
+
// already refuses to inject on abort, but draining still CLEARS
|
|
1558
|
+
// the UI queue — so on a (programmatic) abort we must not drain,
|
|
1559
|
+
// or a queued-but-uninjected message is lost (spec §143).
|
|
1560
|
+
const _drained = steerEnabled && !signal.aborted ? (deps.drainSteerMessages?.() ?? []) : [];
|
|
1561
|
+
const _newSteers = planSteerInjection({
|
|
1562
|
+
drained: _drained,
|
|
1563
|
+
aborted: signal.aborted,
|
|
1564
|
+
enabled: steerEnabled,
|
|
1565
|
+
});
|
|
1566
|
+
if (_newSteers.length > 0) {
|
|
1567
|
+
pendingSteers.push(..._newSteers);
|
|
1568
|
+
try {
|
|
1569
|
+
const _ar = globalThis.__muonroiAgentRuntime;
|
|
1570
|
+
_ar?.emitEvent({
|
|
1571
|
+
t: "event",
|
|
1572
|
+
kind: "steer-inject",
|
|
1573
|
+
count: _newSteers.length,
|
|
1574
|
+
atStep: sn,
|
|
1575
|
+
runId: deps.getActiveRunId() ?? "",
|
|
1576
|
+
});
|
|
1577
|
+
}
|
|
1578
|
+
catch (emitErr) {
|
|
1579
|
+
console.error(`[message-processor] steer-inject telemetry failed: ${emitErr?.message}`);
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
if (pendingSteers.length === 0)
|
|
1583
|
+
return r;
|
|
1584
|
+
const _base = r.messages ?? stepMessages;
|
|
1585
|
+
const _steerContents = new Set(pendingSteers.map((s) => (typeof s.content === "string" ? s.content : JSON.stringify(s.content))));
|
|
1586
|
+
const _deduped = _base.filter((m) => !(m.role === "user" &&
|
|
1587
|
+
_steerContents.has(typeof m.content === "string" ? m.content : JSON.stringify(m.content))));
|
|
1588
|
+
return { ...r, messages: [..._deduped, ...pendingSteers] };
|
|
1589
|
+
};
|
|
1472
1590
|
const stripped = turnCaps.sanitizeHistory(stepMessages);
|
|
1473
1591
|
// Agent-controlled veto (PRESERVE) or lighter selective keep (KEEP_TOOL_IDS) for this turn's B4 compaction.
|
|
1474
1592
|
// PRESERVE_FULL_CONTEXT skips the compactor entirely (full history).
|
|
@@ -1499,7 +1617,7 @@ export class MessageProcessor {
|
|
|
1499
1617
|
return false;
|
|
1500
1618
|
});
|
|
1501
1619
|
if (hasPreserve) {
|
|
1502
|
-
return { messages: stripped };
|
|
1620
|
+
return withSteers({ messages: stripped });
|
|
1503
1621
|
}
|
|
1504
1622
|
// F2 — envelope = system prompt + JSON-Schema of every tool
|
|
1505
1623
|
// re-sent on every step. Without this the threshold check
|
|
@@ -1573,7 +1691,7 @@ export class MessageProcessor {
|
|
|
1573
1691
|
if (compacted === stripped && shouldPreWarnCompaction(_preWarnChars, topLevelCompactThreshold)) {
|
|
1574
1692
|
const _cp = buildCheckpointReminder(sn, true);
|
|
1575
1693
|
const _pre = `[pre-compaction warning at step ${sn} — next step(s) will likely rewrite older tool results to stubs (threshold ${topLevelCompactThreshold}, keepLast=${topLevelCompactKeepLast}). ${_cp} Summarize or finish if possible.]`;
|
|
1576
|
-
return { messages: attachReminderToMessages(stripped, _pre) };
|
|
1694
|
+
return withSteers({ messages: attachReminderToMessages(stripped, _pre) });
|
|
1577
1695
|
}
|
|
1578
1696
|
// Phase 4A — scope reminder injection (REQ-005).
|
|
1579
1697
|
// Cadence K = 3/5/8 for small/medium/large. Soft-warn fires
|
|
@@ -1635,10 +1753,10 @@ export class MessageProcessor {
|
|
|
1635
1753
|
: buildRepetitionReminder(_ceilingSessionId)
|
|
1636
1754
|
: _scopePart;
|
|
1637
1755
|
const withReminder = attachReminderToMessages(compacted, _reminder);
|
|
1638
|
-
return { messages: withReminder };
|
|
1756
|
+
return withSteers({ messages: withReminder });
|
|
1639
1757
|
}
|
|
1640
1758
|
if (compacted === stripped && stripped === stepMessages)
|
|
1641
|
-
return {};
|
|
1759
|
+
return withSteers({});
|
|
1642
1760
|
// Self-awareness note: tell the model compaction happened so it
|
|
1643
1761
|
// knows earlier context was elided and can adjust its behavior.
|
|
1644
1762
|
// Enhanced per EE anti-mù plan (docs/ee-anti-mu-compaction-plan.md Phase 2): include proactive
|
|
@@ -1656,9 +1774,9 @@ export class MessageProcessor {
|
|
|
1656
1774
|
})()
|
|
1657
1775
|
: null;
|
|
1658
1776
|
if (_compactNote) {
|
|
1659
|
-
return { messages: attachReminderToMessages(compacted, _compactNote) };
|
|
1777
|
+
return withSteers({ messages: attachReminderToMessages(compacted, _compactNote) });
|
|
1660
1778
|
}
|
|
1661
|
-
return { messages: compacted };
|
|
1779
|
+
return withSteers({ messages: compacted });
|
|
1662
1780
|
},
|
|
1663
1781
|
...(dropParam("temperature") ? {} : { temperature: 0.7 }),
|
|
1664
1782
|
...(dropParam("maxOutputTokens") ? {} : { maxOutputTokens: taskTypeToMaxTokens(pilCtx.taskType) }),
|
|
@@ -1730,6 +1848,11 @@ export class MessageProcessor {
|
|
|
1730
1848
|
const _wireProviderIdTop = runtime.modelInfo?.provider ?? "unknown";
|
|
1731
1849
|
for await (const part of result.fullStream) {
|
|
1732
1850
|
stall.pet(); // chunk arrived — reset the stall watchdog
|
|
1851
|
+
// Count only real content parts. The watchdog abort itself surfaces
|
|
1852
|
+
// as an "abort" part — counting it would defeat the TTFB-stall gate
|
|
1853
|
+
// (a frozen-before-first-byte stall yields ONLY the abort part).
|
|
1854
|
+
if (part.type !== "abort")
|
|
1855
|
+
chunksThisAttempt++;
|
|
1733
1856
|
if (signal.aborted) {
|
|
1734
1857
|
yield { type: "content", content: "\n\n[Cancelled]" };
|
|
1735
1858
|
break;
|
|
@@ -2283,6 +2406,19 @@ export class MessageProcessor {
|
|
|
2283
2406
|
// instead of a benign "[Cancelled]" so a hung provider no longer
|
|
2284
2407
|
// looks like a silent freeze.
|
|
2285
2408
|
if (stallTriggered) {
|
|
2409
|
+
// Time-to-first-byte stall (no real chunk this attempt): the
|
|
2410
|
+
// socket wedged before any output — re-issue the SAME request
|
|
2411
|
+
// rather than giving up. Bounded by maxStallRetries; never
|
|
2412
|
+
// fires once tools ran or text flowed (planStallReprompt gate).
|
|
2413
|
+
const _stallBackoff = planStallReprompt();
|
|
2414
|
+
if (_stallBackoff != null) {
|
|
2415
|
+
stall.dispose();
|
|
2416
|
+
await new Promise((r) => setTimeout(r, _stallBackoff));
|
|
2417
|
+
if (!signal.aborted) {
|
|
2418
|
+
stallTriggered = false;
|
|
2419
|
+
continue streamAttempt;
|
|
2420
|
+
}
|
|
2421
|
+
}
|
|
2286
2422
|
stall.dispose();
|
|
2287
2423
|
// A response tool already produced the terminal structured
|
|
2288
2424
|
// answer (buffered from its call args) before the provider
|
|
@@ -2913,6 +3049,20 @@ export class MessageProcessor {
|
|
|
2913
3049
|
attemptedOverflowRecovery = true;
|
|
2914
3050
|
continue;
|
|
2915
3051
|
}
|
|
3052
|
+
// Stall surfaced as a throw (rather than an "abort" stream part):
|
|
3053
|
+
// apply the SAME time-to-first-byte re-prompt as the abort-part path.
|
|
3054
|
+
// The watchdog already fired (stallTriggered) so its timer is spent —
|
|
3055
|
+
// no dispose needed; the next attempt arms a fresh watchdog.
|
|
3056
|
+
if (stallTriggered) {
|
|
3057
|
+
const _stallBackoff = planStallReprompt();
|
|
3058
|
+
if (_stallBackoff != null) {
|
|
3059
|
+
await new Promise((r) => setTimeout(r, _stallBackoff));
|
|
3060
|
+
if (!signal.aborted) {
|
|
3061
|
+
stallTriggered = false;
|
|
3062
|
+
continue;
|
|
3063
|
+
}
|
|
3064
|
+
}
|
|
3065
|
+
}
|
|
2916
3066
|
// Transient network/server error retry — up to MAX_STREAM_RETRIES extra attempts.
|
|
2917
3067
|
// Only retry when no content has flowed yet (assistantText empty) to avoid
|
|
2918
3068
|
// partial-output corruption. Honour the abort signal between retries.
|
|
@@ -20,6 +20,8 @@ export declare class Agent {
|
|
|
20
20
|
private messages;
|
|
21
21
|
private messageSeqs;
|
|
22
22
|
private abortController;
|
|
23
|
+
/** UI-registered live-queue steer drain; see Agent.setSteerDrain. */
|
|
24
|
+
private steerDrain;
|
|
23
25
|
private maxToolRounds;
|
|
24
26
|
private mode;
|
|
25
27
|
private modelId;
|
|
@@ -97,6 +99,14 @@ export declare class Agent {
|
|
|
97
99
|
*/
|
|
98
100
|
private _initFlow;
|
|
99
101
|
getModel(): string;
|
|
102
|
+
/**
|
|
103
|
+
* Register (or clear with null) the UI callback that drains messages typed
|
|
104
|
+
* while a turn is streaming, for mid-turn steering injection. Called from the
|
|
105
|
+
* TUI when MUONROI_STEER_INJECTION is enabled.
|
|
106
|
+
*/
|
|
107
|
+
setSteerDrain(fn: (() => {
|
|
108
|
+
text: string;
|
|
109
|
+
}[]) | null): void;
|
|
100
110
|
getActiveRunId(): string | null;
|
|
101
111
|
setModel(model: string): void;
|
|
102
112
|
getMode(): AgentMode;
|
|
@@ -130,6 +130,8 @@ export class Agent {
|
|
|
130
130
|
messages = [];
|
|
131
131
|
messageSeqs = [];
|
|
132
132
|
abortController = null;
|
|
133
|
+
/** UI-registered live-queue steer drain; see Agent.setSteerDrain. */
|
|
134
|
+
steerDrain = null;
|
|
133
135
|
maxToolRounds;
|
|
134
136
|
mode = "agent";
|
|
135
137
|
modelId;
|
|
@@ -309,6 +311,14 @@ export class Agent {
|
|
|
309
311
|
getModel() {
|
|
310
312
|
return this.modelId;
|
|
311
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Register (or clear with null) the UI callback that drains messages typed
|
|
316
|
+
* while a turn is streaming, for mid-turn steering injection. Called from the
|
|
317
|
+
* TUI when MUONROI_STEER_INJECTION is enabled.
|
|
318
|
+
*/
|
|
319
|
+
setSteerDrain(fn) {
|
|
320
|
+
this.steerDrain = fn;
|
|
321
|
+
}
|
|
312
322
|
getActiveRunId() {
|
|
313
323
|
return this._activeRunId;
|
|
314
324
|
}
|
|
@@ -2123,6 +2133,7 @@ export class Agent {
|
|
|
2123
2133
|
runDelegation: (request, signal) => self.runDelegation(request, signal),
|
|
2124
2134
|
readDelegation: (id) => self.readDelegation(id),
|
|
2125
2135
|
listDelegations: () => self.listDelegations(),
|
|
2136
|
+
drainSteerMessages: () => self.steerDrain?.() ?? [],
|
|
2126
2137
|
appendCompletedTurn: (user, asst) => self.appendCompletedTurn(user, asst),
|
|
2127
2138
|
discardAbortedTurn: (user) => self.discardAbortedTurn(user),
|
|
2128
2139
|
recordUsage: (usage, source, model, shape) => self.recordUsage(usage, source, model, shape),
|
|
@@ -31,6 +31,7 @@ export interface StallToolResult {
|
|
|
31
31
|
export declare const STALL_RESCUE_MAX_RESULTS = 8;
|
|
32
32
|
/** Max chars kept per tool output in the digest. */
|
|
33
33
|
export declare const STALL_RESCUE_MAX_CHARS_PER_RESULT = 1500;
|
|
34
|
+
export declare function detectReplyLanguageHint(userText: string): string | null;
|
|
34
35
|
/**
|
|
35
36
|
* Capture a tool result into a capped ring buffer (mutates `buffer`). Keeps the
|
|
36
37
|
* buffer bounded in BOTH count and per-entry size so a long turn can't blow
|
|
@@ -26,6 +26,22 @@
|
|
|
26
26
|
export const STALL_RESCUE_MAX_RESULTS = 8;
|
|
27
27
|
/** Max chars kept per tool output in the digest. */
|
|
28
28
|
export const STALL_RESCUE_MAX_CHARS_PER_RESULT = 1500;
|
|
29
|
+
/**
|
|
30
|
+
* Heuristic: detect the user's reply language from the original request so the
|
|
31
|
+
* synthesis prompt can carry an EXPLICIT language directive. The base system
|
|
32
|
+
* prompt's "reply in user's language" rule is normally enough, but after a
|
|
33
|
+
* stall the rescue path issues a NEW synthesis call whose extra directives
|
|
34
|
+
* ("use ONLY tool outputs", "no more tools") can crowd out the language rule
|
|
35
|
+
* — observed live (storyflow_ui session 22661c8de9f2): user wrote Vietnamese,
|
|
36
|
+
* the rescued answer came back in English. Returns a language name or null
|
|
37
|
+
* (skip the hint = treat as English-default).
|
|
38
|
+
*/
|
|
39
|
+
const VI_DIACRITIC_RE = /[à-ỹÀ-Ỹ]/;
|
|
40
|
+
export function detectReplyLanguageHint(userText) {
|
|
41
|
+
if (VI_DIACRITIC_RE.test(userText))
|
|
42
|
+
return "Vietnamese";
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
29
45
|
/**
|
|
30
46
|
* Capture a tool result into a capped ring buffer (mutates `buffer`). Keeps the
|
|
31
47
|
* buffer bounded in BOTH count and per-entry size so a long turn can't blow
|
|
@@ -44,7 +60,10 @@ export function pushStallToolResult(buffer, tool, rawText) {
|
|
|
44
60
|
*/
|
|
45
61
|
export function buildStallSynthesisMessages(baseMessages, userText, toolResults) {
|
|
46
62
|
const digest = toolResults.map((r, i) => `[${i + 1}] ${r.tool}:\n${r.text}`).join("\n\n");
|
|
47
|
-
const
|
|
63
|
+
const lang = detectReplyLanguageHint(userText);
|
|
64
|
+
const langLine = lang ? `Reply in ${lang} (the user wrote in ${lang}).\n\n` : "";
|
|
65
|
+
const content = langLine +
|
|
66
|
+
"The connection to the model stalled before it could finish its answer. " +
|
|
48
67
|
"You already ran the tools below this turn — use ONLY their outputs to give " +
|
|
49
68
|
"your best final answer now. Do NOT call any more tools.\n\n" +
|
|
50
69
|
`Original request:\n${userText}\n\n` +
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import { attemptStallRescue, buildStallSynthesisMessages, pushStallToolResult, STALL_RESCUE_MAX_CHARS_PER_RESULT, STALL_RESCUE_MAX_RESULTS, } from "./stall-rescue.js";
|
|
2
|
+
import { attemptStallRescue, buildStallSynthesisMessages, detectReplyLanguageHint, pushStallToolResult, STALL_RESCUE_MAX_CHARS_PER_RESULT, STALL_RESCUE_MAX_RESULTS, } from "./stall-rescue.js";
|
|
3
3
|
describe("pushStallToolResult", () => {
|
|
4
4
|
it("caps the buffer count, keeping the most recent results", () => {
|
|
5
5
|
const buf = [];
|
|
@@ -34,6 +34,35 @@ describe("buildStallSynthesisMessages", () => {
|
|
|
34
34
|
// does not mutate the base array
|
|
35
35
|
expect(base.length).toBe(1);
|
|
36
36
|
});
|
|
37
|
+
it("prepends a Vietnamese reply directive when user wrote in Vietnamese", () => {
|
|
38
|
+
const out = buildStallSynthesisMessages([], "sửa thêm popup reload không xoá login", [
|
|
39
|
+
{ tool: "grep", text: "found" },
|
|
40
|
+
]);
|
|
41
|
+
const last = out[out.length - 1];
|
|
42
|
+
expect(last.content).toMatch(/^Reply in Vietnamese/);
|
|
43
|
+
expect(last.content).toContain("the user wrote in Vietnamese");
|
|
44
|
+
});
|
|
45
|
+
it("omits the language directive for plain English input", () => {
|
|
46
|
+
const out = buildStallSynthesisMessages([], "find a bug please", [{ tool: "grep", text: "found" }]);
|
|
47
|
+
const last = out[out.length - 1];
|
|
48
|
+
expect(last.content).not.toMatch(/^Reply in /);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
describe("detectReplyLanguageHint", () => {
|
|
52
|
+
it("detects Vietnamese from any diacritic", () => {
|
|
53
|
+
expect(detectReplyLanguageHint("tìm")).toBe("Vietnamese");
|
|
54
|
+
expect(detectReplyLanguageHint("sửa thêm cho tôi")).toBe("Vietnamese");
|
|
55
|
+
expect(detectReplyLanguageHint("Cập nhật")).toBe("Vietnamese");
|
|
56
|
+
});
|
|
57
|
+
it("returns null for ASCII / English text", () => {
|
|
58
|
+
expect(detectReplyLanguageHint("commit and push please")).toBeNull();
|
|
59
|
+
expect(detectReplyLanguageHint("")).toBeNull();
|
|
60
|
+
});
|
|
61
|
+
it("returns null for a VN-only word with NO diacritics (the heuristic is conservative)", () => {
|
|
62
|
+
// 'commit push de auto ci/cd len prod nhe' — VN sans diacritics is indistinguishable
|
|
63
|
+
// from English under this heuristic. Acceptable: false negative, never false positive.
|
|
64
|
+
expect(detectReplyLanguageHint("commit push de auto ci/cd len prod nhe")).toBeNull();
|
|
65
|
+
});
|
|
37
66
|
});
|
|
38
67
|
describe("attemptStallRescue", () => {
|
|
39
68
|
it("returns null when there are no tool results (nothing to synthesize)", async () => {
|
|
@@ -31,4 +31,35 @@ export interface StallWatchdog {
|
|
|
31
31
|
export declare const STALL_ABORT_REASON = "provider-stall";
|
|
32
32
|
/** User-facing message surfaced when the stall watchdog fires. */
|
|
33
33
|
export declare const STALL_ERROR_MESSAGE: string;
|
|
34
|
+
/** Inputs to the stall re-prompt decision — see {@link shouldRepromptStall}. */
|
|
35
|
+
export interface StallRepromptState {
|
|
36
|
+
/** The watchdog fired for this attempt. */
|
|
37
|
+
stallTriggered: boolean;
|
|
38
|
+
/** How many stall re-prompts have already happened this turn. */
|
|
39
|
+
stallRetryCount: number;
|
|
40
|
+
/** Configured cap (getProviderStallRetries); 0 disables re-prompt. */
|
|
41
|
+
maxStallRetries: number;
|
|
42
|
+
/** Real content parts received this attempt (the abort part is NOT counted). */
|
|
43
|
+
chunksThisAttempt: number;
|
|
44
|
+
/** True when no assistant text has flowed this attempt. */
|
|
45
|
+
assistantTextEmpty: boolean;
|
|
46
|
+
/** True on genuine user cancel (never re-prompt over a cancel). */
|
|
47
|
+
aborted: boolean;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Decide whether a fired stall watchdog should trigger a re-prompt (re-issue
|
|
51
|
+
* the same request) instead of surfacing the stall.
|
|
52
|
+
*
|
|
53
|
+
* ONLY a time-to-first-byte stall qualifies: zero real chunks AND no assistant
|
|
54
|
+
* text this attempt, under the retry cap, and not a user cancel. Re-issuing
|
|
55
|
+
* after tools ran or text flowed would corrupt/duplicate output — those cases
|
|
56
|
+
* fall through to the partial-answer rescue path instead. Pure (no side
|
|
57
|
+
* effects) so it is unit-testable in isolation from the orchestrator loop.
|
|
58
|
+
*/
|
|
59
|
+
export declare function shouldRepromptStall(s: StallRepromptState): boolean;
|
|
60
|
+
/**
|
|
61
|
+
* Exponential backoff (ms, capped at 4s) before the Nth stall re-prompt
|
|
62
|
+
* (1-based): 500 → 1000 → 2000 → 4000 → 4000.
|
|
63
|
+
*/
|
|
64
|
+
export declare function stallRepromptBackoffMs(attempt: number): number;
|
|
34
65
|
export declare function createStallWatchdog(timeoutMs: number, onFire?: () => void): StallWatchdog;
|