muonroi-cli 1.6.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/cli/cost-forensics.d.ts +3 -0
- package/dist/src/cli/cost-forensics.js +11 -0
- package/dist/src/cli/cost-forensics.test.js +1 -0
- package/dist/src/cli/experience-report.d.ts +20 -0
- package/dist/src/cli/experience-report.js +76 -0
- package/dist/src/cli/experience-report.test.d.ts +5 -0
- package/dist/src/cli/experience-report.test.js +63 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/gsd/__tests__/directives.test.js +24 -1
- package/dist/src/gsd/directives.d.ts +22 -0
- package/dist/src/gsd/directives.js +34 -10
- package/dist/src/index.js +9 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +54 -4
- package/dist/src/mcp/__tests__/forensics-tools.test.js +1 -0
- package/dist/src/mcp/client-pool.d.ts +9 -2
- package/dist/src/mcp/client-pool.js +60 -21
- package/dist/src/orchestrator/message-processor.js +34 -2
- package/dist/src/orchestrator/session-experience.d.ts +89 -0
- package/dist/src/orchestrator/session-experience.js +169 -0
- package/dist/src/orchestrator/session-experience.test.d.ts +6 -0
- package/dist/src/orchestrator/session-experience.test.js +72 -0
- package/dist/src/orchestrator/stream-runner.js +4 -0
- package/dist/src/orchestrator/subagent-compactor.d.ts +10 -0
- package/dist/src/orchestrator/subagent-compactor.js +14 -0
- package/dist/src/orchestrator/subagent-compactor.spec.js +54 -0
- package/dist/src/pil/__tests__/layer3-ee-injection.test.js +5 -3
- package/dist/src/pil/__tests__/layer3-injected-chunk.test.js +31 -0
- package/dist/src/pil/__tests__/pipeline.test.js +17 -0
- package/dist/src/pil/layer3-ee-injection.d.ts +9 -0
- package/dist/src/pil/layer3-ee-injection.js +29 -0
- package/dist/src/pil/layer4-gsd.js +3 -2
- package/dist/src/pil/pipeline.js +11 -0
- package/dist/src/pil/session-experience-injection.d.ts +34 -0
- package/dist/src/pil/session-experience-injection.js +54 -0
- package/dist/src/pil/session-experience-injection.test.d.ts +6 -0
- package/dist/src/pil/session-experience-injection.test.js +79 -0
- package/dist/src/storage/interaction-log.d.ts +1 -1
- package/dist/src/storage/interaction-log.js +17 -4
- package/dist/src/storage/session-experience-store.d.ts +63 -0
- package/dist/src/storage/session-experience-store.js +164 -0
- package/dist/src/storage/session-experience-store.test.d.ts +5 -0
- package/dist/src/storage/session-experience-store.test.js +86 -0
- package/dist/src/storage/tool-results.js +23 -0
- package/dist/src/storage/tool-results.test.d.ts +1 -0
- package/dist/src/storage/tool-results.test.js +48 -0
- package/dist/src/storage/ui-interaction-log.js +4 -2
- package/dist/src/tools/registry-ee-query.test.js +7 -1
- package/dist/src/tools/registry.js +7 -0
- package/dist/src/types/index.d.ts +6 -0
- package/dist/src/ui/__tests__/markdown-render.test.js +17 -0
- package/dist/src/ui/app.js +0 -0
- package/dist/src/ui/markdown-render.js +12 -0
- package/package.json +1 -1
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* confirm that sub-agent context no longer balloons past 80k input
|
|
9
9
|
* after the cumulative cap kicks in.
|
|
10
10
|
*/
|
|
11
|
+
import type { SessionExperienceCounts } from "../orchestrator/session-experience.js";
|
|
11
12
|
export interface CostForensicsRow {
|
|
12
13
|
id: number;
|
|
13
14
|
source: string;
|
|
@@ -35,6 +36,8 @@ export interface CostForensicsSummary {
|
|
|
35
36
|
cacheHitRatio: number;
|
|
36
37
|
peakSingleCallInput: number;
|
|
37
38
|
events: CostForensicsRow[];
|
|
39
|
+
/** Anti-mù counters for this session (null when none recorded). */
|
|
40
|
+
experience: SessionExperienceCounts | null;
|
|
38
41
|
}
|
|
39
42
|
/**
|
|
40
43
|
* Return ALL session ids matching a prefix (newest first, capped at 5).
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import { getProviderCapabilities } from "../providers/capabilities.js";
|
|
12
12
|
import { detectProviderForModel } from "../providers/runtime.js";
|
|
13
13
|
import { getDatabase } from "../storage/db.js";
|
|
14
|
+
import { selectSessionExperience } from "../storage/session-experience-store.js";
|
|
14
15
|
function resolveSessionId(prefix) {
|
|
15
16
|
const rows = getDatabase()
|
|
16
17
|
.prepare(`SELECT id FROM sessions WHERE id LIKE ? ORDER BY created_at DESC LIMIT 5`)
|
|
@@ -103,6 +104,7 @@ export function collectCostForensics(sessionId) {
|
|
|
103
104
|
cacheHitRatio,
|
|
104
105
|
peakSingleCallInput,
|
|
105
106
|
events,
|
|
107
|
+
experience: selectSessionExperience(sessionId),
|
|
106
108
|
};
|
|
107
109
|
}
|
|
108
110
|
export function computeCacheCadence(events) {
|
|
@@ -164,6 +166,15 @@ export function printCostForensics(summary, opts = {}) {
|
|
|
164
166
|
`(~${formatNum(cadence.estReBilledTokens)} tok re-billed). ` +
|
|
165
167
|
`Likely fast tool-loop latency — fewer/batched tool rounds recover this.`);
|
|
166
168
|
}
|
|
169
|
+
// Anti-mù counters for this session (rec #1 persisted forensics).
|
|
170
|
+
if (summary.experience) {
|
|
171
|
+
const x = summary.experience;
|
|
172
|
+
const rehydrated = x.rehydratedCache + x.rehydratedDisk + x.rehydratedEe;
|
|
173
|
+
w(`Anti-mù: ${x.compactions} compaction(s), ${x.elided} tool output(s) elided` +
|
|
174
|
+
`${x.elided > 0 ? ` (${formatNum(x.totalElidedChars)} chars)` : ""}, ` +
|
|
175
|
+
`${rehydrated} rehydrated (cache=${x.rehydratedCache} disk=${x.rehydratedDisk} ee=${x.rehydratedEe}), ` +
|
|
176
|
+
`${x.unavailable} needed-but-unavailable.`);
|
|
177
|
+
}
|
|
167
178
|
w(``);
|
|
168
179
|
w(`Per-event breakdown:`);
|
|
169
180
|
w(`${"seq".padEnd(5)}${"src".padEnd(10)}${"input".padStart(9)}${"out".padStart(7)}${"cacheR".padStart(9)}${"cacheC".padStart(8)} ts`);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/cli/experience-report.ts
|
|
3
|
+
*
|
|
4
|
+
* `muonroi-cli usage experience` — cross-session anti-mù telemetry. Aggregates
|
|
5
|
+
* the per-session session_experience snapshots to answer the measure-before-
|
|
6
|
+
* re-architecting question: how often does compaction actually elide a tool
|
|
7
|
+
* output, and when the agent goes back for one, can it recover it?
|
|
8
|
+
*
|
|
9
|
+
* This is the data gate for the deferred anti-mù re-architecture (auto-protect /
|
|
10
|
+
* auto-rehydrate). Low elision rate or high recovery rate ⇒ the friction is rare
|
|
11
|
+
* or cognitive, not data-loss ⇒ defer. High unavailable / low recovery ⇒ real
|
|
12
|
+
* loss ⇒ justified.
|
|
13
|
+
*/
|
|
14
|
+
import { type ExperienceAggregate } from "../storage/session-experience-store.js";
|
|
15
|
+
/** Pure renderer — returns the report lines so it is unit-testable without a DB. */
|
|
16
|
+
export declare function renderExperienceAggregate(agg: ExperienceAggregate, limit: number): string[];
|
|
17
|
+
export declare function runExperienceReport(opts?: {
|
|
18
|
+
limit?: number;
|
|
19
|
+
json?: boolean;
|
|
20
|
+
}): Promise<void>;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* src/cli/experience-report.ts
|
|
3
|
+
*
|
|
4
|
+
* `muonroi-cli usage experience` — cross-session anti-mù telemetry. Aggregates
|
|
5
|
+
* the per-session session_experience snapshots to answer the measure-before-
|
|
6
|
+
* re-architecting question: how often does compaction actually elide a tool
|
|
7
|
+
* output, and when the agent goes back for one, can it recover it?
|
|
8
|
+
*
|
|
9
|
+
* This is the data gate for the deferred anti-mù re-architecture (auto-protect /
|
|
10
|
+
* auto-rehydrate). Low elision rate or high recovery rate ⇒ the friction is rare
|
|
11
|
+
* or cognitive, not data-loss ⇒ defer. High unavailable / low recovery ⇒ real
|
|
12
|
+
* loss ⇒ justified.
|
|
13
|
+
*/
|
|
14
|
+
import { aggregateSessionExperience } from "../storage/session-experience-store.js";
|
|
15
|
+
function pct(n, d) {
|
|
16
|
+
return d > 0 ? `${((n / d) * 100).toFixed(0)}%` : "—";
|
|
17
|
+
}
|
|
18
|
+
function num(n) {
|
|
19
|
+
return n.toLocaleString("en-US");
|
|
20
|
+
}
|
|
21
|
+
/** Pure renderer — returns the report lines so it is unit-testable without a DB. */
|
|
22
|
+
export function renderExperienceAggregate(agg, limit) {
|
|
23
|
+
const t = agg.totals;
|
|
24
|
+
const rehydrated = t.rehydratedCache + t.rehydratedDisk + t.rehydratedEe;
|
|
25
|
+
const out = [];
|
|
26
|
+
out.push("");
|
|
27
|
+
out.push(`Session-experience aggregate — latest ${agg.sessionCount} session(s) with a snapshot (cap ${limit})`);
|
|
28
|
+
out.push("─".repeat(72));
|
|
29
|
+
if (agg.sessionCount === 0) {
|
|
30
|
+
out.push("No session_experience snapshots recorded yet.");
|
|
31
|
+
out.push("Run some real (non-meta) sessions, then re-check — compaction only");
|
|
32
|
+
out.push("persists a snapshot once it actually elides / rehydrates something.");
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
out.push(`Sessions with compaction elision: ${agg.sessionsWithElision} (${pct(agg.sessionsWithElision, agg.sessionCount)})`);
|
|
36
|
+
out.push(`Sessions hitting needed-but-unavail: ${agg.sessionsWithUnavailable} (${pct(agg.sessionsWithUnavailable, agg.sessionCount)})`);
|
|
37
|
+
out.push("");
|
|
38
|
+
out.push("Totals across those sessions:");
|
|
39
|
+
out.push(` Compactions fired: ${num(t.compactions)}`);
|
|
40
|
+
out.push(` Tool outputs elided: ${num(t.elided)} (${num(t.totalElidedChars)} chars)`);
|
|
41
|
+
out.push(` Rehydrated via ee_query: ${num(rehydrated)} (cache=${t.rehydratedCache} disk=${t.rehydratedDisk} ee=${t.rehydratedEe})`);
|
|
42
|
+
out.push(` Needed-but-unavailable: ${num(t.unavailable)}`);
|
|
43
|
+
out.push(` EE timeouts / errors: ${num(t.eeTimeouts)} / ${num(t.eeErrors)}`);
|
|
44
|
+
out.push("");
|
|
45
|
+
out.push(`Rehydrate recovery rate: ${(agg.rehydrateRecoveryRate * 100).toFixed(0)}% (rehydrated / (rehydrated + unavailable))`);
|
|
46
|
+
out.push("");
|
|
47
|
+
// Decision signal for the deferred re-architecture.
|
|
48
|
+
out.push("Re-architecture decision signal:");
|
|
49
|
+
if (t.elided === 0) {
|
|
50
|
+
out.push(" • Compaction has not elided anything — friction is not occurring. DEFER.");
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
const elisionRate = agg.sessionsWithElision / agg.sessionCount;
|
|
54
|
+
if (elisionRate < 0.2) {
|
|
55
|
+
out.push(` • Elision bites in only ${pct(agg.sessionsWithElision, agg.sessionCount)} of sessions — rare. Likely DEFER.`);
|
|
56
|
+
}
|
|
57
|
+
if (agg.rehydrateRecoveryRate >= 0.9 || t.unavailable === 0) {
|
|
58
|
+
out.push(" • Recovery rate high / no unavailable — manual rehydrate works; friction is cognitive, not data-loss. Manifest+keepLast likely enough.");
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
out.push(` • Recovery rate ${(agg.rehydrateRecoveryRate * 100).toFixed(0)}% with ${num(t.unavailable)} unrecoverable — real data loss. Auto-protect/auto-rehydrate JUSTIFIED.`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
export async function runExperienceReport(opts = {}) {
|
|
67
|
+
const limit = opts.limit && opts.limit > 0 ? opts.limit : 100;
|
|
68
|
+
const agg = aggregateSessionExperience(limit);
|
|
69
|
+
if (opts.json) {
|
|
70
|
+
process.stdout.write(`${JSON.stringify(agg, null, 2)}\n`);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
for (const line of renderExperienceAggregate(agg, limit))
|
|
74
|
+
process.stdout.write(`${line}\n`);
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=experience-report.js.map
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* experience-report renderer — the cross-session decision signal that gates the
|
|
3
|
+
* deferred anti-mù auto-protect/auto-rehydrate re-architecture.
|
|
4
|
+
*/
|
|
5
|
+
import { describe, expect, it } from "vitest";
|
|
6
|
+
import { renderExperienceAggregate } from "./experience-report.js";
|
|
7
|
+
function counts(p = {}) {
|
|
8
|
+
return {
|
|
9
|
+
compactions: 0,
|
|
10
|
+
elided: 0,
|
|
11
|
+
totalElidedChars: 0,
|
|
12
|
+
rehydratedCache: 0,
|
|
13
|
+
rehydratedDisk: 0,
|
|
14
|
+
rehydratedEe: 0,
|
|
15
|
+
unavailable: 0,
|
|
16
|
+
eeTimeouts: 0,
|
|
17
|
+
eeErrors: 0,
|
|
18
|
+
...p,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function agg(p = {}) {
|
|
22
|
+
return {
|
|
23
|
+
sessionCount: p.sessionCount ?? 1,
|
|
24
|
+
sessionsWithElision: p.sessionsWithElision ?? 0,
|
|
25
|
+
sessionsWithUnavailable: p.sessionsWithUnavailable ?? 0,
|
|
26
|
+
totals: counts(p.totals),
|
|
27
|
+
rehydrateRecoveryRate: p.rehydrateRecoveryRate ?? 1,
|
|
28
|
+
perSession: p.perSession ?? [],
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
describe("renderExperienceAggregate", () => {
|
|
32
|
+
it("reports the no-data case clearly", () => {
|
|
33
|
+
const text = renderExperienceAggregate(agg({ sessionCount: 0 }), 100).join("\n");
|
|
34
|
+
expect(text).toContain("No session_experience snapshots recorded yet");
|
|
35
|
+
});
|
|
36
|
+
it("signals DEFER when nothing was ever elided", () => {
|
|
37
|
+
const text = renderExperienceAggregate(agg({ sessionCount: 5, totals: { compactions: 3 } }), 100).join("\n");
|
|
38
|
+
expect(text).toContain("has not elided anything");
|
|
39
|
+
expect(text).toContain("DEFER");
|
|
40
|
+
});
|
|
41
|
+
it("signals cognitive-not-data-loss when recovery is high / no unavailable", () => {
|
|
42
|
+
const text = renderExperienceAggregate(agg({
|
|
43
|
+
sessionCount: 4,
|
|
44
|
+
sessionsWithElision: 3,
|
|
45
|
+
rehydrateRecoveryRate: 1,
|
|
46
|
+
totals: { compactions: 5, elided: 20, rehydratedCache: 8 },
|
|
47
|
+
}), 100).join("\n");
|
|
48
|
+
expect(text).toMatch(/cognitive, not data-loss/);
|
|
49
|
+
expect(text).not.toMatch(/JUSTIFIED/);
|
|
50
|
+
});
|
|
51
|
+
it("signals re-architecture JUSTIFIED when recovery is low with unrecoverable artifacts", () => {
|
|
52
|
+
const text = renderExperienceAggregate(agg({
|
|
53
|
+
sessionCount: 6,
|
|
54
|
+
sessionsWithElision: 5,
|
|
55
|
+
sessionsWithUnavailable: 4,
|
|
56
|
+
rehydrateRecoveryRate: 0.3,
|
|
57
|
+
totals: { compactions: 10, elided: 40, rehydratedEe: 3, unavailable: 7 },
|
|
58
|
+
}), 100).join("\n");
|
|
59
|
+
expect(text).toContain("real data loss");
|
|
60
|
+
expect(text).toContain("JUSTIFIED");
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
//# sourceMappingURL=experience-report.test.js.map
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const PACKAGE_VERSION = "1.6.
|
|
1
|
+
export declare const PACKAGE_VERSION = "1.6.2";
|
|
2
2
|
export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
|
|
2
2
|
// Sourced from package.json at build time so it survives bun --compile bundling.
|
|
3
|
-
export const PACKAGE_VERSION = "1.6.
|
|
3
|
+
export const PACKAGE_VERSION = "1.6.2";
|
|
4
4
|
export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
|
|
5
5
|
//# sourceMappingURL=version.js.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
2
|
import { scoreComplexity } from "../complexity.js";
|
|
3
|
-
import { buildDirective } from "../directives.js";
|
|
3
|
+
import { buildDirective, mentionsEcosystemScope } from "../directives.js";
|
|
4
4
|
import { detectGrayAreas } from "../gray-areas.js";
|
|
5
5
|
describe("buildDirective", () => {
|
|
6
6
|
it("emits a blocking heavy directive with mandatory steps", () => {
|
|
@@ -66,6 +66,29 @@ describe("buildDirective", () => {
|
|
|
66
66
|
expect(out.blocking).toBe(false);
|
|
67
67
|
expect(out.text.length).toBeLessThan(300);
|
|
68
68
|
});
|
|
69
|
+
it("appends the muonroi-docs nudge for an ecosystem question (session 41ccfeb2ceee turn 1)", () => {
|
|
70
|
+
const complexity = scoreComplexity("bạn hiểu thế nào về ecosystem muonroi nói chung");
|
|
71
|
+
const out = buildDirective({ complexity, phase: null, grayAreas: [], informational: true, ecosystem: true });
|
|
72
|
+
expect(out.text).toMatch(/QUESTION \/ explanatory/); // still the human-facing question directive
|
|
73
|
+
expect(out.text).toMatch(/ECOSYSTEM SCOPE/);
|
|
74
|
+
expect(out.text).toMatch(/muonroi-docs MCP is the AUTHORITATIVE source|AUTHORITATIVE source/);
|
|
75
|
+
expect(out.text).toMatch(/call it FIRST/i);
|
|
76
|
+
});
|
|
77
|
+
it("does NOT append the ecosystem nudge for a plain question", () => {
|
|
78
|
+
const complexity = scoreComplexity("how does this CLI affect you?");
|
|
79
|
+
const out = buildDirective({ complexity, phase: null, grayAreas: [], informational: true });
|
|
80
|
+
expect(out.text).not.toMatch(/ECOSYSTEM SCOPE/);
|
|
81
|
+
});
|
|
82
|
+
it("mentionsEcosystemScope is tight: ecosystem/BB wording yes, bare CLI-internals no", () => {
|
|
83
|
+
// Fires on genuine ecosystem scope (the case muonroi-docs exists to serve)…
|
|
84
|
+
expect(mentionsEcosystemScope("ecosystem muonroi nói chung và muonroi-cli nói riêng")).toBe(true);
|
|
85
|
+
expect(mentionsEcosystemScope("hệ sinh thái muonroi gồm những gì")).toBe(true);
|
|
86
|
+
expect(mentionsEcosystemScope("how does the building-block rule engine work")).toBe(true);
|
|
87
|
+
// …but NOT on a muonroi-cli internals question that merely names the product,
|
|
88
|
+
// which would wrongly steer toward .NET package docs.
|
|
89
|
+
expect(mentionsEcosystemScope("how does muonroi-cli compaction work")).toBe(false);
|
|
90
|
+
expect(mentionsEcosystemScope("fix the off-by-one in the router")).toBe(false);
|
|
91
|
+
});
|
|
69
92
|
it("renders the recommended option first in gray-area block", () => {
|
|
70
93
|
const prompt = "redo everything from scratch";
|
|
71
94
|
const complexity = scoreComplexity(prompt);
|
|
@@ -31,6 +31,21 @@ export interface DirectiveInput {
|
|
|
31
31
|
* buildDirective emits a human-facing question directive instead.
|
|
32
32
|
*/
|
|
33
33
|
informational?: boolean;
|
|
34
|
+
/**
|
|
35
|
+
* True when the turn is about the Muonroi ECOSYSTEM (the whole platform, BB/
|
|
36
|
+
* .NET packages, building-block, open-core boundary, setup/install) rather than
|
|
37
|
+
* muonroi-cli's own TS internals. When set, buildDirective appends a nudge to
|
|
38
|
+
* consult the authoritative muonroi-docs MCP first. Computed by the caller via
|
|
39
|
+
* mentionsEcosystemScope so a CLI-internals question (which merely contains the
|
|
40
|
+
* word "muonroi") does NOT misfire toward .NET docs.
|
|
41
|
+
*
|
|
42
|
+
* Live miss (session 41ccfeb2ceee turn 1): "bạn hiểu thế nào về ecosystem
|
|
43
|
+
* muonroi…" — muonroi-docs WAS in the toolset (smart-filter kept it) but the
|
|
44
|
+
* question directive steered the agent to read/grep local files, so it answered
|
|
45
|
+
* "no comprehensive ecosystem description in the files read" instead of querying
|
|
46
|
+
* the shipped authoritative source.
|
|
47
|
+
*/
|
|
48
|
+
ecosystem?: boolean;
|
|
34
49
|
}
|
|
35
50
|
export interface DirectiveOutput {
|
|
36
51
|
text: string;
|
|
@@ -38,4 +53,11 @@ export interface DirectiveOutput {
|
|
|
38
53
|
/** True when the directive forbids the agent from acting before clarifying. */
|
|
39
54
|
blocking: boolean;
|
|
40
55
|
}
|
|
56
|
+
export declare function mentionsEcosystemScope(message: string): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Appended to any directive when the turn is ecosystem-scoped. Phrased
|
|
59
|
+
* conditionally ("if … available") so it is harmless when muonroi-docs is not
|
|
60
|
+
* configured — the model simply finds no such tool and falls back to local files.
|
|
61
|
+
*/
|
|
62
|
+
export declare const ECOSYSTEM_DOCS_NUDGE: string;
|
|
41
63
|
export declare function buildDirective(input: DirectiveInput): DirectiveOutput;
|
|
@@ -15,6 +15,27 @@
|
|
|
15
15
|
* user-facing prompts into the user's language at render time.
|
|
16
16
|
*/
|
|
17
17
|
const HEADER = "[gsd-native]";
|
|
18
|
+
/**
|
|
19
|
+
* High-precision predicate: is this turn about the Muonroi ECOSYSTEM (where the
|
|
20
|
+
* muonroi-docs MCP is the right source), as opposed to muonroi-cli internals?
|
|
21
|
+
* Deliberately TIGHTER than smart-filter's hasEcosystemSignal — that one keeps
|
|
22
|
+
* the server (over-keeping costs only tokens), but a behavioural "call docs
|
|
23
|
+
* FIRST" nudge must not fire on every "muonroi" mention or it misdirects
|
|
24
|
+
* CLI-internals questions toward .NET package docs. EN + VI.
|
|
25
|
+
*/
|
|
26
|
+
const ECOSYSTEM_SCOPE_RE = /\becosystem\b|hệ\s*sinh\s*thái|he\s*sinh\s*thai|building[-\s]?block|open[-\s]?core|rule\s*engine|decision\s*table|\bnuget\b/i;
|
|
27
|
+
export function mentionsEcosystemScope(message) {
|
|
28
|
+
return ECOSYSTEM_SCOPE_RE.test(message);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Appended to any directive when the turn is ecosystem-scoped. Phrased
|
|
32
|
+
* conditionally ("if … available") so it is harmless when muonroi-docs is not
|
|
33
|
+
* configured — the model simply finds no such tool and falls back to local files.
|
|
34
|
+
*/
|
|
35
|
+
export const ECOSYSTEM_DOCS_NUDGE = [
|
|
36
|
+
`${HEADER} ECOSYSTEM SCOPE — this turn concerns the Muonroi ecosystem (platform overview, BB/.NET packages, building-block, open-core boundary, setup).`,
|
|
37
|
+
"If the muonroi-docs MCP is available, it is the AUTHORITATIVE source — call it FIRST (docs_search / setup_guide / bb_recipe_list / bb_package_describe), THEN ground with local files. Do NOT characterize the ecosystem from local repo files alone.",
|
|
38
|
+
].join("\n");
|
|
18
39
|
function renderGrayAreas(qs) {
|
|
19
40
|
if (qs.length === 0)
|
|
20
41
|
return " (no gray areas detected — confirm the request is fully specified before proceeding)";
|
|
@@ -94,16 +115,19 @@ function buildQuick(input) {
|
|
|
94
115
|
export function buildDirective(input) {
|
|
95
116
|
// Informational/meta prompts answer a human — never apply the
|
|
96
117
|
// implement/verify scaffold (it agent-ifies the reply), regardless of tier.
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
118
|
+
const base = input.informational
|
|
119
|
+
? { text: buildQuestion(), tier: input.complexity.tier, blocking: false }
|
|
120
|
+
: input.complexity.tier === "heavy"
|
|
121
|
+
? { text: buildHeavy(input), tier: "heavy", blocking: true }
|
|
122
|
+
: input.complexity.tier === "standard"
|
|
123
|
+
? { text: buildStandard(input), tier: "standard", blocking: false }
|
|
124
|
+
: { text: buildQuick(input), tier: "quick", blocking: false };
|
|
125
|
+
// Ecosystem-scoped turns get a docs-first nudge regardless of tier (question
|
|
126
|
+
// OR task): muonroi-docs is the authoritative source and must not be skipped
|
|
127
|
+
// in favour of guessing from local files (session 41ccfeb2ceee turn 1).
|
|
128
|
+
if (input.ecosystem) {
|
|
129
|
+
return { ...base, text: `${base.text}\n${ECOSYSTEM_DOCS_NUDGE}` };
|
|
107
130
|
}
|
|
131
|
+
return base;
|
|
108
132
|
}
|
|
109
133
|
//# sourceMappingURL=directives.js.map
|
package/dist/src/index.js
CHANGED
|
@@ -1319,6 +1319,15 @@ usage
|
|
|
1319
1319
|
const { runCostForensics } = await import("./cli/cost-forensics.js");
|
|
1320
1320
|
await runCostForensics({ prefix: sessionPrefix, json: opts.json });
|
|
1321
1321
|
});
|
|
1322
|
+
usage
|
|
1323
|
+
.command("experience")
|
|
1324
|
+
.description("Cross-session anti-mù telemetry: how often compaction elides tool outputs and whether the agent recovers them (gates the deferred auto-protect re-architecture).")
|
|
1325
|
+
.option("--limit <n>", "Number of most-recent sessions to aggregate", "100")
|
|
1326
|
+
.option("--json", "Emit aggregate as JSON")
|
|
1327
|
+
.action(async (opts) => {
|
|
1328
|
+
const { runExperienceReport } = await import("./cli/experience-report.js");
|
|
1329
|
+
await runExperienceReport({ limit: parseInt(opts.limit, 10) || 100, json: opts.json });
|
|
1330
|
+
});
|
|
1322
1331
|
usage
|
|
1323
1332
|
.command("security-audit")
|
|
1324
1333
|
.description("Security posture: yolo/permission overrides, high-risk cmds, shuru audits + cost (from decision-log events)")
|
|
@@ -42,7 +42,7 @@ describe("acquireMcpTools — cross-turn client pool", () => {
|
|
|
42
42
|
expect(Object.keys(b2.tools)).toContain("mcp_fs__ping");
|
|
43
43
|
expect(connectOneServer).toHaveBeenCalledTimes(2); // retried after eviction
|
|
44
44
|
});
|
|
45
|
-
it("self-heals: a
|
|
45
|
+
it("self-heals: a connection error reconnects ONCE in-turn; a permanently-dead server surfaces the error (no loop)", async () => {
|
|
46
46
|
connectOneServer.mockImplementation(async (s) => ({
|
|
47
47
|
tools: {
|
|
48
48
|
[`mcp_${s.id}__boom`]: {
|
|
@@ -55,9 +55,59 @@ describe("acquireMcpTools — cross-turn client pool", () => {
|
|
|
55
55
|
}));
|
|
56
56
|
const b1 = await acquireMcpTools([srv("fs")]);
|
|
57
57
|
await expect(b1.tools["mcp_fs__boom"].execute({}, {})).rejects.toThrow(/transport closed/);
|
|
58
|
-
|
|
59
|
-
expect(
|
|
60
|
-
|
|
58
|
+
// Initial connect + exactly ONE in-turn reconnect — the retry is not looped.
|
|
59
|
+
expect(connectOneServer).toHaveBeenCalledTimes(2);
|
|
60
|
+
});
|
|
61
|
+
it("in-turn reconnect: a mid-turn transport drop is reconnected and the call retried once — succeeds", async () => {
|
|
62
|
+
let gen = 0;
|
|
63
|
+
connectOneServer.mockImplementation(async (s) => {
|
|
64
|
+
gen += 1;
|
|
65
|
+
const dead = gen === 1; // first connect drops mid-call; the reconnect is healthy
|
|
66
|
+
return {
|
|
67
|
+
tools: {
|
|
68
|
+
[`mcp_${s.id}__ping`]: {
|
|
69
|
+
execute: async () => {
|
|
70
|
+
if (dead)
|
|
71
|
+
throw new Error("Attempted to send a request from a closed client");
|
|
72
|
+
return "pong";
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
client: { close: async () => { } },
|
|
77
|
+
};
|
|
78
|
+
});
|
|
79
|
+
const b = await acquireMcpTools([srv("docs")]);
|
|
80
|
+
const result = await b.tools["mcp_docs__ping"].execute({}, {});
|
|
81
|
+
expect(result).toBe("pong"); // recovered within the SAME turn
|
|
82
|
+
expect(connectOneServer).toHaveBeenCalledTimes(2); // drop + one reconnect
|
|
83
|
+
});
|
|
84
|
+
it("a parallel burst on a dropped client shares ONE reconnect; every call retries and succeeds", async () => {
|
|
85
|
+
// Repro of session 41ccfeb2ceee: a 14-call burst at muonroi-docs dropped the
|
|
86
|
+
// HTTP socket after the first calls; previously the rest all threw
|
|
87
|
+
// "Attempted to send a request from a closed client". They must now share a
|
|
88
|
+
// single reconnect and all recover.
|
|
89
|
+
let gen = 0;
|
|
90
|
+
connectOneServer.mockImplementation(async (s) => {
|
|
91
|
+
gen += 1;
|
|
92
|
+
const dead = gen === 1;
|
|
93
|
+
return {
|
|
94
|
+
tools: {
|
|
95
|
+
[`mcp_${s.id}__ping`]: {
|
|
96
|
+
execute: async () => {
|
|
97
|
+
if (dead)
|
|
98
|
+
throw new Error("The socket connection was closed unexpectedly");
|
|
99
|
+
return "pong";
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
client: { close: async () => { } },
|
|
104
|
+
};
|
|
105
|
+
});
|
|
106
|
+
const b = await acquireMcpTools([srv("docs")]);
|
|
107
|
+
const tool = b.tools["mcp_docs__ping"];
|
|
108
|
+
const results = await Promise.all(Array.from({ length: 14 }, () => tool.execute({}, {})));
|
|
109
|
+
expect(results.every((r) => r === "pong")).toBe(true);
|
|
110
|
+
expect(connectOneServer).toHaveBeenCalledTimes(2); // 14 failures → exactly ONE shared reconnect
|
|
61
111
|
});
|
|
62
112
|
it("keys by cwd/config — a different command reconnects rather than reusing", async () => {
|
|
63
113
|
connectOneServer.mockImplementation(async (s) => connected(s.id));
|
|
@@ -15,8 +15,15 @@
|
|
|
15
15
|
*
|
|
16
16
|
* Self-healing: a server that fails to connect is evicted (not cached as a
|
|
17
17
|
* rejection), so a later turn retries. A live client whose child process dies
|
|
18
|
-
* later is evicted when one of its tool calls hits a transport/connection error
|
|
19
|
-
*
|
|
18
|
+
* later is evicted when one of its tool calls hits a transport/connection error.
|
|
19
|
+
*
|
|
20
|
+
* In-turn reconnect: a transport that drops MID-TURN (live: muonroi-docs HTTP
|
|
21
|
+
* socket closed after 2 of a 14-call parallel burst, session 41ccfeb2ceee —
|
|
22
|
+
* every remaining call then threw "Attempted to send a request from a closed
|
|
23
|
+
* client") is reconnected and the failing call is retried ONCE against the fresh
|
|
24
|
+
* client, instead of only reconnecting on the NEXT turn. Concurrent failures in
|
|
25
|
+
* the same burst share one reconnect (the pool dedupes by key); eviction is
|
|
26
|
+
* race-safe so a fresh reconnect is never torn down by a sibling's late failure.
|
|
20
27
|
*/
|
|
21
28
|
import type { McpServerConfig } from "../utils/settings.js";
|
|
22
29
|
import { type McpBuildOptions, type McpToolBundle } from "./runtime.js";
|
|
@@ -15,8 +15,15 @@
|
|
|
15
15
|
*
|
|
16
16
|
* Self-healing: a server that fails to connect is evicted (not cached as a
|
|
17
17
|
* rejection), so a later turn retries. A live client whose child process dies
|
|
18
|
-
* later is evicted when one of its tool calls hits a transport/connection error
|
|
19
|
-
*
|
|
18
|
+
* later is evicted when one of its tool calls hits a transport/connection error.
|
|
19
|
+
*
|
|
20
|
+
* In-turn reconnect: a transport that drops MID-TURN (live: muonroi-docs HTTP
|
|
21
|
+
* socket closed after 2 of a 14-call parallel burst, session 41ccfeb2ceee —
|
|
22
|
+
* every remaining call then threw "Attempted to send a request from a closed
|
|
23
|
+
* client") is reconnected and the failing call is retried ONCE against the fresh
|
|
24
|
+
* client, instead of only reconnecting on the NEXT turn. Concurrent failures in
|
|
25
|
+
* the same burst share one reconnect (the pool dedupes by key); eviction is
|
|
26
|
+
* race-safe so a fresh reconnect is never torn down by a sibling's late failure.
|
|
20
27
|
*/
|
|
21
28
|
import { connectOneServer, getMcpBuildDeadlineMs, } from "./runtime.js";
|
|
22
29
|
import { validateMcpServerConfig } from "./validate.js";
|
|
@@ -38,16 +45,20 @@ function serverKey(s) {
|
|
|
38
45
|
cwd: s.cwd ?? process.cwd(),
|
|
39
46
|
});
|
|
40
47
|
}
|
|
41
|
-
/**
|
|
42
|
-
|
|
48
|
+
/**
|
|
49
|
+
* Tear down a pooled entry ONLY if it still holds `dead` (the specific server a
|
|
50
|
+
* failing tool call was bound to). Race-safe under a parallel burst: when 14
|
|
51
|
+
* sibling calls all fail on the same dropped client, the first evicts it and
|
|
52
|
+
* reconnects; the rest find `entry.connected !== dead` (a fresh client, or no
|
|
53
|
+
* entry) and leave the reconnect untouched. Best-effort cleanup of the dead one.
|
|
54
|
+
*/
|
|
55
|
+
function evictDeadServer(key, dead) {
|
|
43
56
|
const entry = pool.get(key);
|
|
44
|
-
if (!entry)
|
|
57
|
+
if (!entry || entry.connected !== dead)
|
|
45
58
|
return;
|
|
46
59
|
pool.delete(key);
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
void cs.client.close().catch(() => { });
|
|
50
|
-
}, () => { });
|
|
60
|
+
dead.cleanup?.();
|
|
61
|
+
void dead.client.close().catch(() => { });
|
|
51
62
|
}
|
|
52
63
|
/** Heuristic: does this error mean the MCP transport/child is gone? */
|
|
53
64
|
function isConnectionError(e) {
|
|
@@ -69,22 +80,35 @@ function getOrConnect(server, opts) {
|
|
|
69
80
|
const promise = connectOneServer(server, opts);
|
|
70
81
|
const entry = { key, promise };
|
|
71
82
|
pool.set(key, entry);
|
|
83
|
+
promise.then(
|
|
84
|
+
// Record the resolved server so evictDeadServer can match by identity.
|
|
85
|
+
(cs) => {
|
|
86
|
+
entry.connected = cs;
|
|
87
|
+
},
|
|
72
88
|
// Cache a rejection only transiently: evict so the next turn retries rather
|
|
73
89
|
// than returning the same failed promise forever.
|
|
74
|
-
|
|
90
|
+
() => {
|
|
75
91
|
if (pool.get(key) === entry)
|
|
76
92
|
pool.delete(key);
|
|
77
93
|
});
|
|
78
94
|
return promise;
|
|
79
95
|
}
|
|
80
96
|
/**
|
|
81
|
-
* Wrap each tool's execute so a transport/connection failure
|
|
82
|
-
*
|
|
83
|
-
*
|
|
97
|
+
* Wrap each tool's execute so a transport/connection failure is recovered
|
|
98
|
+
* in-turn: evict the dead pooled client (race-safe), reconnect once, and retry
|
|
99
|
+
* the SAME call against the fresh client. Before this, a mid-turn drop only
|
|
100
|
+
* reconnected on the NEXT turn, so the rest of the current turn's batch all
|
|
101
|
+
* failed with "Attempted to send a request from a closed client". The MCP child
|
|
102
|
+
* may also die after a successful connect; the eviction keeps the pool clean for
|
|
103
|
+
* later turns either way.
|
|
104
|
+
*
|
|
105
|
+
* The retry is fired at most ONCE per call (no loop): if the fresh client also
|
|
106
|
+
* drops, or the reconnect itself fails, the original transport error propagates
|
|
107
|
+
* so the model sees a real failure rather than hanging.
|
|
84
108
|
*/
|
|
85
|
-
function wrapForSelfHeal(
|
|
109
|
+
function wrapForSelfHeal(cs, key, server, opts) {
|
|
86
110
|
const out = {};
|
|
87
|
-
for (const [name, tool] of Object.entries(tools)) {
|
|
111
|
+
for (const [name, tool] of Object.entries(cs.tools)) {
|
|
88
112
|
const base = tool.execute;
|
|
89
113
|
if (typeof base !== "function") {
|
|
90
114
|
out[name] = tool;
|
|
@@ -97,11 +121,25 @@ function wrapForSelfHeal(tools, key) {
|
|
|
97
121
|
return await base(args, options);
|
|
98
122
|
}
|
|
99
123
|
catch (e) {
|
|
100
|
-
if (isConnectionError(e))
|
|
101
|
-
|
|
102
|
-
|
|
124
|
+
if (!isConnectionError(e))
|
|
125
|
+
throw e;
|
|
126
|
+
console.error(`[mcp:pool] '${name}' hit a connection error — reconnecting '${server.id}' in-turn and retrying once: ${e instanceof Error ? e.message : String(e)}`);
|
|
127
|
+
// Evict THIS dead client (no-op if a sibling already reconnected), then
|
|
128
|
+
// reconnect. getOrConnect dedupes by key, so a burst shares one reconnect.
|
|
129
|
+
evictDeadServer(key, cs);
|
|
130
|
+
let fresh;
|
|
131
|
+
try {
|
|
132
|
+
fresh = await getOrConnect(server, opts);
|
|
133
|
+
}
|
|
134
|
+
catch (reconnectErr) {
|
|
135
|
+
console.error(`[mcp:pool] in-turn reconnect for '${server.id}' failed; surfacing original error: ${reconnectErr instanceof Error ? reconnectErr.message : String(reconnectErr)}`);
|
|
136
|
+
throw e;
|
|
103
137
|
}
|
|
104
|
-
|
|
138
|
+
const freshTools = fresh.tools;
|
|
139
|
+
const freshExec = freshTools[name]?.execute;
|
|
140
|
+
if (typeof freshExec !== "function")
|
|
141
|
+
throw e;
|
|
142
|
+
return await freshExec(args, options);
|
|
105
143
|
}
|
|
106
144
|
},
|
|
107
145
|
};
|
|
@@ -141,13 +179,14 @@ export async function acquireMcpTools(servers, opts) {
|
|
|
141
179
|
await Promise.race([Promise.allSettled(attempts), deadline]);
|
|
142
180
|
if (deadlineTimer)
|
|
143
181
|
clearTimeout(deadlineTimer);
|
|
144
|
-
for (
|
|
182
|
+
for (let i = 0; i < slots.length; i++) {
|
|
183
|
+
const slot = slots[i];
|
|
145
184
|
if (slot.done) {
|
|
146
185
|
if (slot.error) {
|
|
147
186
|
errors.push(`${slot.label}: ${slot.error}`);
|
|
148
187
|
}
|
|
149
188
|
else if (slot.result) {
|
|
150
|
-
Object.assign(tools, wrapForSelfHeal(slot.result
|
|
189
|
+
Object.assign(tools, wrapForSelfHeal(slot.result, slot.key, enabled[i], opts));
|
|
151
190
|
}
|
|
152
191
|
}
|
|
153
192
|
else {
|