muonroi-cli 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +122 -122
- package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
- package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
- package/dist/src/agent-harness/mock-model.d.ts +11 -0
- package/dist/src/agent-harness/mock-model.js +21 -0
- package/dist/src/cli/cost-forensics.js +12 -12
- package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
- package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
- package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
- package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
- package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
- package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
- package/dist/src/council/clarifier.js +9 -1
- package/dist/src/council/debate.js +5 -1
- package/dist/src/council/decisions-lock.js +3 -3
- package/dist/src/council/index.js +12 -5
- package/dist/src/council/leader.d.ts +0 -17
- package/dist/src/council/leader.js +22 -15
- package/dist/src/council/planner.js +1 -1
- package/dist/src/council/prompts.js +63 -57
- package/dist/src/council/types.d.ts +7 -0
- package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
- package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
- package/dist/src/ee/artifact-cache.d.ts +56 -0
- package/dist/src/ee/artifact-cache.js +155 -0
- package/dist/src/ee/artifact-cache.test.d.ts +1 -0
- package/dist/src/ee/artifact-cache.test.js +69 -0
- package/dist/src/ee/auth.d.ts +9 -0
- package/dist/src/ee/auth.js +19 -0
- package/dist/src/ee/ee-onboarding.d.ts +5 -0
- package/dist/src/ee/ee-onboarding.js +76 -0
- package/dist/src/ee/search.js +7 -5
- package/dist/src/ee/search.test.d.ts +1 -0
- package/dist/src/ee/search.test.js +23 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/headless/output.js +6 -4
- package/dist/src/headless/output.test.js +4 -3
- package/dist/src/index.js +20 -1
- package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
- package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
- package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
- package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
- package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
- package/dist/src/mcp/auto-setup.js +56 -2
- package/dist/src/mcp/client-pool.d.ts +46 -0
- package/dist/src/mcp/client-pool.js +212 -0
- package/dist/src/mcp/oauth-callback.js +2 -2
- package/dist/src/mcp/parse-headers.test.js +14 -14
- package/dist/src/mcp/runtime.d.ts +28 -0
- package/dist/src/mcp/runtime.js +117 -51
- package/dist/src/mcp/self-verify-runner.d.ts +14 -0
- package/dist/src/mcp/self-verify-runner.js +38 -0
- package/dist/src/mcp/setup-guide-text.d.ts +9 -0
- package/dist/src/mcp/setup-guide-text.js +84 -0
- package/dist/src/mcp/smart-filter.js +49 -0
- package/dist/src/mcp/smoke.test.js +43 -43
- package/dist/src/mcp/tools-server.d.ts +7 -0
- package/dist/src/mcp/tools-server.js +19 -22
- package/dist/src/models/catalog.json +349 -349
- package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
- package/dist/src/ops/doctor.d.ts +3 -2
- package/dist/src/ops/doctor.js +47 -11
- package/dist/src/ops/doctor.test.js +4 -3
- package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
- package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
- package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
- package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
- package/dist/src/orchestrator/batch-turn-runner.js +7 -11
- package/dist/src/orchestrator/compaction.d.ts +2 -0
- package/dist/src/orchestrator/compaction.js +14 -1
- package/dist/src/orchestrator/compaction.test.js +25 -1
- package/dist/src/orchestrator/message-processor.js +72 -32
- package/dist/src/orchestrator/orchestrator.js +26 -0
- package/dist/src/orchestrator/prompts.d.ts +51 -0
- package/dist/src/orchestrator/prompts.js +257 -134
- package/dist/src/orchestrator/scope-ceiling.js +6 -1
- package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
- package/dist/src/orchestrator/scope-reminder.js +16 -0
- package/dist/src/orchestrator/scope-reminder.test.js +22 -1
- package/dist/src/orchestrator/stream-runner.js +23 -15
- package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
- package/dist/src/orchestrator/subagent-compactor.js +30 -8
- package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
- package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
- package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
- package/dist/src/pil/__tests__/config.test.js +1 -17
- package/dist/src/pil/__tests__/discovery.test.js +144 -11
- package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
- package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
- package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
- package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
- package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
- package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
- package/dist/src/pil/agent-operating-contract.d.ts +1 -1
- package/dist/src/pil/agent-operating-contract.js +2 -0
- package/dist/src/pil/agent-operating-contract.test.js +7 -2
- package/dist/src/pil/cheap-model-playbook.js +35 -35
- package/dist/src/pil/cheap-model-workbooks.js +16 -13
- package/dist/src/pil/clarity-gate.d.ts +21 -19
- package/dist/src/pil/clarity-gate.js +26 -153
- package/dist/src/pil/config.d.ts +9 -1
- package/dist/src/pil/config.js +15 -4
- package/dist/src/pil/discovery.js +211 -136
- package/dist/src/pil/layer1-intent.d.ts +12 -0
- package/dist/src/pil/layer1-intent.js +283 -38
- package/dist/src/pil/layer1-intent.test.js +210 -4
- package/dist/src/pil/layer16-clarity.d.ts +25 -11
- package/dist/src/pil/layer16-clarity.js +19 -306
- package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
- package/dist/src/pil/layer3-ee-injection.js +96 -4
- package/dist/src/pil/layer4-gsd.js +18 -6
- package/dist/src/pil/layer6-output.d.ts +2 -0
- package/dist/src/pil/layer6-output.js +151 -25
- package/dist/src/pil/llm-classify.d.ts +26 -0
- package/dist/src/pil/llm-classify.js +34 -5
- package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
- package/dist/src/pil/native-capabilities-workbook.js +82 -76
- package/dist/src/pil/pipeline.js +15 -9
- package/dist/src/pil/schema.d.ts +8 -0
- package/dist/src/pil/schema.js +12 -1
- package/dist/src/pil/task-tier-map.js +4 -0
- package/dist/src/pil/types.d.ts +11 -1
- package/dist/src/product-loop/done-gate.js +3 -3
- package/dist/src/product-loop/loop-driver.js +18 -18
- package/dist/src/product-loop/progress-snapshot.js +4 -4
- package/dist/src/providers/auth/gemini-oauth.js +6 -15
- package/dist/src/providers/auth/grok-oauth.js +6 -15
- package/dist/src/providers/auth/openai-oauth.js +6 -15
- package/dist/src/providers/mcp-vision-bridge.js +48 -48
- package/dist/src/reporter/index.js +1 -1
- package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
- package/dist/src/scaffold/bb-quality-gate.js +5 -5
- package/dist/src/scaffold/continuation-prompt.js +60 -60
- package/dist/src/scaffold/init-new.js +453 -453
- package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
- package/dist/src/self-qa/agentic-loop.js +24 -19
- package/dist/src/self-qa/spec-emitter.js +26 -23
- package/dist/src/storage/__tests__/migrations.test.js +2 -2
- package/dist/src/storage/interaction-log.js +5 -5
- package/dist/src/storage/migrations.js +122 -122
- package/dist/src/storage/sessions.js +42 -42
- package/dist/src/storage/transcript.js +91 -84
- package/dist/src/storage/usage.js +14 -14
- package/dist/src/storage/workspaces.js +12 -12
- package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
- package/dist/src/tools/__tests__/native-tools.test.js +53 -0
- package/dist/src/tools/git-safety.d.ts +61 -0
- package/dist/src/tools/git-safety.js +141 -0
- package/dist/src/tools/git-safety.test.d.ts +1 -0
- package/dist/src/tools/git-safety.test.js +111 -0
- package/dist/src/tools/native-tools.d.ts +31 -0
- package/dist/src/tools/native-tools.js +273 -0
- package/dist/src/tools/registry-ee-query.test.js +18 -1
- package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
- package/dist/src/tools/registry-git-safety.test.js +92 -0
- package/dist/src/tools/registry.js +52 -6
- package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
- package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
- package/dist/src/ui/app.js +0 -0
- package/dist/src/ui/components/message-view.js +4 -1
- package/dist/src/ui/components/structured-response-view.js +7 -3
- package/dist/src/ui/components/tool-group.js +7 -1
- package/dist/src/ui/markdown-render.d.ts +41 -0
- package/dist/src/ui/markdown-render.js +223 -0
- package/dist/src/ui/markdown.d.ts +10 -0
- package/dist/src/ui/markdown.js +12 -35
- package/dist/src/ui/slash/council-inspect.js +4 -4
- package/dist/src/ui/slash/export.js +4 -4
- package/dist/src/ui/utils/text.d.ts +8 -0
- package/dist/src/ui/utils/text.js +16 -0
- package/dist/src/ui/utils/text.test.d.ts +1 -0
- package/dist/src/ui/utils/text.test.js +23 -0
- package/dist/src/usage/ledger.js +48 -15
- package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
- package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
- package/dist/src/utils/clipboard-image.js +23 -23
- package/dist/src/utils/open-url.d.ts +56 -0
- package/dist/src/utils/open-url.js +58 -0
- package/dist/src/utils/open-url.test.d.ts +1 -0
- package/dist/src/utils/open-url.test.js +86 -0
- package/dist/src/utils/settings.d.ts +12 -0
- package/dist/src/utils/settings.js +48 -0
- package/dist/src/utils/side-question.js +2 -2
- package/dist/src/utils/skills.js +3 -3
- package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
- package/dist/src/verify/environment.js +2 -1
- package/package.json +1 -1
- package/dist/src/pil/layer16-clarity.test.js +0 -31
- /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
|
@@ -10,7 +10,14 @@ const mockHandler = {
|
|
|
10
10
|
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
11
11
|
};
|
|
12
12
|
describe("runDiscovery()", () => {
|
|
13
|
-
it("
|
|
13
|
+
it("proceeds without interview when the model proposes no questions", async () => {
|
|
14
|
+
// Phase 2: the model is the sole ask-decider. An empty proposer result means
|
|
15
|
+
// "no gray area" → no interview, no fabricated [Discovery] outcome.
|
|
16
|
+
const handler = {
|
|
17
|
+
askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
|
|
18
|
+
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
19
|
+
};
|
|
20
|
+
const proposer = vi.fn().mockResolvedValue([]);
|
|
14
21
|
const result = await runDiscovery("fix TypeError in src/auth/login.ts:42", {
|
|
15
22
|
taskType: "debug",
|
|
16
23
|
confidence: 0.9,
|
|
@@ -18,15 +25,86 @@ describe("runDiscovery()", () => {
|
|
|
18
25
|
domain: "typescript",
|
|
19
26
|
outputStyle: "balanced",
|
|
20
27
|
intentKind: "task",
|
|
21
|
-
}, process.cwd(), null);
|
|
28
|
+
}, process.cwd(), handler, null, proposer);
|
|
29
|
+
expect(proposer).toHaveBeenCalled();
|
|
22
30
|
expect(result.interviewed).toBe(false);
|
|
23
31
|
expect(result.accepted).toBe(true);
|
|
32
|
+
expect(handler.askQuestion).not.toHaveBeenCalled();
|
|
24
33
|
});
|
|
25
|
-
it("
|
|
34
|
+
it("does NOT interview (and never fabricates regex questions) when no proposer is wired", async () => {
|
|
35
|
+
// Phase 2 fail-loud: an interactive turn missing a proposer logs and proceeds
|
|
36
|
+
// WITHOUT an interview — it must never fall back to keyword-generated gaps.
|
|
37
|
+
const handler = {
|
|
38
|
+
askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
|
|
39
|
+
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
40
|
+
};
|
|
41
|
+
const result = await runDiscovery("fix auth", // vague — old regex gate would have asked a scope question
|
|
42
|
+
{
|
|
43
|
+
taskType: "debug",
|
|
44
|
+
confidence: 0.6,
|
|
45
|
+
complexity: "low",
|
|
46
|
+
domain: "typescript",
|
|
47
|
+
outputStyle: null,
|
|
48
|
+
intentKind: "task",
|
|
49
|
+
}, process.cwd(), handler, null, null);
|
|
50
|
+
expect(result.interviewed).toBe(false);
|
|
51
|
+
expect(result.accepted).toBe(true);
|
|
52
|
+
expect(handler.askQuestion).not.toHaveBeenCalled();
|
|
53
|
+
});
|
|
54
|
+
it("surfaces the model's reason + recommends in the interview askcard", async () => {
|
|
55
|
+
const askQuestion = vi.fn().mockResolvedValue({ questionId: "q1", text: "OAuth", kind: "choice" });
|
|
56
|
+
const handler = {
|
|
57
|
+
askQuestion,
|
|
58
|
+
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
59
|
+
};
|
|
60
|
+
const proposer = vi
|
|
61
|
+
.fn()
|
|
62
|
+
.mockResolvedValue(["Which auth method? [MODEL RECS: OAuth | API keys] [WHY: changes the whole token flow]"]);
|
|
63
|
+
await runDiscovery("add authentication", {
|
|
64
|
+
taskType: "generate",
|
|
65
|
+
confidence: 0.6,
|
|
66
|
+
complexity: "low",
|
|
67
|
+
domain: null,
|
|
68
|
+
outputStyle: null,
|
|
69
|
+
intentKind: "task",
|
|
70
|
+
}, process.cwd(), handler, null, proposer);
|
|
71
|
+
expect(askQuestion).toHaveBeenCalled();
|
|
72
|
+
const card = askQuestion.mock.calls[0][0];
|
|
73
|
+
// Model's WHY drives the askcard context; recommends drive the options.
|
|
74
|
+
expect(card.context).toBe("changes the whole token flow");
|
|
75
|
+
expect(card.question).toBe("Which auth method?");
|
|
76
|
+
const labels = (card.options ?? []).map((o) => o.label);
|
|
77
|
+
expect(labels).toContain("OAuth");
|
|
78
|
+
expect(labels).toContain("API keys");
|
|
79
|
+
expect(card.defaultIndex).toBe(0); // first recommend = recommended default
|
|
80
|
+
});
|
|
81
|
+
it("skips all discovery when the user explicitly says don't ask (EN + VI)", async () => {
|
|
82
|
+
const handler = {
|
|
83
|
+
askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
|
|
84
|
+
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
85
|
+
};
|
|
86
|
+
const l1 = {
|
|
87
|
+
taskType: "analyze",
|
|
88
|
+
confidence: 0.6, // low enough that discovery would normally interview
|
|
89
|
+
complexity: "low",
|
|
90
|
+
domain: null,
|
|
91
|
+
outputStyle: null,
|
|
92
|
+
intentKind: "task",
|
|
93
|
+
};
|
|
94
|
+
const enResult = await runDiscovery("analyze the orchestrator, just answer, don't ask", l1, process.cwd(), handler);
|
|
95
|
+
expect(enResult.interviewed).toBe(false);
|
|
96
|
+
expect(enResult.accepted).toBe(true);
|
|
97
|
+
const viResult = await runDiscovery("phân tích orchestrator, đừng hỏi, trả lời thẳng", l1, process.cwd(), handler);
|
|
98
|
+
expect(viResult.interviewed).toBe(false);
|
|
99
|
+
expect(viResult.accepted).toBe(true);
|
|
100
|
+
expect(handler.askQuestion).not.toHaveBeenCalled();
|
|
101
|
+
});
|
|
102
|
+
it("interviews user when the model proposes a question", async () => {
|
|
26
103
|
const handler = {
|
|
27
104
|
askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "Error disappears", kind: "choice" }),
|
|
28
105
|
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
29
106
|
};
|
|
107
|
+
const proposer = vi.fn().mockResolvedValue(["What's the expected fix outcome? [MODEL RECS: Error disappears]"]);
|
|
30
108
|
const result = await runDiscovery("fix auth", {
|
|
31
109
|
taskType: "debug",
|
|
32
110
|
confidence: 0.6,
|
|
@@ -34,7 +112,7 @@ describe("runDiscovery()", () => {
|
|
|
34
112
|
domain: "typescript",
|
|
35
113
|
outputStyle: null,
|
|
36
114
|
intentKind: "task",
|
|
37
|
-
}, process.cwd(), handler);
|
|
115
|
+
}, process.cwd(), handler, null, proposer);
|
|
38
116
|
expect(result.interviewed).toBe(true);
|
|
39
117
|
expect(result.accepted).toBe(true);
|
|
40
118
|
expect(handler.askQuestion).toHaveBeenCalled();
|
|
@@ -53,14 +131,14 @@ describe("runDiscovery()", () => {
|
|
|
53
131
|
});
|
|
54
132
|
it("sets accepted=false when user cancels", async () => {
|
|
55
133
|
const handler = {
|
|
56
|
-
//
|
|
57
|
-
// asked. First call = scope gap, second call = acceptance card.
|
|
134
|
+
// First askQuestion = the model's interview question, second = acceptance card.
|
|
58
135
|
askQuestion: vi
|
|
59
136
|
.fn()
|
|
60
137
|
.mockResolvedValueOnce({ questionId: "q1", text: "done", kind: "choice" })
|
|
61
138
|
.mockResolvedValue({ questionId: "q-acc", text: "cancel", kind: "choice" }),
|
|
62
139
|
showAcceptance: vi.fn().mockResolvedValue("cancel"),
|
|
63
140
|
};
|
|
141
|
+
const proposer = vi.fn().mockResolvedValue(["What's the expected outcome? [MODEL RECS: Error disappears]"]);
|
|
64
142
|
const result = await runDiscovery("fix auth", {
|
|
65
143
|
taskType: "debug",
|
|
66
144
|
confidence: 0.6,
|
|
@@ -68,17 +146,18 @@ describe("runDiscovery()", () => {
|
|
|
68
146
|
domain: "typescript",
|
|
69
147
|
outputStyle: null,
|
|
70
148
|
intentKind: "task",
|
|
71
|
-
}, process.cwd(), handler);
|
|
149
|
+
}, process.cwd(), handler, null, proposer);
|
|
72
150
|
expect(result.accepted).toBe(false);
|
|
73
151
|
});
|
|
74
152
|
it("does not swallow the original request into a generic outcome for a general prompt (B2)", async () => {
|
|
75
|
-
// B2 —
|
|
76
|
-
//
|
|
77
|
-
//
|
|
153
|
+
// B2 — the old generic outcome askcard collapsed intent to "general: Task
|
|
154
|
+
// completed", discarding the user's prompt. With the model proposing no
|
|
155
|
+
// questions, the outcome must derive from the raw text (no fabrication).
|
|
78
156
|
const handler = {
|
|
79
157
|
askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "Task completed", kind: "choice" }),
|
|
80
158
|
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
81
159
|
};
|
|
160
|
+
const proposer = vi.fn().mockResolvedValue([]);
|
|
82
161
|
const result = await runDiscovery("make the dashboard feel less cluttered", {
|
|
83
162
|
taskType: "general",
|
|
84
163
|
confidence: 0.6,
|
|
@@ -86,7 +165,7 @@ describe("runDiscovery()", () => {
|
|
|
86
165
|
domain: null,
|
|
87
166
|
outputStyle: null,
|
|
88
167
|
intentKind: "task",
|
|
89
|
-
}, process.cwd(), handler);
|
|
168
|
+
}, process.cwd(), handler, null, proposer);
|
|
90
169
|
expect(result.intentStatement).not.toBe("general: Task completed");
|
|
91
170
|
expect(result.outcome).not.toBe("Task completed");
|
|
92
171
|
// The original request must survive into the resolved outcome.
|
|
@@ -98,4 +177,58 @@ describe("runDiscovery()", () => {
|
|
|
98
177
|
expect(result.accepted).toBe(true);
|
|
99
178
|
});
|
|
100
179
|
});
|
|
180
|
+
describe("runDiscovery() — outcome autofill override (path-leak vs legit slash)", () => {
|
|
181
|
+
const analyzeL1 = {
|
|
182
|
+
taskType: "analyze",
|
|
183
|
+
confidence: 0.6,
|
|
184
|
+
complexity: "low",
|
|
185
|
+
domain: null,
|
|
186
|
+
outputStyle: null,
|
|
187
|
+
intentKind: "task",
|
|
188
|
+
};
|
|
189
|
+
// A handler that always picks `text` for both the interview answer and the
|
|
190
|
+
// acceptance card (any non-"cancel"/"adjust" text accepts).
|
|
191
|
+
const pickAnswer = (text) => ({
|
|
192
|
+
askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text, kind: "choice" }),
|
|
193
|
+
showAcceptance: vi.fn().mockResolvedValue("accept"),
|
|
194
|
+
});
|
|
195
|
+
it("preserves a user outcome answer containing '/' (does not clobber with the autofilled default)", async () => {
|
|
196
|
+
// Regression: the override matched ANY '/' (bare `\/` regex alt +
|
|
197
|
+
// `.includes("/")`), silently replacing a legit answer like
|
|
198
|
+
// "support both REST/GraphQL endpoints" with the analyze default.
|
|
199
|
+
const userAnswer = "support both REST/GraphQL endpoints";
|
|
200
|
+
const proposer = vi
|
|
201
|
+
.fn()
|
|
202
|
+
.mockResolvedValue([
|
|
203
|
+
"Which API surface should the analysis target? [MODEL RECS: support both REST/GraphQL endpoints | REST only]",
|
|
204
|
+
]);
|
|
205
|
+
const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer(userAnswer), null, proposer);
|
|
206
|
+
expect(result.outcome).toBe(userAnswer);
|
|
207
|
+
expect(result.outcome).not.toBe("Detailed analysis with concrete improvement recommendations");
|
|
208
|
+
});
|
|
209
|
+
it("preserves another 'or'-style slash answer (validate input/output schemas)", async () => {
|
|
210
|
+
const userAnswer = "validate input/output schemas";
|
|
211
|
+
const proposer = vi.fn().mockResolvedValue(["What should the analysis verify?"]);
|
|
212
|
+
const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer(userAnswer), null, proposer);
|
|
213
|
+
expect(result.outcome).toBe(userAnswer);
|
|
214
|
+
});
|
|
215
|
+
it("still overwrites a genuinely path-leaked outcome with the autofilled default", async () => {
|
|
216
|
+
// Guard against over-correction: a real filesystem-path leak (scope-option
|
|
217
|
+
// shape "src/cli (cli)") must STILL be replaced by the inferred outcome.
|
|
218
|
+
const proposer = vi.fn().mockResolvedValue(["What scope? [MODEL RECS: src/cli (cli)]"]);
|
|
219
|
+
const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer("src/cli (cli)"), null, proposer);
|
|
220
|
+
expect(result.outcome).toBe("Detailed analysis with concrete improvement recommendations");
|
|
221
|
+
});
|
|
222
|
+
it("treats the 'provide my own details' meta-option as no-answer, not a literal outcome", async () => {
|
|
223
|
+
// The default meta-option ("I will provide my own details / constraints")
|
|
224
|
+
// is a 'no specific answer' sentinel — it must not survive verbatim as the
|
|
225
|
+
// outcome. With no inferred default available (generate), it falls back to
|
|
226
|
+
// the raw-derived intent rather than the sentinel string.
|
|
227
|
+
const sentinel = "I will provide my own details / constraints";
|
|
228
|
+
const proposer = vi.fn().mockResolvedValue(["What outcome do you expect?"]);
|
|
229
|
+
const result = await runDiscovery("build the user dashboard widget", { ...analyzeL1, taskType: "generate" }, process.cwd(), pickAnswer(sentinel), null, proposer);
|
|
230
|
+
expect(result.outcome).not.toBe(sentinel);
|
|
231
|
+
expect(result.outcome.toLowerCase()).toContain("dashboard");
|
|
232
|
+
});
|
|
233
|
+
});
|
|
101
234
|
//# sourceMappingURL=discovery.test.js.map
|
|
@@ -4,18 +4,23 @@
|
|
|
4
4
|
* pil-report command can answer "which pass actually decided the outcome".
|
|
5
5
|
*/
|
|
6
6
|
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
7
|
-
const { mockClassify, mockClassifyViaBrain, mockPilContext, mockIsUnifiedPilEnabled } = vi.hoisted(() => ({
|
|
7
|
+
const { mockClassify, mockClassifyViaBrain, mockPilContext, mockIsUnifiedPilEnabled, mockIsLlmFirstClassifyEnabled } = vi.hoisted(() => ({
|
|
8
8
|
mockClassify: vi.fn(),
|
|
9
9
|
mockClassifyViaBrain: vi.fn(),
|
|
10
10
|
mockPilContext: vi.fn(),
|
|
11
11
|
mockIsUnifiedPilEnabled: vi.fn(),
|
|
12
|
+
// OFF so these trace tests exercise the regex cascade passes.
|
|
13
|
+
mockIsLlmFirstClassifyEnabled: vi.fn(() => false),
|
|
12
14
|
}));
|
|
13
15
|
vi.mock("../../router/classifier/index.js", () => ({ classify: mockClassify }));
|
|
14
16
|
vi.mock("../../ee/bridge.js", () => ({
|
|
15
17
|
classifyViaBrain: mockClassifyViaBrain,
|
|
16
18
|
pilContext: mockPilContext,
|
|
17
19
|
}));
|
|
18
|
-
vi.mock("../config.js", () => ({
|
|
20
|
+
vi.mock("../config.js", () => ({
|
|
21
|
+
isUnifiedPilEnabled: mockIsUnifiedPilEnabled,
|
|
22
|
+
isLlmFirstClassifyEnabled: mockIsLlmFirstClassifyEnabled,
|
|
23
|
+
}));
|
|
19
24
|
import { layer1Intent } from "../layer1-intent.js";
|
|
20
25
|
function makeCtx(raw) {
|
|
21
26
|
return {
|
|
@@ -4,6 +4,9 @@ vi.mock("../../router/classifier/index.js", () => ({
|
|
|
4
4
|
}));
|
|
5
5
|
vi.mock("../config.js", () => ({
|
|
6
6
|
isUnifiedPilEnabled: vi.fn(() => false),
|
|
7
|
+
// OFF here so these cascade tests exercise the regex passes (the model-first
|
|
8
|
+
// gate is covered in src/pil/layer1-intent.test.ts).
|
|
9
|
+
isLlmFirstClassifyEnabled: vi.fn(() => false),
|
|
7
10
|
}));
|
|
8
11
|
vi.mock("../../ee/bridge.js", () => ({
|
|
9
12
|
classifyViaBrain: vi.fn().mockResolvedValue(null),
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import { buildInterviewQuestion,
|
|
2
|
+
import { buildInterviewQuestion, resolveGapsNonInteractive } from "../layer16-clarity.js";
|
|
3
|
+
// Phase 2 (2026-06-16): detectClarityGaps + its keyword option-builders were
|
|
4
|
+
// removed (the model now generates every clarification). The surviving helpers
|
|
5
|
+
// — buildInterviewQuestion (render) and resolveGapsNonInteractive (headless
|
|
6
|
+
// default-answer resolution) — are exercised here with model-shaped gaps.
|
|
3
7
|
const EMPTY_PROJECT = {
|
|
4
8
|
language: "typescript",
|
|
5
9
|
framework: null,
|
|
@@ -10,122 +14,10 @@ const EMPTY_PROJECT = {
|
|
|
10
14
|
{ path: "src/billing/", name: "billing", entryFiles: [], exportedSymbols: [] },
|
|
11
15
|
],
|
|
12
16
|
eePatterns: [],
|
|
13
|
-
relevantModules: [],
|
|
17
|
+
relevantModules: [{ path: "src/auth/", relevance: "named in prompt", exists: true }],
|
|
14
18
|
scannedAt: Date.now(),
|
|
15
19
|
cwd: "/proj",
|
|
16
20
|
};
|
|
17
|
-
describe("detectClarityGaps()", () => {
|
|
18
|
-
it("detects outcome gap for vague non-debug prompt", () => {
|
|
19
|
-
// PIL-L6 fix — debug now joins the autofill set, so vague debug prompts
|
|
20
|
-
// ("fix auth") no longer trigger an outcome question. Use a generate
|
|
21
|
-
// prompt instead to still cover the gap-detection path.
|
|
22
|
-
const gaps = detectClarityGaps("build something", "generate", 0.7, EMPTY_PROJECT);
|
|
23
|
-
const outcomeGap = gaps.find((g) => g.dimension === "outcome");
|
|
24
|
-
expect(outcomeGap).toBeDefined();
|
|
25
|
-
});
|
|
26
|
-
it("does NOT detect outcome gap for vague debug prompt (autofilled)", () => {
|
|
27
|
-
const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
|
|
28
|
-
const outcomeGap = gaps.find((g) => g.dimension === "outcome");
|
|
29
|
-
expect(outcomeGap).toBeUndefined();
|
|
30
|
-
});
|
|
31
|
-
it("does NOT detect an outcome gap for a vague general prompt (B2 intent-swallow guard)", () => {
|
|
32
|
-
// B2 — a `general` prompt's only outcome options are tautological
|
|
33
|
-
// ("Task completed" / "Issue resolved"). Asking them lets the default
|
|
34
|
-
// answer overwrite the user's real request, so the intent collapses to
|
|
35
|
-
// "general: Task completed" and the original prompt is lost. Skip the
|
|
36
|
-
// askcard so the outcome falls back to the raw request downstream.
|
|
37
|
-
const gaps = detectClarityGaps("the project feels messy", "general", 0.7, EMPTY_PROJECT);
|
|
38
|
-
const outcomeGap = gaps.find((g) => g.dimension === "outcome");
|
|
39
|
-
expect(outcomeGap).toBeUndefined();
|
|
40
|
-
});
|
|
41
|
-
it("detects scope gap when no file reference", () => {
|
|
42
|
-
const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
|
|
43
|
-
const scopeGap = gaps.find((g) => g.dimension === "scope");
|
|
44
|
-
expect(scopeGap).toBeDefined();
|
|
45
|
-
});
|
|
46
|
-
it("returns no gaps for specific prompt", () => {
|
|
47
|
-
const gaps = detectClarityGaps("fix TypeError in src/auth/login.ts:42", "debug", 0.9, EMPTY_PROJECT);
|
|
48
|
-
expect(gaps).toHaveLength(0);
|
|
49
|
-
});
|
|
50
|
-
it("scope options include matching bounded contexts", () => {
|
|
51
|
-
const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
|
|
52
|
-
const scopeGap = gaps.find((g) => g.dimension === "scope");
|
|
53
|
-
expect(scopeGap?.options.some((o) => o.includes("auth"))).toBe(true);
|
|
54
|
-
});
|
|
55
|
-
it("does NOT detect a scope gap for a general prompt with no codebase signal (B2-symmetric scope guard)", () => {
|
|
56
|
-
// Live drive (session 8a87aa060c6a): the pure non-codebase prompt "Reply
|
|
57
|
-
// with exactly one word: PONG" fired the scope askcard "Which part of the
|
|
58
|
-
// codebase should this target?" because countFileReferences /
|
|
59
|
-
// hasExplicitScope / hasOperationalScope were all empty — the detector
|
|
60
|
-
// assumes every prompt is a codebase task. A general/unclassified prompt
|
|
61
|
-
// has no codebase dimension to scope, so the question is nonsensical (and
|
|
62
|
-
// its acceptance card is downstream noise). Skip it, symmetric to the B2
|
|
63
|
-
// outcome guard; scope falls back to project-root downstream.
|
|
64
|
-
const gaps = detectClarityGaps("Reply with exactly one word: PONG", "general", 0.6, EMPTY_PROJECT);
|
|
65
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
|
|
66
|
-
// The only candidate gap was scope → general prompt now yields zero gaps,
|
|
67
|
-
// so discovery never marks interviewed=true and shows no acceptance card.
|
|
68
|
-
expect(gaps).toHaveLength(0);
|
|
69
|
-
});
|
|
70
|
-
it("STILL detects a scope gap for a classified (non-general) task with no file reference", () => {
|
|
71
|
-
// Guard must stay narrow: a real code task that simply omitted a path still
|
|
72
|
-
// benefits from the scope-narrowing askcard. Only general/null is skipped.
|
|
73
|
-
const gaps = detectClarityGaps("implement the search feature", "generate", 0.7, EMPTY_PROJECT);
|
|
74
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeDefined();
|
|
75
|
-
});
|
|
76
|
-
it("does NOT detect a scope gap for an image-analysis prompt (image is the scope)", () => {
|
|
77
|
-
// Live drive (PR#34 probe): "Take a screenshot of the homepage and analyze
|
|
78
|
-
// the diagram.png image to describe its layout" fired the codebase-scope
|
|
79
|
-
// askcard "Which part of the codebase should this target?" — nonsensical for
|
|
80
|
-
// an image-analysis task. The image (screenshot / diagram.png) IS the scope,
|
|
81
|
-
// symmetric to how operational (CI/build) prompts are scoped to the pipeline.
|
|
82
|
-
const gaps = detectClarityGaps("Take a screenshot of the homepage and analyze the diagram.png image to describe its layout", "analyze", 0.7, EMPTY_PROJECT);
|
|
83
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
|
|
84
|
-
// analyze autofills outcome, so with scope suppressed there are zero gaps →
|
|
85
|
-
// no interview, no acceptance card.
|
|
86
|
-
expect(gaps).toHaveLength(0);
|
|
87
|
-
});
|
|
88
|
-
it("STILL detects a scope gap for a code task that mentions an ambiguous non-image word", () => {
|
|
89
|
-
// Narrowness guard: image-scope suppression must not swallow real codebase
|
|
90
|
-
// tasks. "add a logo to the header" carries no concrete image signal (no
|
|
91
|
-
// file extension / screenshot / photo), so the scope askcard stays.
|
|
92
|
-
const gaps = detectClarityGaps("add a logo to the header", "generate", 0.7, EMPTY_PROJECT);
|
|
93
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeDefined();
|
|
94
|
-
});
|
|
95
|
-
it("does NOT detect a scope gap for a web-search / external-info prompt", () => {
|
|
96
|
-
// Live drive (tavily probe, session d7a45a2dba30): "search the web for the
|
|
97
|
-
// latest vitest release notes" classified taskType=analyze fired the
|
|
98
|
-
// codebase-scope askcard and recorded a wrong scope ("src/mcp"). A
|
|
99
|
-
// web-search task is scoped to the web, not the codebase — symmetric to the
|
|
100
|
-
// image-scope and operational-scope guards.
|
|
101
|
-
const gaps = detectClarityGaps("search the web for the latest vitest release notes", "analyze", 0.7, EMPTY_PROJECT);
|
|
102
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
|
|
103
|
-
expect(gaps).toHaveLength(0);
|
|
104
|
-
});
|
|
105
|
-
it("does NOT detect a scope gap for a self-contained computation prompt (data is inline)", () => {
|
|
106
|
-
// Live drive (deepseek-vs-grok A/B, session 17fc23f0): "Compute f([3,1,2])
|
|
107
|
-
// where f sorts the list ascending then returns the sum of the first two
|
|
108
|
-
// elements." classified taskType=analyze (regex:read matched the bare word
|
|
109
|
-
// "list", conf 0.80 → skipped the brain) fired BOTH the pil-interview scope
|
|
110
|
-
// askcard ("Which part of the codebase should this target?" → auto "Entire
|
|
111
|
-
// project") AND the pil-acceptance card. The operand [3,1,2] is supplied
|
|
112
|
-
// inline — the task has no codebase dimension to scope. Symmetric to the
|
|
113
|
-
// image / web / operational scope guards.
|
|
114
|
-
const gaps = detectClarityGaps("Compute f([3,1,2]) where f sorts the list ascending then returns the sum of the first two elements.", "analyze", 0.8, EMPTY_PROJECT);
|
|
115
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
|
|
116
|
-
// analyze autofills outcome, so with scope suppressed there are zero gaps →
|
|
117
|
-
// no interview, no acceptance card.
|
|
118
|
-
expect(gaps).toHaveLength(0);
|
|
119
|
-
});
|
|
120
|
-
it("STILL detects a scope gap for a code task that embeds a literal but no compute framing", () => {
|
|
121
|
-
// Narrowness guard: the inline-literal suppression must not swallow real
|
|
122
|
-
// codebase tasks. "set the default retry delays to [100, 200, 400] in the
|
|
123
|
-
// config" carries a literal but is scoped to the codebase (no compute verb),
|
|
124
|
-
// so the scope askcard stays.
|
|
125
|
-
const gaps = detectClarityGaps("set the default retry delays to [100, 200, 400] in the config", "generate", 0.7, EMPTY_PROJECT);
|
|
126
|
-
expect(gaps.find((g) => g.dimension === "scope")).toBeDefined();
|
|
127
|
-
});
|
|
128
|
-
});
|
|
129
21
|
describe("buildInterviewQuestion()", () => {
|
|
130
22
|
it("builds a CouncilQuestionData with pil-interview phase", () => {
|
|
131
23
|
const gap = {
|
|
@@ -141,11 +33,35 @@ describe("buildInterviewQuestion()", () => {
|
|
|
141
33
|
expect(q.options).toBeDefined();
|
|
142
34
|
expect(q.options.some((o) => o.kind === "freetext")).toBe(true);
|
|
143
35
|
});
|
|
36
|
+
it("surfaces the model's reason (gap.description) as the askcard context", () => {
|
|
37
|
+
const gap = {
|
|
38
|
+
dimension: "outcome",
|
|
39
|
+
description: "answering this changes whether we add OAuth or just API keys",
|
|
40
|
+
suggestedQuestion: "Which auth method?",
|
|
41
|
+
options: ["OAuth", "API keys"],
|
|
42
|
+
defaultIndex: 0,
|
|
43
|
+
};
|
|
44
|
+
const q = buildInterviewQuestion(gap, "q-2");
|
|
45
|
+
expect(q.context).toBe("answering this changes whether we add OAuth or just API keys");
|
|
46
|
+
});
|
|
144
47
|
});
|
|
145
48
|
describe("resolveGapsNonInteractive()", () => {
|
|
146
|
-
it("fills gaps with best-effort from project context", () => {
|
|
147
|
-
const gaps =
|
|
49
|
+
it("fills gaps with best-effort defaults from the model options + project context", () => {
|
|
50
|
+
const gaps = [
|
|
51
|
+
{
|
|
52
|
+
dimension: "outcome",
|
|
53
|
+
description: "Model-generated clarification #1",
|
|
54
|
+
suggestedQuestion: "What outcome do you expect?",
|
|
55
|
+
options: ["Error resolved", "Other (type free answer)"],
|
|
56
|
+
defaultIndex: 0,
|
|
57
|
+
},
|
|
58
|
+
];
|
|
148
59
|
const resolved = resolveGapsNonInteractive(gaps, EMPTY_PROJECT, "fix auth");
|
|
60
|
+
expect(resolved.outcome).toBe("Error resolved");
|
|
61
|
+
expect(resolved.scope.length).toBeGreaterThan(0);
|
|
62
|
+
});
|
|
63
|
+
it("falls back to the raw-derived outcome when there is no outcome gap", () => {
|
|
64
|
+
const resolved = resolveGapsNonInteractive([], EMPTY_PROJECT, "fix the login bug");
|
|
149
65
|
expect(resolved.outcome).toBeTruthy();
|
|
150
66
|
expect(resolved.scope.length).toBeGreaterThan(0);
|
|
151
67
|
});
|
|
@@ -75,6 +75,43 @@ describe("layer4Gsd (gsd-native)", () => {
|
|
|
75
75
|
const result = await layer4Gsd(makeCtx({ raw: "review the pull request" }));
|
|
76
76
|
expect(["review", "discuss", "execute"]).toContain(result.gsdPhase);
|
|
77
77
|
});
|
|
78
|
+
it("routes a question-shaped analyze/debug prompt to the QUESTION directive (no 'state a plan')", async () => {
|
|
79
|
+
// De-robotizing: a plain question must not get the STANDARD "state a 2-3 line
|
|
80
|
+
// plan" scaffold even when L1 classifies it analyze/debug (not "general").
|
|
81
|
+
const q = "why does the build fail intermittently?";
|
|
82
|
+
const result = await layer4Gsd(makeCtx({ raw: q, enriched: q, taskType: "debug", intentKind: "task" }));
|
|
83
|
+
expect(result.enriched).toContain("QUESTION / explanatory");
|
|
84
|
+
expect(result.enriched).not.toContain("State a 2-3 line plan");
|
|
85
|
+
});
|
|
86
|
+
it("treats a genuine general question (general + task) as informational", async () => {
|
|
87
|
+
const q = "what does the enrichment layer do?";
|
|
88
|
+
const result = await layer4Gsd(makeCtx({ raw: q, enriched: q, taskType: "general", intentKind: "task" }));
|
|
89
|
+
expect(result.enriched).toContain("QUESTION / explanatory");
|
|
90
|
+
});
|
|
91
|
+
it("does NOT treat an implementation request as informational even if phrased as a question", async () => {
|
|
92
|
+
// isImplementationIntent guards the question clause: "can you refactor … and
|
|
93
|
+
// wire up …" is a real edit task → STANDARD scaffold, not the QUESTION directive.
|
|
94
|
+
const q = "can you refactor the dropdown and wire up the keyboard handlers?";
|
|
95
|
+
const result = await layer4Gsd(makeCtx({ raw: q, enriched: q, taskType: "refactor", intentKind: "task" }));
|
|
96
|
+
expect(result.enriched).not.toContain("QUESTION / explanatory");
|
|
97
|
+
});
|
|
98
|
+
it("Phase 2b: deliverableKind='answer' is informational even for an imperative (no '?') prompt", async () => {
|
|
99
|
+
// The raw text is a plain imperative — the legacy regex (isQuestionLike /
|
|
100
|
+
// isMetaAnalysisPrompt) would NOT mark it informational. The model's
|
|
101
|
+
// deliverableKind='answer' must override that and route to the QUESTION
|
|
102
|
+
// directive — proving L4 consumes the model signal, not the regex.
|
|
103
|
+
const raw = "go over the auth module and tell me what it does";
|
|
104
|
+
const result = await layer4Gsd(makeCtx({ raw, enriched: raw, taskType: "analyze", intentKind: "task", deliverableKind: "answer" }));
|
|
105
|
+
expect(result.enriched).toContain("QUESTION / explanatory");
|
|
106
|
+
});
|
|
107
|
+
it("Phase 2b: deliverableKind='code' is NOT informational even for a question-shaped prompt", async () => {
|
|
108
|
+
// The raw text reads as a question — the legacy regex would mark it
|
|
109
|
+
// informational. The model's deliverableKind='code' must override that so
|
|
110
|
+
// the STANDARD implement scaffold is used (the deliverable is file edits).
|
|
111
|
+
const raw = "why not just refactor the dropdown and wire the keyboard handlers?";
|
|
112
|
+
const result = await layer4Gsd(makeCtx({ raw, enriched: raw, taskType: "refactor", intentKind: "task", deliverableKind: "code" }));
|
|
113
|
+
expect(result.enriched).not.toContain("QUESTION / explanatory");
|
|
114
|
+
});
|
|
78
115
|
it("uses ctx.gsdPhase from L1 (unified path) without calling routeTask", async () => {
|
|
79
116
|
const { routeTask } = await import("../../ee/bridge.js");
|
|
80
117
|
vi.mocked(routeTask).mockClear();
|