muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/artifact-cache.d.ts +56 -0
  26. package/dist/src/ee/artifact-cache.js +155 -0
  27. package/dist/src/ee/artifact-cache.test.d.ts +1 -0
  28. package/dist/src/ee/artifact-cache.test.js +69 -0
  29. package/dist/src/ee/auth.d.ts +9 -0
  30. package/dist/src/ee/auth.js +19 -0
  31. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  32. package/dist/src/ee/ee-onboarding.js +76 -0
  33. package/dist/src/ee/search.js +7 -5
  34. package/dist/src/ee/search.test.d.ts +1 -0
  35. package/dist/src/ee/search.test.js +23 -0
  36. package/dist/src/generated/version.d.ts +1 -1
  37. package/dist/src/generated/version.js +1 -1
  38. package/dist/src/headless/output.js +6 -4
  39. package/dist/src/headless/output.test.js +4 -3
  40. package/dist/src/index.js +20 -1
  41. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  42. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  43. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  44. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  45. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  46. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  47. package/dist/src/mcp/auto-setup.js +56 -2
  48. package/dist/src/mcp/client-pool.d.ts +46 -0
  49. package/dist/src/mcp/client-pool.js +212 -0
  50. package/dist/src/mcp/oauth-callback.js +2 -2
  51. package/dist/src/mcp/parse-headers.test.js +14 -14
  52. package/dist/src/mcp/runtime.d.ts +28 -0
  53. package/dist/src/mcp/runtime.js +117 -51
  54. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  55. package/dist/src/mcp/self-verify-runner.js +38 -0
  56. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  57. package/dist/src/mcp/setup-guide-text.js +84 -0
  58. package/dist/src/mcp/smart-filter.js +49 -0
  59. package/dist/src/mcp/smoke.test.js +43 -43
  60. package/dist/src/mcp/tools-server.d.ts +7 -0
  61. package/dist/src/mcp/tools-server.js +19 -22
  62. package/dist/src/models/catalog.json +349 -349
  63. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  64. package/dist/src/ops/doctor.d.ts +3 -2
  65. package/dist/src/ops/doctor.js +47 -11
  66. package/dist/src/ops/doctor.test.js +4 -3
  67. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  68. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  69. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  70. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  71. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  72. package/dist/src/orchestrator/compaction.d.ts +2 -0
  73. package/dist/src/orchestrator/compaction.js +14 -1
  74. package/dist/src/orchestrator/compaction.test.js +25 -1
  75. package/dist/src/orchestrator/message-processor.js +72 -32
  76. package/dist/src/orchestrator/orchestrator.js +26 -0
  77. package/dist/src/orchestrator/prompts.d.ts +51 -0
  78. package/dist/src/orchestrator/prompts.js +257 -134
  79. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  80. package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
  81. package/dist/src/orchestrator/scope-reminder.js +16 -0
  82. package/dist/src/orchestrator/scope-reminder.test.js +22 -1
  83. package/dist/src/orchestrator/stream-runner.js +23 -15
  84. package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
  85. package/dist/src/orchestrator/subagent-compactor.js +30 -8
  86. package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
  87. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  88. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  89. package/dist/src/pil/__tests__/config.test.js +1 -17
  90. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  91. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  92. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  93. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  94. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  95. package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
  96. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  97. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
  98. package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
  99. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  100. package/dist/src/pil/agent-operating-contract.js +2 -0
  101. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  102. package/dist/src/pil/cheap-model-playbook.js +35 -35
  103. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  104. package/dist/src/pil/clarity-gate.d.ts +21 -19
  105. package/dist/src/pil/clarity-gate.js +26 -153
  106. package/dist/src/pil/config.d.ts +9 -1
  107. package/dist/src/pil/config.js +15 -4
  108. package/dist/src/pil/discovery.js +211 -136
  109. package/dist/src/pil/layer1-intent.d.ts +12 -0
  110. package/dist/src/pil/layer1-intent.js +283 -38
  111. package/dist/src/pil/layer1-intent.test.js +210 -4
  112. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  113. package/dist/src/pil/layer16-clarity.js +19 -306
  114. package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
  115. package/dist/src/pil/layer3-ee-injection.js +96 -4
  116. package/dist/src/pil/layer4-gsd.js +18 -6
  117. package/dist/src/pil/layer6-output.d.ts +2 -0
  118. package/dist/src/pil/layer6-output.js +151 -25
  119. package/dist/src/pil/llm-classify.d.ts +26 -0
  120. package/dist/src/pil/llm-classify.js +34 -5
  121. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  122. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  123. package/dist/src/pil/pipeline.js +15 -9
  124. package/dist/src/pil/schema.d.ts +8 -0
  125. package/dist/src/pil/schema.js +12 -1
  126. package/dist/src/pil/task-tier-map.js +4 -0
  127. package/dist/src/pil/types.d.ts +11 -1
  128. package/dist/src/product-loop/done-gate.js +3 -3
  129. package/dist/src/product-loop/loop-driver.js +18 -18
  130. package/dist/src/product-loop/progress-snapshot.js +4 -4
  131. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  132. package/dist/src/providers/auth/grok-oauth.js +6 -15
  133. package/dist/src/providers/auth/openai-oauth.js +6 -15
  134. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  135. package/dist/src/reporter/index.js +1 -1
  136. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  137. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  138. package/dist/src/scaffold/continuation-prompt.js +60 -60
  139. package/dist/src/scaffold/init-new.js +453 -453
  140. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  141. package/dist/src/self-qa/agentic-loop.js +24 -19
  142. package/dist/src/self-qa/spec-emitter.js +26 -23
  143. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  144. package/dist/src/storage/interaction-log.js +5 -5
  145. package/dist/src/storage/migrations.js +122 -122
  146. package/dist/src/storage/sessions.js +42 -42
  147. package/dist/src/storage/transcript.js +91 -84
  148. package/dist/src/storage/usage.js +14 -14
  149. package/dist/src/storage/workspaces.js +12 -12
  150. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  151. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  152. package/dist/src/tools/git-safety.d.ts +61 -0
  153. package/dist/src/tools/git-safety.js +141 -0
  154. package/dist/src/tools/git-safety.test.d.ts +1 -0
  155. package/dist/src/tools/git-safety.test.js +111 -0
  156. package/dist/src/tools/native-tools.d.ts +31 -0
  157. package/dist/src/tools/native-tools.js +273 -0
  158. package/dist/src/tools/registry-ee-query.test.js +18 -1
  159. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  160. package/dist/src/tools/registry-git-safety.test.js +92 -0
  161. package/dist/src/tools/registry.js +52 -6
  162. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  163. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  164. package/dist/src/ui/app.js +0 -0
  165. package/dist/src/ui/components/message-view.js +4 -1
  166. package/dist/src/ui/components/structured-response-view.js +7 -3
  167. package/dist/src/ui/components/tool-group.js +7 -1
  168. package/dist/src/ui/markdown-render.d.ts +41 -0
  169. package/dist/src/ui/markdown-render.js +223 -0
  170. package/dist/src/ui/markdown.d.ts +10 -0
  171. package/dist/src/ui/markdown.js +12 -35
  172. package/dist/src/ui/slash/council-inspect.js +4 -4
  173. package/dist/src/ui/slash/export.js +4 -4
  174. package/dist/src/ui/utils/text.d.ts +8 -0
  175. package/dist/src/ui/utils/text.js +16 -0
  176. package/dist/src/ui/utils/text.test.d.ts +1 -0
  177. package/dist/src/ui/utils/text.test.js +23 -0
  178. package/dist/src/usage/ledger.js +48 -15
  179. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  180. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  181. package/dist/src/utils/clipboard-image.js +23 -23
  182. package/dist/src/utils/open-url.d.ts +56 -0
  183. package/dist/src/utils/open-url.js +58 -0
  184. package/dist/src/utils/open-url.test.d.ts +1 -0
  185. package/dist/src/utils/open-url.test.js +86 -0
  186. package/dist/src/utils/settings.d.ts +12 -0
  187. package/dist/src/utils/settings.js +48 -0
  188. package/dist/src/utils/side-question.js +2 -2
  189. package/dist/src/utils/skills.js +3 -3
  190. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  191. package/dist/src/verify/environment.js +2 -1
  192. package/package.json +1 -1
  193. package/dist/src/pil/layer16-clarity.test.js +0 -31
  194. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -10,7 +10,14 @@ const mockHandler = {
10
10
  showAcceptance: vi.fn().mockResolvedValue("accept"),
11
11
  };
12
12
  describe("runDiscovery()", () => {
13
- it("auto-passes on high-confidence specific prompt", async () => {
13
+ it("proceeds without interview when the model proposes no questions", async () => {
14
+ // Phase 2: the model is the sole ask-decider. An empty proposer result means
15
+ // "no gray area" → no interview, no fabricated [Discovery] outcome.
16
+ const handler = {
17
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
18
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
19
+ };
20
+ const proposer = vi.fn().mockResolvedValue([]);
14
21
  const result = await runDiscovery("fix TypeError in src/auth/login.ts:42", {
15
22
  taskType: "debug",
16
23
  confidence: 0.9,
@@ -18,15 +25,86 @@ describe("runDiscovery()", () => {
18
25
  domain: "typescript",
19
26
  outputStyle: "balanced",
20
27
  intentKind: "task",
21
- }, process.cwd(), null);
28
+ }, process.cwd(), handler, null, proposer);
29
+ expect(proposer).toHaveBeenCalled();
22
30
  expect(result.interviewed).toBe(false);
23
31
  expect(result.accepted).toBe(true);
32
+ expect(handler.askQuestion).not.toHaveBeenCalled();
24
33
  });
25
- it("interviews user on vague prompt with handler", async () => {
34
+ it("does NOT interview (and never fabricates regex questions) when no proposer is wired", async () => {
35
+ // Phase 2 fail-loud: an interactive turn missing a proposer logs and proceeds
36
+ // WITHOUT an interview — it must never fall back to keyword-generated gaps.
37
+ const handler = {
38
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
39
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
40
+ };
41
+ const result = await runDiscovery("fix auth", // vague — old regex gate would have asked a scope question
42
+ {
43
+ taskType: "debug",
44
+ confidence: 0.6,
45
+ complexity: "low",
46
+ domain: "typescript",
47
+ outputStyle: null,
48
+ intentKind: "task",
49
+ }, process.cwd(), handler, null, null);
50
+ expect(result.interviewed).toBe(false);
51
+ expect(result.accepted).toBe(true);
52
+ expect(handler.askQuestion).not.toHaveBeenCalled();
53
+ });
54
+ it("surfaces the model's reason + recommends in the interview askcard", async () => {
55
+ const askQuestion = vi.fn().mockResolvedValue({ questionId: "q1", text: "OAuth", kind: "choice" });
56
+ const handler = {
57
+ askQuestion,
58
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
59
+ };
60
+ const proposer = vi
61
+ .fn()
62
+ .mockResolvedValue(["Which auth method? [MODEL RECS: OAuth | API keys] [WHY: changes the whole token flow]"]);
63
+ await runDiscovery("add authentication", {
64
+ taskType: "generate",
65
+ confidence: 0.6,
66
+ complexity: "low",
67
+ domain: null,
68
+ outputStyle: null,
69
+ intentKind: "task",
70
+ }, process.cwd(), handler, null, proposer);
71
+ expect(askQuestion).toHaveBeenCalled();
72
+ const card = askQuestion.mock.calls[0][0];
73
+ // Model's WHY drives the askcard context; recommends drive the options.
74
+ expect(card.context).toBe("changes the whole token flow");
75
+ expect(card.question).toBe("Which auth method?");
76
+ const labels = (card.options ?? []).map((o) => o.label);
77
+ expect(labels).toContain("OAuth");
78
+ expect(labels).toContain("API keys");
79
+ expect(card.defaultIndex).toBe(0); // first recommend = recommended default
80
+ });
81
+ it("skips all discovery when the user explicitly says don't ask (EN + VI)", async () => {
82
+ const handler = {
83
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
84
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
85
+ };
86
+ const l1 = {
87
+ taskType: "analyze",
88
+ confidence: 0.6, // low enough that discovery would normally interview
89
+ complexity: "low",
90
+ domain: null,
91
+ outputStyle: null,
92
+ intentKind: "task",
93
+ };
94
+ const enResult = await runDiscovery("analyze the orchestrator, just answer, don't ask", l1, process.cwd(), handler);
95
+ expect(enResult.interviewed).toBe(false);
96
+ expect(enResult.accepted).toBe(true);
97
+ const viResult = await runDiscovery("phân tích orchestrator, đừng hỏi, trả lời thẳng", l1, process.cwd(), handler);
98
+ expect(viResult.interviewed).toBe(false);
99
+ expect(viResult.accepted).toBe(true);
100
+ expect(handler.askQuestion).not.toHaveBeenCalled();
101
+ });
102
+ it("interviews user when the model proposes a question", async () => {
26
103
  const handler = {
27
104
  askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "Error disappears", kind: "choice" }),
28
105
  showAcceptance: vi.fn().mockResolvedValue("accept"),
29
106
  };
107
+ const proposer = vi.fn().mockResolvedValue(["What's the expected fix outcome? [MODEL RECS: Error disappears]"]);
30
108
  const result = await runDiscovery("fix auth", {
31
109
  taskType: "debug",
32
110
  confidence: 0.6,
@@ -34,7 +112,7 @@ describe("runDiscovery()", () => {
34
112
  domain: "typescript",
35
113
  outputStyle: null,
36
114
  intentKind: "task",
37
- }, process.cwd(), handler);
115
+ }, process.cwd(), handler, null, proposer);
38
116
  expect(result.interviewed).toBe(true);
39
117
  expect(result.accepted).toBe(true);
40
118
  expect(handler.askQuestion).toHaveBeenCalled();
@@ -53,14 +131,14 @@ describe("runDiscovery()", () => {
53
131
  });
54
132
  it("sets accepted=false when user cancels", async () => {
55
133
  const handler = {
56
- // PIL-L6 fix debug now autofills outcome, so only the scope gap is
57
- // asked. First call = scope gap, second call = acceptance card.
134
+ // First askQuestion = the model's interview question, second = acceptance card.
58
135
  askQuestion: vi
59
136
  .fn()
60
137
  .mockResolvedValueOnce({ questionId: "q1", text: "done", kind: "choice" })
61
138
  .mockResolvedValue({ questionId: "q-acc", text: "cancel", kind: "choice" }),
62
139
  showAcceptance: vi.fn().mockResolvedValue("cancel"),
63
140
  };
141
+ const proposer = vi.fn().mockResolvedValue(["What's the expected outcome? [MODEL RECS: Error disappears]"]);
64
142
  const result = await runDiscovery("fix auth", {
65
143
  taskType: "debug",
66
144
  confidence: 0.6,
@@ -68,17 +146,18 @@ describe("runDiscovery()", () => {
68
146
  domain: "typescript",
69
147
  outputStyle: null,
70
148
  intentKind: "task",
71
- }, process.cwd(), handler);
149
+ }, process.cwd(), handler, null, proposer);
72
150
  expect(result.accepted).toBe(false);
73
151
  });
74
152
  it("does not swallow the original request into a generic outcome for a general prompt (B2)", async () => {
75
- // B2 — answering the (now-skipped) generic outcome askcard used to collapse
76
- // the intent to "general: Task completed", discarding the user's prompt.
77
- // The scope gap may still fire; the outcome must derive from the raw text.
153
+ // B2 — the old generic outcome askcard collapsed intent to "general: Task
154
+ // completed", discarding the user's prompt. With the model proposing no
155
+ // questions, the outcome must derive from the raw text (no fabrication).
78
156
  const handler = {
79
157
  askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "Task completed", kind: "choice" }),
80
158
  showAcceptance: vi.fn().mockResolvedValue("accept"),
81
159
  };
160
+ const proposer = vi.fn().mockResolvedValue([]);
82
161
  const result = await runDiscovery("make the dashboard feel less cluttered", {
83
162
  taskType: "general",
84
163
  confidence: 0.6,
@@ -86,7 +165,7 @@ describe("runDiscovery()", () => {
86
165
  domain: null,
87
166
  outputStyle: null,
88
167
  intentKind: "task",
89
- }, process.cwd(), handler);
168
+ }, process.cwd(), handler, null, proposer);
90
169
  expect(result.intentStatement).not.toBe("general: Task completed");
91
170
  expect(result.outcome).not.toBe("Task completed");
92
171
  // The original request must survive into the resolved outcome.
@@ -98,4 +177,58 @@ describe("runDiscovery()", () => {
98
177
  expect(result.accepted).toBe(true);
99
178
  });
100
179
  });
180
+ describe("runDiscovery() — outcome autofill override (path-leak vs legit slash)", () => {
181
+ const analyzeL1 = {
182
+ taskType: "analyze",
183
+ confidence: 0.6,
184
+ complexity: "low",
185
+ domain: null,
186
+ outputStyle: null,
187
+ intentKind: "task",
188
+ };
189
+ // A handler that always picks `text` for both the interview answer and the
190
+ // acceptance card (any non-"cancel"/"adjust" text accepts).
191
+ const pickAnswer = (text) => ({
192
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text, kind: "choice" }),
193
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
194
+ });
195
+ it("preserves a user outcome answer containing '/' (does not clobber with the autofilled default)", async () => {
196
+ // Regression: the override matched ANY '/' (bare `\/` regex alt +
197
+ // `.includes("/")`), silently replacing a legit answer like
198
+ // "support both REST/GraphQL endpoints" with the analyze default.
199
+ const userAnswer = "support both REST/GraphQL endpoints";
200
+ const proposer = vi
201
+ .fn()
202
+ .mockResolvedValue([
203
+ "Which API surface should the analysis target? [MODEL RECS: support both REST/GraphQL endpoints | REST only]",
204
+ ]);
205
+ const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer(userAnswer), null, proposer);
206
+ expect(result.outcome).toBe(userAnswer);
207
+ expect(result.outcome).not.toBe("Detailed analysis with concrete improvement recommendations");
208
+ });
209
+ it("preserves another 'or'-style slash answer (validate input/output schemas)", async () => {
210
+ const userAnswer = "validate input/output schemas";
211
+ const proposer = vi.fn().mockResolvedValue(["What should the analysis verify?"]);
212
+ const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer(userAnswer), null, proposer);
213
+ expect(result.outcome).toBe(userAnswer);
214
+ });
215
+ it("still overwrites a genuinely path-leaked outcome with the autofilled default", async () => {
216
+ // Guard against over-correction: a real filesystem-path leak (scope-option
217
+ // shape "src/cli (cli)") must STILL be replaced by the inferred outcome.
218
+ const proposer = vi.fn().mockResolvedValue(["What scope? [MODEL RECS: src/cli (cli)]"]);
219
+ const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer("src/cli (cli)"), null, proposer);
220
+ expect(result.outcome).toBe("Detailed analysis with concrete improvement recommendations");
221
+ });
222
+ it("treats the 'provide my own details' meta-option as no-answer, not a literal outcome", async () => {
223
+ // The default meta-option ("I will provide my own details / constraints")
224
+ // is a 'no specific answer' sentinel — it must not survive verbatim as the
225
+ // outcome. With no inferred default available (generate), it falls back to
226
+ // the raw-derived intent rather than the sentinel string.
227
+ const sentinel = "I will provide my own details / constraints";
228
+ const proposer = vi.fn().mockResolvedValue(["What outcome do you expect?"]);
229
+ const result = await runDiscovery("build the user dashboard widget", { ...analyzeL1, taskType: "generate" }, process.cwd(), pickAnswer(sentinel), null, proposer);
230
+ expect(result.outcome).not.toBe(sentinel);
231
+ expect(result.outcome.toLowerCase()).toContain("dashboard");
232
+ });
233
+ });
101
234
  //# sourceMappingURL=discovery.test.js.map
@@ -4,18 +4,23 @@
4
4
  * pil-report command can answer "which pass actually decided the outcome".
5
5
  */
6
6
  import { beforeEach, describe, expect, it, vi } from "vitest";
7
- const { mockClassify, mockClassifyViaBrain, mockPilContext, mockIsUnifiedPilEnabled } = vi.hoisted(() => ({
7
+ const { mockClassify, mockClassifyViaBrain, mockPilContext, mockIsUnifiedPilEnabled, mockIsLlmFirstClassifyEnabled } = vi.hoisted(() => ({
8
8
  mockClassify: vi.fn(),
9
9
  mockClassifyViaBrain: vi.fn(),
10
10
  mockPilContext: vi.fn(),
11
11
  mockIsUnifiedPilEnabled: vi.fn(),
12
+ // OFF so these trace tests exercise the regex cascade passes.
13
+ mockIsLlmFirstClassifyEnabled: vi.fn(() => false),
12
14
  }));
13
15
  vi.mock("../../router/classifier/index.js", () => ({ classify: mockClassify }));
14
16
  vi.mock("../../ee/bridge.js", () => ({
15
17
  classifyViaBrain: mockClassifyViaBrain,
16
18
  pilContext: mockPilContext,
17
19
  }));
18
- vi.mock("../config.js", () => ({ isUnifiedPilEnabled: mockIsUnifiedPilEnabled }));
20
+ vi.mock("../config.js", () => ({
21
+ isUnifiedPilEnabled: mockIsUnifiedPilEnabled,
22
+ isLlmFirstClassifyEnabled: mockIsLlmFirstClassifyEnabled,
23
+ }));
19
24
  import { layer1Intent } from "../layer1-intent.js";
20
25
  function makeCtx(raw) {
21
26
  return {
@@ -4,6 +4,9 @@ vi.mock("../../router/classifier/index.js", () => ({
4
4
  }));
5
5
  vi.mock("../config.js", () => ({
6
6
  isUnifiedPilEnabled: vi.fn(() => false),
7
+ // OFF here so these cascade tests exercise the regex passes (the model-first
8
+ // gate is covered in src/pil/layer1-intent.test.ts).
9
+ isLlmFirstClassifyEnabled: vi.fn(() => false),
7
10
  }));
8
11
  vi.mock("../../ee/bridge.js", () => ({
9
12
  classifyViaBrain: vi.fn().mockResolvedValue(null),
@@ -1,5 +1,9 @@
1
1
  import { describe, expect, it } from "vitest";
2
- import { buildInterviewQuestion, detectClarityGaps, resolveGapsNonInteractive } from "../layer16-clarity.js";
2
+ import { buildInterviewQuestion, resolveGapsNonInteractive } from "../layer16-clarity.js";
3
+ // Phase 2 (2026-06-16): detectClarityGaps + its keyword option-builders were
4
+ // removed (the model now generates every clarification). The surviving helpers
5
+ // — buildInterviewQuestion (render) and resolveGapsNonInteractive (headless
6
+ // default-answer resolution) — are exercised here with model-shaped gaps.
3
7
  const EMPTY_PROJECT = {
4
8
  language: "typescript",
5
9
  framework: null,
@@ -10,122 +14,10 @@ const EMPTY_PROJECT = {
10
14
  { path: "src/billing/", name: "billing", entryFiles: [], exportedSymbols: [] },
11
15
  ],
12
16
  eePatterns: [],
13
- relevantModules: [],
17
+ relevantModules: [{ path: "src/auth/", relevance: "named in prompt", exists: true }],
14
18
  scannedAt: Date.now(),
15
19
  cwd: "/proj",
16
20
  };
17
- describe("detectClarityGaps()", () => {
18
- it("detects outcome gap for vague non-debug prompt", () => {
19
- // PIL-L6 fix — debug now joins the autofill set, so vague debug prompts
20
- // ("fix auth") no longer trigger an outcome question. Use a generate
21
- // prompt instead to still cover the gap-detection path.
22
- const gaps = detectClarityGaps("build something", "generate", 0.7, EMPTY_PROJECT);
23
- const outcomeGap = gaps.find((g) => g.dimension === "outcome");
24
- expect(outcomeGap).toBeDefined();
25
- });
26
- it("does NOT detect outcome gap for vague debug prompt (autofilled)", () => {
27
- const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
28
- const outcomeGap = gaps.find((g) => g.dimension === "outcome");
29
- expect(outcomeGap).toBeUndefined();
30
- });
31
- it("does NOT detect an outcome gap for a vague general prompt (B2 intent-swallow guard)", () => {
32
- // B2 — a `general` prompt's only outcome options are tautological
33
- // ("Task completed" / "Issue resolved"). Asking them lets the default
34
- // answer overwrite the user's real request, so the intent collapses to
35
- // "general: Task completed" and the original prompt is lost. Skip the
36
- // askcard so the outcome falls back to the raw request downstream.
37
- const gaps = detectClarityGaps("the project feels messy", "general", 0.7, EMPTY_PROJECT);
38
- const outcomeGap = gaps.find((g) => g.dimension === "outcome");
39
- expect(outcomeGap).toBeUndefined();
40
- });
41
- it("detects scope gap when no file reference", () => {
42
- const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
43
- const scopeGap = gaps.find((g) => g.dimension === "scope");
44
- expect(scopeGap).toBeDefined();
45
- });
46
- it("returns no gaps for specific prompt", () => {
47
- const gaps = detectClarityGaps("fix TypeError in src/auth/login.ts:42", "debug", 0.9, EMPTY_PROJECT);
48
- expect(gaps).toHaveLength(0);
49
- });
50
- it("scope options include matching bounded contexts", () => {
51
- const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
52
- const scopeGap = gaps.find((g) => g.dimension === "scope");
53
- expect(scopeGap?.options.some((o) => o.includes("auth"))).toBe(true);
54
- });
55
- it("does NOT detect a scope gap for a general prompt with no codebase signal (B2-symmetric scope guard)", () => {
56
- // Live drive (session 8a87aa060c6a): the pure non-codebase prompt "Reply
57
- // with exactly one word: PONG" fired the scope askcard "Which part of the
58
- // codebase should this target?" because countFileReferences /
59
- // hasExplicitScope / hasOperationalScope were all empty — the detector
60
- // assumes every prompt is a codebase task. A general/unclassified prompt
61
- // has no codebase dimension to scope, so the question is nonsensical (and
62
- // its acceptance card is downstream noise). Skip it, symmetric to the B2
63
- // outcome guard; scope falls back to project-root downstream.
64
- const gaps = detectClarityGaps("Reply with exactly one word: PONG", "general", 0.6, EMPTY_PROJECT);
65
- expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
66
- // The only candidate gap was scope → general prompt now yields zero gaps,
67
- // so discovery never marks interviewed=true and shows no acceptance card.
68
- expect(gaps).toHaveLength(0);
69
- });
70
- it("STILL detects a scope gap for a classified (non-general) task with no file reference", () => {
71
- // Guard must stay narrow: a real code task that simply omitted a path still
72
- // benefits from the scope-narrowing askcard. Only general/null is skipped.
73
- const gaps = detectClarityGaps("implement the search feature", "generate", 0.7, EMPTY_PROJECT);
74
- expect(gaps.find((g) => g.dimension === "scope")).toBeDefined();
75
- });
76
- it("does NOT detect a scope gap for an image-analysis prompt (image is the scope)", () => {
77
- // Live drive (PR#34 probe): "Take a screenshot of the homepage and analyze
78
- // the diagram.png image to describe its layout" fired the codebase-scope
79
- // askcard "Which part of the codebase should this target?" — nonsensical for
80
- // an image-analysis task. The image (screenshot / diagram.png) IS the scope,
81
- // symmetric to how operational (CI/build) prompts are scoped to the pipeline.
82
- const gaps = detectClarityGaps("Take a screenshot of the homepage and analyze the diagram.png image to describe its layout", "analyze", 0.7, EMPTY_PROJECT);
83
- expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
84
- // analyze autofills outcome, so with scope suppressed there are zero gaps →
85
- // no interview, no acceptance card.
86
- expect(gaps).toHaveLength(0);
87
- });
88
- it("STILL detects a scope gap for a code task that mentions an ambiguous non-image word", () => {
89
- // Narrowness guard: image-scope suppression must not swallow real codebase
90
- // tasks. "add a logo to the header" carries no concrete image signal (no
91
- // file extension / screenshot / photo), so the scope askcard stays.
92
- const gaps = detectClarityGaps("add a logo to the header", "generate", 0.7, EMPTY_PROJECT);
93
- expect(gaps.find((g) => g.dimension === "scope")).toBeDefined();
94
- });
95
- it("does NOT detect a scope gap for a web-search / external-info prompt", () => {
96
- // Live drive (tavily probe, session d7a45a2dba30): "search the web for the
97
- // latest vitest release notes" classified taskType=analyze fired the
98
- // codebase-scope askcard and recorded a wrong scope ("src/mcp"). A
99
- // web-search task is scoped to the web, not the codebase — symmetric to the
100
- // image-scope and operational-scope guards.
101
- const gaps = detectClarityGaps("search the web for the latest vitest release notes", "analyze", 0.7, EMPTY_PROJECT);
102
- expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
103
- expect(gaps).toHaveLength(0);
104
- });
105
- it("does NOT detect a scope gap for a self-contained computation prompt (data is inline)", () => {
106
- // Live drive (deepseek-vs-grok A/B, session 17fc23f0): "Compute f([3,1,2])
107
- // where f sorts the list ascending then returns the sum of the first two
108
- // elements." classified taskType=analyze (regex:read matched the bare word
109
- // "list", conf 0.80 → skipped the brain) fired BOTH the pil-interview scope
110
- // askcard ("Which part of the codebase should this target?" → auto "Entire
111
- // project") AND the pil-acceptance card. The operand [3,1,2] is supplied
112
- // inline — the task has no codebase dimension to scope. Symmetric to the
113
- // image / web / operational scope guards.
114
- const gaps = detectClarityGaps("Compute f([3,1,2]) where f sorts the list ascending then returns the sum of the first two elements.", "analyze", 0.8, EMPTY_PROJECT);
115
- expect(gaps.find((g) => g.dimension === "scope")).toBeUndefined();
116
- // analyze autofills outcome, so with scope suppressed there are zero gaps →
117
- // no interview, no acceptance card.
118
- expect(gaps).toHaveLength(0);
119
- });
120
- it("STILL detects a scope gap for a code task that embeds a literal but no compute framing", () => {
121
- // Narrowness guard: the inline-literal suppression must not swallow real
122
- // codebase tasks. "set the default retry delays to [100, 200, 400] in the
123
- // config" carries a literal but is scoped to the codebase (no compute verb),
124
- // so the scope askcard stays.
125
- const gaps = detectClarityGaps("set the default retry delays to [100, 200, 400] in the config", "generate", 0.7, EMPTY_PROJECT);
126
- expect(gaps.find((g) => g.dimension === "scope")).toBeDefined();
127
- });
128
- });
129
21
  describe("buildInterviewQuestion()", () => {
130
22
  it("builds a CouncilQuestionData with pil-interview phase", () => {
131
23
  const gap = {
@@ -141,11 +33,35 @@ describe("buildInterviewQuestion()", () => {
141
33
  expect(q.options).toBeDefined();
142
34
  expect(q.options.some((o) => o.kind === "freetext")).toBe(true);
143
35
  });
36
+ it("surfaces the model's reason (gap.description) as the askcard context", () => {
37
+ const gap = {
38
+ dimension: "outcome",
39
+ description: "answering this changes whether we add OAuth or just API keys",
40
+ suggestedQuestion: "Which auth method?",
41
+ options: ["OAuth", "API keys"],
42
+ defaultIndex: 0,
43
+ };
44
+ const q = buildInterviewQuestion(gap, "q-2");
45
+ expect(q.context).toBe("answering this changes whether we add OAuth or just API keys");
46
+ });
144
47
  });
145
48
  describe("resolveGapsNonInteractive()", () => {
146
- it("fills gaps with best-effort from project context", () => {
147
- const gaps = detectClarityGaps("fix auth", "debug", 0.7, EMPTY_PROJECT);
49
+ it("fills gaps with best-effort defaults from the model options + project context", () => {
50
+ const gaps = [
51
+ {
52
+ dimension: "outcome",
53
+ description: "Model-generated clarification #1",
54
+ suggestedQuestion: "What outcome do you expect?",
55
+ options: ["Error resolved", "Other (type free answer)"],
56
+ defaultIndex: 0,
57
+ },
58
+ ];
148
59
  const resolved = resolveGapsNonInteractive(gaps, EMPTY_PROJECT, "fix auth");
60
+ expect(resolved.outcome).toBe("Error resolved");
61
+ expect(resolved.scope.length).toBeGreaterThan(0);
62
+ });
63
+ it("falls back to the raw-derived outcome when there is no outcome gap", () => {
64
+ const resolved = resolveGapsNonInteractive([], EMPTY_PROJECT, "fix the login bug");
149
65
  expect(resolved.outcome).toBeTruthy();
150
66
  expect(resolved.scope.length).toBeGreaterThan(0);
151
67
  });
@@ -75,6 +75,43 @@ describe("layer4Gsd (gsd-native)", () => {
75
75
  const result = await layer4Gsd(makeCtx({ raw: "review the pull request" }));
76
76
  expect(["review", "discuss", "execute"]).toContain(result.gsdPhase);
77
77
  });
78
+ it("routes a question-shaped analyze/debug prompt to the QUESTION directive (no 'state a plan')", async () => {
79
+ // De-robotizing: a plain question must not get the STANDARD "state a 2-3 line
80
+ // plan" scaffold even when L1 classifies it analyze/debug (not "general").
81
+ const q = "why does the build fail intermittently?";
82
+ const result = await layer4Gsd(makeCtx({ raw: q, enriched: q, taskType: "debug", intentKind: "task" }));
83
+ expect(result.enriched).toContain("QUESTION / explanatory");
84
+ expect(result.enriched).not.toContain("State a 2-3 line plan");
85
+ });
86
+ it("treats a genuine general question (general + task) as informational", async () => {
87
+ const q = "what does the enrichment layer do?";
88
+ const result = await layer4Gsd(makeCtx({ raw: q, enriched: q, taskType: "general", intentKind: "task" }));
89
+ expect(result.enriched).toContain("QUESTION / explanatory");
90
+ });
91
+ it("does NOT treat an implementation request as informational even if phrased as a question", async () => {
92
+ // isImplementationIntent guards the question clause: "can you refactor … and
93
+ // wire up …" is a real edit task → STANDARD scaffold, not the QUESTION directive.
94
+ const q = "can you refactor the dropdown and wire up the keyboard handlers?";
95
+ const result = await layer4Gsd(makeCtx({ raw: q, enriched: q, taskType: "refactor", intentKind: "task" }));
96
+ expect(result.enriched).not.toContain("QUESTION / explanatory");
97
+ });
98
+ it("Phase 2b: deliverableKind='answer' is informational even for an imperative (no '?') prompt", async () => {
99
+ // The raw text is a plain imperative — the legacy regex (isQuestionLike /
100
+ // isMetaAnalysisPrompt) would NOT mark it informational. The model's
101
+ // deliverableKind='answer' must override that and route to the QUESTION
102
+ // directive — proving L4 consumes the model signal, not the regex.
103
+ const raw = "go over the auth module and tell me what it does";
104
+ const result = await layer4Gsd(makeCtx({ raw, enriched: raw, taskType: "analyze", intentKind: "task", deliverableKind: "answer" }));
105
+ expect(result.enriched).toContain("QUESTION / explanatory");
106
+ });
107
+ it("Phase 2b: deliverableKind='code' is NOT informational even for a question-shaped prompt", async () => {
108
+ // The raw text reads as a question — the legacy regex would mark it
109
+ // informational. The model's deliverableKind='code' must override that so
110
+ // the STANDARD implement scaffold is used (the deliverable is file edits).
111
+ const raw = "why not just refactor the dropdown and wire the keyboard handlers?";
112
+ const result = await layer4Gsd(makeCtx({ raw, enriched: raw, taskType: "refactor", intentKind: "task", deliverableKind: "code" }));
113
+ expect(result.enriched).not.toContain("QUESTION / explanatory");
114
+ });
78
115
  it("uses ctx.gsdPhase from L1 (unified path) without calling routeTask", async () => {
79
116
  const { routeTask } = await import("../../ee/bridge.js");
80
117
  vi.mocked(routeTask).mockClear();