muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/auth.d.ts +9 -0
  26. package/dist/src/ee/auth.js +19 -0
  27. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  28. package/dist/src/ee/ee-onboarding.js +76 -0
  29. package/dist/src/generated/version.d.ts +1 -1
  30. package/dist/src/generated/version.js +1 -1
  31. package/dist/src/headless/output.js +6 -4
  32. package/dist/src/headless/output.test.js +4 -3
  33. package/dist/src/index.js +20 -1
  34. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  35. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  36. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  37. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  38. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  39. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  40. package/dist/src/mcp/auto-setup.js +56 -2
  41. package/dist/src/mcp/client-pool.d.ts +46 -0
  42. package/dist/src/mcp/client-pool.js +212 -0
  43. package/dist/src/mcp/oauth-callback.js +2 -2
  44. package/dist/src/mcp/parse-headers.test.js +14 -14
  45. package/dist/src/mcp/runtime.d.ts +28 -0
  46. package/dist/src/mcp/runtime.js +117 -51
  47. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  48. package/dist/src/mcp/self-verify-runner.js +38 -0
  49. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  50. package/dist/src/mcp/setup-guide-text.js +84 -0
  51. package/dist/src/mcp/smart-filter.js +49 -0
  52. package/dist/src/mcp/smoke.test.js +43 -43
  53. package/dist/src/mcp/tools-server.d.ts +7 -0
  54. package/dist/src/mcp/tools-server.js +19 -22
  55. package/dist/src/models/catalog.json +349 -349
  56. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  57. package/dist/src/ops/doctor.d.ts +3 -2
  58. package/dist/src/ops/doctor.js +47 -11
  59. package/dist/src/ops/doctor.test.js +4 -3
  60. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  61. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  62. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  63. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  64. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  65. package/dist/src/orchestrator/message-processor.js +57 -27
  66. package/dist/src/orchestrator/orchestrator.js +26 -0
  67. package/dist/src/orchestrator/prompts.d.ts +51 -0
  68. package/dist/src/orchestrator/prompts.js +257 -134
  69. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  70. package/dist/src/orchestrator/stream-runner.js +20 -15
  71. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  72. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  73. package/dist/src/pil/__tests__/config.test.js +1 -17
  74. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  75. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  76. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  77. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  78. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  79. package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
  80. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  81. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  82. package/dist/src/pil/agent-operating-contract.js +2 -0
  83. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  84. package/dist/src/pil/cheap-model-playbook.js +35 -35
  85. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  86. package/dist/src/pil/clarity-gate.d.ts +21 -19
  87. package/dist/src/pil/clarity-gate.js +26 -153
  88. package/dist/src/pil/config.d.ts +9 -1
  89. package/dist/src/pil/config.js +15 -4
  90. package/dist/src/pil/discovery.js +211 -136
  91. package/dist/src/pil/layer1-intent.d.ts +12 -0
  92. package/dist/src/pil/layer1-intent.js +283 -38
  93. package/dist/src/pil/layer1-intent.test.js +210 -4
  94. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  95. package/dist/src/pil/layer16-clarity.js +19 -306
  96. package/dist/src/pil/layer4-gsd.js +18 -6
  97. package/dist/src/pil/layer6-output.d.ts +2 -0
  98. package/dist/src/pil/layer6-output.js +137 -22
  99. package/dist/src/pil/llm-classify.d.ts +26 -0
  100. package/dist/src/pil/llm-classify.js +34 -5
  101. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  102. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  103. package/dist/src/pil/schema.d.ts +8 -0
  104. package/dist/src/pil/schema.js +12 -1
  105. package/dist/src/pil/task-tier-map.js +4 -0
  106. package/dist/src/pil/types.d.ts +11 -1
  107. package/dist/src/product-loop/done-gate.js +3 -3
  108. package/dist/src/product-loop/loop-driver.js +18 -18
  109. package/dist/src/product-loop/progress-snapshot.js +4 -4
  110. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  111. package/dist/src/providers/auth/grok-oauth.js +6 -15
  112. package/dist/src/providers/auth/openai-oauth.js +6 -15
  113. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  114. package/dist/src/reporter/index.js +1 -1
  115. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  116. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  117. package/dist/src/scaffold/continuation-prompt.js +60 -60
  118. package/dist/src/scaffold/init-new.js +453 -453
  119. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  120. package/dist/src/self-qa/agentic-loop.js +24 -19
  121. package/dist/src/self-qa/spec-emitter.js +26 -23
  122. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  123. package/dist/src/storage/interaction-log.js +5 -5
  124. package/dist/src/storage/migrations.js +122 -122
  125. package/dist/src/storage/sessions.js +42 -42
  126. package/dist/src/storage/transcript.js +91 -84
  127. package/dist/src/storage/usage.js +14 -14
  128. package/dist/src/storage/workspaces.js +12 -12
  129. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  130. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  131. package/dist/src/tools/git-safety.d.ts +61 -0
  132. package/dist/src/tools/git-safety.js +141 -0
  133. package/dist/src/tools/git-safety.test.d.ts +1 -0
  134. package/dist/src/tools/git-safety.test.js +111 -0
  135. package/dist/src/tools/native-tools.d.ts +31 -0
  136. package/dist/src/tools/native-tools.js +273 -0
  137. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  138. package/dist/src/tools/registry-git-safety.test.js +92 -0
  139. package/dist/src/tools/registry.js +39 -4
  140. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  141. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  142. package/dist/src/ui/app.js +0 -0
  143. package/dist/src/ui/components/message-view.js +4 -1
  144. package/dist/src/ui/components/structured-response-view.js +7 -3
  145. package/dist/src/ui/components/tool-group.js +7 -1
  146. package/dist/src/ui/markdown-render.d.ts +41 -0
  147. package/dist/src/ui/markdown-render.js +223 -0
  148. package/dist/src/ui/markdown.d.ts +10 -0
  149. package/dist/src/ui/markdown.js +12 -35
  150. package/dist/src/ui/slash/council-inspect.js +4 -4
  151. package/dist/src/ui/slash/export.js +4 -4
  152. package/dist/src/ui/utils/text.d.ts +8 -0
  153. package/dist/src/ui/utils/text.js +16 -0
  154. package/dist/src/ui/utils/text.test.d.ts +1 -0
  155. package/dist/src/ui/utils/text.test.js +23 -0
  156. package/dist/src/usage/ledger.js +48 -15
  157. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  158. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  159. package/dist/src/utils/clipboard-image.js +23 -23
  160. package/dist/src/utils/open-url.d.ts +56 -0
  161. package/dist/src/utils/open-url.js +58 -0
  162. package/dist/src/utils/open-url.test.d.ts +1 -0
  163. package/dist/src/utils/open-url.test.js +86 -0
  164. package/dist/src/utils/settings.d.ts +12 -0
  165. package/dist/src/utils/settings.js +48 -0
  166. package/dist/src/utils/side-question.js +2 -2
  167. package/dist/src/utils/skills.js +3 -3
  168. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  169. package/dist/src/verify/environment.js +2 -1
  170. package/package.json +1 -1
  171. package/dist/src/pil/layer16-clarity.test.js +0 -31
  172. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -1,213 +1,28 @@
1
1
  import { describe, expect, it } from "vitest";
2
- import { canInferOutcome, countFileReferences, hasExplicitScope, hasExternalInfoScope, hasImageScope, hasOperationalScope, hasSelfContainedComputationScope, hasWholeRepoScope, shouldAutoPass, } from "../clarity-gate.js";
3
- describe("hasWholeRepoScope()", () => {
4
- it("detects whole-repo / whole-project intent (EN + VI)", () => {
5
- // The repo-eval prompt that fired a nonsensical "which part?" askcard.
6
- expect(hasWholeRepoScope("đánh giá repo muonroi-cli này: điểm mạnh, điểm yếu")).toBe(true);
7
- expect(hasWholeRepoScope("evaluate the repo: strengths and weaknesses")).toBe(true);
8
- expect(hasWholeRepoScope("review the whole codebase")).toBe(true);
9
- expect(hasWholeRepoScope("audit the entire project")).toBe(true);
10
- expect(hasWholeRepoScope("phân tích toàn bộ dự án")).toBe(true);
11
- expect(hasWholeRepoScope("give me an overview of the repository")).toBe(true);
12
- // summarize/overview verbs (gap found in the deepseek session probe: "tóm tắt
13
- // repo này" still fired the scope askcard because the verb list lacked it).
14
- expect(hasWholeRepoScope("tóm tắt nhanh repo này")).toBe(true);
15
- expect(hasWholeRepoScope("summarize the repository")).toBe(true);
16
- expect(hasWholeRepoScope("give me a summary of the project")).toBe(true);
17
- });
18
- it("does NOT fire on summarize/review of a narrow target", () => {
19
- expect(hasWholeRepoScope("summarize the login function")).toBe(false);
20
- expect(hasWholeRepoScope("tóm tắt hàm xử auth")).toBe(false);
21
- });
22
- it("does NOT fire on narrow tasks that merely mention a repo/project", () => {
23
- // "this repo" without a wholeness/eval signal must still be scoped.
24
- expect(hasWholeRepoScope("add a logout button to this repo")).toBe(false);
25
- expect(hasWholeRepoScope("fix the login bug in the project")).toBe(false);
26
- expect(hasWholeRepoScope("implement the search feature")).toBe(false);
27
- expect(hasWholeRepoScope("refactor the auth module")).toBe(false);
28
- });
29
- it("whole-repo scope no longer blocks auto-pass (was: scope-gap → false)", () => {
30
- // With an inferable outcome (explicit goal), the ONLY remaining blocker for a
31
- // repo-wide prompt was the scope gap. hasWholeRepoScope clears it.
32
- const prompt = "review the entire codebase — goal: a report of strengths and weaknesses";
33
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, prompt)).toBe(true);
34
- // Control: same shape but NOT repo-wide still fails on the scope gap.
35
- const narrow = "review the system — goal: a report of strengths and weaknesses";
36
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, narrow)).toBe(false);
37
- });
38
- });
39
- describe("hasSelfContainedComputationScope()", () => {
40
- it("detects an inline-data computation prompt (the operand is in the prompt, not the codebase)", () => {
41
- // Live drive (deepseek-vs-grok A/B, session probe 2026-06-05): "Compute
42
- // f([3,1,2]) where f sorts the list ascending then returns the sum of the
43
- // first two elements." classified taskType=analyze (regex:read matched the
44
- // bare word "list") fired the codebase-scope askcard "Which part of the
45
- // codebase should this target?" — nonsensical for a self-contained math
46
- // problem whose input data is supplied inline. Symmetric to image/web/
47
- // operational scope guards.
48
- expect(hasSelfContainedComputationScope("Compute f([3,1,2]) where f sorts the list ascending then returns the sum of the first two elements.")).toBe(true);
49
- expect(hasSelfContainedComputationScope("Given the array [5, 2, 8, 1, 9], what is the second largest element?")).toBe(true);
50
- expect(hasSelfContainedComputationScope("What is the median of [10, 4, 7]?")).toBe(true);
51
- expect(hasSelfContainedComputationScope('Reverse the list ["a", "b", "c"] and return it.')).toBe(true);
52
- });
53
- it("does NOT fire without an inline data literal", () => {
54
- // The framing verb alone is not enough — a codebase task can say "compute"
55
- // ("compute the hash in the auth module"). Only an inline operand qualifies.
56
- expect(hasSelfContainedComputationScope("compute the cache key in the auth module")).toBe(false);
57
- expect(hasSelfContainedComputationScope("sort the users table by created_at")).toBe(false);
58
- expect(hasSelfContainedComputationScope("what is the second largest element of the array")).toBe(false);
59
- });
60
- it("does NOT fire on a real codebase task that merely contains an array literal (no compute framing)", () => {
61
- // Narrowness guard: the literal alone is not enough. A feature/debug task
62
- // that embeds a literal but is scoped to the codebase must KEEP its scope
63
- // askcard. Requires BOTH an inline literal AND computation framing.
64
- expect(hasSelfContainedComputationScope("add the items [1, 2, 3] to the cart in the checkout flow")).toBe(false);
65
- expect(hasSelfContainedComputationScope("fix the bug where parseRange([1, 5]) returns the wrong values")).toBe(false);
66
- expect(hasSelfContainedComputationScope("set the default retry delays to [100, 200, 400] in the config")).toBe(false);
67
- });
68
- it("does NOT fire on bracketed file-name lists (those are codebase-scoped)", () => {
69
- // [a.ts, b.ts] is a list of files, not data — must stay codebase-scoped.
70
- expect(hasSelfContainedComputationScope("compare the exports of [auth.ts, session.ts]")).toBe(false);
71
- });
72
- it("self-contained computation no longer blocks auto-pass (was: scope-gap → false)", () => {
73
- // With an inferable outcome ("return the result"), the ONLY remaining blocker
74
- // for an inline-data computation prompt was the scope gap.
75
- // hasSelfContainedComputationScope clears it.
76
- const prompt = "Compute the sum of the first two sorted elements of [3, 1, 2] and return the result.";
77
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, prompt)).toBe(true);
78
- // Control: same outcome-inferable shape but NO inline literal still fails on
79
- // the scope gap (a real codebase computation must still be scoped).
80
- const codeTask = "Compute the largest element of the users array and return it.";
81
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, codeTask)).toBe(false);
82
- });
83
- });
84
- describe("canInferOutcome()", () => {
85
- it("returns false for null taskType", () => {
86
- expect(canInferOutcome(null, "do something")).toBe(false);
87
- });
88
- it("returns false for general taskType", () => {
89
- expect(canInferOutcome("general", "fix stuff")).toBe(false);
90
- });
91
- it("returns true for a general taskType that is a direct imperative command", () => {
92
- // A direct command has a self-evident outcome (it runs / it shows), so it
93
- // should auto-pass instead of triggering an outcome-clarification askcard.
94
- expect(canInferOutcome("general", "run the test suite")).toBe(true);
95
- expect(canInferOutcome("general", "echo harness-ok")).toBe(true);
96
- expect(canInferOutcome("general", "show the package.json scripts")).toBe(true);
97
- expect(canInferOutcome("general", "list the open ports")).toBe(true);
98
- });
99
- it("returns false for a general imperative verb with no object", () => {
100
- expect(canInferOutcome("general", "run")).toBe(false);
101
- expect(canInferOutcome("general", "execute ")).toBe(false);
102
- });
103
- it("returns false for a general non-imperative prompt", () => {
104
- expect(canInferOutcome("general", "the build is slow")).toBe(false);
105
- });
106
- it("returns true when prompt has error reference", () => {
107
- expect(canInferOutcome("debug", "fix the TypeError in login")).toBe(true);
108
- });
109
- it("returns true when prompt has file:line reference", () => {
110
- expect(canInferOutcome("debug", "fix auth.ts:42")).toBe(true);
111
- });
112
- it("returns true when prompt has target state verb", () => {
113
- expect(canInferOutcome("refactor", "should return a Promise")).toBe(true);
114
- });
115
- it("returns true when prompt has add pattern", () => {
116
- expect(canInferOutcome("generate", "add validation to login form")).toBe(true);
117
- });
118
- it("returns false for vague prompt with valid taskType", () => {
119
- expect(canInferOutcome("debug", "fix auth")).toBe(false);
120
- });
121
- });
122
- describe("countFileReferences()", () => {
123
- it("counts .ts and .tsx files", () => {
124
- expect(countFileReferences("fix login.ts and dashboard.tsx")).toBe(2);
125
- });
126
- it("returns 0 for no file refs", () => {
127
- expect(countFileReferences("fix the auth module")).toBe(0);
128
- });
129
- it("ignores non-code extensions", () => {
130
- expect(countFileReferences("see report.pdf")).toBe(0);
131
- });
132
- });
133
- describe("hasExplicitScope()", () => {
134
- it("detects src/ paths", () => {
135
- expect(hasExplicitScope("refactor src/auth/jwt.ts")).toBe(true);
136
- });
137
- it("detects lib/ paths", () => {
138
- expect(hasExplicitScope("update lib/utils")).toBe(true);
139
- });
140
- it("returns false for no path", () => {
141
- expect(hasExplicitScope("refactor the code")).toBe(false);
142
- });
143
- });
144
- describe("shouldAutoPass()", () => {
145
- it("auto-passes high-confidence + specific file + inferrable outcome", () => {
146
- expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix TypeError in src/auth/login.ts:42")).toBe(true);
147
- });
148
- it("rejects low confidence", () => {
149
- expect(shouldAutoPass({ confidence: 0.6, taskType: "debug", complexity: "low" }, "fix TypeError in login.ts:42")).toBe(false);
150
- });
151
- it("rejects vague prompt despite high confidence", () => {
152
- expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix auth")).toBe(false);
153
- });
154
- it("rejects high complexity", () => {
155
- expect(shouldAutoPass({ confidence: 0.9, taskType: "refactor", complexity: "high" }, "refactor src/auth/login.ts should return Promise")).toBe(false);
156
- });
157
- it("auto-passes with explicit scope path even without file extension", () => {
158
- expect(shouldAutoPass({ confidence: 0.9, taskType: "refactor", complexity: "medium" }, "refactor src/auth/ module to return Promises")).toBe(true);
159
- });
160
- // PIL-L6 fix
161
- it("auto-passes CI/build debug task even without file path (operational scope)", () => {
162
- expect(shouldAutoPass({ confidence: 0.9, taskType: "debug", complexity: "low" }, "fix the ci fail — goal: green pipeline")).toBe(true);
163
- });
164
- // Image-scope fix — an image-analysis task is scoped to the image, not a file
165
- // path, so it should auto-pass when its outcome is inferrable.
166
- it("auto-passes an image-analysis task even without file path (image scope)", () => {
167
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, "analyze screenshot.png — goal: describe the layout")).toBe(true);
168
- });
169
- // External-info fix — a web-search task is scoped to the web, not a file path.
170
- it("auto-passes a web-search task even without file path (external-info scope)", () => {
171
- expect(shouldAutoPass({ confidence: 0.9, taskType: "analyze", complexity: "low" }, "search the web for the vitest release date — goal: find the version")).toBe(true);
172
- });
173
- });
174
- describe("hasExternalInfoScope()", () => {
175
- it("detects web-search / external-info intent", () => {
176
- expect(hasExternalInfoScope("search the web for the latest vitest release notes")).toBe(true);
177
- expect(hasExternalInfoScope("google the error message")).toBe(true);
178
- expect(hasExternalInfoScope("what's the latest news on the framework")).toBe(true);
179
- expect(hasExternalInfoScope("summarize https://example.com/post")).toBe(true);
180
- });
181
- it("returns false for codebase tasks, including in-repo 'search'", () => {
182
- // Narrow: must NOT swallow a real code task. "search the codebase" and
183
- // "search feature" are codebase work and still deserve a scope askcard.
184
- expect(hasExternalInfoScope("search the codebase for usages of foo")).toBe(false);
185
- expect(hasExternalInfoScope("implement the search feature")).toBe(false);
186
- expect(hasExternalInfoScope("add the zod library to the auth module")).toBe(false);
187
- expect(hasExternalInfoScope("refactor the login flow")).toBe(false);
188
- });
189
- });
190
- describe("hasImageScope()", () => {
191
- it("detects an image file extension", () => {
192
- expect(hasImageScope("analyze diagram.png")).toBe(true);
193
- expect(hasImageScope("describe the layout of mock.jpg")).toBe(true);
194
- expect(hasImageScope("read chart.svg")).toBe(true);
195
- });
196
- it("detects a data:image URI and screenshot/photo nouns", () => {
197
- expect(hasImageScope("here is data:image/png;base64,AAAA")).toBe(true);
198
- expect(hasImageScope("take a screenshot and analyze it")).toBe(true);
199
- expect(hasImageScope("look at the photo")).toBe(true);
200
- });
201
- it("returns false for codebase tasks and ambiguous/overloaded words", () => {
202
- // Narrow on purpose: a false positive SUPPRESSES a legitimate scope
203
- // question, so overloaded words must NOT match.
204
- expect(hasImageScope("refactor the login flow")).toBe(false);
205
- expect(hasImageScope("add a logo to the header")).toBe(false); // "logo" excluded
206
- expect(hasImageScope("rebuild the docker image")).toBe(false); // bare "image" excluded
207
- expect(hasImageScope("look at the bigger picture")).toBe(false); // "picture" excluded
208
- });
209
- });
210
- describe("hasOperationalScope() — PIL-L6", () => {
2
+ import { detectNoClarifySignal, hasOperationalScope } from "../clarity-gate.js";
3
+ // Phase 2 (2026-06-16): the regex ASK gate (shouldAutoPass + canInferOutcome +
4
+ // the per-modality scope detectors) was removed the model now decides every
5
+ // clarification. Only two non-gating helpers survive: detectNoClarifySignal
6
+ // (explicit user consent) and hasOperationalScope (outcome-label polish).
7
+ describe("detectNoClarifySignal()", () => {
8
+ it("detects explicit no-clarify directives (EN)", () => {
9
+ expect(detectNoClarifySignal("just answer, don't ask me anything")).toBe(true);
10
+ expect(detectNoClarifySignal("answer directly without asking")).toBe(true);
11
+ expect(detectNoClarifySignal("no questions please, just do it")).toBe(true);
12
+ expect(detectNoClarifySignal("stop asking and give me the result")).toBe(true);
13
+ });
14
+ it("detects explicit no-clarify directives (VI + transliteration)", () => {
15
+ expect(detectNoClarifySignal("Đừng hỏi lại. Trả lời thẳng 3 câu hỏi.")).toBe(true);
16
+ expect(detectNoClarifySignal("không cần hỏi, trả lời luôn")).toBe(true);
17
+ expect(detectNoClarifySignal("tra loi thang dung hoi")).toBe(true);
18
+ });
19
+ it("does NOT match the explanation idiom 'don't ask me why'", () => {
20
+ expect(detectNoClarifySignal("it just works, don't ask me why")).toBe(false);
21
+ expect(detectNoClarifySignal("explain the auth flow")).toBe(false);
22
+ expect(detectNoClarifySignal("which part of the code should I read?")).toBe(false);
23
+ });
24
+ });
25
+ describe("hasOperationalScope()", () => {
211
26
  it("detects ci/build/test/action keywords", () => {
212
27
  expect(hasOperationalScope("fix ci fail")).toBe(true);
213
28
  expect(hasOperationalScope("the build is broken")).toBe(true);
@@ -219,10 +34,4 @@ describe("hasOperationalScope() — PIL-L6", () => {
219
34
  expect(hasOperationalScope("explain hooks")).toBe(false);
220
35
  });
221
36
  });
222
- describe("canInferOutcome() — explicit goal (PIL-L6)", () => {
223
- it("returns true when prompt names an explicit goal", () => {
224
- expect(canInferOutcome("debug", "goal: pipeline green")).toBe(true);
225
- expect(canInferOutcome("debug", "mong muốn: tests passing")).toBe(true);
226
- });
227
- });
228
37
  //# sourceMappingURL=clarity-gate.test.js.map
@@ -1,5 +1,5 @@
1
1
  import { afterEach, beforeEach, describe, expect, it } from "vitest";
2
- import { getAutoPassThreshold, getMaxInterviewQuestions, isDiscoveryEnabled, isUnifiedPilEnabled } from "../config.js";
2
+ import { getMaxInterviewQuestions, isDiscoveryEnabled, isUnifiedPilEnabled } from "../config.js";
3
3
  describe("isUnifiedPilEnabled", () => {
4
4
  const orig = process.env.MUONROI_PIL_UNIFIED;
5
5
  beforeEach(() => {
@@ -43,22 +43,6 @@ describe("isDiscoveryEnabled()", () => {
43
43
  delete process.env.MUONROI_PIL_DISCOVERY;
44
44
  });
45
45
  });
46
- describe("getAutoPassThreshold()", () => {
47
- it("returns 0.85 by default", () => {
48
- delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
49
- expect(getAutoPassThreshold()).toBe(0.85);
50
- });
51
- it("respects env override in range", () => {
52
- process.env.MUONROI_PIL_AUTOPASS_THRESHOLD = "0.7";
53
- expect(getAutoPassThreshold()).toBe(0.7);
54
- delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
55
- });
56
- it("clamps out-of-range to default", () => {
57
- process.env.MUONROI_PIL_AUTOPASS_THRESHOLD = "1.5";
58
- expect(getAutoPassThreshold()).toBe(0.85);
59
- delete process.env.MUONROI_PIL_AUTOPASS_THRESHOLD;
60
- });
61
- });
62
46
  describe("getMaxInterviewQuestions()", () => {
63
47
  it("returns 3 by default", () => {
64
48
  delete process.env.MUONROI_PIL_MAX_QUESTIONS;
@@ -10,7 +10,14 @@ const mockHandler = {
10
10
  showAcceptance: vi.fn().mockResolvedValue("accept"),
11
11
  };
12
12
  describe("runDiscovery()", () => {
13
- it("auto-passes on high-confidence specific prompt", async () => {
13
+ it("proceeds without interview when the model proposes no questions", async () => {
14
+ // Phase 2: the model is the sole ask-decider. An empty proposer result means
15
+ // "no gray area" → no interview, no fabricated [Discovery] outcome.
16
+ const handler = {
17
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
18
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
19
+ };
20
+ const proposer = vi.fn().mockResolvedValue([]);
14
21
  const result = await runDiscovery("fix TypeError in src/auth/login.ts:42", {
15
22
  taskType: "debug",
16
23
  confidence: 0.9,
@@ -18,15 +25,86 @@ describe("runDiscovery()", () => {
18
25
  domain: "typescript",
19
26
  outputStyle: "balanced",
20
27
  intentKind: "task",
21
- }, process.cwd(), null);
28
+ }, process.cwd(), handler, null, proposer);
29
+ expect(proposer).toHaveBeenCalled();
22
30
  expect(result.interviewed).toBe(false);
23
31
  expect(result.accepted).toBe(true);
32
+ expect(handler.askQuestion).not.toHaveBeenCalled();
24
33
  });
25
- it("interviews user on vague prompt with handler", async () => {
34
+ it("does NOT interview (and never fabricates regex questions) when no proposer is wired", async () => {
35
+ // Phase 2 fail-loud: an interactive turn missing a proposer logs and proceeds
36
+ // WITHOUT an interview — it must never fall back to keyword-generated gaps.
37
+ const handler = {
38
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
39
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
40
+ };
41
+ const result = await runDiscovery("fix auth", // vague — old regex gate would have asked a scope question
42
+ {
43
+ taskType: "debug",
44
+ confidence: 0.6,
45
+ complexity: "low",
46
+ domain: "typescript",
47
+ outputStyle: null,
48
+ intentKind: "task",
49
+ }, process.cwd(), handler, null, null);
50
+ expect(result.interviewed).toBe(false);
51
+ expect(result.accepted).toBe(true);
52
+ expect(handler.askQuestion).not.toHaveBeenCalled();
53
+ });
54
+ it("surfaces the model's reason + recommends in the interview askcard", async () => {
55
+ const askQuestion = vi.fn().mockResolvedValue({ questionId: "q1", text: "OAuth", kind: "choice" });
56
+ const handler = {
57
+ askQuestion,
58
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
59
+ };
60
+ const proposer = vi
61
+ .fn()
62
+ .mockResolvedValue(["Which auth method? [MODEL RECS: OAuth | API keys] [WHY: changes the whole token flow]"]);
63
+ await runDiscovery("add authentication", {
64
+ taskType: "generate",
65
+ confidence: 0.6,
66
+ complexity: "low",
67
+ domain: null,
68
+ outputStyle: null,
69
+ intentKind: "task",
70
+ }, process.cwd(), handler, null, proposer);
71
+ expect(askQuestion).toHaveBeenCalled();
72
+ const card = askQuestion.mock.calls[0][0];
73
+ // Model's WHY drives the askcard context; recommends drive the options.
74
+ expect(card.context).toBe("changes the whole token flow");
75
+ expect(card.question).toBe("Which auth method?");
76
+ const labels = (card.options ?? []).map((o) => o.label);
77
+ expect(labels).toContain("OAuth");
78
+ expect(labels).toContain("API keys");
79
+ expect(card.defaultIndex).toBe(0); // first recommend = recommended default
80
+ });
81
+ it("skips all discovery when the user explicitly says don't ask (EN + VI)", async () => {
82
+ const handler = {
83
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "x", kind: "choice" }),
84
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
85
+ };
86
+ const l1 = {
87
+ taskType: "analyze",
88
+ confidence: 0.6, // low enough that discovery would normally interview
89
+ complexity: "low",
90
+ domain: null,
91
+ outputStyle: null,
92
+ intentKind: "task",
93
+ };
94
+ const enResult = await runDiscovery("analyze the orchestrator, just answer, don't ask", l1, process.cwd(), handler);
95
+ expect(enResult.interviewed).toBe(false);
96
+ expect(enResult.accepted).toBe(true);
97
+ const viResult = await runDiscovery("phân tích orchestrator, đừng hỏi, trả lời thẳng", l1, process.cwd(), handler);
98
+ expect(viResult.interviewed).toBe(false);
99
+ expect(viResult.accepted).toBe(true);
100
+ expect(handler.askQuestion).not.toHaveBeenCalled();
101
+ });
102
+ it("interviews user when the model proposes a question", async () => {
26
103
  const handler = {
27
104
  askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "Error disappears", kind: "choice" }),
28
105
  showAcceptance: vi.fn().mockResolvedValue("accept"),
29
106
  };
107
+ const proposer = vi.fn().mockResolvedValue(["What's the expected fix outcome? [MODEL RECS: Error disappears]"]);
30
108
  const result = await runDiscovery("fix auth", {
31
109
  taskType: "debug",
32
110
  confidence: 0.6,
@@ -34,7 +112,7 @@ describe("runDiscovery()", () => {
34
112
  domain: "typescript",
35
113
  outputStyle: null,
36
114
  intentKind: "task",
37
- }, process.cwd(), handler);
115
+ }, process.cwd(), handler, null, proposer);
38
116
  expect(result.interviewed).toBe(true);
39
117
  expect(result.accepted).toBe(true);
40
118
  expect(handler.askQuestion).toHaveBeenCalled();
@@ -53,14 +131,14 @@ describe("runDiscovery()", () => {
53
131
  });
54
132
  it("sets accepted=false when user cancels", async () => {
55
133
  const handler = {
56
- // PIL-L6 fix debug now autofills outcome, so only the scope gap is
57
- // asked. First call = scope gap, second call = acceptance card.
134
+ // First askQuestion = the model's interview question, second = acceptance card.
58
135
  askQuestion: vi
59
136
  .fn()
60
137
  .mockResolvedValueOnce({ questionId: "q1", text: "done", kind: "choice" })
61
138
  .mockResolvedValue({ questionId: "q-acc", text: "cancel", kind: "choice" }),
62
139
  showAcceptance: vi.fn().mockResolvedValue("cancel"),
63
140
  };
141
+ const proposer = vi.fn().mockResolvedValue(["What's the expected outcome? [MODEL RECS: Error disappears]"]);
64
142
  const result = await runDiscovery("fix auth", {
65
143
  taskType: "debug",
66
144
  confidence: 0.6,
@@ -68,17 +146,18 @@ describe("runDiscovery()", () => {
68
146
  domain: "typescript",
69
147
  outputStyle: null,
70
148
  intentKind: "task",
71
- }, process.cwd(), handler);
149
+ }, process.cwd(), handler, null, proposer);
72
150
  expect(result.accepted).toBe(false);
73
151
  });
74
152
  it("does not swallow the original request into a generic outcome for a general prompt (B2)", async () => {
75
- // B2 — answering the (now-skipped) generic outcome askcard used to collapse
76
- // the intent to "general: Task completed", discarding the user's prompt.
77
- // The scope gap may still fire; the outcome must derive from the raw text.
153
+ // B2 — the old generic outcome askcard collapsed intent to "general: Task
154
+ // completed", discarding the user's prompt. With the model proposing no
155
+ // questions, the outcome must derive from the raw text (no fabrication).
78
156
  const handler = {
79
157
  askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text: "Task completed", kind: "choice" }),
80
158
  showAcceptance: vi.fn().mockResolvedValue("accept"),
81
159
  };
160
+ const proposer = vi.fn().mockResolvedValue([]);
82
161
  const result = await runDiscovery("make the dashboard feel less cluttered", {
83
162
  taskType: "general",
84
163
  confidence: 0.6,
@@ -86,7 +165,7 @@ describe("runDiscovery()", () => {
86
165
  domain: null,
87
166
  outputStyle: null,
88
167
  intentKind: "task",
89
- }, process.cwd(), handler);
168
+ }, process.cwd(), handler, null, proposer);
90
169
  expect(result.intentStatement).not.toBe("general: Task completed");
91
170
  expect(result.outcome).not.toBe("Task completed");
92
171
  // The original request must survive into the resolved outcome.
@@ -98,4 +177,58 @@ describe("runDiscovery()", () => {
98
177
  expect(result.accepted).toBe(true);
99
178
  });
100
179
  });
180
+ describe("runDiscovery() — outcome autofill override (path-leak vs legit slash)", () => {
181
+ const analyzeL1 = {
182
+ taskType: "analyze",
183
+ confidence: 0.6,
184
+ complexity: "low",
185
+ domain: null,
186
+ outputStyle: null,
187
+ intentKind: "task",
188
+ };
189
+ // A handler that always picks `text` for both the interview answer and the
190
+ // acceptance card (any non-"cancel"/"adjust" text accepts).
191
+ const pickAnswer = (text) => ({
192
+ askQuestion: vi.fn().mockResolvedValue({ questionId: "q1", text, kind: "choice" }),
193
+ showAcceptance: vi.fn().mockResolvedValue("accept"),
194
+ });
195
+ it("preserves a user outcome answer containing '/' (does not clobber with the autofilled default)", async () => {
196
+ // Regression: the override matched ANY '/' (bare `\/` regex alt +
197
+ // `.includes("/")`), silently replacing a legit answer like
198
+ // "support both REST/GraphQL endpoints" with the analyze default.
199
+ const userAnswer = "support both REST/GraphQL endpoints";
200
+ const proposer = vi
201
+ .fn()
202
+ .mockResolvedValue([
203
+ "Which API surface should the analysis target? [MODEL RECS: support both REST/GraphQL endpoints | REST only]",
204
+ ]);
205
+ const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer(userAnswer), null, proposer);
206
+ expect(result.outcome).toBe(userAnswer);
207
+ expect(result.outcome).not.toBe("Detailed analysis with concrete improvement recommendations");
208
+ });
209
+ it("preserves another 'or'-style slash answer (validate input/output schemas)", async () => {
210
+ const userAnswer = "validate input/output schemas";
211
+ const proposer = vi.fn().mockResolvedValue(["What should the analysis verify?"]);
212
+ const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer(userAnswer), null, proposer);
213
+ expect(result.outcome).toBe(userAnswer);
214
+ });
215
+ it("still overwrites a genuinely path-leaked outcome with the autofilled default", async () => {
216
+ // Guard against over-correction: a real filesystem-path leak (scope-option
217
+ // shape "src/cli (cli)") must STILL be replaced by the inferred outcome.
218
+ const proposer = vi.fn().mockResolvedValue(["What scope? [MODEL RECS: src/cli (cli)]"]);
219
+ const result = await runDiscovery("review the API layer", analyzeL1, process.cwd(), pickAnswer("src/cli (cli)"), null, proposer);
220
+ expect(result.outcome).toBe("Detailed analysis with concrete improvement recommendations");
221
+ });
222
+ it("treats the 'provide my own details' meta-option as no-answer, not a literal outcome", async () => {
223
+ // The default meta-option ("I will provide my own details / constraints")
224
+ // is a 'no specific answer' sentinel — it must not survive verbatim as the
225
+ // outcome. With no inferred default available (generate), it falls back to
226
+ // the raw-derived intent rather than the sentinel string.
227
+ const sentinel = "I will provide my own details / constraints";
228
+ const proposer = vi.fn().mockResolvedValue(["What outcome do you expect?"]);
229
+ const result = await runDiscovery("build the user dashboard widget", { ...analyzeL1, taskType: "generate" }, process.cwd(), pickAnswer(sentinel), null, proposer);
230
+ expect(result.outcome).not.toBe(sentinel);
231
+ expect(result.outcome.toLowerCase()).toContain("dashboard");
232
+ });
233
+ });
101
234
  //# sourceMappingURL=discovery.test.js.map
@@ -4,18 +4,23 @@
4
4
  * pil-report command can answer "which pass actually decided the outcome".
5
5
  */
6
6
  import { beforeEach, describe, expect, it, vi } from "vitest";
7
- const { mockClassify, mockClassifyViaBrain, mockPilContext, mockIsUnifiedPilEnabled } = vi.hoisted(() => ({
7
+ const { mockClassify, mockClassifyViaBrain, mockPilContext, mockIsUnifiedPilEnabled, mockIsLlmFirstClassifyEnabled } = vi.hoisted(() => ({
8
8
  mockClassify: vi.fn(),
9
9
  mockClassifyViaBrain: vi.fn(),
10
10
  mockPilContext: vi.fn(),
11
11
  mockIsUnifiedPilEnabled: vi.fn(),
12
+ // OFF so these trace tests exercise the regex cascade passes.
13
+ mockIsLlmFirstClassifyEnabled: vi.fn(() => false),
12
14
  }));
13
15
  vi.mock("../../router/classifier/index.js", () => ({ classify: mockClassify }));
14
16
  vi.mock("../../ee/bridge.js", () => ({
15
17
  classifyViaBrain: mockClassifyViaBrain,
16
18
  pilContext: mockPilContext,
17
19
  }));
18
- vi.mock("../config.js", () => ({ isUnifiedPilEnabled: mockIsUnifiedPilEnabled }));
20
+ vi.mock("../config.js", () => ({
21
+ isUnifiedPilEnabled: mockIsUnifiedPilEnabled,
22
+ isLlmFirstClassifyEnabled: mockIsLlmFirstClassifyEnabled,
23
+ }));
19
24
  import { layer1Intent } from "../layer1-intent.js";
20
25
  function makeCtx(raw) {
21
26
  return {
@@ -4,6 +4,9 @@ vi.mock("../../router/classifier/index.js", () => ({
4
4
  }));
5
5
  vi.mock("../config.js", () => ({
6
6
  isUnifiedPilEnabled: vi.fn(() => false),
7
+ // OFF here so these cascade tests exercise the regex passes (the model-first
8
+ // gate is covered in src/pil/layer1-intent.test.ts).
9
+ isLlmFirstClassifyEnabled: vi.fn(() => false),
7
10
  }));
8
11
  vi.mock("../../ee/bridge.js", () => ({
9
12
  classifyViaBrain: vi.fn().mockResolvedValue(null),