muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/auth.d.ts +9 -0
  26. package/dist/src/ee/auth.js +19 -0
  27. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  28. package/dist/src/ee/ee-onboarding.js +76 -0
  29. package/dist/src/generated/version.d.ts +1 -1
  30. package/dist/src/generated/version.js +1 -1
  31. package/dist/src/headless/output.js +6 -4
  32. package/dist/src/headless/output.test.js +4 -3
  33. package/dist/src/index.js +20 -1
  34. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  35. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  36. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  37. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  38. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  39. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  40. package/dist/src/mcp/auto-setup.js +56 -2
  41. package/dist/src/mcp/client-pool.d.ts +46 -0
  42. package/dist/src/mcp/client-pool.js +212 -0
  43. package/dist/src/mcp/oauth-callback.js +2 -2
  44. package/dist/src/mcp/parse-headers.test.js +14 -14
  45. package/dist/src/mcp/runtime.d.ts +28 -0
  46. package/dist/src/mcp/runtime.js +117 -51
  47. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  48. package/dist/src/mcp/self-verify-runner.js +38 -0
  49. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  50. package/dist/src/mcp/setup-guide-text.js +84 -0
  51. package/dist/src/mcp/smart-filter.js +49 -0
  52. package/dist/src/mcp/smoke.test.js +43 -43
  53. package/dist/src/mcp/tools-server.d.ts +7 -0
  54. package/dist/src/mcp/tools-server.js +19 -22
  55. package/dist/src/models/catalog.json +349 -349
  56. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  57. package/dist/src/ops/doctor.d.ts +3 -2
  58. package/dist/src/ops/doctor.js +47 -11
  59. package/dist/src/ops/doctor.test.js +4 -3
  60. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  61. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  62. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  63. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  64. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  65. package/dist/src/orchestrator/message-processor.js +57 -27
  66. package/dist/src/orchestrator/orchestrator.js +26 -0
  67. package/dist/src/orchestrator/prompts.d.ts +51 -0
  68. package/dist/src/orchestrator/prompts.js +257 -134
  69. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  70. package/dist/src/orchestrator/stream-runner.js +20 -15
  71. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  72. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  73. package/dist/src/pil/__tests__/config.test.js +1 -17
  74. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  75. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  76. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  77. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  78. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  79. package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
  80. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  81. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  82. package/dist/src/pil/agent-operating-contract.js +2 -0
  83. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  84. package/dist/src/pil/cheap-model-playbook.js +35 -35
  85. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  86. package/dist/src/pil/clarity-gate.d.ts +21 -19
  87. package/dist/src/pil/clarity-gate.js +26 -153
  88. package/dist/src/pil/config.d.ts +9 -1
  89. package/dist/src/pil/config.js +15 -4
  90. package/dist/src/pil/discovery.js +211 -136
  91. package/dist/src/pil/layer1-intent.d.ts +12 -0
  92. package/dist/src/pil/layer1-intent.js +283 -38
  93. package/dist/src/pil/layer1-intent.test.js +210 -4
  94. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  95. package/dist/src/pil/layer16-clarity.js +19 -306
  96. package/dist/src/pil/layer4-gsd.js +18 -6
  97. package/dist/src/pil/layer6-output.d.ts +2 -0
  98. package/dist/src/pil/layer6-output.js +137 -22
  99. package/dist/src/pil/llm-classify.d.ts +26 -0
  100. package/dist/src/pil/llm-classify.js +34 -5
  101. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  102. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  103. package/dist/src/pil/schema.d.ts +8 -0
  104. package/dist/src/pil/schema.js +12 -1
  105. package/dist/src/pil/task-tier-map.js +4 -0
  106. package/dist/src/pil/types.d.ts +11 -1
  107. package/dist/src/product-loop/done-gate.js +3 -3
  108. package/dist/src/product-loop/loop-driver.js +18 -18
  109. package/dist/src/product-loop/progress-snapshot.js +4 -4
  110. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  111. package/dist/src/providers/auth/grok-oauth.js +6 -15
  112. package/dist/src/providers/auth/openai-oauth.js +6 -15
  113. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  114. package/dist/src/reporter/index.js +1 -1
  115. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  116. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  117. package/dist/src/scaffold/continuation-prompt.js +60 -60
  118. package/dist/src/scaffold/init-new.js +453 -453
  119. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  120. package/dist/src/self-qa/agentic-loop.js +24 -19
  121. package/dist/src/self-qa/spec-emitter.js +26 -23
  122. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  123. package/dist/src/storage/interaction-log.js +5 -5
  124. package/dist/src/storage/migrations.js +122 -122
  125. package/dist/src/storage/sessions.js +42 -42
  126. package/dist/src/storage/transcript.js +91 -84
  127. package/dist/src/storage/usage.js +14 -14
  128. package/dist/src/storage/workspaces.js +12 -12
  129. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  130. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  131. package/dist/src/tools/git-safety.d.ts +61 -0
  132. package/dist/src/tools/git-safety.js +141 -0
  133. package/dist/src/tools/git-safety.test.d.ts +1 -0
  134. package/dist/src/tools/git-safety.test.js +111 -0
  135. package/dist/src/tools/native-tools.d.ts +31 -0
  136. package/dist/src/tools/native-tools.js +273 -0
  137. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  138. package/dist/src/tools/registry-git-safety.test.js +92 -0
  139. package/dist/src/tools/registry.js +39 -4
  140. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  141. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  142. package/dist/src/ui/app.js +0 -0
  143. package/dist/src/ui/components/message-view.js +4 -1
  144. package/dist/src/ui/components/structured-response-view.js +7 -3
  145. package/dist/src/ui/components/tool-group.js +7 -1
  146. package/dist/src/ui/markdown-render.d.ts +41 -0
  147. package/dist/src/ui/markdown-render.js +223 -0
  148. package/dist/src/ui/markdown.d.ts +10 -0
  149. package/dist/src/ui/markdown.js +12 -35
  150. package/dist/src/ui/slash/council-inspect.js +4 -4
  151. package/dist/src/ui/slash/export.js +4 -4
  152. package/dist/src/ui/utils/text.d.ts +8 -0
  153. package/dist/src/ui/utils/text.js +16 -0
  154. package/dist/src/ui/utils/text.test.d.ts +1 -0
  155. package/dist/src/ui/utils/text.test.js +23 -0
  156. package/dist/src/usage/ledger.js +48 -15
  157. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  158. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  159. package/dist/src/utils/clipboard-image.js +23 -23
  160. package/dist/src/utils/open-url.d.ts +56 -0
  161. package/dist/src/utils/open-url.js +58 -0
  162. package/dist/src/utils/open-url.test.d.ts +1 -0
  163. package/dist/src/utils/open-url.test.js +86 -0
  164. package/dist/src/utils/settings.d.ts +12 -0
  165. package/dist/src/utils/settings.js +48 -0
  166. package/dist/src/utils/side-question.js +2 -2
  167. package/dist/src/utils/skills.js +3 -3
  168. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  169. package/dist/src/verify/environment.js +2 -1
  170. package/package.json +1 -1
  171. package/dist/src/pil/layer16-clarity.test.js +0 -31
  172. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -18,10 +18,11 @@ export async function judgeReadiness(spec, topic, qa, llm, leaderModelId, costAw
18
18
  try {
19
19
  raw = await llm.generate(judgeModel, system, prompt, 512);
20
20
  }
21
- catch {
21
+ catch (err) {
22
22
  // On LLM failure, default to "not ready" with an empty gaps list so the
23
23
  // loop continues rather than breaking on transient errors. Worst case it
24
24
  // runs up to MAX_CLARIFY_ROUNDS and exits with ready=false.
25
+ console.error(`[council/clarifier] readiness judge LLM call failed: ${err?.message}`);
25
26
  return { ready: false, confidence: 0, gaps: [] };
26
27
  }
27
28
  try {
@@ -182,6 +183,13 @@ costAware = false) {
182
183
  }
183
184
  }
184
185
  if (questions.length === 0) {
186
+ // The clarifier asking nothing IS the readiness signal — the leader already
187
+ // decided no gaps remain. Mark the spec ready directly rather than leaving the
188
+ // gate at its not-ready default (wrong signal on the cleanest topics) or paying
189
+ // for a redundant readiness-judge LLM call on this break path.
190
+ gateReady = true;
191
+ gateConfidence = 1;
192
+ gateGaps = [];
185
193
  yield phaseDone({
186
194
  phaseId: roundId,
187
195
  kind: "clarification_round",
@@ -216,6 +216,7 @@ async function debateWithRetry(llm, model, system, prompt, signal, traceCb, tool
216
216
  export async function* runDebate(spec, config, llm) {
217
217
  const { leaderModelId, participants, conversationContext, signal, debatePlan } = config;
218
218
  const researchSkipOverride = config.researchSkipOverride === true;
219
+ const leaderNeedsResearch = config.leaderNeedsResearch;
219
220
  const internetFirst = config.internetFirst === true;
220
221
  const costAware = config.costAware === true;
221
222
  const active = [];
@@ -232,9 +233,12 @@ export async function* runDebate(spec, config, llm) {
232
233
  // emit the same "circuit breaker tripped" message every round.
233
234
  const announcedDisabled = new Set();
234
235
  // ── Leader decides: research needed? (skipped if user overrode upstream) ──
236
+ // Reuse the leader's upstream research decision (computed once in runCouncil)
237
+ // when available; only run the classifier here for direct callers that did not
238
+ // pre-compute it. Avoids a duplicate leader-tier LLM call per council run.
235
239
  const needsResearch = researchSkipOverride
236
240
  ? false
237
- : yield* evaluateResearchNeed(spec, leaderModelId, conversationContext, llm, costAware);
241
+ : (leaderNeedsResearch ?? (yield* evaluateResearchNeed(spec, leaderModelId, conversationContext, llm, costAware)));
238
242
  if (researchSkipOverride) {
239
243
  yield {
240
244
  type: "content",
@@ -35,8 +35,7 @@ export function extractStackFromSpec(spec) {
35
35
  all.includes("muonroi basetemplate") ||
36
36
  all.includes("basetemplate") ||
37
37
  all.includes("building-block") ||
38
- all.includes("mediatр") ||
39
- all.includes("mediatр")
38
+ all.includes("mediatr")
40
39
  ? "Muonroi.BaseTemplate (.NET 9, CQRS/MediatR, MEntity/MRepository pattern)"
41
40
  : null;
42
41
  const frontendMatch = all.includes("react") && (all.includes("vite") || all.includes("css module"))
@@ -203,7 +202,8 @@ export async function writeDecisionsLock(input) {
203
202
  await atomicWriteText(filePath, content);
204
203
  return true;
205
204
  }
206
- catch {
205
+ catch (err) {
206
+ console.error(`[council/decisions-lock] failed to write decisions.lock.md to ${input.runDir}: ${err?.message}`, { stack: err?.stack?.split("\n").slice(0, 3) });
207
207
  return false;
208
208
  }
209
209
  }
@@ -110,10 +110,13 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
110
110
  // to skip — research is the slowest part of council and trivial questions
111
111
  // (e.g. "what did we just decide?") should not pay that cost.
112
112
  let researchSkipOverride = false;
113
+ // Hoisted so the leader's research decision can be reused by runDebate instead
114
+ // of re-running the classifier LLM call (see CouncilConfig.leaderNeedsResearch).
115
+ // Stays undefined if the classifier throws — fail-open: runDebate re-evaluates.
116
+ let leaderNeedsResearch;
113
117
  try {
114
118
  const needGen = evaluateResearchNeed(spec, leaderModelId, conversationContext, llm, costAware);
115
119
  let needStep;
116
- let leaderNeedsResearch = true;
117
120
  do {
118
121
  needStep = await needGen.next();
119
122
  if (!needStep.done && needStep.value)
@@ -156,8 +159,9 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
156
159
  };
157
160
  }
158
161
  }
159
- catch {
160
- /* fail-open — fall through to default behavior in runDebate */
162
+ catch (err) {
163
+ // fail-open — leaderNeedsResearch stays undefined so runDebate re-evaluates.
164
+ console.error(`[council] research-need pre-check failed (fail-open): ${err?.message}`);
161
165
  }
162
166
  // Await EE pre-fetch (started in parallel with clarifier — latency already hidden)
163
167
  const eeResult = await eePromise;
@@ -228,6 +232,7 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
228
232
  debatePlan,
229
233
  signal: options?.signal,
230
234
  researchSkipOverride,
235
+ leaderNeedsResearch,
231
236
  internetFirst,
232
237
  costAware,
233
238
  }, llm);
@@ -642,8 +647,10 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
642
647
  })),
643
648
  synthesisExcerpt: synthesisText.slice(0, 2000),
644
649
  rejectedProposals: rejectedProposals.length > 0 ? rejectedProposals : undefined,
645
- }).catch(() => {
646
- /* non-critical lock file write failure must never break the council */
650
+ }).catch((err) => {
651
+ // writeDecisionsLock logs its own errors and returns false; this guard
652
+ // only fires on an unexpected throw — log it (No-Silent-Catch), never break council.
653
+ console.error(`[council] decisions.lock write guard caught: ${err?.message}`);
647
654
  });
648
655
  }
649
656
  }
@@ -33,23 +33,6 @@ export interface LeaderResolution {
33
33
  /** Set when no configured leader existed and one was picked by tier. */
34
34
  defaulted?: boolean;
35
35
  }
36
- /**
37
- * Resolve the leader model with quality-aware promotion.
38
- *
39
- * Hard rule: stay within the SESSION model's provider — don't switch providers
40
- * (different billing, surprise cost). We only upgrade tier within the same
41
- * provider that the user is already running.
42
- *
43
- * Priority:
44
- * 1. Find the highest-tier reachable model from the session provider's
45
- * catalog (registry + any configured role-models on that provider).
46
- * 2. If a configured `roleModels.leader` exists AND is on the session
47
- * provider, use it unless a strictly higher-tier model exists on the
48
- * same provider — then auto-promote with a note.
49
- * 3. If configured leader is on a DIFFERENT provider, ignore it and pick
50
- * from the session provider.
51
- * 4. Fall back to the session model itself.
52
- */
53
36
  export declare function resolveLeaderModelDetailed(sessionModelId: string): Promise<LeaderResolution>;
54
37
  /** Back-compat sync wrapper. Returns the modelId only; no reachability check. */
55
38
  export declare function resolveLeaderModel(sessionModelId: string): string;
@@ -1,5 +1,5 @@
1
1
  import { getModelByTier, getModelInfo, getModelsForProvider } from "../models/registry.js";
2
- import { loadKeyForProvider } from "../providers/keychain.js";
2
+ import { getConfiguredProviders } from "../providers/keychain.js";
3
3
  import { detectProviderForModel } from "../providers/runtime.js";
4
4
  import { getRoleModel, getRoleModels, isProviderDisabled } from "../utils/settings.js";
5
5
  const TIER_RANK = { fast: 1, balanced: 2, premium: 3 };
@@ -77,16 +77,30 @@ export function pickCouncilTaskModel(task, leaderModelId, costAware) {
77
77
  * from the session provider.
78
78
  * 4. Fall back to the session model itself.
79
79
  */
80
+ /**
81
+ * A provider is reachable when it has an API key OR a stored OAuth token.
82
+ * `loadKeyForProvider` only knows API keys (it throws for OAuth-only
83
+ * providers), so without the OAuth fallback an OAuth-authed provider — e.g.
84
+ * grok via xAI OAuth, or OpenAI/Google OAuth without an API key — was wrongly
85
+ * treated as unreachable, making council bail "No reachable provider" even
86
+ * though the model answers fine. VERIFY F15.
87
+ */
88
+ async function isProviderReachable(provider) {
89
+ // getConfiguredProviders() is the authoritative cred check — it unifies API
90
+ // keys (keychain/env/settings) AND stored OAuth tokens across every provider
91
+ // in the OAuth registry. The old loadKeyForProvider-only check saw API keys
92
+ // but not OAuth, so an OAuth-only provider (e.g. grok via xAI OAuth) was
93
+ // wrongly unreachable and council bailed "No reachable provider". VERIFY F15.
94
+ const configured = await getConfiguredProviders();
95
+ return configured.includes(provider);
96
+ }
80
97
  export async function resolveLeaderModelDetailed(sessionModelId) {
81
98
  const sessionProviderId = detectProviderForModel(sessionModelId);
82
99
  const configured = getRoleModel("leader");
83
100
  const configuredProvider = configured ? detectProviderForModel(configured) : undefined;
84
101
  const configuredTier = configured ? tierOf(configured) : undefined;
85
102
  const sessionDisabled = isProviderDisabled(sessionProviderId);
86
- const sessionReachable = !sessionDisabled &&
87
- (await loadKeyForProvider(sessionProviderId)
88
- .then(() => true)
89
- .catch(() => false));
103
+ const sessionReachable = !sessionDisabled && (await isProviderReachable(sessionProviderId));
90
104
  if (!sessionReachable) {
91
105
  return { modelId: configured ?? sessionModelId };
92
106
  }
@@ -169,9 +183,7 @@ export async function resolveParticipants(sessionModelId, preferMultiProvider) {
169
183
  const provider = detectProviderForModel(modelId);
170
184
  if (isProviderDisabled(provider))
171
185
  continue;
172
- const canReach = await loadKeyForProvider(provider)
173
- .then(() => true)
174
- .catch(() => false);
186
+ const canReach = await isProviderReachable(provider);
175
187
  if (canReach)
176
188
  candidates.push({ role, model: modelId });
177
189
  }
@@ -186,10 +198,7 @@ export async function resolveParticipants(sessionModelId, preferMultiProvider) {
186
198
  return sameCandidates;
187
199
  }
188
200
  const providerDisabled = isProviderDisabled(detectProviderForModel(sessionModelId));
189
- const canReach = !providerDisabled &&
190
- (await loadKeyForProvider(detectProviderForModel(sessionModelId))
191
- .then(() => true)
192
- .catch(() => false));
201
+ const canReach = !providerDisabled && (await isProviderReachable(detectProviderForModel(sessionModelId)));
193
202
  if (canReach) {
194
203
  return ALL_ROLES.map((role) => ({ role, model: sessionModelId }));
195
204
  }
@@ -198,9 +207,7 @@ export async function resolveParticipants(sessionModelId, preferMultiProvider) {
198
207
  async function resolveSameProviderCandidates(providerId, sessionModelId, roles) {
199
208
  if (isProviderDisabled(providerId))
200
209
  return [];
201
- const canReach = await loadKeyForProvider(providerId)
202
- .then(() => true)
203
- .catch(() => false);
210
+ const canReach = await isProviderReachable(providerId);
204
211
  if (!canReach)
205
212
  return [];
206
213
  const providerModels = getModelsForProvider(providerId);
@@ -186,7 +186,7 @@ function shapeFallback(synthesisText, debatePlan) {
186
186
  let found = false;
187
187
  for (const line of synthesisText.split("\n")) {
188
188
  const trimmed = line.trim();
189
- if (trimmed.match(new RegExp(`^#{1,3}s+${heading.replace(/\s+/g, "s+")}`, "i"))) {
189
+ if (trimmed.match(new RegExp(`^#{1,3}\\s+${heading.replace(/\s+/g, "\\s+")}`, "i"))) {
190
190
  found = true;
191
191
  continue;
192
192
  }
@@ -6,36 +6,35 @@ export function buildClarificationPrompt(topic, conversationContext, previousQA)
6
6
  : "";
7
7
  return {
8
8
  system: `You are a senior technical lead preparing for a multi-expert discussion. ` +
9
- `Your job is to identify AMBIGUITIES in the topic that would cause experts to talk past each other or go off-topic.\n\n` +
10
- `Analyze the topic and conversation context carefully. Generate targeted clarification questions.\n` +
11
- `Focus on:\n` +
12
- `- SCOPE: What exactly is in/out of scope?\n` +
13
- `- CONSTRAINTS: Technical, time, resource, or business constraints?\n` +
14
- `- SUCCESS CRITERIA: How will we know the discussion produced a good result?\n` +
15
- `- CONTEXT: What existing decisions, code, or patterns are relevant?\n\n` +
16
- `## Minimum-question rule\n` +
17
- `Return [] ONLY for topics that are already a precise technical question with a single ` +
18
- `expected outcome (e.g. "What does X function return?", "Fix typo in README"). ` +
19
- `For ANY topic that describes a feature, project, idea, or design — even if the user ` +
20
- `gave several sentences you MUST ask AT LEAST 2 questions, typically about:\n` +
21
- `- Scope boundaries (what's in/out of v1)\n` +
22
- `- Success metric (how is "done" measured)\n` +
23
- `- Hard constraint (timeline, platform, must-include / must-avoid)\n` +
24
- `A 1-paragraph "build me X" topic is NEVER specific enough there are always implicit ` +
25
- `scope, criteria, and constraint gaps. Ask them.\n\n` +
26
- `If the topic is already specific enough (single technical Q&A only), return an empty array.\n\n` +
9
+ `Your job is to surface the FEW genuine ambiguities that would make experts talk past each other NOT to run a questionnaire.\n\n` +
10
+ `Read the topic and the conversation context especially any "## Current Project" section — carefully. ` +
11
+ `Ask ONLY about things you genuinely cannot infer and that would actually change the plan:\n` +
12
+ `- SCOPE: what is in/out of scope for THIS change?\n` +
13
+ `- CONSTRAINTS: hard technical/time/business constraints not already implied by the context.\n` +
14
+ `- SUCCESS CRITERIA: how "done" is judged, when it isn't already obvious.\n\n` +
15
+ `## How many questions\n` +
16
+ `Ask the minimum that unblocks a focused discussion — typically 0-2. A well-scoped topic, or one ` +
17
+ `whose context already answers the gaps, needs ZERO questions: return []. Do NOT pad to a quota, ` +
18
+ `and never ask a question whose answer is already in the topic or the project context.\n\n` +
19
+ `## Existing-repo grounding (IMPORTANT)\n` +
20
+ `If a "## Current Project" section is present you are working in an EXISTING repository NOT a ` +
21
+ `greenfield project. Ground every question and every option in what that snapshot actually shows ` +
22
+ `(its language, framework, modules, conventions). Do NOT ask generic greenfield questions — product ` +
23
+ `type, target audience, which language/framework, which database, hosting when the repo already ` +
24
+ `answers them; asking those signals you ignored the context and wastes the user's time. Ask only ` +
25
+ `about intent/scope decisions specific to THIS change, phrased in terms of the real codebase.\n\n` +
27
26
  `IMPORTANT — defaults from the workspace:\n` +
28
27
  `- If the topic refers to "this project", "current project", "repo này", "dự án hiện tại" or similar, ` +
29
28
  `the project IS the one described in the "## Current Project" section of the context. DO NOT ask which project.\n` +
30
29
  `- Only ask about project identity when the topic mentions multiple distinct projects or external products.\n` +
31
- `- Prefer using the project's package.json name and description as implicit context for follow-up questions.\n\n` +
30
+ `- Use the project's package.json name and description as implicit context for follow-up questions.\n\n` +
32
31
  `Output ONLY a JSON array (no markdown, no preamble):\n` +
33
32
  `[{"question": "...", "why": "why this matters for a focused discussion", "suggestions": ["option A", "option B"], "recommended": "option A", "isRequired": true}]\n\n` +
34
- `Rules for "recommended":\n` +
35
- `- Only include "recommended" when, given the topic + context, ONE option is clearly the best default.\n` +
33
+ `Rules for "recommended" (be decisive — the user should never face an unranked list):\n` +
34
+ `- ALWAYS include "recommended" the single option you would choose if the user said "you decide", given the topic + project context.\n` +
36
35
  `- Its value MUST be exactly equal to one of the entries in "suggestions".\n` +
37
- `- Pick at most ONE recommended option per question. If you cannot confidently single one out, OMIT the field entirely do not guess.\n` +
38
- `Return [] if no clarification needed.`,
36
+ `- Omit it ONLY in a genuine 50/50 tie where recommending either option would be misleading. A missing recommendation must be the rare exception, not the default.\n` +
37
+ `Return [] if no clarification is needed.`,
39
38
  prompt: `## Topic\n${topic}\n\n` +
40
39
  (conversationContext ? `## Conversation Context\n${conversationContext}\n` : "") +
41
40
  qaSection,
@@ -84,8 +83,7 @@ export function buildReadinessJudgePrompt(topic, qa, spec) {
84
83
  `- "gaps" MUST be empty when "ready" is true.\n` +
85
84
  `- Each gap is a single sentence starting with a noun: what info is missing (not a question).\n` +
86
85
  ` Example: "Target platform (web, mobile, or both) not specified."\n` +
87
- `- "confidence" reflects how sure you are; a ready=true with confidence=0.6 means "probably " +\n` +
88
- ` "ready but some ambiguity remains". confidence=1.0 means zero remaining blind spots.\n` +
86
+ `- "confidence" reflects how sure you are; a ready=true with confidence=0.6 means "probably ready but some ambiguity remains". confidence=1.0 means zero remaining blind spots.\n` +
89
87
  `- When the topic is a simple one-answer technical question (no design/scope), set ready=true, ` +
90
88
  ` confidence=1.0, gaps=[].`,
91
89
  prompt: `## Topic\n${topic}\n\n` +
@@ -124,15 +122,14 @@ const ENGLISH_ONLY_RULE = `\n## Language Rule (mandatory)\n` +
124
122
  * and burn the step budget without producing analytical content (the bug
125
123
  * that caused session a7a5690d2049 to fail with 4/4 empty turns).
126
124
  */
125
+ // Opening turns run tool-free (openingWithRetry → llm.generate, no verification
126
+ // tools wired). The rule must NOT advertise tools the model cannot call, or it
127
+ // hallucinates `[CONFIRMED via grep:...]` tags for searches it never ran.
127
128
  const EVIDENCE_RULE_OPENING = `\n## Evidence Rule\n` +
128
- `Stay analytical. You may optionally call AT MOST ONE verification tool ` +
129
- `(grep / read_file / web_fetch / context7) ONLY to verify a SPECIFIC ` +
130
- `numerical or factual claim you would otherwise have to invent.\n` +
131
- `- Do NOT call tools for exploration or to gather background context.\n` +
132
- `- Do NOT chain multiple tool calls — you have one shot, then must produce your full response.\n` +
133
- `- If no claim needs verification, skip tool use entirely and answer directly.\n` +
134
- `Tag verified facts: \`[CONFIRMED via <tool>:<evidence>]\` or \`[REFUTED via <tool>:<evidence>]\`.\n` +
135
- `For uncited numbers / library specs that you cannot verify, mark them \`[UNVERIFIED: <claim>]\`.\n`;
129
+ `Stay analytical and ground every claim in the brief + context you were given. ` +
130
+ `You have NO tools in this opening turn — do not claim to have run grep / read_file / web searches.\n` +
131
+ `- For any number or library spec you cannot support from the provided context, mark it \`[UNVERIFIED: <claim>]\` instead of asserting it.\n` +
132
+ `- A later round can verify disputed claims; your job now is a clear, honest analysis.\n`;
136
133
  const EVIDENCE_RULE_RESPONSE = `\n## Evidence Rule\n` +
137
134
  `Stay analytical. You may optionally call AT MOST ONE verification tool ` +
138
135
  `(grep / read_file / web_fetch / context7) ONLY to verify a SPECIFIC ` +
@@ -277,9 +274,7 @@ export function buildLeaderEvaluationPrompt(ctx) {
277
274
  ` "researchQuery": null,\n` +
278
275
  ` "shouldContinue": true/false,\n` +
279
276
  ` "reason": "one sentence explaining your decision",\n` +
280
- ` "evidenceDensity": 0.0, // citations / total claims ratio (0.0–1.0)\n` +
281
- ` "disagreementResolved": 0, // count of [REFUTED] + [CONFIRMED] tags and explicit concessions\n` +
282
- ` "extendRounds": 0 // set to 1-3 ONLY when this is the last planned round AND one critical point is genuinely close to resolving but not yet there. 0 otherwise.\n` +
277
+ ` "extendRounds": 0 // set to 1-3 ONLY when one critical point is genuinely close to resolving but not yet there; 0 otherwise. The orchestrator applies this only if rounds remain — do not try to track the round count yourself.\n` +
283
278
  (stackLock
284
279
  ? ` ,\n "consensusQuality": "full", // "full" when all positions stay within locked stack; "partial" when out-of-stack violations found\n` +
285
280
  ` "outOfStackViolations": [] // list of out-of-stack tech names cited by participants (empty when none)\n`
@@ -297,7 +292,8 @@ export function buildRoundSummaryPrompt(allExchanges, topic, round) {
297
292
  `1. Points where participants AGREE\n` +
298
293
  `2. Points still in DISPUTE (with each side's core argument)\n` +
299
294
  `3. New EVIDENCE or perspectives raised this round\n` +
300
- `Be concise — one line per bullet. No preamble.`,
295
+ `Be concise — one line per bullet. No preamble. ` +
296
+ `Do NOT write "Round N" or any round-number counter in your bullets — this summary is fed into later turns, where round labels read as robotic noise. Refer to points by their content.`,
301
297
  prompt: `Round ${round} discussion on: ${topic}\n\n${allExchanges}`,
302
298
  };
303
299
  }
@@ -468,6 +464,15 @@ export function buildSynthesisPrompt(ctx) {
468
464
  : "Balance clarity with completeness.") // balanced (default)
469
465
  : "";
470
466
  const stackLockForSynth = buildStackLockSection(ctx.spec);
467
+ // De-robotize: for choice/plan outputs, force a single decisive recommendation
468
+ // (mirrors the clarifier's mandatory-default rule). Scoped to decision/plan kinds
469
+ // so evaluation/investigation/exploration shapes keep their neutral analytical tone.
470
+ const decisiveness = finalShape.kind === "decision" || finalShape.kind === "implementation_plan"
471
+ ? `\n## Decisiveness (recommendation/verdict)\n` +
472
+ `Lead with the single choice you would make if the user said "you decide" — name it in the first sentence of the recommendation. ` +
473
+ `Do NOT hedge with "it depends", "both have merits", or an unranked list of options. ` +
474
+ `If the debate genuinely did not converge, say so in one sentence and STILL give your best single recommendation plus the one condition that would change it.\n`
475
+ : "";
471
476
  let system = `You are the team lead synthesizing a multi-specialist discussion.\n\n` +
472
477
  `## Original Brief\n` +
473
478
  `Problem: ${ctx.spec.problemStatement}\n` +
@@ -476,6 +481,7 @@ export function buildSynthesisPrompt(ctx) {
476
481
  intent +
477
482
  (stackLockForSynth ? `\n${stackLockForSynth}\n` : "") +
478
483
  guardrailBlock +
484
+ decisiveness +
479
485
  `\nProduce the answer the user requested — do NOT default to an implementation plan ` +
480
486
  `unless the output shape explicitly asks for actionItems/plan. ` +
481
487
  `Stay grounded in the discussion; do not invent facts; mark unverified claims explicitly.\n\n` +
@@ -507,33 +513,33 @@ export function buildSynthesisPrompt(ctx) {
507
513
  }
508
514
  let extraContext = "";
509
515
  if (ctx.refineContext) {
510
- extraContext += `
511
- ## User Refinements
512
- ${ctx.refineContext}
516
+ extraContext += `
517
+ ## User Refinements
518
+ ${ctx.refineContext}
513
519
  `;
514
520
  }
515
521
  if (ctx.planEmphasis) {
516
- extraContext += `
517
- ## Additional Instruction
518
- The user has requested a concrete action plan with executable steps. Each action item MUST be an object with these fields:
519
- {
520
- "step": "<imperative action>",
521
- "owner_lens": "<which stance owns this — frontend / backend / architecture / etc>",
522
- "time_estimate": "<rough — e.g. '2h', '1d', '~30min'>",
523
- "depends_on": ["<step keys this requires>"] or [],
524
- "acceptance_criteria": "<how we know it's done>"
525
- }
526
- Order action items by dependency: predecessors first, dependents after.
527
- Risks MUST be objects with: {"description", "severity": "High|Medium|Low", "mitigation"}.
528
- Do NOT emit loose strings for these fields — the user needs structured plan output.
522
+ extraContext += `
523
+ ## Additional Instruction
524
+ The user has requested a concrete action plan with executable steps. Each action item MUST be an object with these fields:
525
+ {
526
+ "step": "<imperative action>",
527
+ "owner_lens": "<which stance owns this — frontend / backend / architecture / etc>",
528
+ "time_estimate": "<rough — e.g. '2h', '1d', '~30min'>",
529
+ "depends_on": ["<step keys this requires>"] or [],
530
+ "acceptance_criteria": "<how we know it's done>"
531
+ }
532
+ Order action items by dependency: predecessors first, dependents after.
533
+ Risks MUST be objects with: {"description", "severity": "High|Medium|Low", "mitigation"}.
534
+ Do NOT emit loose strings for these fields — the user needs structured plan output.
529
535
  `;
530
536
  }
531
537
  return {
532
538
  system,
533
- prompt: `Final positions:
534
- ${ctx.finalPositions}
535
-
536
- Full discussion:
539
+ prompt: `Final positions:
540
+ ${ctx.finalPositions}
541
+
542
+ Full discussion:
537
543
  ${ctx.allExchanges}${extraContext}`,
538
544
  };
539
545
  }
@@ -200,6 +200,13 @@ export interface CouncilConfig {
200
200
  userModelMessage?: ModelMessage;
201
201
  /** When true, runDebate skips the research phase even if the leader requested it (user override). */
202
202
  researchSkipOverride?: boolean;
203
+ /**
204
+ * Leader's pre-computed "is research needed?" decision from runCouncil. When set,
205
+ * runDebate reuses it instead of re-running the classifier LLM call — avoids a
206
+ * duplicate leader-tier call per run plus a possible contradiction with the
207
+ * user-facing skip card. Undefined for direct runDebate callers/tests (they re-evaluate).
208
+ */
209
+ leaderNeedsResearch?: boolean;
203
210
  /** When true, the working directory has no source code yet — research prompt prefers internet sources. */
204
211
  internetFirst?: boolean;
205
212
  /** When true, leader sub-tasks downshift to cheaper tier models on the same provider. */
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,32 @@
1
+ import { mkdtempSync, readFileSync, rmSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { afterEach, beforeEach, describe, expect, it } from "vitest";
5
+ import { writeExperienceConfig } from "../auth.js";
6
+ describe("writeExperienceConfig", () => {
7
+ let home;
8
+ beforeEach(() => {
9
+ home = mkdtempSync(join(tmpdir(), "ee-cfg-"));
10
+ });
11
+ afterEach(() => {
12
+ rmSync(home, { recursive: true, force: true });
13
+ });
14
+ const read = () => JSON.parse(readFileSync(join(home, ".experience", "config.json"), "utf8"));
15
+ it("creates ~/.experience/config.json (and dir) when none exists", async () => {
16
+ await writeExperienceConfig({ serverBaseUrl: "https://ee.example.com", serverAuthToken: "tok" }, { home });
17
+ const cfg = read();
18
+ expect(cfg.serverBaseUrl).toBe("https://ee.example.com");
19
+ expect(cfg.serverAuthToken).toBe("tok");
20
+ });
21
+ it("merges into an existing config, preserving unrelated fields", async () => {
22
+ // Seed an existing config with an unrelated field (as the EE installer would).
23
+ await writeExperienceConfig({ embeddingModelVersion: "v9", serverAuthToken: "old" }, { home });
24
+ // Now write a new serverBaseUrl + token — embeddingModelVersion must survive.
25
+ await writeExperienceConfig({ serverBaseUrl: "https://ee2.example.com", serverAuthToken: "new" }, { home });
26
+ const cfg = read();
27
+ expect(cfg.embeddingModelVersion).toBe("v9"); // preserved
28
+ expect(cfg.serverBaseUrl).toBe("https://ee2.example.com"); // added
29
+ expect(cfg.serverAuthToken).toBe("new"); // overwritten
30
+ });
31
+ });
32
+ //# sourceMappingURL=ee-onboarding.test.js.map
@@ -15,6 +15,15 @@ export declare function loadEEAuthToken(opts?: {
15
15
  export declare function refreshAuthToken(opts?: {
16
16
  home?: string;
17
17
  }): Promise<string | null>;
18
+ /**
19
+ * Merge a partial config into ~/.experience/config.json (creating the file +
20
+ * directory if absent), preserving any fields the EE installer or the user
21
+ * already wrote. Used by the first-run EE setup step. Throws on write failure so
22
+ * the caller can surface it (never silently swallow — the user asked to set this up).
23
+ */
24
+ export declare function writeExperienceConfig(patch: Partial<ExperienceConfig>, opts?: {
25
+ home?: string;
26
+ }): Promise<void>;
18
27
  export declare function getCachedAuthToken(): string | null;
19
28
  export declare function getEmbeddingModelVersion(): string;
20
29
  export declare function getCachedServerBaseUrl(): string | null;
@@ -39,6 +39,25 @@ export async function refreshAuthToken(opts = {}) {
39
39
  _token = null;
40
40
  return await loadEEAuthToken(opts);
41
41
  }
42
+ /**
43
+ * Merge a partial config into ~/.experience/config.json (creating the file +
44
+ * directory if absent), preserving any fields the EE installer or the user
45
+ * already wrote. Used by the first-run EE setup step. Throws on write failure so
46
+ * the caller can surface it (never silently swallow — the user asked to set this up).
47
+ */
48
+ export async function writeExperienceConfig(patch, opts = {}) {
49
+ const p = configPath(opts.home);
50
+ let existing = {};
51
+ try {
52
+ existing = JSON.parse(await fs.readFile(p, "utf8"));
53
+ }
54
+ catch {
55
+ // No existing config (or unreadable) — start fresh.
56
+ }
57
+ const merged = { ...existing, ...patch };
58
+ await fs.mkdir(path.dirname(p), { recursive: true });
59
+ await fs.writeFile(p, `${JSON.stringify(merged, null, 2)}\n`, "utf8");
60
+ }
42
61
  export function getCachedAuthToken() {
43
62
  return _token;
44
63
  }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Returns true when a config was written (so the caller can reload EE auth).
3
+ * Returns false when skipped or invalid.
4
+ */
5
+ export declare function firstRunEESetup(): Promise<boolean>;
@@ -0,0 +1,76 @@
1
+ /**
2
+ * First-run Experience Engine setup (interactive, readline — runs BEFORE any TUI
3
+ * code, same pattern as the credential wizard). Offers to connect an EE server
4
+ * and writes ~/.experience/config.json so the agent's record/recall/feedback
5
+ * loop (ee_query / ee_feedback via muonroi-tools) has a brain to talk to.
6
+ *
7
+ * Optional + skippable: a blank URL skips. No hardcoded fallback — a failed
8
+ * health probe is reported, not hidden, and never blocks setup.
9
+ */
10
+ import { createInterface } from "node:readline";
11
+ import { writeExperienceConfig } from "./auth.js";
12
+ /** Best-effort reachability probe — returns true/false, never throws. */
13
+ async function probeHealth(baseUrl, token) {
14
+ try {
15
+ const ac = new AbortController();
16
+ const timer = setTimeout(() => ac.abort(), 4000);
17
+ try {
18
+ const res = await fetch(`${baseUrl}/health`, {
19
+ signal: ac.signal,
20
+ headers: token ? { authorization: `Bearer ${token}` } : undefined,
21
+ });
22
+ return res.ok;
23
+ }
24
+ finally {
25
+ clearTimeout(timer);
26
+ }
27
+ }
28
+ catch {
29
+ return false;
30
+ }
31
+ }
32
+ /**
33
+ * Returns true when a config was written (so the caller can reload EE auth).
34
+ * Returns false when skipped or invalid.
35
+ */
36
+ export async function firstRunEESetup() {
37
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
38
+ const ask = (q) => new Promise((resolve) => rl.question(q, (a) => resolve(a)));
39
+ try {
40
+ process.stderr.write("\nExperience Engine (optional) — a shared brain that recalls past decisions, gotchas,\n" +
41
+ "and recipes so the agent works like a senior on your stack. You can set this up later\n" +
42
+ "by editing ~/.experience/config.json or setting MUONROI_EE_BASE_URL.\n\n");
43
+ const url = (await ask("EE server URL (blank to skip): ")).trim();
44
+ if (!url) {
45
+ process.stderr.write("Skipped Experience Engine setup.\n");
46
+ return false;
47
+ }
48
+ let normalized;
49
+ try {
50
+ normalized = new URL(url).toString().replace(/\/$/, "");
51
+ }
52
+ catch {
53
+ process.stderr.write("That doesn't look like a valid URL — skipped EE setup.\n");
54
+ return false;
55
+ }
56
+ const token = (await ask("EE auth token (blank if the server needs none): ")).trim();
57
+ await writeExperienceConfig({
58
+ serverBaseUrl: normalized,
59
+ ...(token ? { serverAuthToken: token } : {}),
60
+ });
61
+ process.stderr.write(`Wrote Experience Engine config → ~/.experience/config.json (serverBaseUrl=${normalized}).\n`);
62
+ const reachable = await probeHealth(normalized, token || undefined);
63
+ process.stderr.write(reachable
64
+ ? " ✓ EE server reachable.\n"
65
+ : " ⚠ Could not reach the EE server right now (saved anyway — run 'muonroi-cli doctor' to recheck).\n");
66
+ return true;
67
+ }
68
+ catch (err) {
69
+ process.stderr.write(`\nEE setup failed: ${err?.message ?? String(err)} — skipped.\n`);
70
+ return false;
71
+ }
72
+ finally {
73
+ rl.close();
74
+ }
75
+ }
76
+ //# sourceMappingURL=ee-onboarding.js.map
@@ -1,2 +1,2 @@
1
- export declare const PACKAGE_VERSION = "1.4.1";
1
+ export declare const PACKAGE_VERSION = "1.5.0";
2
2
  export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
@@ -1,5 +1,5 @@
1
1
  // AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
2
2
  // Sourced from package.json at build time so it survives bun --compile bundling.
3
- export const PACKAGE_VERSION = "1.4.1";
3
+ export const PACKAGE_VERSION = "1.5.0";
4
4
  export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
5
5
  //# sourceMappingURL=version.js.map