npm - muonroi-cli - Versions diffs - 1.4.1 → 1.5.0 - Mend

muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/LICENSE +21 -21
package/README.md +122 -122
package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
package/dist/src/agent-harness/mock-model.d.ts +11 -0
package/dist/src/agent-harness/mock-model.js +21 -0
package/dist/src/cli/cost-forensics.js +12 -12
package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
package/dist/src/council/clarifier.js +9 -1
package/dist/src/council/debate.js +5 -1
package/dist/src/council/decisions-lock.js +3 -3
package/dist/src/council/index.js +12 -5
package/dist/src/council/leader.d.ts +0 -17
package/dist/src/council/leader.js +22 -15
package/dist/src/council/planner.js +1 -1
package/dist/src/council/prompts.js +63 -57
package/dist/src/council/types.d.ts +7 -0
package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
package/dist/src/ee/auth.d.ts +9 -0
package/dist/src/ee/auth.js +19 -0
package/dist/src/ee/ee-onboarding.d.ts +5 -0
package/dist/src/ee/ee-onboarding.js +76 -0
package/dist/src/generated/version.d.ts +1 -1
package/dist/src/generated/version.js +1 -1
package/dist/src/headless/output.js +6 -4
package/dist/src/headless/output.test.js +4 -3
package/dist/src/index.js +20 -1
package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
package/dist/src/mcp/auto-setup.js +56 -2
package/dist/src/mcp/client-pool.d.ts +46 -0
package/dist/src/mcp/client-pool.js +212 -0
package/dist/src/mcp/oauth-callback.js +2 -2
package/dist/src/mcp/parse-headers.test.js +14 -14
package/dist/src/mcp/runtime.d.ts +28 -0
package/dist/src/mcp/runtime.js +117 -51
package/dist/src/mcp/self-verify-runner.d.ts +14 -0
package/dist/src/mcp/self-verify-runner.js +38 -0
package/dist/src/mcp/setup-guide-text.d.ts +9 -0
package/dist/src/mcp/setup-guide-text.js +84 -0
package/dist/src/mcp/smart-filter.js +49 -0
package/dist/src/mcp/smoke.test.js +43 -43
package/dist/src/mcp/tools-server.d.ts +7 -0
package/dist/src/mcp/tools-server.js +19 -22
package/dist/src/models/catalog.json +349 -349
package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
package/dist/src/ops/doctor.d.ts +3 -2
package/dist/src/ops/doctor.js +47 -11
package/dist/src/ops/doctor.test.js +4 -3
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
package/dist/src/orchestrator/batch-turn-runner.js +7 -11
package/dist/src/orchestrator/message-processor.js +57 -27
package/dist/src/orchestrator/orchestrator.js +26 -0
package/dist/src/orchestrator/prompts.d.ts +51 -0
package/dist/src/orchestrator/prompts.js +257 -134
package/dist/src/orchestrator/scope-ceiling.js +6 -1
package/dist/src/orchestrator/stream-runner.js +20 -15
package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
package/dist/src/pil/__tests__/config.test.js +1 -17
package/dist/src/pil/__tests__/discovery.test.js +144 -11
package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
package/dist/src/pil/agent-operating-contract.d.ts +1 -1
package/dist/src/pil/agent-operating-contract.js +2 -0
package/dist/src/pil/agent-operating-contract.test.js +7 -2
package/dist/src/pil/cheap-model-playbook.js +35 -35
package/dist/src/pil/cheap-model-workbooks.js +16 -13
package/dist/src/pil/clarity-gate.d.ts +21 -19
package/dist/src/pil/clarity-gate.js +26 -153
package/dist/src/pil/config.d.ts +9 -1
package/dist/src/pil/config.js +15 -4
package/dist/src/pil/discovery.js +211 -136
package/dist/src/pil/layer1-intent.d.ts +12 -0
package/dist/src/pil/layer1-intent.js +283 -38
package/dist/src/pil/layer1-intent.test.js +210 -4
package/dist/src/pil/layer16-clarity.d.ts +25 -11
package/dist/src/pil/layer16-clarity.js +19 -306
package/dist/src/pil/layer4-gsd.js +18 -6
package/dist/src/pil/layer6-output.d.ts +2 -0
package/dist/src/pil/layer6-output.js +137 -22
package/dist/src/pil/llm-classify.d.ts +26 -0
package/dist/src/pil/llm-classify.js +34 -5
package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
package/dist/src/pil/native-capabilities-workbook.js +82 -76
package/dist/src/pil/schema.d.ts +8 -0
package/dist/src/pil/schema.js +12 -1
package/dist/src/pil/task-tier-map.js +4 -0
package/dist/src/pil/types.d.ts +11 -1
package/dist/src/product-loop/done-gate.js +3 -3
package/dist/src/product-loop/loop-driver.js +18 -18
package/dist/src/product-loop/progress-snapshot.js +4 -4
package/dist/src/providers/auth/gemini-oauth.js +6 -15
package/dist/src/providers/auth/grok-oauth.js +6 -15
package/dist/src/providers/auth/openai-oauth.js +6 -15
package/dist/src/providers/mcp-vision-bridge.js +48 -48
package/dist/src/reporter/index.js +1 -1
package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
package/dist/src/scaffold/bb-quality-gate.js +5 -5
package/dist/src/scaffold/continuation-prompt.js +60 -60
package/dist/src/scaffold/init-new.js +453 -453
package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
package/dist/src/self-qa/agentic-loop.js +24 -19
package/dist/src/self-qa/spec-emitter.js +26 -23
package/dist/src/storage/__tests__/migrations.test.js +2 -2
package/dist/src/storage/interaction-log.js +5 -5
package/dist/src/storage/migrations.js +122 -122
package/dist/src/storage/sessions.js +42 -42
package/dist/src/storage/transcript.js +91 -84
package/dist/src/storage/usage.js +14 -14
package/dist/src/storage/workspaces.js +12 -12
package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
package/dist/src/tools/__tests__/native-tools.test.js +53 -0
package/dist/src/tools/git-safety.d.ts +61 -0
package/dist/src/tools/git-safety.js +141 -0
package/dist/src/tools/git-safety.test.d.ts +1 -0
package/dist/src/tools/git-safety.test.js +111 -0
package/dist/src/tools/native-tools.d.ts +31 -0
package/dist/src/tools/native-tools.js +273 -0
package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
package/dist/src/tools/registry-git-safety.test.js +92 -0
package/dist/src/tools/registry.js +39 -4
package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
package/dist/src/ui/app.js +0 -0
package/dist/src/ui/components/message-view.js +4 -1
package/dist/src/ui/components/structured-response-view.js +7 -3
package/dist/src/ui/components/tool-group.js +7 -1
package/dist/src/ui/markdown-render.d.ts +41 -0
package/dist/src/ui/markdown-render.js +223 -0
package/dist/src/ui/markdown.d.ts +10 -0
package/dist/src/ui/markdown.js +12 -35
package/dist/src/ui/slash/council-inspect.js +4 -4
package/dist/src/ui/slash/export.js +4 -4
package/dist/src/ui/utils/text.d.ts +8 -0
package/dist/src/ui/utils/text.js +16 -0
package/dist/src/ui/utils/text.test.d.ts +1 -0
package/dist/src/ui/utils/text.test.js +23 -0
package/dist/src/usage/ledger.js +48 -15
package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
package/dist/src/utils/clipboard-image.js +23 -23
package/dist/src/utils/open-url.d.ts +56 -0
package/dist/src/utils/open-url.js +58 -0
package/dist/src/utils/open-url.test.d.ts +1 -0
package/dist/src/utils/open-url.test.js +86 -0
package/dist/src/utils/settings.d.ts +12 -0
package/dist/src/utils/settings.js +48 -0
package/dist/src/utils/side-question.js +2 -2
package/dist/src/utils/skills.js +3 -3
package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
package/dist/src/verify/environment.js +2 -1
package/package.json +1 -1
package/dist/src/pil/layer16-clarity.test.js +0 -31
/package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0

package/dist/src/council/clarifier.js CHANGED Viewed

@@ -18,10 +18,11 @@ export async function judgeReadiness(spec, topic, qa, llm, leaderModelId, costAw
     try {
         raw = await llm.generate(judgeModel, system, prompt, 512);
     }
-    catch {
+    catch (err) {
         // On LLM failure, default to "not ready" with an empty gaps list so the
         // loop continues rather than breaking on transient errors. Worst case it
         // runs up to MAX_CLARIFY_ROUNDS and exits with ready=false.
+        console.error(`[council/clarifier] readiness judge LLM call failed: ${err?.message}`);
         return { ready: false, confidence: 0, gaps: [] };
     }
     try {
@@ -182,6 +183,13 @@ costAware = false) {
             }
         }
         if (questions.length === 0) {
+            // The clarifier asking nothing IS the readiness signal — the leader already
+            // decided no gaps remain. Mark the spec ready directly rather than leaving the
+            // gate at its not-ready default (wrong signal on the cleanest topics) or paying
+            // for a redundant readiness-judge LLM call on this break path.
+            gateReady = true;
+            gateConfidence = 1;
+            gateGaps = [];
             yield phaseDone({
                 phaseId: roundId,
                 kind: "clarification_round",

package/dist/src/council/debate.js CHANGED Viewed

@@ -216,6 +216,7 @@ async function debateWithRetry(llm, model, system, prompt, signal, traceCb, tool
 export async function* runDebate(spec, config, llm) {
     const { leaderModelId, participants, conversationContext, signal, debatePlan } = config;
     const researchSkipOverride = config.researchSkipOverride === true;
+    const leaderNeedsResearch = config.leaderNeedsResearch;
     const internetFirst = config.internetFirst === true;
     const costAware = config.costAware === true;
     const active = [];
@@ -232,9 +233,12 @@ export async function* runDebate(spec, config, llm) {
     // emit the same "circuit breaker tripped" message every round.
     const announcedDisabled = new Set();
     // ── Leader decides: research needed? (skipped if user overrode upstream) ──
+    // Reuse the leader's upstream research decision (computed once in runCouncil)
+    // when available; only run the classifier here for direct callers that did not
+    // pre-compute it. Avoids a duplicate leader-tier LLM call per council run.
     const needsResearch = researchSkipOverride
         ? false
-        : yield* evaluateResearchNeed(spec, leaderModelId, conversationContext, llm, costAware);
+        : (leaderNeedsResearch ?? (yield* evaluateResearchNeed(spec, leaderModelId, conversationContext, llm, costAware)));
     if (researchSkipOverride) {
         yield {
             type: "content",

package/dist/src/council/decisions-lock.js CHANGED Viewed

@@ -35,8 +35,7 @@ export function extractStackFromSpec(spec) {
         all.includes("muonroi basetemplate") ||
         all.includes("basetemplate") ||
         all.includes("building-block") ||
-        all.includes("mediatр") ||
-        all.includes("mediatр")
+        all.includes("mediatr")
         ? "Muonroi.BaseTemplate (.NET 9, CQRS/MediatR, MEntity/MRepository pattern)"
         : null;
     const frontendMatch = all.includes("react") && (all.includes("vite") || all.includes("css module"))
@@ -203,7 +202,8 @@ export async function writeDecisionsLock(input) {
         await atomicWriteText(filePath, content);
         return true;
     }
-    catch {
+    catch (err) {
+        console.error(`[council/decisions-lock] failed to write decisions.lock.md to ${input.runDir}: ${err?.message}`, { stack: err?.stack?.split("\n").slice(0, 3) });
         return false;
     }
 }

package/dist/src/council/index.js CHANGED Viewed

@@ -110,10 +110,13 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
     // to skip — research is the slowest part of council and trivial questions
     // (e.g. "what did we just decide?") should not pay that cost.
     let researchSkipOverride = false;
+    // Hoisted so the leader's research decision can be reused by runDebate instead
+    // of re-running the classifier LLM call (see CouncilConfig.leaderNeedsResearch).
+    // Stays undefined if the classifier throws — fail-open: runDebate re-evaluates.
+    let leaderNeedsResearch;
     try {
         const needGen = evaluateResearchNeed(spec, leaderModelId, conversationContext, llm, costAware);
         let needStep;
-        let leaderNeedsResearch = true;
         do {
             needStep = await needGen.next();
             if (!needStep.done && needStep.value)
@@ -156,8 +159,9 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
             };
         }
     }
-    catch {
-        /* fail-open — fall through to default behavior in runDebate */
+    catch (err) {
+        // fail-open — leaderNeedsResearch stays undefined so runDebate re-evaluates.
+        console.error(`[council] research-need pre-check failed (fail-open): ${err?.message}`);
     }
     // Await EE pre-fetch (started in parallel with clarifier — latency already hidden)
     const eeResult = await eePromise;
@@ -228,6 +232,7 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
         debatePlan,
         signal: options?.signal,
         researchSkipOverride,
+        leaderNeedsResearch,
         internetFirst,
         costAware,
     }, llm);
@@ -642,8 +647,10 @@ export async function* runCouncil(topic, sessionModelId, messages, sessionId, ll
                     })),
                     synthesisExcerpt: synthesisText.slice(0, 2000),
                     rejectedProposals: rejectedProposals.length > 0 ? rejectedProposals : undefined,
-                }).catch(() => {
-                    /* non-critical — lock file write failure must never break the council */
+                }).catch((err) => {
+                    // writeDecisionsLock logs its own errors and returns false; this guard
+                    // only fires on an unexpected throw — log it (No-Silent-Catch), never break council.
+                    console.error(`[council] decisions.lock write guard caught: ${err?.message}`);
                 });
             }
         }

package/dist/src/council/leader.d.ts CHANGED Viewed

@@ -33,23 +33,6 @@ export interface LeaderResolution {
     /** Set when no configured leader existed and one was picked by tier. */
     defaulted?: boolean;
 }
-/**
- * Resolve the leader model with quality-aware promotion.
- *
- * Hard rule: stay within the SESSION model's provider — don't switch providers
- * (different billing, surprise cost). We only upgrade tier within the same
- * provider that the user is already running.
- *
- * Priority:
- *   1. Find the highest-tier reachable model from the session provider's
- *      catalog (registry + any configured role-models on that provider).
- *   2. If a configured `roleModels.leader` exists AND is on the session
- *      provider, use it unless a strictly higher-tier model exists on the
- *      same provider — then auto-promote with a note.
- *   3. If configured leader is on a DIFFERENT provider, ignore it and pick
- *      from the session provider.
- *   4. Fall back to the session model itself.
- */
 export declare function resolveLeaderModelDetailed(sessionModelId: string): Promise<LeaderResolution>;
 /** Back-compat sync wrapper. Returns the modelId only; no reachability check. */
 export declare function resolveLeaderModel(sessionModelId: string): string;

package/dist/src/council/leader.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { getModelByTier, getModelInfo, getModelsForProvider } from "../models/registry.js";
-import { loadKeyForProvider } from "../providers/keychain.js";
+import { getConfiguredProviders } from "../providers/keychain.js";
 import { detectProviderForModel } from "../providers/runtime.js";
 import { getRoleModel, getRoleModels, isProviderDisabled } from "../utils/settings.js";
 const TIER_RANK = { fast: 1, balanced: 2, premium: 3 };
@@ -77,16 +77,30 @@ export function pickCouncilTaskModel(task, leaderModelId, costAware) {
  *      from the session provider.
  *   4. Fall back to the session model itself.
  */
+/**
+ * A provider is reachable when it has an API key OR a stored OAuth token.
+ * `loadKeyForProvider` only knows API keys (it throws for OAuth-only
+ * providers), so without the OAuth fallback an OAuth-authed provider — e.g.
+ * grok via xAI OAuth, or OpenAI/Google OAuth without an API key — was wrongly
+ * treated as unreachable, making council bail "No reachable provider" even
+ * though the model answers fine. VERIFY F15.
+ */
+async function isProviderReachable(provider) {
+    // getConfiguredProviders() is the authoritative cred check — it unifies API
+    // keys (keychain/env/settings) AND stored OAuth tokens across every provider
+    // in the OAuth registry. The old loadKeyForProvider-only check saw API keys
+    // but not OAuth, so an OAuth-only provider (e.g. grok via xAI OAuth) was
+    // wrongly unreachable and council bailed "No reachable provider". VERIFY F15.
+    const configured = await getConfiguredProviders();
+    return configured.includes(provider);
+}
 export async function resolveLeaderModelDetailed(sessionModelId) {
     const sessionProviderId = detectProviderForModel(sessionModelId);
     const configured = getRoleModel("leader");
     const configuredProvider = configured ? detectProviderForModel(configured) : undefined;
     const configuredTier = configured ? tierOf(configured) : undefined;
     const sessionDisabled = isProviderDisabled(sessionProviderId);
-    const sessionReachable = !sessionDisabled &&
-        (await loadKeyForProvider(sessionProviderId)
-            .then(() => true)
-            .catch(() => false));
+    const sessionReachable = !sessionDisabled && (await isProviderReachable(sessionProviderId));
     if (!sessionReachable) {
         return { modelId: configured ?? sessionModelId };
     }
@@ -169,9 +183,7 @@ export async function resolveParticipants(sessionModelId, preferMultiProvider) {
             const provider = detectProviderForModel(modelId);
             if (isProviderDisabled(provider))
                 continue;
-            const canReach = await loadKeyForProvider(provider)
-                .then(() => true)
-                .catch(() => false);
+            const canReach = await isProviderReachable(provider);
             if (canReach)
                 candidates.push({ role, model: modelId });
         }
@@ -186,10 +198,7 @@ export async function resolveParticipants(sessionModelId, preferMultiProvider) {
             return sameCandidates;
     }
     const providerDisabled = isProviderDisabled(detectProviderForModel(sessionModelId));
-    const canReach = !providerDisabled &&
-        (await loadKeyForProvider(detectProviderForModel(sessionModelId))
-            .then(() => true)
-            .catch(() => false));
+    const canReach = !providerDisabled && (await isProviderReachable(detectProviderForModel(sessionModelId)));
     if (canReach) {
         return ALL_ROLES.map((role) => ({ role, model: sessionModelId }));
     }
@@ -198,9 +207,7 @@ export async function resolveParticipants(sessionModelId, preferMultiProvider) {
 async function resolveSameProviderCandidates(providerId, sessionModelId, roles) {
     if (isProviderDisabled(providerId))
         return [];
-    const canReach = await loadKeyForProvider(providerId)
-        .then(() => true)
-        .catch(() => false);
+    const canReach = await isProviderReachable(providerId);
     if (!canReach)
         return [];
     const providerModels = getModelsForProvider(providerId);

package/dist/src/council/planner.js CHANGED Viewed

@@ -186,7 +186,7 @@ function shapeFallback(synthesisText, debatePlan) {
         let found = false;
         for (const line of synthesisText.split("\n")) {
             const trimmed = line.trim();
-            if (trimmed.match(new RegExp(`^#{1,3}s+${heading.replace(/\s+/g, "s+")}`, "i"))) {
+            if (trimmed.match(new RegExp(`^#{1,3}\\s+${heading.replace(/\s+/g, "\\s+")}`, "i"))) {
                 found = true;
                 continue;
             }

package/dist/src/council/prompts.js CHANGED Viewed

@@ -6,36 +6,35 @@ export function buildClarificationPrompt(topic, conversationContext, previousQA)
         : "";
     return {
         system: `You are a senior technical lead preparing for a multi-expert discussion. ` +
-            `Your job is to identify AMBIGUITIES in the topic that would cause experts to talk past each other or go off-topic.\n\n` +
-            `Analyze the topic and conversation context carefully. Generate targeted clarification questions.\n` +
-            `Focus on:\n` +
-            `- SCOPE: What exactly is in/out of scope?\n` +
-            `- CONSTRAINTS: Technical, time, resource, or business constraints?\n` +
-            `- SUCCESS CRITERIA: How will we know the discussion produced a good result?\n` +
-            `- CONTEXT: What existing decisions, code, or patterns are relevant?\n\n` +
-            `## Minimum-question rule\n` +
-            `Return [] ONLY for topics that are already a precise technical question with a single ` +
-            `expected outcome (e.g. "What does X function return?", "Fix typo in README"). ` +
-            `For ANY topic that describes a feature, project, idea, or design — even if the user ` +
-            `gave several sentences — you MUST ask AT LEAST 2 questions, typically about:\n` +
-            `- Scope boundaries (what's in/out of v1)\n` +
-            `- Success metric (how is "done" measured)\n` +
-            `- Hard constraint (timeline, platform, must-include / must-avoid)\n` +
-            `A 1-paragraph "build me X" topic is NEVER specific enough — there are always implicit ` +
-            `scope, criteria, and constraint gaps. Ask them.\n\n` +
-            `If the topic is already specific enough (single technical Q&A only), return an empty array.\n\n` +
+            `Your job is to surface the FEW genuine ambiguities that would make experts talk past each other — NOT to run a questionnaire.\n\n` +
+            `Read the topic and the conversation context — especially any "## Current Project" section — carefully. ` +
+            `Ask ONLY about things you genuinely cannot infer and that would actually change the plan:\n` +
+            `- SCOPE: what is in/out of scope for THIS change?\n` +
+            `- CONSTRAINTS: hard technical/time/business constraints not already implied by the context.\n` +
+            `- SUCCESS CRITERIA: how "done" is judged, when it isn't already obvious.\n\n` +
+            `## How many questions\n` +
+            `Ask the minimum that unblocks a focused discussion — typically 0-2. A well-scoped topic, or one ` +
+            `whose context already answers the gaps, needs ZERO questions: return []. Do NOT pad to a quota, ` +
+            `and never ask a question whose answer is already in the topic or the project context.\n\n` +
+            `## Existing-repo grounding (IMPORTANT)\n` +
+            `If a "## Current Project" section is present you are working in an EXISTING repository — NOT a ` +
+            `greenfield project. Ground every question and every option in what that snapshot actually shows ` +
+            `(its language, framework, modules, conventions). Do NOT ask generic greenfield questions — product ` +
+            `type, target audience, which language/framework, which database, hosting — when the repo already ` +
+            `answers them; asking those signals you ignored the context and wastes the user's time. Ask only ` +
+            `about intent/scope decisions specific to THIS change, phrased in terms of the real codebase.\n\n` +
             `IMPORTANT — defaults from the workspace:\n` +
             `- If the topic refers to "this project", "current project", "repo này", "dự án hiện tại" or similar, ` +
             `the project IS the one described in the "## Current Project" section of the context. DO NOT ask which project.\n` +
             `- Only ask about project identity when the topic mentions multiple distinct projects or external products.\n` +
-            `- Prefer using the project's package.json name and description as implicit context for follow-up questions.\n\n` +
+            `- Use the project's package.json name and description as implicit context for follow-up questions.\n\n` +
             `Output ONLY a JSON array (no markdown, no preamble):\n` +
             `[{"question": "...", "why": "why this matters for a focused discussion", "suggestions": ["option A", "option B"], "recommended": "option A", "isRequired": true}]\n\n` +
-            `Rules for "recommended":\n` +
-            `- Only include "recommended" when, given the topic + context, ONE option is clearly the best default.\n` +
+            `Rules for "recommended" (be decisive — the user should never face an unranked list):\n` +
+            `- ALWAYS include "recommended" — the single option you would choose if the user said "you decide", given the topic + project context.\n` +
             `- Its value MUST be exactly equal to one of the entries in "suggestions".\n` +
-            `- Pick at most ONE recommended option per question. If you cannot confidently single one out, OMIT the field entirely — do not guess.\n` +
-            `Return [] if no clarification needed.`,
+            `- Omit it ONLY in a genuine 50/50 tie where recommending either option would be misleading. A missing recommendation must be the rare exception, not the default.\n` +
+            `Return [] if no clarification is needed.`,
         prompt: `## Topic\n${topic}\n\n` +
             (conversationContext ? `## Conversation Context\n${conversationContext}\n` : "") +
             qaSection,
@@ -84,8 +83,7 @@ export function buildReadinessJudgePrompt(topic, qa, spec) {
             `- "gaps" MUST be empty when "ready" is true.\n` +
             `- Each gap is a single sentence starting with a noun: what info is missing (not a question).\n` +
             `  Example: "Target platform (web, mobile, or both) not specified."\n` +
-            `- "confidence" reflects how sure you are; a ready=true with confidence=0.6 means "probably " +\n` +
-            `  "ready but some ambiguity remains". confidence=1.0 means zero remaining blind spots.\n` +
+            `- "confidence" reflects how sure you are; a ready=true with confidence=0.6 means "probably ready but some ambiguity remains". confidence=1.0 means zero remaining blind spots.\n` +
             `- When the topic is a simple one-answer technical question (no design/scope), set ready=true, ` +
             `  confidence=1.0, gaps=[].`,
         prompt: `## Topic\n${topic}\n\n` +
@@ -124,15 +122,14 @@ const ENGLISH_ONLY_RULE = `\n## Language Rule (mandatory)\n` +
  * and burn the step budget without producing analytical content (the bug
  * that caused session a7a5690d2049 to fail with 4/4 empty turns).
  */
+// Opening turns run tool-free (openingWithRetry → llm.generate, no verification
+// tools wired). The rule must NOT advertise tools the model cannot call, or it
+// hallucinates `[CONFIRMED via grep:...]` tags for searches it never ran.
 const EVIDENCE_RULE_OPENING = `\n## Evidence Rule\n` +
-    `Stay analytical. You may optionally call AT MOST ONE verification tool ` +
-    `(grep / read_file / web_fetch / context7) ONLY to verify a SPECIFIC ` +
-    `numerical or factual claim you would otherwise have to invent.\n` +
-    `- Do NOT call tools for exploration or to gather background context.\n` +
-    `- Do NOT chain multiple tool calls — you have one shot, then must produce your full response.\n` +
-    `- If no claim needs verification, skip tool use entirely and answer directly.\n` +
-    `Tag verified facts: \`[CONFIRMED via <tool>:<evidence>]\` or \`[REFUTED via <tool>:<evidence>]\`.\n` +
-    `For uncited numbers / library specs that you cannot verify, mark them \`[UNVERIFIED: <claim>]\`.\n`;
+    `Stay analytical and ground every claim in the brief + context you were given. ` +
+    `You have NO tools in this opening turn — do not claim to have run grep / read_file / web searches.\n` +
+    `- For any number or library spec you cannot support from the provided context, mark it \`[UNVERIFIED: <claim>]\` instead of asserting it.\n` +
+    `- A later round can verify disputed claims; your job now is a clear, honest analysis.\n`;
 const EVIDENCE_RULE_RESPONSE = `\n## Evidence Rule\n` +
     `Stay analytical. You may optionally call AT MOST ONE verification tool ` +
     `(grep / read_file / web_fetch / context7) ONLY to verify a SPECIFIC ` +
@@ -277,9 +274,7 @@ export function buildLeaderEvaluationPrompt(ctx) {
             `  "researchQuery": null,\n` +
             `  "shouldContinue": true/false,\n` +
             `  "reason": "one sentence explaining your decision",\n` +
-            `  "evidenceDensity": 0.0,  // citations / total claims ratio (0.0–1.0)\n` +
-            `  "disagreementResolved": 0,  // count of [REFUTED] + [CONFIRMED] tags and explicit concessions\n` +
-            `  "extendRounds": 0  // set to 1-3 ONLY when this is the last planned round AND one critical point is genuinely close to resolving but not yet there. 0 otherwise.\n` +
+            `  "extendRounds": 0  // set to 1-3 ONLY when one critical point is genuinely close to resolving but not yet there; 0 otherwise. The orchestrator applies this only if rounds remain — do not try to track the round count yourself.\n` +
             (stackLock
                 ? `  ,\n  "consensusQuality": "full",  // "full" when all positions stay within locked stack; "partial" when out-of-stack violations found\n` +
                     `  "outOfStackViolations": []  // list of out-of-stack tech names cited by participants (empty when none)\n`
@@ -297,7 +292,8 @@ export function buildRoundSummaryPrompt(allExchanges, topic, round) {
             `1. Points where participants AGREE\n` +
             `2. Points still in DISPUTE (with each side's core argument)\n` +
             `3. New EVIDENCE or perspectives raised this round\n` +
-            `Be concise — one line per bullet. No preamble.`,
+            `Be concise — one line per bullet. No preamble. ` +
+            `Do NOT write "Round N" or any round-number counter in your bullets — this summary is fed into later turns, where round labels read as robotic noise. Refer to points by their content.`,
         prompt: `Round ${round} discussion on: ${topic}\n\n${allExchanges}`,
     };
 }
@@ -468,6 +464,15 @@ export function buildSynthesisPrompt(ctx) {
                     : "Balance clarity with completeness.") // balanced (default)
         : "";
     const stackLockForSynth = buildStackLockSection(ctx.spec);
+    // De-robotize: for choice/plan outputs, force a single decisive recommendation
+    // (mirrors the clarifier's mandatory-default rule). Scoped to decision/plan kinds
+    // so evaluation/investigation/exploration shapes keep their neutral analytical tone.
+    const decisiveness = finalShape.kind === "decision" || finalShape.kind === "implementation_plan"
+        ? `\n## Decisiveness (recommendation/verdict)\n` +
+            `Lead with the single choice you would make if the user said "you decide" — name it in the first sentence of the recommendation. ` +
+            `Do NOT hedge with "it depends", "both have merits", or an unranked list of options. ` +
+            `If the debate genuinely did not converge, say so in one sentence and STILL give your best single recommendation plus the one condition that would change it.\n`
+        : "";
     let system = `You are the team lead synthesizing a multi-specialist discussion.\n\n` +
         `## Original Brief\n` +
         `Problem: ${ctx.spec.problemStatement}\n` +
@@ -476,6 +481,7 @@ export function buildSynthesisPrompt(ctx) {
         intent +
         (stackLockForSynth ? `\n${stackLockForSynth}\n` : "") +
         guardrailBlock +
+        decisiveness +
         `\nProduce the answer the user requested — do NOT default to an implementation plan ` +
         `unless the output shape explicitly asks for actionItems/plan. ` +
         `Stay grounded in the discussion; do not invent facts; mark unverified claims explicitly.\n\n` +
@@ -507,33 +513,33 @@ export function buildSynthesisPrompt(ctx) {
     }
     let extraContext = "";
     if (ctx.refineContext) {
-        extraContext += `
-## User Refinements
-${ctx.refineContext}
+        extraContext += `
+## User Refinements
+${ctx.refineContext}
 `;
     }
     if (ctx.planEmphasis) {
-        extraContext += `
-## Additional Instruction
-The user has requested a concrete action plan with executable steps. Each action item MUST be an object with these fields:
-  {
-    "step": "<imperative action>",
-    "owner_lens": "<which stance owns this — frontend / backend / architecture / etc>",
-    "time_estimate": "<rough — e.g. '2h', '1d', '~30min'>",
-    "depends_on": ["<step keys this requires>"] or [],
-    "acceptance_criteria": "<how we know it's done>"
-  }
-Order action items by dependency: predecessors first, dependents after.
-Risks MUST be objects with: {"description", "severity": "High|Medium|Low", "mitigation"}.
-Do NOT emit loose strings for these fields — the user needs structured plan output.
+        extraContext += `
+## Additional Instruction
+The user has requested a concrete action plan with executable steps. Each action item MUST be an object with these fields:
+  {
+    "step": "<imperative action>",
+    "owner_lens": "<which stance owns this — frontend / backend / architecture / etc>",
+    "time_estimate": "<rough — e.g. '2h', '1d', '~30min'>",
+    "depends_on": ["<step keys this requires>"] or [],
+    "acceptance_criteria": "<how we know it's done>"
+  }
+Order action items by dependency: predecessors first, dependents after.
+Risks MUST be objects with: {"description", "severity": "High|Medium|Low", "mitigation"}.
+Do NOT emit loose strings for these fields — the user needs structured plan output.
 `;
     }
     return {
         system,
-        prompt: `Final positions:
-${ctx.finalPositions}
-Full discussion:
+        prompt: `Final positions:
+${ctx.finalPositions}
+Full discussion:
 ${ctx.allExchanges}${extraContext}`,
     };
 }

package/dist/src/council/types.d.ts CHANGED Viewed

@@ -200,6 +200,13 @@ export interface CouncilConfig {
     userModelMessage?: ModelMessage;
     /** When true, runDebate skips the research phase even if the leader requested it (user override). */
     researchSkipOverride?: boolean;
+    /**
+     * Leader's pre-computed "is research needed?" decision from runCouncil. When set,
+     * runDebate reuses it instead of re-running the classifier LLM call — avoids a
+     * duplicate leader-tier call per run plus a possible contradiction with the
+     * user-facing skip card. Undefined for direct runDebate callers/tests (they re-evaluate).
+     */
+    leaderNeedsResearch?: boolean;
     /** When true, the working directory has no source code yet — research prompt prefers internet sources. */
     internetFirst?: boolean;
     /** When true, leader sub-tasks downshift to cheaper tier models on the same provider. */

package/dist/src/ee/__tests__/ee-onboarding.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/src/ee/__tests__/ee-onboarding.test.js ADDED Viewed

@@ -0,0 +1,32 @@
+import { mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { writeExperienceConfig } from "../auth.js";
+describe("writeExperienceConfig", () => {
+    let home;
+    beforeEach(() => {
+        home = mkdtempSync(join(tmpdir(), "ee-cfg-"));
+    });
+    afterEach(() => {
+        rmSync(home, { recursive: true, force: true });
+    });
+    const read = () => JSON.parse(readFileSync(join(home, ".experience", "config.json"), "utf8"));
+    it("creates ~/.experience/config.json (and dir) when none exists", async () => {
+        await writeExperienceConfig({ serverBaseUrl: "https://ee.example.com", serverAuthToken: "tok" }, { home });
+        const cfg = read();
+        expect(cfg.serverBaseUrl).toBe("https://ee.example.com");
+        expect(cfg.serverAuthToken).toBe("tok");
+    });
+    it("merges into an existing config, preserving unrelated fields", async () => {
+        // Seed an existing config with an unrelated field (as the EE installer would).
+        await writeExperienceConfig({ embeddingModelVersion: "v9", serverAuthToken: "old" }, { home });
+        // Now write a new serverBaseUrl + token — embeddingModelVersion must survive.
+        await writeExperienceConfig({ serverBaseUrl: "https://ee2.example.com", serverAuthToken: "new" }, { home });
+        const cfg = read();
+        expect(cfg.embeddingModelVersion).toBe("v9"); // preserved
+        expect(cfg.serverBaseUrl).toBe("https://ee2.example.com"); // added
+        expect(cfg.serverAuthToken).toBe("new"); // overwritten
+    });
+});
+//# sourceMappingURL=ee-onboarding.test.js.map

package/dist/src/ee/auth.d.ts CHANGED Viewed

@@ -15,6 +15,15 @@ export declare function loadEEAuthToken(opts?: {
 export declare function refreshAuthToken(opts?: {
     home?: string;
 }): Promise<string | null>;
+/**
+ * Merge a partial config into ~/.experience/config.json (creating the file +
+ * directory if absent), preserving any fields the EE installer or the user
+ * already wrote. Used by the first-run EE setup step. Throws on write failure so
+ * the caller can surface it (never silently swallow — the user asked to set this up).
+ */
+export declare function writeExperienceConfig(patch: Partial<ExperienceConfig>, opts?: {
+    home?: string;
+}): Promise<void>;
 export declare function getCachedAuthToken(): string | null;
 export declare function getEmbeddingModelVersion(): string;
 export declare function getCachedServerBaseUrl(): string | null;

package/dist/src/ee/auth.js CHANGED Viewed

@@ -39,6 +39,25 @@ export async function refreshAuthToken(opts = {}) {
     _token = null;
     return await loadEEAuthToken(opts);
 }
+/**
+ * Merge a partial config into ~/.experience/config.json (creating the file +
+ * directory if absent), preserving any fields the EE installer or the user
+ * already wrote. Used by the first-run EE setup step. Throws on write failure so
+ * the caller can surface it (never silently swallow — the user asked to set this up).
+ */
+export async function writeExperienceConfig(patch, opts = {}) {
+    const p = configPath(opts.home);
+    let existing = {};
+    try {
+        existing = JSON.parse(await fs.readFile(p, "utf8"));
+    }
+    catch {
+        // No existing config (or unreadable) — start fresh.
+    }
+    const merged = { ...existing, ...patch };
+    await fs.mkdir(path.dirname(p), { recursive: true });
+    await fs.writeFile(p, `${JSON.stringify(merged, null, 2)}\n`, "utf8");
+}
 export function getCachedAuthToken() {
     return _token;
 }

package/dist/src/ee/ee-onboarding.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Returns true when a config was written (so the caller can reload EE auth).
+ * Returns false when skipped or invalid.
+ */
+export declare function firstRunEESetup(): Promise<boolean>;

package/dist/src/ee/ee-onboarding.js ADDED Viewed

@@ -0,0 +1,76 @@
+/**
+ * First-run Experience Engine setup (interactive, readline — runs BEFORE any TUI
+ * code, same pattern as the credential wizard). Offers to connect an EE server
+ * and writes ~/.experience/config.json so the agent's record/recall/feedback
+ * loop (ee_query / ee_feedback via muonroi-tools) has a brain to talk to.
+ *
+ * Optional + skippable: a blank URL skips. No hardcoded fallback — a failed
+ * health probe is reported, not hidden, and never blocks setup.
+ */
+import { createInterface } from "node:readline";
+import { writeExperienceConfig } from "./auth.js";
+/** Best-effort reachability probe — returns true/false, never throws. */
+async function probeHealth(baseUrl, token) {
+    try {
+        const ac = new AbortController();
+        const timer = setTimeout(() => ac.abort(), 4000);
+        try {
+            const res = await fetch(`${baseUrl}/health`, {
+                signal: ac.signal,
+                headers: token ? { authorization: `Bearer ${token}` } : undefined,
+            });
+            return res.ok;
+        }
+        finally {
+            clearTimeout(timer);
+        }
+    }
+    catch {
+        return false;
+    }
+}
+/**
+ * Returns true when a config was written (so the caller can reload EE auth).
+ * Returns false when skipped or invalid.
+ */
+export async function firstRunEESetup() {
+    const rl = createInterface({ input: process.stdin, output: process.stderr });
+    const ask = (q) => new Promise((resolve) => rl.question(q, (a) => resolve(a)));
+    try {
+        process.stderr.write("\nExperience Engine (optional) — a shared brain that recalls past decisions, gotchas,\n" +
+            "and recipes so the agent works like a senior on your stack. You can set this up later\n" +
+            "by editing ~/.experience/config.json or setting MUONROI_EE_BASE_URL.\n\n");
+        const url = (await ask("EE server URL (blank to skip): ")).trim();
+        if (!url) {
+            process.stderr.write("Skipped Experience Engine setup.\n");
+            return false;
+        }
+        let normalized;
+        try {
+            normalized = new URL(url).toString().replace(/\/$/, "");
+        }
+        catch {
+            process.stderr.write("That doesn't look like a valid URL — skipped EE setup.\n");
+            return false;
+        }
+        const token = (await ask("EE auth token (blank if the server needs none): ")).trim();
+        await writeExperienceConfig({
+            serverBaseUrl: normalized,
+            ...(token ? { serverAuthToken: token } : {}),
+        });
+        process.stderr.write(`Wrote Experience Engine config → ~/.experience/config.json (serverBaseUrl=${normalized}).\n`);
+        const reachable = await probeHealth(normalized, token || undefined);
+        process.stderr.write(reachable
+            ? "  ✓ EE server reachable.\n"
+            : "  ⚠ Could not reach the EE server right now (saved anyway — run 'muonroi-cli doctor' to recheck).\n");
+        return true;
+    }
+    catch (err) {
+        process.stderr.write(`\nEE setup failed: ${err?.message ?? String(err)} — skipped.\n`);
+        return false;
+    }
+    finally {
+        rl.close();
+    }
+}
+//# sourceMappingURL=ee-onboarding.js.map

package/dist/src/generated/version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const PACKAGE_VERSION = "1.4.1";
+export declare const PACKAGE_VERSION = "1.5.0";
 export declare const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";

package/dist/src/generated/version.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // AUTO-GENERATED by scripts/sync-version.cjs. DO NOT EDIT BY HAND.
 // Sourced from package.json at build time so it survives bun --compile bundling.
-export const PACKAGE_VERSION = "1.4.1";
+export const PACKAGE_VERSION = "1.5.0";
 export const PACKAGE_DESCRIPTION = "BYOK AI coding agent with multi-model council debate, role-based routing, and auto-compact.";
 //# sourceMappingURL=version.js.map