npm - muonroi-cli - Versions diffs - 1.4.1 → 1.6.0 - Mend

muonroi-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

package/LICENSE +21 -21
package/README.md +122 -122
package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
package/dist/src/agent-harness/mock-model.d.ts +11 -0
package/dist/src/agent-harness/mock-model.js +21 -0
package/dist/src/cli/cost-forensics.js +12 -12
package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
package/dist/src/council/clarifier.js +9 -1
package/dist/src/council/debate.js +5 -1
package/dist/src/council/decisions-lock.js +3 -3
package/dist/src/council/index.js +12 -5
package/dist/src/council/leader.d.ts +0 -17
package/dist/src/council/leader.js +22 -15
package/dist/src/council/planner.js +1 -1
package/dist/src/council/prompts.js +63 -57
package/dist/src/council/types.d.ts +7 -0
package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
package/dist/src/ee/artifact-cache.d.ts +56 -0
package/dist/src/ee/artifact-cache.js +155 -0
package/dist/src/ee/artifact-cache.test.d.ts +1 -0
package/dist/src/ee/artifact-cache.test.js +69 -0
package/dist/src/ee/auth.d.ts +9 -0
package/dist/src/ee/auth.js +19 -0
package/dist/src/ee/ee-onboarding.d.ts +5 -0
package/dist/src/ee/ee-onboarding.js +76 -0
package/dist/src/ee/search.js +7 -5
package/dist/src/ee/search.test.d.ts +1 -0
package/dist/src/ee/search.test.js +23 -0
package/dist/src/generated/version.d.ts +1 -1
package/dist/src/generated/version.js +1 -1
package/dist/src/headless/output.js +6 -4
package/dist/src/headless/output.test.js +4 -3
package/dist/src/index.js +20 -1
package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
package/dist/src/mcp/auto-setup.js +56 -2
package/dist/src/mcp/client-pool.d.ts +46 -0
package/dist/src/mcp/client-pool.js +212 -0
package/dist/src/mcp/oauth-callback.js +2 -2
package/dist/src/mcp/parse-headers.test.js +14 -14
package/dist/src/mcp/runtime.d.ts +28 -0
package/dist/src/mcp/runtime.js +117 -51
package/dist/src/mcp/self-verify-runner.d.ts +14 -0
package/dist/src/mcp/self-verify-runner.js +38 -0
package/dist/src/mcp/setup-guide-text.d.ts +9 -0
package/dist/src/mcp/setup-guide-text.js +84 -0
package/dist/src/mcp/smart-filter.js +49 -0
package/dist/src/mcp/smoke.test.js +43 -43
package/dist/src/mcp/tools-server.d.ts +7 -0
package/dist/src/mcp/tools-server.js +19 -22
package/dist/src/models/catalog.json +349 -349
package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
package/dist/src/ops/doctor.d.ts +3 -2
package/dist/src/ops/doctor.js +47 -11
package/dist/src/ops/doctor.test.js +4 -3
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
package/dist/src/orchestrator/batch-turn-runner.js +7 -11
package/dist/src/orchestrator/compaction.d.ts +2 -0
package/dist/src/orchestrator/compaction.js +14 -1
package/dist/src/orchestrator/compaction.test.js +25 -1
package/dist/src/orchestrator/message-processor.js +72 -32
package/dist/src/orchestrator/orchestrator.js +26 -0
package/dist/src/orchestrator/prompts.d.ts +51 -0
package/dist/src/orchestrator/prompts.js +257 -134
package/dist/src/orchestrator/scope-ceiling.js +6 -1
package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
package/dist/src/orchestrator/scope-reminder.js +16 -0
package/dist/src/orchestrator/scope-reminder.test.js +22 -1
package/dist/src/orchestrator/stream-runner.js +23 -15
package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
package/dist/src/orchestrator/subagent-compactor.js +30 -8
package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
package/dist/src/pil/__tests__/config.test.js +1 -17
package/dist/src/pil/__tests__/discovery.test.js +144 -11
package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
package/dist/src/pil/agent-operating-contract.d.ts +1 -1
package/dist/src/pil/agent-operating-contract.js +2 -0
package/dist/src/pil/agent-operating-contract.test.js +7 -2
package/dist/src/pil/cheap-model-playbook.js +35 -35
package/dist/src/pil/cheap-model-workbooks.js +16 -13
package/dist/src/pil/clarity-gate.d.ts +21 -19
package/dist/src/pil/clarity-gate.js +26 -153
package/dist/src/pil/config.d.ts +9 -1
package/dist/src/pil/config.js +15 -4
package/dist/src/pil/discovery.js +211 -136
package/dist/src/pil/layer1-intent.d.ts +12 -0
package/dist/src/pil/layer1-intent.js +283 -38
package/dist/src/pil/layer1-intent.test.js +210 -4
package/dist/src/pil/layer16-clarity.d.ts +25 -11
package/dist/src/pil/layer16-clarity.js +19 -306
package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
package/dist/src/pil/layer3-ee-injection.js +96 -4
package/dist/src/pil/layer4-gsd.js +18 -6
package/dist/src/pil/layer6-output.d.ts +2 -0
package/dist/src/pil/layer6-output.js +151 -25
package/dist/src/pil/llm-classify.d.ts +26 -0
package/dist/src/pil/llm-classify.js +34 -5
package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
package/dist/src/pil/native-capabilities-workbook.js +82 -76
package/dist/src/pil/pipeline.js +15 -9
package/dist/src/pil/schema.d.ts +8 -0
package/dist/src/pil/schema.js +12 -1
package/dist/src/pil/task-tier-map.js +4 -0
package/dist/src/pil/types.d.ts +11 -1
package/dist/src/product-loop/done-gate.js +3 -3
package/dist/src/product-loop/loop-driver.js +18 -18
package/dist/src/product-loop/progress-snapshot.js +4 -4
package/dist/src/providers/auth/gemini-oauth.js +6 -15
package/dist/src/providers/auth/grok-oauth.js +6 -15
package/dist/src/providers/auth/openai-oauth.js +6 -15
package/dist/src/providers/mcp-vision-bridge.js +48 -48
package/dist/src/reporter/index.js +1 -1
package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
package/dist/src/scaffold/bb-quality-gate.js +5 -5
package/dist/src/scaffold/continuation-prompt.js +60 -60
package/dist/src/scaffold/init-new.js +453 -453
package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
package/dist/src/self-qa/agentic-loop.js +24 -19
package/dist/src/self-qa/spec-emitter.js +26 -23
package/dist/src/storage/__tests__/migrations.test.js +2 -2
package/dist/src/storage/interaction-log.js +5 -5
package/dist/src/storage/migrations.js +122 -122
package/dist/src/storage/sessions.js +42 -42
package/dist/src/storage/transcript.js +91 -84
package/dist/src/storage/usage.js +14 -14
package/dist/src/storage/workspaces.js +12 -12
package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
package/dist/src/tools/__tests__/native-tools.test.js +53 -0
package/dist/src/tools/git-safety.d.ts +61 -0
package/dist/src/tools/git-safety.js +141 -0
package/dist/src/tools/git-safety.test.d.ts +1 -0
package/dist/src/tools/git-safety.test.js +111 -0
package/dist/src/tools/native-tools.d.ts +31 -0
package/dist/src/tools/native-tools.js +273 -0
package/dist/src/tools/registry-ee-query.test.js +18 -1
package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
package/dist/src/tools/registry-git-safety.test.js +92 -0
package/dist/src/tools/registry.js +52 -6
package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
package/dist/src/ui/app.js +0 -0
package/dist/src/ui/components/message-view.js +4 -1
package/dist/src/ui/components/structured-response-view.js +7 -3
package/dist/src/ui/components/tool-group.js +7 -1
package/dist/src/ui/markdown-render.d.ts +41 -0
package/dist/src/ui/markdown-render.js +223 -0
package/dist/src/ui/markdown.d.ts +10 -0
package/dist/src/ui/markdown.js +12 -35
package/dist/src/ui/slash/council-inspect.js +4 -4
package/dist/src/ui/slash/export.js +4 -4
package/dist/src/ui/utils/text.d.ts +8 -0
package/dist/src/ui/utils/text.js +16 -0
package/dist/src/ui/utils/text.test.d.ts +1 -0
package/dist/src/ui/utils/text.test.js +23 -0
package/dist/src/usage/ledger.js +48 -15
package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
package/dist/src/utils/clipboard-image.js +23 -23
package/dist/src/utils/open-url.d.ts +56 -0
package/dist/src/utils/open-url.js +58 -0
package/dist/src/utils/open-url.test.d.ts +1 -0
package/dist/src/utils/open-url.test.js +86 -0
package/dist/src/utils/settings.d.ts +12 -0
package/dist/src/utils/settings.js +48 -0
package/dist/src/utils/side-question.js +2 -2
package/dist/src/utils/skills.js +3 -3
package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
package/dist/src/verify/environment.js +2 -1
package/package.json +1 -1
package/dist/src/pil/layer16-clarity.test.js +0 -31
/package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0

package/dist/src/pil/layer1-intent.js CHANGED Viewed

@@ -11,7 +11,7 @@
  */
 import { classifyViaBrain, pilContext } from "../ee/bridge.js";
 import { classify } from "../router/classifier/index.js";
-import { isUnifiedPilEnabled } from "./config.js";
+import { isLlmFirstClassifyEnabled, isUnifiedPilEnabled } from "./config.js";
 /** File/path reference regex — matches common source-file extensions. */
 const FILE_REF_RE = /[\w./-]+\.(ts|tsx|js|jsx|json|md|py|rs|go|cs)\b/gi;
 /** Keywords that force a "low" complexity signal (additive score -3). */
@@ -205,6 +205,58 @@ export function isPerformanceRefactor(raw) {
         return false;
     return true;
 }
+// Greenfield CREATE/BUILD intent → generate.
+//
+// Live `/ideal` E2E verify (fix/council-oauth-reachable): greenfield BUILD
+// prompts were misclassified at the pil-acceptance card —
+//   "build a muonroi-building-block microservice …"           → refactor
+//   "build a Node TS ISO-4217 currency validator w/ vitest tests" → analyze
+// Root cause: the verb "build" (and bare "create X" where X is not one of the
+// literal nouns file/component/module/class/function) is recognized by NO
+// deterministic pass. Pass 1's create-file regex only fires on those literal
+// nouns; Pass 2's `generate` keyword only has generate/scaffold/bootstrap. So
+// greenfield "build/create/implement X" prompts fall through to the brain/LLM
+// — documented to bias toward `refactor` for any code touch (see Pass 3 legacy
+// prompt, 4P-2) — and worse, a build prompt that merely mentions "test(s)" is
+// hijacked by the Pass 2 `analyze` keyword. Pin greenfield creation to
+// `generate` deterministically here, before the classifier + brain.
+//
+// VERB must be the LEADING action (after an optional polite/intent prefix) so
+// "explain how to build X", "the build is failing", "rename the build fn" never
+// match. A concrete software-artifact noun must be the object of creation, and
+// build-FAILURE / debug context vetoes the match (those are bug reports).
+const GREENFIELD_BUILD_PREFIX = String.raw `(?:please\s+|pls\s+|plz\s+|can\s+you\s+|could\s+you\s+|would\s+you\s+(?:please\s+)?|help\s+me\s+(?:to\s+)?|let'?s\s+|i\s+(?:want|need)\s+(?:you\s+)?to\s+|i'?d\s+like\s+(?:you\s+)?to\s+|go\s+ahead\s+and\s+|now\s+|then\s+|just\s+)*`;
+const GREENFIELD_BUILD_VERB = String.raw `build|create|make|implement|develop|scaffold|bootstrap|generate|code\s+up|spin\s+up|stand\s+up|set\s+up|put\s+together`;
+const GREENFIELD_BUILD_LEAD_RE = new RegExp(`^\\s*${GREENFIELD_BUILD_PREFIX}(?:${GREENFIELD_BUILD_VERB})\\b`, "i");
+// Concrete software artifacts (the thing being created). Deliberately excludes
+// "test"/"branch"/"commit" — test-generation is handled by isTestGenerationTask
+// and git verbs route elsewhere — so "make the tests pass" / "create a branch"
+// do not trip this.
+const GREENFIELD_BUILD_TARGET_RE = /\b(app|application|web\s*app|webapp|service|micro[-\s]?service|api|endpoint|server|backend|frontend|cli|tool|utility|library|lib|sdk|package|module|component|widget|page|screen|view|dashboard|website|site|portal|platform|system|engine|parser|validator|formatter|serializer|converter|calculator|generator|linter|compiler|interpreter|middleware|pipeline|workflow|daemon|worker|queue|cache|store|database|schema|model|migration|script|bot|game|simulator|prototype|mvp|poc|demo|feature|function|class|hook|wrapper|adapter|plugin|extension|proxy|gateway|router|handler|controller|resolver|crawler|scraper|client)\b/i;
+// Failure / debug context — a "build" that is FAILING / BROKEN is a bug report,
+// not greenfield creation. Cascade to the debug classifier instead.
+const GREENFIELD_BUILD_FAILURE_GUARD_RE = /\b(fail(?:s|ed|ing|ure)?|broken|broke|crash(?:es|ed|ing)?|not\s+working|doesn'?t\s+work|won'?t\s+(?:build|compile|run)|hỏng)\b/i;
+/**
+ * Detect a greenfield CREATE/BUILD request whose correct taskType is `build`.
+ * Tight by construction: requires a LEADING creation verb + a software-artifact
+ * object, and vetoes build-failure/debug context. When unsure it returns false
+ * so the prompt cascades to the classifier + brain (no wrong deterministic pin).
+ *
+ * `build` is a first-class TaskType (greenfield project/feature creation) — it is
+ * the sole producer of that label. It mirrors `generate` for routing (tier/role/
+ * tokens/ceiling) but carries greenfield-specific outcome options + output rules.
+ * This replaces the F17 band-aid that pinned greenfield prompts to `generate`.
+ */
+export function isGreenfieldBuildTask(raw) {
+    const t = raw.trim();
+    if (!t || t.length > 400)
+        return false;
+    if (!GREENFIELD_BUILD_LEAD_RE.test(t))
+        return false;
+    if (GREENFIELD_BUILD_FAILURE_GUARD_RE.test(t))
+        return false;
+    return GREENFIELD_BUILD_TARGET_RE.test(t);
+}
 /** Detect short continuation prompts ("tiếp tục", "ok", "continue", …). */
 export function isContinuationPhrase(raw) {
     const t = raw.trim();
@@ -536,7 +588,151 @@ export function isSocialPleasantry(raw) {
 }
 export async function layer1Intent(ctx, opts = {}) {
     try {
+        // Pass −1 — MODEL-FIRST classification (MUONROI_LLM_FIRST_CLASSIFY, default ON).
+        //
+        // The configured model classifies taskType/intentKind/style at the very top
+        // of the turn; the keyword-regex cascade below becomes the OFFLINE fallback,
+        // used only when the model is not wired (opts.llmFallback absent) or its call
+        // fails. This is the structural fix for "classifying tasks via keyword regex
+        // misses billions of natural-language cases" — regex no longer DECIDES intent,
+        // it only catches the model-offline case. The EE brain still enriches
+        // downstream (layer3 retrieval) as before. Trivial turns ("ok", greetings)
+        // also go through the model so chitchat is a semantic decision, not a regex
+        // whitelist; the model returns intentKind="chat" for pure pleasantries.
+        if (isLlmFirstClassifyEnabled() && opts.llmFallback) {
+            let llmRes = null;
+            let classifyError = null;
+            try {
+                llmRes = await opts.llmFallback(ctx.raw);
+            }
+            catch (err) {
+                classifyError = err?.message ?? String(err);
+            }
+            if (llmRes) {
+                let intentKind = llmRes.intentKind;
+                // Safety net (never weakens the model): an explicit command/tool-exec
+                // request must never be chitchat — chitchat drops the whole toolset and
+                // breaks the turn. Only ever upgrades chitchat → task.
+                if (intentKind === "chitchat" && hasActionableToolIntent(ctx.raw))
+                    intentKind = "task";
+                const outputStyle = llmRes.outputStyle ?? detectStyleFromText(ctx.raw);
+                const domain = extractDomain("", ctx.raw);
+                const { complexity, score: complexityScore } = scoreComplexity({
+                    rawText: ctx.raw,
+                    taskType: llmRes.taskType,
+                    t0HitCount: 0,
+                    hasMaxSprintsOne: false,
+                });
+                const intentTrace = {
+                    pass1Reason: "llm-first",
+                    pass1Confidence: llmRes.confidence,
+                    pass1TaskType: llmRes.taskType,
+                    pass1Hit: false,
+                    pass2Hit: false,
+                    pass2Pattern: undefined,
+                    pass25ChitchatHit: false,
+                    pass3UnifiedAttempted: false,
+                    pass3UnifiedSucceeded: false,
+                    pass3LegacyTaskAttempted: false,
+                    pass3LegacyTaskSucceeded: false,
+                    pass3LegacyStyleAttempted: false,
+                    pass3LegacyStyleSucceeded: false,
+                    pass4LlmAttempted: true,
+                    pass4LlmSucceeded: true,
+                    styleSource: llmRes.outputStyle ? "brain-unified" : outputStyle ? "explicit-regex" : "none",
+                    finalTaskType: llmRes.taskType,
+                    finalConfidence: llmRes.confidence,
+                    complexity,
+                    complexityScore,
+                };
+                return {
+                    ...ctx,
+                    taskType: llmRes.taskType,
+                    domain,
+                    confidence: llmRes.confidence,
+                    outputStyle,
+                    intentKind,
+                    // Phase 2b: model-decided deliverable drives layer4/layer6 output
+                    // routing instead of keyword regex. null → those layers fall back to
+                    // their legacy regex predicates for this turn.
+                    deliverableKind: llmRes.deliverableKind,
+                    // null lets L6 run its cheap style-rescue if outputStyle is still null;
+                    // EE retrieval enrichment happens downstream in layer3 as usual.
+                    _brainData: null,
+                    _intentTrace: intentTrace,
+                    layers: [
+                        ...ctx.layers,
+                        {
+                            name: "intent-detection",
+                            applied: true,
+                            delta: `taskType=${llmRes.taskType},kind=${intentKind},deliverable=${llmRes.deliverableKind ?? "none"},conf=${llmRes.confidence.toFixed(2)},domain=${domain ?? "none"},style=${outputStyle ?? "none"},source=llm-first`,
+                        },
+                    ],
+                };
+            }
+            // NO fallback. The configured chat model is the SOLE classifier — it is
+            // the model the turn talks to, so it cannot be "offline". A null/failed
+            // result is a real problem: log it loudly and surface it, NEVER paper over
+            // it with a regex guess (which would be confidently wrong — the whole
+            // reason we moved off keyword regex). Return an UNKNOWN classification
+            // (taskType=null): no PIL scaffold is imposed and the chat model still
+            // answers the turn directly — but nothing pretends to know the intent.
+            console.error("[pil.layer1] model-first classify produced no usable result — NOT falling back to regex. " +
+                `reason=${classifyError ?? "null/unparseable model response"} ` +
+                `model-classifier=wired rawPreview=${JSON.stringify(ctx.raw.slice(0, 120))}`);
+            const { complexity: failComplexity, score: failComplexityScore } = scoreComplexity({
+                rawText: ctx.raw,
+                taskType: null,
+                t0HitCount: 0,
+                hasMaxSprintsOne: false,
+            });
+            return {
+                ...ctx,
+                taskType: null,
+                domain: null,
+                confidence: 0,
+                outputStyle: null,
+                // keep-tools: a classify failure must never strip the toolset.
+                intentKind: "task",
+                _brainData: null,
+                _intentTrace: {
+                    pass1Reason: "llm-first-failed",
+                    pass1Confidence: 0,
+                    pass1TaskType: null,
+                    pass1Hit: false,
+                    pass2Hit: false,
+                    pass2Pattern: undefined,
+                    pass25ChitchatHit: false,
+                    pass3UnifiedAttempted: false,
+                    pass3UnifiedSucceeded: false,
+                    pass3LegacyTaskAttempted: false,
+                    pass3LegacyTaskSucceeded: false,
+                    pass3LegacyStyleAttempted: false,
+                    pass3LegacyStyleSucceeded: false,
+                    pass4LlmAttempted: true,
+                    pass4LlmSucceeded: false,
+                    styleSource: "none",
+                    finalTaskType: null,
+                    finalConfidence: 0,
+                    complexity: failComplexity,
+                    complexityScore: failComplexityScore,
+                },
+                layers: [
+                    ...ctx.layers,
+                    {
+                        name: "intent-detection",
+                        applied: false,
+                        delta: `llm-first=FAIL (${classifyError ?? "no-result"}) — surfaced, NO regex fallback`,
+                    },
+                ],
+            };
+        }
         // Pass 0 — deterministic full-prompt overrides (Phase 5 BUG-B / BUG-D).
+        // LEGACY regex cascade — reached ONLY when no model classifier is wired
+        // (opts.llmFallback absent) or the model-first flag is off. On the main chat
+        // path the model classifier is always wired, so this never decides intent in
+        // production. It is NOT a runtime fallback for a failed model call (that path
+        // returns above with a logged failure).
         // Two narrow patterns short-circuit the whole pipeline:
         //  - continuation phrase → general/chitchat
         //  - performance/optimization verbs → refactor/task
@@ -738,6 +934,55 @@ export async function layer1Intent(ctx, opts = {}) {
                 ],
             };
         }
+        if (isGreenfieldBuildTask(ctx.raw)) {
+            const domainPass0 = extractDomain("", ctx.raw);
+            const styleFromText = detectStyleFromText(ctx.raw) ?? "balanced";
+            const { complexity, score: complexityScore } = scoreComplexity({
+                rawText: ctx.raw,
+                taskType: "build",
+                t0HitCount: 0,
+                hasMaxSprintsOne: false,
+            });
+            const intentTrace = {
+                pass1Reason: "pass0:greenfield-build",
+                pass1Confidence: 0.85,
+                pass1TaskType: "build",
+                pass1Hit: true,
+                pass2Hit: false,
+                pass25ChitchatHit: false,
+                pass3UnifiedAttempted: false,
+                pass3UnifiedSucceeded: false,
+                pass3LegacyTaskAttempted: false,
+                pass3LegacyTaskSucceeded: false,
+                pass3LegacyStyleAttempted: false,
+                pass3LegacyStyleSucceeded: false,
+                pass4LlmAttempted: false,
+                pass4LlmSucceeded: false,
+                styleSource: detectStyleFromText(ctx.raw) ? "explicit-regex" : "classifier-default",
+                finalTaskType: "build",
+                finalConfidence: 0.85,
+                complexity,
+                complexityScore,
+            };
+            return {
+                ...ctx,
+                taskType: "build",
+                domain: domainPass0,
+                confidence: 0.85,
+                outputStyle: styleFromText,
+                intentKind: "task",
+                _brainData: null,
+                _intentTrace: intentTrace,
+                layers: [
+                    ...ctx.layers,
+                    {
+                        name: "intent-detection",
+                        applied: true,
+                        delta: `taskType=build,kind=task,conf=0.85,domain=${domainPass0 ?? "none"},style=${styleFromText},pass0=greenfield-build`,
+                    },
+                ],
+            };
+        }
         // Pass 1: local classifier.
         const result = classify(ctx.raw);
         const pass1TaskType = REASON_TO_TASK_TYPE[result.reason] ?? null;
@@ -942,40 +1187,40 @@ export async function layer1Intent(ctx, opts = {}) {
                 //     touch existing files.
                 // 0.7 confidence threshold for Pass 2 keyword override remains
                 // unchanged (HIGH_CONF_THRESHOLD_PASS2 above).
-                const brainRaw = await classifyViaBrain(`You are a multilingual prompt classifier. The user's prompt may be in English, Vietnamese, or a mix of both.
-Classify the prompt's INTENT (not its language). Reply with TWO lowercase words separated by a comma: <category>,<style>
-Category — pick ONE (listed in neutral order, no precedence):
-  analyze       — explain / inspect / review existing code (giải thích, phân tích, review)
-  debug         — fix a bug or investigate failure (sửa lỗi, fix bug, lỗi, traceback)
-  generate      — create new code/file or add new behavior (tạo, sinh code, viết function mới, thêm)
-  refactor      — restructure existing code (tái cấu trúc, refactor)
-  plan          — design / roadmap / architecture (kế hoạch, thiết kế, kiến trúc)
-  documentation — write docs/comments (viết docs, comment, jsdoc)
-  general       — chitchat OR unclear / ambiguous coding intent
-Rules (Phase 4 4P-2 disambiguation):
-- Only return refactor when the user EXPLICITLY uses one of: rename, restructure, reorganize, extract, inline, move, migrate, reshape — applied to EXISTING code WITHOUT adding new behavior.
-- Feature additions ('add flag', 'thêm', 'create endpoint', 'thêm option'), changing a DEFAULT value, adding tests, or improving coverage are 'generate' — NOT refactor.
-- 'improve', 'change', 'update', 'modify', 'đổi', 'cải thiện' alone do NOT imply refactor — pick the specific category by what the change actually does.
-- When the request is ambiguous, prefer 'general' over guessing refactor.
-Negative examples (NOT refactor):
-- "đổi default --max-tool-rounds 8 sang 12" → generate
-- "improve test coverage" → generate
-- "tại sao X trả empty" → analyze
-- "fix CI failing" → debug
-Style — pick ONE:
-  concise (ngắn gọn) | balanced (cân bằng) | detailed (chi tiết)
-Examples:
-  "Refactor this function" → refactor,balanced
-  "tại sao test fail" → debug,balanced
-  "thiết kế hệ thống auth" → plan,detailed
-  "thêm flag --foo" → generate,concise
-  "hi" → general,concise
+                const brainRaw = await classifyViaBrain(`You are a multilingual prompt classifier. The user's prompt may be in English, Vietnamese, or a mix of both.
+Classify the prompt's INTENT (not its language). Reply with TWO lowercase words separated by a comma: <category>,<style>
+Category — pick ONE (listed in neutral order, no precedence):
+  analyze       — explain / inspect / review existing code (giải thích, phân tích, review)
+  debug         — fix a bug or investigate failure (sửa lỗi, fix bug, lỗi, traceback)
+  generate      — create new code/file or add new behavior (tạo, sinh code, viết function mới, thêm)
+  refactor      — restructure existing code (tái cấu trúc, refactor)
+  plan          — design / roadmap / architecture (kế hoạch, thiết kế, kiến trúc)
+  documentation — write docs/comments (viết docs, comment, jsdoc)
+  general       — chitchat OR unclear / ambiguous coding intent
+Rules (Phase 4 4P-2 disambiguation):
+- Only return refactor when the user EXPLICITLY uses one of: rename, restructure, reorganize, extract, inline, move, migrate, reshape — applied to EXISTING code WITHOUT adding new behavior.
+- Feature additions ('add flag', 'thêm', 'create endpoint', 'thêm option'), changing a DEFAULT value, adding tests, or improving coverage are 'generate' — NOT refactor.
+- 'improve', 'change', 'update', 'modify', 'đổi', 'cải thiện' alone do NOT imply refactor — pick the specific category by what the change actually does.
+- When the request is ambiguous, prefer 'general' over guessing refactor.
+Negative examples (NOT refactor):
+- "đổi default --max-tool-rounds 8 sang 12" → generate
+- "improve test coverage" → generate
+- "tại sao X trả empty" → analyze
+- "fix CI failing" → debug
+Style — pick ONE:
+  concise (ngắn gọn) | balanced (cân bằng) | detailed (chi tiết)
+Examples:
+  "Refactor this function" → refactor,balanced
+  "tại sao test fail" → debug,balanced
+  "thiết kế hệ thống auth" → plan,detailed
+  "thêm flag --foo" → generate,concise
+  "hi" → general,concise
 Prompt: "${ctx.raw.slice(0, 500)}"`, 1500);
                 if (brainRaw) {
                     pass3LegacyTaskSucceeded = true;
@@ -1026,9 +1271,9 @@ Prompt: "${ctx.raw.slice(0, 500)}"`, 1500);
                 if (pass3LegacyTaskAttempted) {
                     legacyBrainAttempted = true;
                     pass3LegacyStyleAttempted = true;
-                    const brainRawStyle = await classifyViaBrain(`Detect the user's preferred output style. The prompt may be EN or VN.
-Reply with ONE word: concise (ngắn gọn) | balanced (bình thường) | detailed (chi tiết).
+                    const brainRawStyle = await classifyViaBrain(`Detect the user's preferred output style. The prompt may be EN or VN.
+Reply with ONE word: concise (ngắn gọn) | balanced (bình thường) | detailed (chi tiết).
 Prompt: "${ctx.raw.slice(0, 300)}"`, 800);
                     if (brainRawStyle) {
                         pass3LegacyStyleSucceeded = true;

package/dist/src/pil/layer1-intent.test.js CHANGED Viewed

@@ -9,12 +9,17 @@ vi.mock("../ee/bridge.js", () => ({
 }));
 vi.mock("./config.js", () => ({
     isUnifiedPilEnabled: vi.fn(() => false),
+    // Default OFF so the existing cascade tests below exercise the regex passes.
+    // The model-first gate has its own describe block that flips this to true.
+    isLlmFirstClassifyEnabled: vi.fn(() => false),
 }));
 import { classifyViaBrain } from "../ee/bridge.js";
 import { classify } from "../router/classifier/index.js";
-import { hasActionableToolIntent, isSocialPleasantry, isStatusCheckQuestion, layer1Intent } from "./layer1-intent.js";
+import { isLlmFirstClassifyEnabled } from "./config.js";
+import { hasActionableToolIntent, isGreenfieldBuildTask, isSocialPleasantry, isStatusCheckQuestion, layer1Intent, } from "./layer1-intent.js";
 const mockedClassify = vi.mocked(classify);
 const mockedClassifyViaBrain = vi.mocked(classifyViaBrain);
+const mockedLlmFirst = vi.mocked(isLlmFirstClassifyEnabled);
 function makeCtx(raw) {
     return {
         raw,
@@ -91,7 +96,11 @@ describe("layer1Intent", () => {
     it("invokes brain classification (Pass 3) when taskType is null after Pass 2", async () => {
         mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
         mockedClassifyViaBrain.mockResolvedValue("generate, concise");
-        const result = await layer1Intent(makeCtx("make me a new service"));
+        // No leading creation verb + no artifact noun → misses Pass 0 greenfield-build
+        // and the Pass 2 keyword rules, so the brain (Pass 3) decides. (A prompt with
+        // an explicit creation verb like "make me a new service" is now pinned to
+        // `build` by Pass 0 and never reaches the brain.)
+        const result = await layer1Intent(makeCtx("work on the onboarding flow"));
         expect(mockedClassifyViaBrain).toHaveBeenCalled();
         expect(result.taskType).toBe("generate");
         expect(result.confidence).toBe(0.55);
@@ -285,6 +294,55 @@ describe("layer1Intent", () => {
             expect(mockedClassify).toHaveBeenCalled();
             expect(result.taskType).toBe("analyze");
         });
+        // Greenfield CREATE/BUILD intent → build (live `/ideal` verify regression).
+        // "build a … microservice …" fell through to the brain → refactor, and
+        // "build a … validator with vitest tests" was hijacked by the Pass 2
+        // `analyze` keyword (the word "tests"). The verb "build" is recognized by no
+        // deterministic pass (Pass 1 create-file regex only fires on the literal
+        // nouns file/component/module/class/function; Pass 2 generate keyword only
+        // has generate/scaffold/bootstrap). `build` is now a first-class TaskType
+        // (greenfield project/feature creation); Pass 0 pins it deterministically
+        // before the classifier + brain.
+        const greenfieldCases = [
+            "build a muonroi-building-block microservice with a fraud-detection rule engine, multi-tenancy, and auth",
+            "build a Node TypeScript ISO-4217 currency code validator with vitest tests",
+            "build a small Node TS lib",
+            "create a REST API in Express",
+            "make a React dashboard component",
+            "implement a rate limiter middleware",
+            "develop a chat application with websockets",
+            "i want to build a todo app",
+        ];
+        for (const phrase of greenfieldCases) {
+            it(`Pass 0 greenfield '${phrase.slice(0, 36)}…' → build/task, skips classifier`, async () => {
+                const result = await layer1Intent(makeCtx(phrase));
+                expect(result.taskType).toBe("build");
+                expect(result.intentKind).toBe("task");
+                expect(result.confidence).toBe(0.85);
+                expect(mockedClassify).not.toHaveBeenCalled();
+                expect(mockedClassifyViaBrain).not.toHaveBeenCalled();
+                expect(result._intentTrace?.pass1Reason).toBe("pass0:greenfield-build");
+            });
+        }
+        it("Pass 0 greenfield defers to cascade for build-FAILURE prompts (debug, not build)", async () => {
+            mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
+            const result = await layer1Intent(makeCtx("the build is failing after the merge"));
+            expect(mockedClassify).toHaveBeenCalled();
+            expect(result.taskType).not.toBe("build");
+        });
+        it("Pass 0 greenfield defers to cascade for explanation prompts (analyze, not build)", async () => {
+            mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
+            mockedClassifyViaBrain.mockResolvedValue("analyze,balanced");
+            const result = await layer1Intent(makeCtx("explain how to build a parser"));
+            expect(mockedClassify).toHaveBeenCalled();
+            expect(result.taskType).not.toBe("build");
+        });
+        it("Pass 0 greenfield does NOT fire on refactor of an existing artifact", async () => {
+            mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:refactor", confidence: 0.75 });
+            const result = await layer1Intent(makeCtx("refactor the user service"));
+            expect(mockedClassify).toHaveBeenCalled();
+            expect(result.taskType).toBe("refactor");
+        });
     });
     it("fails open on error — returns ctx unchanged with applied=false", async () => {
         mockedClassify.mockImplementation(() => {
@@ -314,7 +372,13 @@ describe("hasActionableToolIntent — explicit run/tool requests are never chitc
     });
 });
 describe("intentKind guard — a tool/command request must never route as chitchat", () => {
-    const generalFallback = async () => ({ taskType: "general", outputStyle: null, confidence: 0.75 });
+    const generalFallback = async () => ({
+        taskType: "general",
+        outputStyle: null,
+        confidence: 0.75,
+        intentKind: "task",
+        deliverableKind: null,
+    });
     it("flips chitchat → task when the LLM fallback returns 'general' but the prompt is a command request", async () => {
         // Reproduces 817e508f57ee: classify abstains, LLM fallback returns
         // general → intentKind would be chitchat → message-processor drops the
@@ -347,6 +411,60 @@ describe("intentKind guard — a tool/command request must never route as chitch
         expect(result.intentKind).toBe("task");
     });
 });
+describe("isGreenfieldBuildTask — greenfield create/build intent (Pass 0 pin)", () => {
+    const positives = [
+        "build a muonroi-building-block microservice with a fraud-detection rule engine, multi-tenancy, and auth",
+        "build a Node TypeScript ISO-4217 currency code validator with vitest tests",
+        "build a small Node TS lib",
+        "create a REST API in Express",
+        "create a CLI tool for managing tasks",
+        "make a React dashboard component",
+        "implement a rate limiter middleware",
+        "develop a chat application with websockets",
+        "scaffold a new CLI tool",
+        "build me a currency converter",
+        "Build a GraphQL server",
+        "please create an authentication service",
+        "can you build a parser for ISO-8601 dates",
+        "set up a CI pipeline for the repo",
+        "build a faster JSON parser",
+        "i want to build a todo app",
+    ];
+    const negatives = [
+        "the build is failing",
+        "fix the build",
+        "build broke after the merge",
+        "why is the build red?",
+        "the CI pipeline is broken",
+        "explain how to build a parser",
+        "how would you build a microservice?",
+        "should I build this as a monolith or microservices?",
+        "review the auth service I built",
+        "refactor the user service",
+        "rename the build function",
+        "analyze the rule engine",
+        "make it faster",
+        "make the tests pass",
+        "create a branch and commit",
+        "update the readme",
+        "optimize the database queries",
+        "what does the validator do?",
+        "add a button to the form",
+        "the server crashed",
+    ];
+    it("matches greenfield creation requests", () => {
+        for (const p of positives)
+            expect(isGreenfieldBuildTask(p), p).toBe(true);
+    });
+    it("does NOT match debug / analyze / refactor / question prompts", () => {
+        for (const n of negatives)
+            expect(isGreenfieldBuildTask(n), n).toBe(false);
+    });
+    it("returns false on empty / whitespace input", () => {
+        expect(isGreenfieldBuildTask("")).toBe(false);
+        expect(isGreenfieldBuildTask("   ")).toBe(false);
+    });
+});
 describe("isStatusCheckQuestion — meta follow-ups about prior work (session c6387d2c6e1b)", () => {
     it("detects Vietnamese 'đã … chưa' status questions", () => {
         expect(isStatusCheckQuestion("bạn đã có plan chưa nhỉ")).toBe(true);
@@ -422,9 +540,97 @@ describe("Pass 2.6 — social pleasantries route to chitchat (drop the tool-sche
     it("does NOT route a thanks-then-task prompt to chitchat", async () => {
         mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
         const result = await layer1Intent(makeCtx("thanks, now fix the bug in src/auth/login.ts"), {
-            llmFallback: async () => ({ taskType: "debug", outputStyle: null, confidence: 0.8 }),
+            llmFallback: async () => ({
+                taskType: "debug",
+                outputStyle: null,
+                confidence: 0.8,
+                intentKind: "task",
+                deliverableKind: "code",
+            }),
+        });
+        expect(result.intentKind).toBe("task");
+    });
+});
+describe("layer1Intent — model-first gate (MUONROI_LLM_FIRST_CLASSIFY)", () => {
+    beforeEach(() => {
+        mockedLlmFirst.mockReturnValue(true);
+        // Make the regex cascade obviously WRONG so passing tests prove the model won.
+        mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:create-file", confidence: 0.9 });
+    });
+    it("uses the model's verdict and never runs the regex classifier", async () => {
+        const result = await layer1Intent(makeCtx("bạn thử call tool setup_guide xem được không"), {
+            llmFallback: async () => ({
+                taskType: "general",
+                outputStyle: "concise",
+                confidence: 0.9,
+                intentKind: "task",
+                deliverableKind: "answer",
+            }),
         });
+        expect(result.taskType).toBe("general"); // NOT the regex 'create-file' → generate
         expect(result.intentKind).toBe("task");
+        expect(result.deliverableKind).toBe("answer"); // Phase 2b: model deliverable threads onto ctx
+        expect(result._intentTrace?.pass1Reason).toBe("llm-first");
+        expect(mockedClassify).not.toHaveBeenCalled();
+    });
+    it("marks chitchat from the model for a pure greeting", async () => {
+        const result = await layer1Intent(makeCtx("cảm ơn bạn nhé"), {
+            llmFallback: async () => ({
+                taskType: "general",
+                outputStyle: "concise",
+                confidence: 0.9,
+                intentKind: "chitchat",
+                deliverableKind: "answer",
+            }),
+        });
+        expect(result.intentKind).toBe("chitchat");
+    });
+    it("safety net: an actionable command never routes to chitchat even if the model says chat", async () => {
+        const result = await layer1Intent(makeCtx("run the build: npm run build"), {
+            llmFallback: async () => ({
+                taskType: "general",
+                outputStyle: "concise",
+                confidence: 0.9,
+                intentKind: "chitchat",
+                deliverableKind: "answer",
+            }),
+        });
+        expect(result.intentKind).toBe("task");
+    });
+    it("does NOT fall back to regex when the model returns null — fails loud, no wrong guess", async () => {
+        mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:debug", confidence: 0.85 });
+        const result = await layer1Intent(makeCtx("fix the failing build"), {
+            llmFallback: async () => null,
+        });
+        expect(mockedClassify).not.toHaveBeenCalled(); // regex cascade never runs
+        expect(result.taskType).toBeNull(); // unknown, not a confidently-wrong regex guess
+        expect(result.intentKind).toBe("task"); // keep-tools on failure
+        expect(result._intentTrace?.pass1Reason).toBe("llm-first-failed");
+    });
+    it("does NOT fall back to regex when the model call throws — same fail-loud path", async () => {
+        mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:debug", confidence: 0.85 });
+        const result = await layer1Intent(makeCtx("fix the failing build"), {
+            llmFallback: async () => {
+                throw new Error("rate limited");
+            },
+        });
+        expect(mockedClassify).not.toHaveBeenCalled();
+        expect(result.taskType).toBeNull();
+        expect(result._intentTrace?.pass1Reason).toBe("llm-first-failed");
+    });
+    it("falls back to the cascade when the flag is OFF even with llmFallback wired", async () => {
+        mockedLlmFirst.mockReturnValue(false);
+        mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:debug", confidence: 0.85 });
+        const llm = vi.fn(async () => ({
+            taskType: "general",
+            outputStyle: null,
+            confidence: 0.9,
+            intentKind: "task",
+            deliverableKind: null,
+        }));
+        const result = await layer1Intent(makeCtx("fix the failing build"), { llmFallback: llm });
+        expect(llm).not.toHaveBeenCalled();
+        expect(result.taskType).toBe("debug");
     });
 });
 //# sourceMappingURL=layer1-intent.test.js.map