muonroi-cli 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +122 -122
- package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
- package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
- package/dist/src/agent-harness/mock-model.d.ts +11 -0
- package/dist/src/agent-harness/mock-model.js +21 -0
- package/dist/src/cli/cost-forensics.js +12 -12
- package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
- package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
- package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
- package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
- package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
- package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
- package/dist/src/council/clarifier.js +9 -1
- package/dist/src/council/debate.js +5 -1
- package/dist/src/council/decisions-lock.js +3 -3
- package/dist/src/council/index.js +12 -5
- package/dist/src/council/leader.d.ts +0 -17
- package/dist/src/council/leader.js +22 -15
- package/dist/src/council/planner.js +1 -1
- package/dist/src/council/prompts.js +63 -57
- package/dist/src/council/types.d.ts +7 -0
- package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
- package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
- package/dist/src/ee/artifact-cache.d.ts +56 -0
- package/dist/src/ee/artifact-cache.js +155 -0
- package/dist/src/ee/artifact-cache.test.d.ts +1 -0
- package/dist/src/ee/artifact-cache.test.js +69 -0
- package/dist/src/ee/auth.d.ts +9 -0
- package/dist/src/ee/auth.js +19 -0
- package/dist/src/ee/ee-onboarding.d.ts +5 -0
- package/dist/src/ee/ee-onboarding.js +76 -0
- package/dist/src/ee/search.js +7 -5
- package/dist/src/ee/search.test.d.ts +1 -0
- package/dist/src/ee/search.test.js +23 -0
- package/dist/src/generated/version.d.ts +1 -1
- package/dist/src/generated/version.js +1 -1
- package/dist/src/headless/output.js +6 -4
- package/dist/src/headless/output.test.js +4 -3
- package/dist/src/index.js +20 -1
- package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
- package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
- package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
- package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
- package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
- package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
- package/dist/src/mcp/auto-setup.js +56 -2
- package/dist/src/mcp/client-pool.d.ts +46 -0
- package/dist/src/mcp/client-pool.js +212 -0
- package/dist/src/mcp/oauth-callback.js +2 -2
- package/dist/src/mcp/parse-headers.test.js +14 -14
- package/dist/src/mcp/runtime.d.ts +28 -0
- package/dist/src/mcp/runtime.js +117 -51
- package/dist/src/mcp/self-verify-runner.d.ts +14 -0
- package/dist/src/mcp/self-verify-runner.js +38 -0
- package/dist/src/mcp/setup-guide-text.d.ts +9 -0
- package/dist/src/mcp/setup-guide-text.js +84 -0
- package/dist/src/mcp/smart-filter.js +49 -0
- package/dist/src/mcp/smoke.test.js +43 -43
- package/dist/src/mcp/tools-server.d.ts +7 -0
- package/dist/src/mcp/tools-server.js +19 -22
- package/dist/src/models/catalog.json +349 -349
- package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
- package/dist/src/ops/doctor.d.ts +3 -2
- package/dist/src/ops/doctor.js +47 -11
- package/dist/src/ops/doctor.test.js +4 -3
- package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
- package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
- package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
- package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
- package/dist/src/orchestrator/batch-turn-runner.js +7 -11
- package/dist/src/orchestrator/compaction.d.ts +2 -0
- package/dist/src/orchestrator/compaction.js +14 -1
- package/dist/src/orchestrator/compaction.test.js +25 -1
- package/dist/src/orchestrator/message-processor.js +72 -32
- package/dist/src/orchestrator/orchestrator.js +26 -0
- package/dist/src/orchestrator/prompts.d.ts +51 -0
- package/dist/src/orchestrator/prompts.js +257 -134
- package/dist/src/orchestrator/scope-ceiling.js +6 -1
- package/dist/src/orchestrator/scope-reminder.d.ts +12 -0
- package/dist/src/orchestrator/scope-reminder.js +16 -0
- package/dist/src/orchestrator/scope-reminder.test.js +22 -1
- package/dist/src/orchestrator/stream-runner.js +23 -15
- package/dist/src/orchestrator/subagent-compactor.d.ts +14 -5
- package/dist/src/orchestrator/subagent-compactor.js +30 -8
- package/dist/src/orchestrator/subagent-compactor.spec.js +18 -0
- package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
- package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
- package/dist/src/pil/__tests__/config.test.js +1 -17
- package/dist/src/pil/__tests__/discovery.test.js +144 -11
- package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
- package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
- package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
- package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
- package/dist/src/pil/__tests__/layer6-output.test.js +158 -18
- package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.d.ts +1 -0
- package/dist/src/pil/__tests__/surface-compaction-artifacts.test.js +112 -0
- package/dist/src/pil/agent-operating-contract.d.ts +1 -1
- package/dist/src/pil/agent-operating-contract.js +2 -0
- package/dist/src/pil/agent-operating-contract.test.js +7 -2
- package/dist/src/pil/cheap-model-playbook.js +35 -35
- package/dist/src/pil/cheap-model-workbooks.js +16 -13
- package/dist/src/pil/clarity-gate.d.ts +21 -19
- package/dist/src/pil/clarity-gate.js +26 -153
- package/dist/src/pil/config.d.ts +9 -1
- package/dist/src/pil/config.js +15 -4
- package/dist/src/pil/discovery.js +211 -136
- package/dist/src/pil/layer1-intent.d.ts +12 -0
- package/dist/src/pil/layer1-intent.js +283 -38
- package/dist/src/pil/layer1-intent.test.js +210 -4
- package/dist/src/pil/layer16-clarity.d.ts +25 -11
- package/dist/src/pil/layer16-clarity.js +19 -306
- package/dist/src/pil/layer3-ee-injection.d.ts +19 -0
- package/dist/src/pil/layer3-ee-injection.js +96 -4
- package/dist/src/pil/layer4-gsd.js +18 -6
- package/dist/src/pil/layer6-output.d.ts +2 -0
- package/dist/src/pil/layer6-output.js +151 -25
- package/dist/src/pil/llm-classify.d.ts +26 -0
- package/dist/src/pil/llm-classify.js +34 -5
- package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
- package/dist/src/pil/native-capabilities-workbook.js +82 -76
- package/dist/src/pil/pipeline.js +15 -9
- package/dist/src/pil/schema.d.ts +8 -0
- package/dist/src/pil/schema.js +12 -1
- package/dist/src/pil/task-tier-map.js +4 -0
- package/dist/src/pil/types.d.ts +11 -1
- package/dist/src/product-loop/done-gate.js +3 -3
- package/dist/src/product-loop/loop-driver.js +18 -18
- package/dist/src/product-loop/progress-snapshot.js +4 -4
- package/dist/src/providers/auth/gemini-oauth.js +6 -15
- package/dist/src/providers/auth/grok-oauth.js +6 -15
- package/dist/src/providers/auth/openai-oauth.js +6 -15
- package/dist/src/providers/mcp-vision-bridge.js +48 -48
- package/dist/src/reporter/index.js +1 -1
- package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
- package/dist/src/scaffold/bb-quality-gate.js +5 -5
- package/dist/src/scaffold/continuation-prompt.js +60 -60
- package/dist/src/scaffold/init-new.js +453 -453
- package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
- package/dist/src/self-qa/agentic-loop.js +24 -19
- package/dist/src/self-qa/spec-emitter.js +26 -23
- package/dist/src/storage/__tests__/migrations.test.js +2 -2
- package/dist/src/storage/interaction-log.js +5 -5
- package/dist/src/storage/migrations.js +122 -122
- package/dist/src/storage/sessions.js +42 -42
- package/dist/src/storage/transcript.js +91 -84
- package/dist/src/storage/usage.js +14 -14
- package/dist/src/storage/workspaces.js +12 -12
- package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
- package/dist/src/tools/__tests__/native-tools.test.js +53 -0
- package/dist/src/tools/git-safety.d.ts +61 -0
- package/dist/src/tools/git-safety.js +141 -0
- package/dist/src/tools/git-safety.test.d.ts +1 -0
- package/dist/src/tools/git-safety.test.js +111 -0
- package/dist/src/tools/native-tools.d.ts +31 -0
- package/dist/src/tools/native-tools.js +273 -0
- package/dist/src/tools/registry-ee-query.test.js +18 -1
- package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
- package/dist/src/tools/registry-git-safety.test.js +92 -0
- package/dist/src/tools/registry.js +52 -6
- package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
- package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
- package/dist/src/ui/app.js +0 -0
- package/dist/src/ui/components/message-view.js +4 -1
- package/dist/src/ui/components/structured-response-view.js +7 -3
- package/dist/src/ui/components/tool-group.js +7 -1
- package/dist/src/ui/markdown-render.d.ts +41 -0
- package/dist/src/ui/markdown-render.js +223 -0
- package/dist/src/ui/markdown.d.ts +10 -0
- package/dist/src/ui/markdown.js +12 -35
- package/dist/src/ui/slash/council-inspect.js +4 -4
- package/dist/src/ui/slash/export.js +4 -4
- package/dist/src/ui/utils/text.d.ts +8 -0
- package/dist/src/ui/utils/text.js +16 -0
- package/dist/src/ui/utils/text.test.d.ts +1 -0
- package/dist/src/ui/utils/text.test.js +23 -0
- package/dist/src/usage/ledger.js +48 -15
- package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
- package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
- package/dist/src/utils/clipboard-image.js +23 -23
- package/dist/src/utils/open-url.d.ts +56 -0
- package/dist/src/utils/open-url.js +58 -0
- package/dist/src/utils/open-url.test.d.ts +1 -0
- package/dist/src/utils/open-url.test.js +86 -0
- package/dist/src/utils/settings.d.ts +12 -0
- package/dist/src/utils/settings.js +48 -0
- package/dist/src/utils/side-question.js +2 -2
- package/dist/src/utils/skills.js +3 -3
- package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
- package/dist/src/verify/environment.js +2 -1
- package/package.json +1 -1
- package/dist/src/pil/layer16-clarity.test.js +0 -31
- /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { classifyViaBrain, pilContext } from "../ee/bridge.js";
|
|
13
13
|
import { classify } from "../router/classifier/index.js";
|
|
14
|
-
import { isUnifiedPilEnabled } from "./config.js";
|
|
14
|
+
import { isLlmFirstClassifyEnabled, isUnifiedPilEnabled } from "./config.js";
|
|
15
15
|
/** File/path reference regex — matches common source-file extensions. */
|
|
16
16
|
const FILE_REF_RE = /[\w./-]+\.(ts|tsx|js|jsx|json|md|py|rs|go|cs)\b/gi;
|
|
17
17
|
/** Keywords that force a "low" complexity signal (additive score -3). */
|
|
@@ -205,6 +205,58 @@ export function isPerformanceRefactor(raw) {
|
|
|
205
205
|
return false;
|
|
206
206
|
return true;
|
|
207
207
|
}
|
|
208
|
+
// Greenfield CREATE/BUILD intent → generate.
|
|
209
|
+
//
|
|
210
|
+
// Live `/ideal` E2E verify (fix/council-oauth-reachable): greenfield BUILD
|
|
211
|
+
// prompts were misclassified at the pil-acceptance card —
|
|
212
|
+
// "build a muonroi-building-block microservice …" → refactor
|
|
213
|
+
// "build a Node TS ISO-4217 currency validator w/ vitest tests" → analyze
|
|
214
|
+
// Root cause: the verb "build" (and bare "create X" where X is not one of the
|
|
215
|
+
// literal nouns file/component/module/class/function) is recognized by NO
|
|
216
|
+
// deterministic pass. Pass 1's create-file regex only fires on those literal
|
|
217
|
+
// nouns; Pass 2's `generate` keyword only has generate/scaffold/bootstrap. So
|
|
218
|
+
// greenfield "build/create/implement X" prompts fall through to the brain/LLM
|
|
219
|
+
// — documented to bias toward `refactor` for any code touch (see Pass 3 legacy
|
|
220
|
+
// prompt, 4P-2) — and worse, a build prompt that merely mentions "test(s)" is
|
|
221
|
+
// hijacked by the Pass 2 `analyze` keyword. Pin greenfield creation to
|
|
222
|
+
// `generate` deterministically here, before the classifier + brain.
|
|
223
|
+
//
|
|
224
|
+
// VERB must be the LEADING action (after an optional polite/intent prefix) so
|
|
225
|
+
// "explain how to build X", "the build is failing", "rename the build fn" never
|
|
226
|
+
// match. A concrete software-artifact noun must be the object of creation, and
|
|
227
|
+
// build-FAILURE / debug context vetoes the match (those are bug reports).
|
|
228
|
+
const GREENFIELD_BUILD_PREFIX = String.raw `(?:please\s+|pls\s+|plz\s+|can\s+you\s+|could\s+you\s+|would\s+you\s+(?:please\s+)?|help\s+me\s+(?:to\s+)?|let'?s\s+|i\s+(?:want|need)\s+(?:you\s+)?to\s+|i'?d\s+like\s+(?:you\s+)?to\s+|go\s+ahead\s+and\s+|now\s+|then\s+|just\s+)*`;
|
|
229
|
+
const GREENFIELD_BUILD_VERB = String.raw `build|create|make|implement|develop|scaffold|bootstrap|generate|code\s+up|spin\s+up|stand\s+up|set\s+up|put\s+together`;
|
|
230
|
+
const GREENFIELD_BUILD_LEAD_RE = new RegExp(`^\\s*${GREENFIELD_BUILD_PREFIX}(?:${GREENFIELD_BUILD_VERB})\\b`, "i");
|
|
231
|
+
// Concrete software artifacts (the thing being created). Deliberately excludes
|
|
232
|
+
// "test"/"branch"/"commit" — test-generation is handled by isTestGenerationTask
|
|
233
|
+
// and git verbs route elsewhere — so "make the tests pass" / "create a branch"
|
|
234
|
+
// do not trip this.
|
|
235
|
+
const GREENFIELD_BUILD_TARGET_RE = /\b(app|application|web\s*app|webapp|service|micro[-\s]?service|api|endpoint|server|backend|frontend|cli|tool|utility|library|lib|sdk|package|module|component|widget|page|screen|view|dashboard|website|site|portal|platform|system|engine|parser|validator|formatter|serializer|converter|calculator|generator|linter|compiler|interpreter|middleware|pipeline|workflow|daemon|worker|queue|cache|store|database|schema|model|migration|script|bot|game|simulator|prototype|mvp|poc|demo|feature|function|class|hook|wrapper|adapter|plugin|extension|proxy|gateway|router|handler|controller|resolver|crawler|scraper|client)\b/i;
|
|
236
|
+
// Failure / debug context — a "build" that is FAILING / BROKEN is a bug report,
|
|
237
|
+
// not greenfield creation. Cascade to the debug classifier instead.
|
|
238
|
+
const GREENFIELD_BUILD_FAILURE_GUARD_RE = /\b(fail(?:s|ed|ing|ure)?|broken|broke|crash(?:es|ed|ing)?|not\s+working|doesn'?t\s+work|won'?t\s+(?:build|compile|run)|hỏng)\b/i;
|
|
239
|
+
/**
|
|
240
|
+
* Detect a greenfield CREATE/BUILD request whose correct taskType is `build`.
|
|
241
|
+
* Tight by construction: requires a LEADING creation verb + a software-artifact
|
|
242
|
+
* object, and vetoes build-failure/debug context. When unsure it returns false
|
|
243
|
+
* so the prompt cascades to the classifier + brain (no wrong deterministic pin).
|
|
244
|
+
*
|
|
245
|
+
* `build` is a first-class TaskType (greenfield project/feature creation) — it is
|
|
246
|
+
* the sole producer of that label. It mirrors `generate` for routing (tier/role/
|
|
247
|
+
* tokens/ceiling) but carries greenfield-specific outcome options + output rules.
|
|
248
|
+
* This replaces the F17 band-aid that pinned greenfield prompts to `generate`.
|
|
249
|
+
*/
|
|
250
|
+
export function isGreenfieldBuildTask(raw) {
|
|
251
|
+
const t = raw.trim();
|
|
252
|
+
if (!t || t.length > 400)
|
|
253
|
+
return false;
|
|
254
|
+
if (!GREENFIELD_BUILD_LEAD_RE.test(t))
|
|
255
|
+
return false;
|
|
256
|
+
if (GREENFIELD_BUILD_FAILURE_GUARD_RE.test(t))
|
|
257
|
+
return false;
|
|
258
|
+
return GREENFIELD_BUILD_TARGET_RE.test(t);
|
|
259
|
+
}
|
|
208
260
|
/** Detect short continuation prompts ("tiếp tục", "ok", "continue", …). */
|
|
209
261
|
export function isContinuationPhrase(raw) {
|
|
210
262
|
const t = raw.trim();
|
|
@@ -536,7 +588,151 @@ export function isSocialPleasantry(raw) {
|
|
|
536
588
|
}
|
|
537
589
|
export async function layer1Intent(ctx, opts = {}) {
|
|
538
590
|
try {
|
|
591
|
+
// Pass −1 — MODEL-FIRST classification (MUONROI_LLM_FIRST_CLASSIFY, default ON).
|
|
592
|
+
//
|
|
593
|
+
// The configured model classifies taskType/intentKind/style at the very top
|
|
594
|
+
// of the turn; the keyword-regex cascade below becomes the OFFLINE fallback,
|
|
595
|
+
// used only when the model is not wired (opts.llmFallback absent) or its call
|
|
596
|
+
// fails. This is the structural fix for "classifying tasks via keyword regex
|
|
597
|
+
// misses billions of natural-language cases" — regex no longer DECIDES intent,
|
|
598
|
+
// it only catches the model-offline case. The EE brain still enriches
|
|
599
|
+
// downstream (layer3 retrieval) as before. Trivial turns ("ok", greetings)
|
|
600
|
+
// also go through the model so chitchat is a semantic decision, not a regex
|
|
601
|
+
// whitelist; the model returns intentKind="chat" for pure pleasantries.
|
|
602
|
+
if (isLlmFirstClassifyEnabled() && opts.llmFallback) {
|
|
603
|
+
let llmRes = null;
|
|
604
|
+
let classifyError = null;
|
|
605
|
+
try {
|
|
606
|
+
llmRes = await opts.llmFallback(ctx.raw);
|
|
607
|
+
}
|
|
608
|
+
catch (err) {
|
|
609
|
+
classifyError = err?.message ?? String(err);
|
|
610
|
+
}
|
|
611
|
+
if (llmRes) {
|
|
612
|
+
let intentKind = llmRes.intentKind;
|
|
613
|
+
// Safety net (never weakens the model): an explicit command/tool-exec
|
|
614
|
+
// request must never be chitchat — chitchat drops the whole toolset and
|
|
615
|
+
// breaks the turn. Only ever upgrades chitchat → task.
|
|
616
|
+
if (intentKind === "chitchat" && hasActionableToolIntent(ctx.raw))
|
|
617
|
+
intentKind = "task";
|
|
618
|
+
const outputStyle = llmRes.outputStyle ?? detectStyleFromText(ctx.raw);
|
|
619
|
+
const domain = extractDomain("", ctx.raw);
|
|
620
|
+
const { complexity, score: complexityScore } = scoreComplexity({
|
|
621
|
+
rawText: ctx.raw,
|
|
622
|
+
taskType: llmRes.taskType,
|
|
623
|
+
t0HitCount: 0,
|
|
624
|
+
hasMaxSprintsOne: false,
|
|
625
|
+
});
|
|
626
|
+
const intentTrace = {
|
|
627
|
+
pass1Reason: "llm-first",
|
|
628
|
+
pass1Confidence: llmRes.confidence,
|
|
629
|
+
pass1TaskType: llmRes.taskType,
|
|
630
|
+
pass1Hit: false,
|
|
631
|
+
pass2Hit: false,
|
|
632
|
+
pass2Pattern: undefined,
|
|
633
|
+
pass25ChitchatHit: false,
|
|
634
|
+
pass3UnifiedAttempted: false,
|
|
635
|
+
pass3UnifiedSucceeded: false,
|
|
636
|
+
pass3LegacyTaskAttempted: false,
|
|
637
|
+
pass3LegacyTaskSucceeded: false,
|
|
638
|
+
pass3LegacyStyleAttempted: false,
|
|
639
|
+
pass3LegacyStyleSucceeded: false,
|
|
640
|
+
pass4LlmAttempted: true,
|
|
641
|
+
pass4LlmSucceeded: true,
|
|
642
|
+
styleSource: llmRes.outputStyle ? "brain-unified" : outputStyle ? "explicit-regex" : "none",
|
|
643
|
+
finalTaskType: llmRes.taskType,
|
|
644
|
+
finalConfidence: llmRes.confidence,
|
|
645
|
+
complexity,
|
|
646
|
+
complexityScore,
|
|
647
|
+
};
|
|
648
|
+
return {
|
|
649
|
+
...ctx,
|
|
650
|
+
taskType: llmRes.taskType,
|
|
651
|
+
domain,
|
|
652
|
+
confidence: llmRes.confidence,
|
|
653
|
+
outputStyle,
|
|
654
|
+
intentKind,
|
|
655
|
+
// Phase 2b: model-decided deliverable drives layer4/layer6 output
|
|
656
|
+
// routing instead of keyword regex. null → those layers fall back to
|
|
657
|
+
// their legacy regex predicates for this turn.
|
|
658
|
+
deliverableKind: llmRes.deliverableKind,
|
|
659
|
+
// null lets L6 run its cheap style-rescue if outputStyle is still null;
|
|
660
|
+
// EE retrieval enrichment happens downstream in layer3 as usual.
|
|
661
|
+
_brainData: null,
|
|
662
|
+
_intentTrace: intentTrace,
|
|
663
|
+
layers: [
|
|
664
|
+
...ctx.layers,
|
|
665
|
+
{
|
|
666
|
+
name: "intent-detection",
|
|
667
|
+
applied: true,
|
|
668
|
+
delta: `taskType=${llmRes.taskType},kind=${intentKind},deliverable=${llmRes.deliverableKind ?? "none"},conf=${llmRes.confidence.toFixed(2)},domain=${domain ?? "none"},style=${outputStyle ?? "none"},source=llm-first`,
|
|
669
|
+
},
|
|
670
|
+
],
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
// NO fallback. The configured chat model is the SOLE classifier — it is
|
|
674
|
+
// the model the turn talks to, so it cannot be "offline". A null/failed
|
|
675
|
+
// result is a real problem: log it loudly and surface it, NEVER paper over
|
|
676
|
+
// it with a regex guess (which would be confidently wrong — the whole
|
|
677
|
+
// reason we moved off keyword regex). Return an UNKNOWN classification
|
|
678
|
+
// (taskType=null): no PIL scaffold is imposed and the chat model still
|
|
679
|
+
// answers the turn directly — but nothing pretends to know the intent.
|
|
680
|
+
console.error("[pil.layer1] model-first classify produced no usable result — NOT falling back to regex. " +
|
|
681
|
+
`reason=${classifyError ?? "null/unparseable model response"} ` +
|
|
682
|
+
`model-classifier=wired rawPreview=${JSON.stringify(ctx.raw.slice(0, 120))}`);
|
|
683
|
+
const { complexity: failComplexity, score: failComplexityScore } = scoreComplexity({
|
|
684
|
+
rawText: ctx.raw,
|
|
685
|
+
taskType: null,
|
|
686
|
+
t0HitCount: 0,
|
|
687
|
+
hasMaxSprintsOne: false,
|
|
688
|
+
});
|
|
689
|
+
return {
|
|
690
|
+
...ctx,
|
|
691
|
+
taskType: null,
|
|
692
|
+
domain: null,
|
|
693
|
+
confidence: 0,
|
|
694
|
+
outputStyle: null,
|
|
695
|
+
// keep-tools: a classify failure must never strip the toolset.
|
|
696
|
+
intentKind: "task",
|
|
697
|
+
_brainData: null,
|
|
698
|
+
_intentTrace: {
|
|
699
|
+
pass1Reason: "llm-first-failed",
|
|
700
|
+
pass1Confidence: 0,
|
|
701
|
+
pass1TaskType: null,
|
|
702
|
+
pass1Hit: false,
|
|
703
|
+
pass2Hit: false,
|
|
704
|
+
pass2Pattern: undefined,
|
|
705
|
+
pass25ChitchatHit: false,
|
|
706
|
+
pass3UnifiedAttempted: false,
|
|
707
|
+
pass3UnifiedSucceeded: false,
|
|
708
|
+
pass3LegacyTaskAttempted: false,
|
|
709
|
+
pass3LegacyTaskSucceeded: false,
|
|
710
|
+
pass3LegacyStyleAttempted: false,
|
|
711
|
+
pass3LegacyStyleSucceeded: false,
|
|
712
|
+
pass4LlmAttempted: true,
|
|
713
|
+
pass4LlmSucceeded: false,
|
|
714
|
+
styleSource: "none",
|
|
715
|
+
finalTaskType: null,
|
|
716
|
+
finalConfidence: 0,
|
|
717
|
+
complexity: failComplexity,
|
|
718
|
+
complexityScore: failComplexityScore,
|
|
719
|
+
},
|
|
720
|
+
layers: [
|
|
721
|
+
...ctx.layers,
|
|
722
|
+
{
|
|
723
|
+
name: "intent-detection",
|
|
724
|
+
applied: false,
|
|
725
|
+
delta: `llm-first=FAIL (${classifyError ?? "no-result"}) — surfaced, NO regex fallback`,
|
|
726
|
+
},
|
|
727
|
+
],
|
|
728
|
+
};
|
|
729
|
+
}
|
|
539
730
|
// Pass 0 — deterministic full-prompt overrides (Phase 5 BUG-B / BUG-D).
|
|
731
|
+
// LEGACY regex cascade — reached ONLY when no model classifier is wired
|
|
732
|
+
// (opts.llmFallback absent) or the model-first flag is off. On the main chat
|
|
733
|
+
// path the model classifier is always wired, so this never decides intent in
|
|
734
|
+
// production. It is NOT a runtime fallback for a failed model call (that path
|
|
735
|
+
// returns above with a logged failure).
|
|
540
736
|
// Two narrow patterns short-circuit the whole pipeline:
|
|
541
737
|
// - continuation phrase → general/chitchat
|
|
542
738
|
// - performance/optimization verbs → refactor/task
|
|
@@ -738,6 +934,55 @@ export async function layer1Intent(ctx, opts = {}) {
|
|
|
738
934
|
],
|
|
739
935
|
};
|
|
740
936
|
}
|
|
937
|
+
if (isGreenfieldBuildTask(ctx.raw)) {
|
|
938
|
+
const domainPass0 = extractDomain("", ctx.raw);
|
|
939
|
+
const styleFromText = detectStyleFromText(ctx.raw) ?? "balanced";
|
|
940
|
+
const { complexity, score: complexityScore } = scoreComplexity({
|
|
941
|
+
rawText: ctx.raw,
|
|
942
|
+
taskType: "build",
|
|
943
|
+
t0HitCount: 0,
|
|
944
|
+
hasMaxSprintsOne: false,
|
|
945
|
+
});
|
|
946
|
+
const intentTrace = {
|
|
947
|
+
pass1Reason: "pass0:greenfield-build",
|
|
948
|
+
pass1Confidence: 0.85,
|
|
949
|
+
pass1TaskType: "build",
|
|
950
|
+
pass1Hit: true,
|
|
951
|
+
pass2Hit: false,
|
|
952
|
+
pass25ChitchatHit: false,
|
|
953
|
+
pass3UnifiedAttempted: false,
|
|
954
|
+
pass3UnifiedSucceeded: false,
|
|
955
|
+
pass3LegacyTaskAttempted: false,
|
|
956
|
+
pass3LegacyTaskSucceeded: false,
|
|
957
|
+
pass3LegacyStyleAttempted: false,
|
|
958
|
+
pass3LegacyStyleSucceeded: false,
|
|
959
|
+
pass4LlmAttempted: false,
|
|
960
|
+
pass4LlmSucceeded: false,
|
|
961
|
+
styleSource: detectStyleFromText(ctx.raw) ? "explicit-regex" : "classifier-default",
|
|
962
|
+
finalTaskType: "build",
|
|
963
|
+
finalConfidence: 0.85,
|
|
964
|
+
complexity,
|
|
965
|
+
complexityScore,
|
|
966
|
+
};
|
|
967
|
+
return {
|
|
968
|
+
...ctx,
|
|
969
|
+
taskType: "build",
|
|
970
|
+
domain: domainPass0,
|
|
971
|
+
confidence: 0.85,
|
|
972
|
+
outputStyle: styleFromText,
|
|
973
|
+
intentKind: "task",
|
|
974
|
+
_brainData: null,
|
|
975
|
+
_intentTrace: intentTrace,
|
|
976
|
+
layers: [
|
|
977
|
+
...ctx.layers,
|
|
978
|
+
{
|
|
979
|
+
name: "intent-detection",
|
|
980
|
+
applied: true,
|
|
981
|
+
delta: `taskType=build,kind=task,conf=0.85,domain=${domainPass0 ?? "none"},style=${styleFromText},pass0=greenfield-build`,
|
|
982
|
+
},
|
|
983
|
+
],
|
|
984
|
+
};
|
|
985
|
+
}
|
|
741
986
|
// Pass 1: local classifier.
|
|
742
987
|
const result = classify(ctx.raw);
|
|
743
988
|
const pass1TaskType = REASON_TO_TASK_TYPE[result.reason] ?? null;
|
|
@@ -942,40 +1187,40 @@ export async function layer1Intent(ctx, opts = {}) {
|
|
|
942
1187
|
// touch existing files.
|
|
943
1188
|
// 0.7 confidence threshold for Pass 2 keyword override remains
|
|
944
1189
|
// unchanged (HIGH_CONF_THRESHOLD_PASS2 above).
|
|
945
|
-
const brainRaw = await classifyViaBrain(`You are a multilingual prompt classifier. The user's prompt may be in English, Vietnamese, or a mix of both.
|
|
946
|
-
Classify the prompt's INTENT (not its language). Reply with TWO lowercase words separated by a comma: <category>,<style>
|
|
947
|
-
|
|
948
|
-
Category — pick ONE (listed in neutral order, no precedence):
|
|
949
|
-
analyze — explain / inspect / review existing code (giải thích, phân tích, review)
|
|
950
|
-
debug — fix a bug or investigate failure (sửa lỗi, fix bug, lỗi, traceback)
|
|
951
|
-
generate — create new code/file or add new behavior (tạo, sinh code, viết function mới, thêm)
|
|
952
|
-
refactor — restructure existing code (tái cấu trúc, refactor)
|
|
953
|
-
plan — design / roadmap / architecture (kế hoạch, thiết kế, kiến trúc)
|
|
954
|
-
documentation — write docs/comments (viết docs, comment, jsdoc)
|
|
955
|
-
general — chitchat OR unclear / ambiguous coding intent
|
|
956
|
-
|
|
957
|
-
Rules (Phase 4 4P-2 disambiguation):
|
|
958
|
-
- Only return refactor when the user EXPLICITLY uses one of: rename, restructure, reorganize, extract, inline, move, migrate, reshape — applied to EXISTING code WITHOUT adding new behavior.
|
|
959
|
-
- Feature additions ('add flag', 'thêm', 'create endpoint', 'thêm option'), changing a DEFAULT value, adding tests, or improving coverage are 'generate' — NOT refactor.
|
|
960
|
-
- 'improve', 'change', 'update', 'modify', 'đổi', 'cải thiện' alone do NOT imply refactor — pick the specific category by what the change actually does.
|
|
961
|
-
- When the request is ambiguous, prefer 'general' over guessing refactor.
|
|
962
|
-
|
|
963
|
-
Negative examples (NOT refactor):
|
|
964
|
-
- "đổi default --max-tool-rounds 8 sang 12" → generate
|
|
965
|
-
- "improve test coverage" → generate
|
|
966
|
-
- "tại sao X trả empty" → analyze
|
|
967
|
-
- "fix CI failing" → debug
|
|
968
|
-
|
|
969
|
-
Style — pick ONE:
|
|
970
|
-
concise (ngắn gọn) | balanced (cân bằng) | detailed (chi tiết)
|
|
971
|
-
|
|
972
|
-
Examples:
|
|
973
|
-
"Refactor this function" → refactor,balanced
|
|
974
|
-
"tại sao test fail" → debug,balanced
|
|
975
|
-
"thiết kế hệ thống auth" → plan,detailed
|
|
976
|
-
"thêm flag --foo" → generate,concise
|
|
977
|
-
"hi" → general,concise
|
|
978
|
-
|
|
1190
|
+
const brainRaw = await classifyViaBrain(`You are a multilingual prompt classifier. The user's prompt may be in English, Vietnamese, or a mix of both.
|
|
1191
|
+
Classify the prompt's INTENT (not its language). Reply with TWO lowercase words separated by a comma: <category>,<style>
|
|
1192
|
+
|
|
1193
|
+
Category — pick ONE (listed in neutral order, no precedence):
|
|
1194
|
+
analyze — explain / inspect / review existing code (giải thích, phân tích, review)
|
|
1195
|
+
debug — fix a bug or investigate failure (sửa lỗi, fix bug, lỗi, traceback)
|
|
1196
|
+
generate — create new code/file or add new behavior (tạo, sinh code, viết function mới, thêm)
|
|
1197
|
+
refactor — restructure existing code (tái cấu trúc, refactor)
|
|
1198
|
+
plan — design / roadmap / architecture (kế hoạch, thiết kế, kiến trúc)
|
|
1199
|
+
documentation — write docs/comments (viết docs, comment, jsdoc)
|
|
1200
|
+
general — chitchat OR unclear / ambiguous coding intent
|
|
1201
|
+
|
|
1202
|
+
Rules (Phase 4 4P-2 disambiguation):
|
|
1203
|
+
- Only return refactor when the user EXPLICITLY uses one of: rename, restructure, reorganize, extract, inline, move, migrate, reshape — applied to EXISTING code WITHOUT adding new behavior.
|
|
1204
|
+
- Feature additions ('add flag', 'thêm', 'create endpoint', 'thêm option'), changing a DEFAULT value, adding tests, or improving coverage are 'generate' — NOT refactor.
|
|
1205
|
+
- 'improve', 'change', 'update', 'modify', 'đổi', 'cải thiện' alone do NOT imply refactor — pick the specific category by what the change actually does.
|
|
1206
|
+
- When the request is ambiguous, prefer 'general' over guessing refactor.
|
|
1207
|
+
|
|
1208
|
+
Negative examples (NOT refactor):
|
|
1209
|
+
- "đổi default --max-tool-rounds 8 sang 12" → generate
|
|
1210
|
+
- "improve test coverage" → generate
|
|
1211
|
+
- "tại sao X trả empty" → analyze
|
|
1212
|
+
- "fix CI failing" → debug
|
|
1213
|
+
|
|
1214
|
+
Style — pick ONE:
|
|
1215
|
+
concise (ngắn gọn) | balanced (cân bằng) | detailed (chi tiết)
|
|
1216
|
+
|
|
1217
|
+
Examples:
|
|
1218
|
+
"Refactor this function" → refactor,balanced
|
|
1219
|
+
"tại sao test fail" → debug,balanced
|
|
1220
|
+
"thiết kế hệ thống auth" → plan,detailed
|
|
1221
|
+
"thêm flag --foo" → generate,concise
|
|
1222
|
+
"hi" → general,concise
|
|
1223
|
+
|
|
979
1224
|
Prompt: "${ctx.raw.slice(0, 500)}"`, 1500);
|
|
980
1225
|
if (brainRaw) {
|
|
981
1226
|
pass3LegacyTaskSucceeded = true;
|
|
@@ -1026,9 +1271,9 @@ Prompt: "${ctx.raw.slice(0, 500)}"`, 1500);
|
|
|
1026
1271
|
if (pass3LegacyTaskAttempted) {
|
|
1027
1272
|
legacyBrainAttempted = true;
|
|
1028
1273
|
pass3LegacyStyleAttempted = true;
|
|
1029
|
-
const brainRawStyle = await classifyViaBrain(`Detect the user's preferred output style. The prompt may be EN or VN.
|
|
1030
|
-
Reply with ONE word: concise (ngắn gọn) | balanced (bình thường) | detailed (chi tiết).
|
|
1031
|
-
|
|
1274
|
+
const brainRawStyle = await classifyViaBrain(`Detect the user's preferred output style. The prompt may be EN or VN.
|
|
1275
|
+
Reply with ONE word: concise (ngắn gọn) | balanced (bình thường) | detailed (chi tiết).
|
|
1276
|
+
|
|
1032
1277
|
Prompt: "${ctx.raw.slice(0, 300)}"`, 800);
|
|
1033
1278
|
if (brainRawStyle) {
|
|
1034
1279
|
pass3LegacyStyleSucceeded = true;
|
|
@@ -9,12 +9,17 @@ vi.mock("../ee/bridge.js", () => ({
|
|
|
9
9
|
}));
|
|
10
10
|
vi.mock("./config.js", () => ({
|
|
11
11
|
isUnifiedPilEnabled: vi.fn(() => false),
|
|
12
|
+
// Default OFF so the existing cascade tests below exercise the regex passes.
|
|
13
|
+
// The model-first gate has its own describe block that flips this to true.
|
|
14
|
+
isLlmFirstClassifyEnabled: vi.fn(() => false),
|
|
12
15
|
}));
|
|
13
16
|
import { classifyViaBrain } from "../ee/bridge.js";
|
|
14
17
|
import { classify } from "../router/classifier/index.js";
|
|
15
|
-
import {
|
|
18
|
+
import { isLlmFirstClassifyEnabled } from "./config.js";
|
|
19
|
+
import { hasActionableToolIntent, isGreenfieldBuildTask, isSocialPleasantry, isStatusCheckQuestion, layer1Intent, } from "./layer1-intent.js";
|
|
16
20
|
const mockedClassify = vi.mocked(classify);
|
|
17
21
|
const mockedClassifyViaBrain = vi.mocked(classifyViaBrain);
|
|
22
|
+
const mockedLlmFirst = vi.mocked(isLlmFirstClassifyEnabled);
|
|
18
23
|
function makeCtx(raw) {
|
|
19
24
|
return {
|
|
20
25
|
raw,
|
|
@@ -91,7 +96,11 @@ describe("layer1Intent", () => {
|
|
|
91
96
|
it("invokes brain classification (Pass 3) when taskType is null after Pass 2", async () => {
|
|
92
97
|
mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
|
|
93
98
|
mockedClassifyViaBrain.mockResolvedValue("generate, concise");
|
|
94
|
-
|
|
99
|
+
// No leading creation verb + no artifact noun → misses Pass 0 greenfield-build
|
|
100
|
+
// and the Pass 2 keyword rules, so the brain (Pass 3) decides. (A prompt with
|
|
101
|
+
// an explicit creation verb like "make me a new service" is now pinned to
|
|
102
|
+
// `build` by Pass 0 and never reaches the brain.)
|
|
103
|
+
const result = await layer1Intent(makeCtx("work on the onboarding flow"));
|
|
95
104
|
expect(mockedClassifyViaBrain).toHaveBeenCalled();
|
|
96
105
|
expect(result.taskType).toBe("generate");
|
|
97
106
|
expect(result.confidence).toBe(0.55);
|
|
@@ -285,6 +294,55 @@ describe("layer1Intent", () => {
|
|
|
285
294
|
expect(mockedClassify).toHaveBeenCalled();
|
|
286
295
|
expect(result.taskType).toBe("analyze");
|
|
287
296
|
});
|
|
297
|
+
// Greenfield CREATE/BUILD intent → build (live `/ideal` verify regression).
|
|
298
|
+
// "build a … microservice …" fell through to the brain → refactor, and
|
|
299
|
+
// "build a … validator with vitest tests" was hijacked by the Pass 2
|
|
300
|
+
// `analyze` keyword (the word "tests"). The verb "build" is recognized by no
|
|
301
|
+
// deterministic pass (Pass 1 create-file regex only fires on the literal
|
|
302
|
+
// nouns file/component/module/class/function; Pass 2 generate keyword only
|
|
303
|
+
// has generate/scaffold/bootstrap). `build` is now a first-class TaskType
|
|
304
|
+
// (greenfield project/feature creation); Pass 0 pins it deterministically
|
|
305
|
+
// before the classifier + brain.
|
|
306
|
+
const greenfieldCases = [
|
|
307
|
+
"build a muonroi-building-block microservice with a fraud-detection rule engine, multi-tenancy, and auth",
|
|
308
|
+
"build a Node TypeScript ISO-4217 currency code validator with vitest tests",
|
|
309
|
+
"build a small Node TS lib",
|
|
310
|
+
"create a REST API in Express",
|
|
311
|
+
"make a React dashboard component",
|
|
312
|
+
"implement a rate limiter middleware",
|
|
313
|
+
"develop a chat application with websockets",
|
|
314
|
+
"i want to build a todo app",
|
|
315
|
+
];
|
|
316
|
+
for (const phrase of greenfieldCases) {
|
|
317
|
+
it(`Pass 0 greenfield '${phrase.slice(0, 36)}…' → build/task, skips classifier`, async () => {
|
|
318
|
+
const result = await layer1Intent(makeCtx(phrase));
|
|
319
|
+
expect(result.taskType).toBe("build");
|
|
320
|
+
expect(result.intentKind).toBe("task");
|
|
321
|
+
expect(result.confidence).toBe(0.85);
|
|
322
|
+
expect(mockedClassify).not.toHaveBeenCalled();
|
|
323
|
+
expect(mockedClassifyViaBrain).not.toHaveBeenCalled();
|
|
324
|
+
expect(result._intentTrace?.pass1Reason).toBe("pass0:greenfield-build");
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
it("Pass 0 greenfield defers to cascade for build-FAILURE prompts (debug, not build)", async () => {
|
|
328
|
+
mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
|
|
329
|
+
const result = await layer1Intent(makeCtx("the build is failing after the merge"));
|
|
330
|
+
expect(mockedClassify).toHaveBeenCalled();
|
|
331
|
+
expect(result.taskType).not.toBe("build");
|
|
332
|
+
});
|
|
333
|
+
it("Pass 0 greenfield defers to cascade for explanation prompts (analyze, not build)", async () => {
|
|
334
|
+
mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
|
|
335
|
+
mockedClassifyViaBrain.mockResolvedValue("analyze,balanced");
|
|
336
|
+
const result = await layer1Intent(makeCtx("explain how to build a parser"));
|
|
337
|
+
expect(mockedClassify).toHaveBeenCalled();
|
|
338
|
+
expect(result.taskType).not.toBe("build");
|
|
339
|
+
});
|
|
340
|
+
it("Pass 0 greenfield does NOT fire on refactor of an existing artifact", async () => {
|
|
341
|
+
mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:refactor", confidence: 0.75 });
|
|
342
|
+
const result = await layer1Intent(makeCtx("refactor the user service"));
|
|
343
|
+
expect(mockedClassify).toHaveBeenCalled();
|
|
344
|
+
expect(result.taskType).toBe("refactor");
|
|
345
|
+
});
|
|
288
346
|
});
|
|
289
347
|
it("fails open on error — returns ctx unchanged with applied=false", async () => {
|
|
290
348
|
mockedClassify.mockImplementation(() => {
|
|
@@ -314,7 +372,13 @@ describe("hasActionableToolIntent — explicit run/tool requests are never chitc
|
|
|
314
372
|
});
|
|
315
373
|
});
|
|
316
374
|
describe("intentKind guard — a tool/command request must never route as chitchat", () => {
|
|
317
|
-
const generalFallback = async () => ({
|
|
375
|
+
const generalFallback = async () => ({
|
|
376
|
+
taskType: "general",
|
|
377
|
+
outputStyle: null,
|
|
378
|
+
confidence: 0.75,
|
|
379
|
+
intentKind: "task",
|
|
380
|
+
deliverableKind: null,
|
|
381
|
+
});
|
|
318
382
|
it("flips chitchat → task when the LLM fallback returns 'general' but the prompt is a command request", async () => {
|
|
319
383
|
// Reproduces 817e508f57ee: classify abstains, LLM fallback returns
|
|
320
384
|
// general → intentKind would be chitchat → message-processor drops the
|
|
@@ -347,6 +411,60 @@ describe("intentKind guard — a tool/command request must never route as chitch
|
|
|
347
411
|
expect(result.intentKind).toBe("task");
|
|
348
412
|
});
|
|
349
413
|
});
|
|
414
|
+
describe("isGreenfieldBuildTask — greenfield create/build intent (Pass 0 pin)", () => {
|
|
415
|
+
const positives = [
|
|
416
|
+
"build a muonroi-building-block microservice with a fraud-detection rule engine, multi-tenancy, and auth",
|
|
417
|
+
"build a Node TypeScript ISO-4217 currency code validator with vitest tests",
|
|
418
|
+
"build a small Node TS lib",
|
|
419
|
+
"create a REST API in Express",
|
|
420
|
+
"create a CLI tool for managing tasks",
|
|
421
|
+
"make a React dashboard component",
|
|
422
|
+
"implement a rate limiter middleware",
|
|
423
|
+
"develop a chat application with websockets",
|
|
424
|
+
"scaffold a new CLI tool",
|
|
425
|
+
"build me a currency converter",
|
|
426
|
+
"Build a GraphQL server",
|
|
427
|
+
"please create an authentication service",
|
|
428
|
+
"can you build a parser for ISO-8601 dates",
|
|
429
|
+
"set up a CI pipeline for the repo",
|
|
430
|
+
"build a faster JSON parser",
|
|
431
|
+
"i want to build a todo app",
|
|
432
|
+
];
|
|
433
|
+
const negatives = [
|
|
434
|
+
"the build is failing",
|
|
435
|
+
"fix the build",
|
|
436
|
+
"build broke after the merge",
|
|
437
|
+
"why is the build red?",
|
|
438
|
+
"the CI pipeline is broken",
|
|
439
|
+
"explain how to build a parser",
|
|
440
|
+
"how would you build a microservice?",
|
|
441
|
+
"should I build this as a monolith or microservices?",
|
|
442
|
+
"review the auth service I built",
|
|
443
|
+
"refactor the user service",
|
|
444
|
+
"rename the build function",
|
|
445
|
+
"analyze the rule engine",
|
|
446
|
+
"make it faster",
|
|
447
|
+
"make the tests pass",
|
|
448
|
+
"create a branch and commit",
|
|
449
|
+
"update the readme",
|
|
450
|
+
"optimize the database queries",
|
|
451
|
+
"what does the validator do?",
|
|
452
|
+
"add a button to the form",
|
|
453
|
+
"the server crashed",
|
|
454
|
+
];
|
|
455
|
+
it("matches greenfield creation requests", () => {
|
|
456
|
+
for (const p of positives)
|
|
457
|
+
expect(isGreenfieldBuildTask(p), p).toBe(true);
|
|
458
|
+
});
|
|
459
|
+
it("does NOT match debug / analyze / refactor / question prompts", () => {
|
|
460
|
+
for (const n of negatives)
|
|
461
|
+
expect(isGreenfieldBuildTask(n), n).toBe(false);
|
|
462
|
+
});
|
|
463
|
+
it("returns false on empty / whitespace input", () => {
|
|
464
|
+
expect(isGreenfieldBuildTask("")).toBe(false);
|
|
465
|
+
expect(isGreenfieldBuildTask(" ")).toBe(false);
|
|
466
|
+
});
|
|
467
|
+
});
|
|
350
468
|
describe("isStatusCheckQuestion — meta follow-ups about prior work (session c6387d2c6e1b)", () => {
|
|
351
469
|
it("detects Vietnamese 'đã … chưa' status questions", () => {
|
|
352
470
|
expect(isStatusCheckQuestion("bạn đã có plan chưa nhỉ")).toBe(true);
|
|
@@ -422,9 +540,97 @@ describe("Pass 2.6 — social pleasantries route to chitchat (drop the tool-sche
|
|
|
422
540
|
it("does NOT route a thanks-then-task prompt to chitchat", async () => {
|
|
423
541
|
mockedClassify.mockReturnValue({ tier: "abstain", reason: "regex:no-match", confidence: 0.1 });
|
|
424
542
|
const result = await layer1Intent(makeCtx("thanks, now fix the bug in src/auth/login.ts"), {
|
|
425
|
-
llmFallback: async () => ({
|
|
543
|
+
llmFallback: async () => ({
|
|
544
|
+
taskType: "debug",
|
|
545
|
+
outputStyle: null,
|
|
546
|
+
confidence: 0.8,
|
|
547
|
+
intentKind: "task",
|
|
548
|
+
deliverableKind: "code",
|
|
549
|
+
}),
|
|
550
|
+
});
|
|
551
|
+
expect(result.intentKind).toBe("task");
|
|
552
|
+
});
|
|
553
|
+
});
|
|
554
|
+
describe("layer1Intent — model-first gate (MUONROI_LLM_FIRST_CLASSIFY)", () => {
|
|
555
|
+
beforeEach(() => {
|
|
556
|
+
mockedLlmFirst.mockReturnValue(true);
|
|
557
|
+
// Make the regex cascade obviously WRONG so passing tests prove the model won.
|
|
558
|
+
mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:create-file", confidence: 0.9 });
|
|
559
|
+
});
|
|
560
|
+
it("uses the model's verdict and never runs the regex classifier", async () => {
|
|
561
|
+
const result = await layer1Intent(makeCtx("bạn thử call tool setup_guide xem được không"), {
|
|
562
|
+
llmFallback: async () => ({
|
|
563
|
+
taskType: "general",
|
|
564
|
+
outputStyle: "concise",
|
|
565
|
+
confidence: 0.9,
|
|
566
|
+
intentKind: "task",
|
|
567
|
+
deliverableKind: "answer",
|
|
568
|
+
}),
|
|
426
569
|
});
|
|
570
|
+
expect(result.taskType).toBe("general"); // NOT the regex 'create-file' → generate
|
|
427
571
|
expect(result.intentKind).toBe("task");
|
|
572
|
+
expect(result.deliverableKind).toBe("answer"); // Phase 2b: model deliverable threads onto ctx
|
|
573
|
+
expect(result._intentTrace?.pass1Reason).toBe("llm-first");
|
|
574
|
+
expect(mockedClassify).not.toHaveBeenCalled();
|
|
575
|
+
});
|
|
576
|
+
it("marks chitchat from the model for a pure greeting", async () => {
|
|
577
|
+
const result = await layer1Intent(makeCtx("cảm ơn bạn nhé"), {
|
|
578
|
+
llmFallback: async () => ({
|
|
579
|
+
taskType: "general",
|
|
580
|
+
outputStyle: "concise",
|
|
581
|
+
confidence: 0.9,
|
|
582
|
+
intentKind: "chitchat",
|
|
583
|
+
deliverableKind: "answer",
|
|
584
|
+
}),
|
|
585
|
+
});
|
|
586
|
+
expect(result.intentKind).toBe("chitchat");
|
|
587
|
+
});
|
|
588
|
+
it("safety net: an actionable command never routes to chitchat even if the model says chat", async () => {
|
|
589
|
+
const result = await layer1Intent(makeCtx("run the build: npm run build"), {
|
|
590
|
+
llmFallback: async () => ({
|
|
591
|
+
taskType: "general",
|
|
592
|
+
outputStyle: "concise",
|
|
593
|
+
confidence: 0.9,
|
|
594
|
+
intentKind: "chitchat",
|
|
595
|
+
deliverableKind: "answer",
|
|
596
|
+
}),
|
|
597
|
+
});
|
|
598
|
+
expect(result.intentKind).toBe("task");
|
|
599
|
+
});
|
|
600
|
+
it("does NOT fall back to regex when the model returns null — fails loud, no wrong guess", async () => {
|
|
601
|
+
mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:debug", confidence: 0.85 });
|
|
602
|
+
const result = await layer1Intent(makeCtx("fix the failing build"), {
|
|
603
|
+
llmFallback: async () => null,
|
|
604
|
+
});
|
|
605
|
+
expect(mockedClassify).not.toHaveBeenCalled(); // regex cascade never runs
|
|
606
|
+
expect(result.taskType).toBeNull(); // unknown, not a confidently-wrong regex guess
|
|
607
|
+
expect(result.intentKind).toBe("task"); // keep-tools on failure
|
|
608
|
+
expect(result._intentTrace?.pass1Reason).toBe("llm-first-failed");
|
|
609
|
+
});
|
|
610
|
+
it("does NOT fall back to regex when the model call throws — same fail-loud path", async () => {
|
|
611
|
+
mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:debug", confidence: 0.85 });
|
|
612
|
+
const result = await layer1Intent(makeCtx("fix the failing build"), {
|
|
613
|
+
llmFallback: async () => {
|
|
614
|
+
throw new Error("rate limited");
|
|
615
|
+
},
|
|
616
|
+
});
|
|
617
|
+
expect(mockedClassify).not.toHaveBeenCalled();
|
|
618
|
+
expect(result.taskType).toBeNull();
|
|
619
|
+
expect(result._intentTrace?.pass1Reason).toBe("llm-first-failed");
|
|
620
|
+
});
|
|
621
|
+
it("falls back to the cascade when the flag is OFF even with llmFallback wired", async () => {
|
|
622
|
+
mockedLlmFirst.mockReturnValue(false);
|
|
623
|
+
mockedClassify.mockReturnValue({ tier: "hot", reason: "regex:debug", confidence: 0.85 });
|
|
624
|
+
const llm = vi.fn(async () => ({
|
|
625
|
+
taskType: "general",
|
|
626
|
+
outputStyle: null,
|
|
627
|
+
confidence: 0.9,
|
|
628
|
+
intentKind: "task",
|
|
629
|
+
deliverableKind: null,
|
|
630
|
+
}));
|
|
631
|
+
const result = await layer1Intent(makeCtx("fix the failing build"), { llmFallback: llm });
|
|
632
|
+
expect(llm).not.toHaveBeenCalled();
|
|
633
|
+
expect(result.taskType).toBe("debug");
|
|
428
634
|
});
|
|
429
635
|
});
|
|
430
636
|
//# sourceMappingURL=layer1-intent.test.js.map
|