npm - @wazir-dev/cli - Versions diffs - 1.0.0 → 1.2.0 - Mend

@wazir-dev/cli 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

package/CHANGELOG.md +100 -2
package/README.md +6 -6
package/docs/concepts/architecture.md +1 -1
package/docs/concepts/roles-and-workflows.md +2 -0
package/docs/concepts/why-wazir.md +59 -0
package/docs/decisions/2026-03-19-deferred-items.md +564 -0
package/docs/decisions/2026-03-19-enhancement-decisions.md +300 -0
package/docs/plans/2026-03-15-cli-pipeline-integration-plan.md +1 -1
package/docs/readmes/INDEX.md +21 -5
package/docs/readmes/features/expertise/README.md +2 -2
package/docs/readmes/features/exports/README.md +2 -2
package/docs/readmes/features/schemas/README.md +3 -0
package/docs/readmes/features/skills/README.md +17 -0
package/docs/readmes/features/skills/clarifier.md +5 -0
package/docs/readmes/features/skills/claude-cli.md +5 -0
package/docs/readmes/features/skills/codex-cli.md +5 -0
package/docs/readmes/features/skills/dispatching-parallel-agents.md +5 -0
package/docs/readmes/features/skills/executing-plans.md +5 -0
package/docs/readmes/features/skills/executor.md +5 -0
package/docs/readmes/features/skills/finishing-a-development-branch.md +5 -0
package/docs/readmes/features/skills/gemini-cli.md +5 -0
package/docs/readmes/features/skills/humanize.md +5 -0
package/docs/readmes/features/skills/init-pipeline.md +5 -0
package/docs/readmes/features/skills/receiving-code-review.md +5 -0
package/docs/readmes/features/skills/requesting-code-review.md +5 -0
package/docs/readmes/features/skills/reviewer.md +5 -0
package/docs/readmes/features/skills/subagent-driven-development.md +5 -0
package/docs/readmes/features/skills/using-git-worktrees.md +5 -0
package/docs/readmes/features/skills/wazir.md +5 -0
package/docs/readmes/features/skills/writing-skills.md +5 -0
package/docs/readmes/features/workflows/prepare-next.md +1 -1
package/docs/reference/configuration-reference.md +47 -6
package/docs/reference/launch-checklist.md +4 -4
package/docs/reference/review-loop-pattern.md +538 -0
package/docs/reference/roles-reference.md +1 -0
package/docs/reference/skill-tiers.md +147 -0
package/docs/reference/tooling-cli.md +5 -1
package/docs/truth-claims.yaml +18 -0
package/expertise/antipatterns/process/ai-coding-antipatterns.md +97 -1
package/exports/hosts/claude/.claude/agents/clarifier.md +3 -0
package/exports/hosts/claude/.claude/agents/designer.md +3 -0
package/exports/hosts/claude/.claude/agents/executor.md +2 -0
package/exports/hosts/claude/.claude/agents/planner.md +3 -0
package/exports/hosts/claude/.claude/agents/researcher.md +2 -0
package/exports/hosts/claude/.claude/agents/reviewer.md +5 -1
package/exports/hosts/claude/.claude/agents/specifier.md +3 -0
package/exports/hosts/claude/.claude/commands/clarify.md +4 -0
package/exports/hosts/claude/.claude/commands/design-review.md +4 -0
package/exports/hosts/claude/.claude/commands/design.md +4 -0
package/exports/hosts/claude/.claude/commands/discover.md +4 -0
package/exports/hosts/claude/.claude/commands/execute.md +4 -0
package/exports/hosts/claude/.claude/commands/plan-review.md +4 -0
package/exports/hosts/claude/.claude/commands/plan.md +4 -0
package/exports/hosts/claude/.claude/commands/spec-challenge.md +4 -0
package/exports/hosts/claude/.claude/commands/specify.md +4 -0
package/exports/hosts/claude/.claude/commands/verify.md +4 -0
package/exports/hosts/claude/.claude/settings.json +9 -0
package/exports/hosts/claude/CLAUDE.md +1 -1
package/exports/hosts/claude/export.manifest.json +22 -20
package/exports/hosts/claude/host-package.json +3 -1
package/exports/hosts/codex/AGENTS.md +1 -1
package/exports/hosts/codex/export.manifest.json +22 -20
package/exports/hosts/codex/host-package.json +3 -1
package/exports/hosts/cursor/.cursor/hooks.json +4 -0
package/exports/hosts/cursor/.cursor/rules/wazir-core.mdc +1 -1
package/exports/hosts/cursor/export.manifest.json +22 -20
package/exports/hosts/cursor/host-package.json +3 -1
package/exports/hosts/gemini/GEMINI.md +1 -1
package/exports/hosts/gemini/export.manifest.json +22 -20
package/exports/hosts/gemini/host-package.json +3 -1
package/hooks/context-mode-router +191 -0
package/hooks/definitions/context_mode_router.yaml +19 -0
package/hooks/definitions/loop_cap_guard.yaml +1 -1
package/hooks/hooks.json +43 -0
package/hooks/protected-path-write-guard +8 -0
package/hooks/routing-matrix.json +45 -0
package/hooks/session-start +62 -1
package/llms-full.txt +905 -132
package/package.json +3 -3
package/roles/clarifier.md +3 -0
package/roles/designer.md +3 -0
package/roles/executor.md +2 -0
package/roles/planner.md +3 -0
package/roles/researcher.md +2 -0
package/roles/reviewer.md +5 -1
package/roles/specifier.md +3 -0
package/schemas/hook.schema.json +2 -1
package/schemas/phase-report.schema.json +80 -0
package/schemas/usage.schema.json +25 -1
package/schemas/wazir-manifest.schema.json +19 -0
package/skills/brainstorming/SKILL.md +20 -56
package/skills/clarifier/SKILL.md +243 -0
package/skills/claude-cli/SKILL.md +320 -0
package/skills/codex-cli/SKILL.md +260 -0
package/skills/debugging/SKILL.md +24 -1
package/skills/design/SKILL.md +13 -0
package/skills/dispatching-parallel-agents/SKILL.md +13 -0
package/skills/executing-plans/SKILL.md +28 -2
package/skills/executor/SKILL.md +129 -0
package/skills/finishing-a-development-branch/SKILL.md +13 -0
package/skills/gemini-cli/SKILL.md +260 -0
package/skills/humanize/SKILL.md +13 -0
package/skills/init-pipeline/SKILL.md +76 -78
package/skills/prepare-next/SKILL.md +81 -10
package/skills/receiving-code-review/SKILL.md +21 -0
package/skills/requesting-code-review/SKILL.md +38 -5
package/skills/reviewer/SKILL.md +423 -0
package/skills/run-audit/SKILL.md +13 -0
package/skills/scan-project/SKILL.md +13 -0
package/skills/self-audit/SKILL.md +197 -16
package/skills/subagent-driven-development/SKILL.md +38 -2
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +2 -0
package/skills/subagent-driven-development/implementer-prompt.md +8 -0
package/skills/subagent-driven-development/spec-reviewer-prompt.md +7 -0
package/skills/tdd/SKILL.md +21 -0
package/skills/using-git-worktrees/SKILL.md +13 -0
package/skills/using-skills/SKILL.md +13 -0
package/skills/verification/SKILL.md +13 -0
package/skills/wazir/SKILL.md +286 -262
package/skills/writing-plans/SKILL.md +44 -4
package/skills/writing-skills/SKILL.md +13 -0
package/templates/artifacts/implementation-plan.md +3 -0
package/templates/artifacts/tasks-template.md +133 -0
package/templates/examples/phase-report.example.json +48 -0
package/templates/examples/wazir-manifest.example.yaml +1 -1
package/tooling/src/adapters/composition-engine.js +256 -0
package/tooling/src/adapters/model-router.js +84 -0
package/tooling/src/capture/command.js +111 -2
package/tooling/src/capture/run-config.js +23 -0
package/tooling/src/capture/store.js +24 -0
package/tooling/src/capture/usage.js +106 -0
package/tooling/src/checks/ac-matrix.js +256 -0
package/tooling/src/checks/brand-truth.js +3 -6
package/tooling/src/checks/command-registry.js +13 -0
package/tooling/src/checks/docs-truth.js +1 -1
package/tooling/src/checks/runtime-surface.js +3 -7
package/tooling/src/checks/skills.js +111 -0
package/tooling/src/cli.js +17 -3
package/tooling/src/commands/stats.js +161 -0
package/tooling/src/commands/validate.js +5 -1
package/tooling/src/export/compiler.js +33 -37
package/tooling/src/gating/agent.js +145 -0
package/tooling/src/guards/phase-prerequisite-guard.js +127 -0
package/tooling/src/hooks/routing-logic.js +69 -0
package/tooling/src/init/auto-detect.js +260 -0
package/tooling/src/init/command.js +161 -0
package/tooling/src/input/scanner.js +46 -0
package/tooling/src/reports/command.js +103 -0
package/tooling/src/reports/phase-report.js +323 -0
package/tooling/src/state/command.js +160 -0
package/tooling/src/state/db.js +287 -0
package/tooling/src/status/command.js +53 -1
package/wazir.manifest.yaml +26 -17
package/workflows/clarify.md +4 -0
package/workflows/design-review.md +4 -0
package/workflows/design.md +4 -0
package/workflows/discover.md +4 -0
package/workflows/execute.md +4 -0
package/workflows/plan-review.md +4 -0
package/workflows/plan.md +4 -0
package/workflows/spec-challenge.md +4 -0
package/workflows/specify.md +4 -0
package/workflows/verify.md +4 -0

package/skills/writing-plans/SKILL.md CHANGED Viewed

@@ -5,15 +5,31 @@ description: Use after clarification, research, and design approval to create an
 # Writing Plans
+## Command Routing
+Follow the Canonical Command Matrix in `hooks/routing-matrix.json`.
+- Large commands (test runners, builds, diffs, dependency trees, linting) → context-mode tools
+- Small commands (git status, ls, pwd, wazir CLI) → native Bash
+- If context-mode unavailable, fall back to native Bash with warning
+## Codebase Exploration
+1. Query `wazir index search-symbols <query>` first
+2. Use `wazir recall file <path> --tier L1` for targeted reads
+3. Fall back to direct file reads ONLY for files identified by index queries
+4. Maximum 10 direct file reads without a justifying index query
+5. If no index exists: `wazir index build && wazir index summarize --tier all`
 Inputs:
 - approved design or approved clarified direction
 - current repo state
 - relevant research findings
-Output:
+Output path:
-- one implementation plan in `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
+- **Inside a pipeline run** (`.wazir/runs/latest/` exists): write to `.wazir/runs/latest/clarified/execution-plan.md` and task specs to `.wazir/runs/latest/tasks/task-NNN/spec.md`
+- **Standalone** (no active run): write to `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
+To detect: check if `.wazir/runs/latest/clarified/` exists. If yes, use run paths.
 The plan must include:
@@ -25,6 +41,30 @@ The plan must include:
 Rules:
-- do not write repo-local task files outside the plan directory
-- do not rely on retired `run-*` workflow wrappers
+- do not write implementation code during planning
 - make the plan detailed enough that another weak model can execute it without inventing missing steps
+- each task spec must have testable acceptance criteria, not vague descriptions
+## Plan Review Loop
+After writing the plan, invoke `wz:reviewer --mode plan-review` to run the plan-review loop using plan dimensions (see `workflows/plan-review.md` and `docs/reference/review-loop-pattern.md`). Do NOT call `codex exec` or `codex review` directly — the reviewer skill handles Codex integration internally.
+The planner resolves findings from each pass. The loop runs for `pass_counts[depth]` passes (quick=3, standard=5, deep=7). No extension.
+For non-code artifacts (the plan itself), Codex review uses stdin pipe:
+```bash
+CODEX_MODEL=$(jq -r '.multi_tool.codex.model // empty' .wazir/state/config.json 2>/dev/null)
+CODEX_MODEL=${CODEX_MODEL:-gpt-5.4}
+cat <plan-path> | codex exec -c model="$CODEX_MODEL" "Review this implementation plan focusing on [dimension]..."
+```
+`codex review -c model="$CODEX_MODEL"` is used only for code artifacts, not plans.
+Codex error handling: if `codex` exits non-zero, log the error, mark the pass as `codex-unavailable`, and use self-review findings only. Never treat a Codex failure as a clean pass.
+Loop depth follows the project's depth config (quick/standard/deep).
+Standalone mode: if no `.wazir/runs/latest/` exists, artifacts go to `docs/plans/` and review logs go alongside (`docs/plans/YYYY-MM-DD-<topic>-review-pass-N.md`). Loop cap guard is not invoked in standalone mode.
+After the loop completes, present findings summary and wait for user approval before completing.

package/skills/writing-skills/SKILL.md CHANGED Viewed

@@ -5,6 +5,19 @@ description: Use when creating new skills, editing existing skills, or verifying
 # Writing Skills
+## Command Routing
+Follow the Canonical Command Matrix in `hooks/routing-matrix.json`.
+- Large commands (test runners, builds, diffs, dependency trees, linting) → context-mode tools
+- Small commands (git status, ls, pwd, wazir CLI) → native Bash
+- If context-mode unavailable, fall back to native Bash with warning
+## Codebase Exploration
+1. Query `wazir index search-symbols <query>` first
+2. Use `wazir recall file <path> --tier L1` for targeted reads
+3. Fall back to direct file reads ONLY for files identified by index queries
+4. Maximum 10 direct file reads without a justifying index query
+5. If no index exists: `wazir index build && wazir index summarize --tier all`
 ## Overview
 **Writing skills IS Test-Driven Development applied to process documentation.**

package/templates/artifacts/implementation-plan.md CHANGED Viewed

@@ -16,6 +16,9 @@ approval_status: required
 ## Tasks And Subtasks
+Use the spec-kit task format defined in `templates/artifacts/tasks-template.md`.
+The execution plan produced by `wz:writing-plans` follows this template.
 ## Acceptance Criteria
 ## Verification

package/templates/artifacts/tasks-template.md ADDED Viewed

@@ -0,0 +1,133 @@
+---
+artifact_type: execution_plan
+phase: plan
+role: planner
+run_id: <run-id>
+loop: 1
+status: draft
+sources:
+  - <approved-spec>
+  - <approved-design>
+approval_status: required
+template_ref: tasks-template
+---
+# Execution Plan: <Project Title>
+## Constitution Check
+- Approved spec: `<path-to-spec>`
+- Approved design: `<path-to-design>`
+- Branch: `<branch-name>`
+- Depth: <quick|standard|deep>
+## MVP Strategy
+1. Complete Phase 1 (Setup) + Phase 2 (Foundational)
+2. Complete first User Story phase → test independently → deploy/demo (MVP!)
+3. Add stories incrementally — each adds value without breaking previous
+## Dependency Graph
+```
+<task-id> → <task-id> (describe dependency)
+```
+---
+## Phase 1: Setup
+**Goal:** Project scaffolding — directories, configs, stubs.
+- [ ] T001 Setup task description with `path/to/file`
+- [ ] T002 [P] Parallel setup task with `another/path`
+---
+## Phase 2: Foundational
+**Goal:** Core infrastructure that BLOCKS all user stories. Must complete before any story phase.
+- [ ] T003 Foundational task with `path/to/file`
+- [ ] T004 [P] Parallel foundational task with `another/file`
+**Independent test:** Describe how to verify this phase independently.
+---
+## Phase 3: User Story 1 — [US1] <Story Title>
+**Goal:** <What this story delivers to the user>
+**Independent test criteria:**
+- <How to verify this story works end-to-end without other stories>
+**Implementation tasks:**
+- [ ] T005 [US1] Task description with `path/to/file`
+- [ ] T006 [P] [US1] Parallel task with `another/file`
+- [ ] T007 [US1] Task description with `path/to/file`
+---
+## Phase 4: User Story 2 — [US2] <Story Title>
+**Goal:** <What this story delivers>
+**Independent test criteria:**
+- <Verification approach>
+**Implementation tasks:**
+- [ ] T008 [US2] Task with `path/to/file`
+- [ ] T009 [P] [US2] Parallel task with `path/to/file`
+---
+## Phase N: Polish & Cross-Cutting
+**Goal:** Final integration, documentation, cleanup.
+- [ ] T0XX Polish task with `path/to/file`
+---
+## Cross-cutting Constraints
+| ID | Constraint | When verified |
+|----|-----------|--------------|
+| CC1 | <constraint> | <timing> |
+## Task Summary
+| Phase | Tasks | Stories | Size | Execution |
+|-------|-------|---------|------|-----------|
+| 1: Setup | T001-T002 | — | S | Serial |
+| 2: Foundational | T003-T004 | — | M | T003∥T004 |
+| 3: [US1] | T005-T007 | US1 | M | Serial |
+| 4: [US2] | T008-T009 | US2 | M | T008→T009 |
+| N: Polish | T0XX | — | S | Serial |
+## Format Reference
+**Task line format:**
+```
+- [ ] [TaskID] [P?] [Story?] Description with `file/path`
+```
+- `- [ ]` — checkbox (always present)
+- `TaskID` — sequential: T001, T002, T003...
+- `[P]` — parallel marker: ONLY if task can run simultaneously with adjacent tasks (different files, no dependencies)
+- `[US1]` — user story label: maps to story phases from the spec
+- Description — clear action with exact file path in backticks
+**Phase rules:**
+- Phase 1 (Setup): project init, scaffolding
+- Phase 2 (Foundational): BLOCKS all user stories — must complete first
+- Phase 3+: one phase per user story, in priority order from spec
+- Final phase: polish, cross-cutting, documentation
+**Task ordering within phases:**
+- Models before services, services before endpoints
+- Each user story phase has: goal, independent test criteria, implementation tasks
+- Tasks organized by user story, NOT by layer
+**Parallel markers:**
+- `[P]` means this task can run simultaneously with the next `[P]` task
+- Only use when tasks edit different files with no shared dependencies
+- Adjacent `[P]` tasks form a parallel group

package/templates/examples/phase-report.example.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+  "phase_name": "verify",
+  "run_id": "run-2026-03-11-example",
+  "timestamp": "2026-03-11T13:00:00Z",
+  "attempted_actions": [
+    {
+      "description": "Run full test suite against implementation artifacts",
+      "outcome": "success",
+      "evidence": "42 tests passed, 0 failed"
+    },
+    {
+      "description": "Check lint compliance across all changed files",
+      "outcome": "success",
+      "evidence": "0 lint errors found"
+    }
+  ],
+  "drift_analysis": {
+    "delta": 0,
+    "description": "No drift detected between spec requirements and implementation."
+  },
+  "quality_metrics": {
+    "test_pass_count": 42,
+    "test_fail_count": 0,
+    "lint_errors": 0,
+    "type_errors": 0
+  },
+  "risk_flags": [
+    {
+      "severity": "low",
+      "description": "One acceptance criterion is verified by a single unit test only.",
+      "mitigation": "Add an integration test covering the same criterion in the next phase."
+    }
+  ],
+  "decisions": [
+    {
+      "description": "Proceed to review without additional verification loops.",
+      "rationale": "All tests pass and drift delta is zero.",
+      "alternatives_considered": [
+        "Run a second verification loop with stricter coverage thresholds"
+      ],
+      "source": "verifier"
+    }
+  ],
+  "verdict_recommendation": {
+    "verdict": "continue",
+    "reasoning": "All quality metrics are within acceptable thresholds and no high-severity risks were identified."
+  }
+}

package/templates/examples/wazir-manifest.example.yaml CHANGED Viewed

@@ -49,7 +49,7 @@ protected_paths:
   - input
   - exports/hosts
 prohibited_terms:
-  - agent-os
+  - legacy-name
 adapters:
   context_mode:
     enabled_by_default: false

package/tooling/src/adapters/composition-engine.js ADDED Viewed

@@ -0,0 +1,256 @@
+import crypto from 'node:crypto';
+import fs from 'node:fs';
+import path from 'node:path';
+import { readYamlFile } from '../loaders.js';
+import { estimateTokens } from '../capture/usage.js';
+const DEFAULT_TOKEN_CEILING = 50_000;
+const MODULE_CAP = 15;
+/**
+ * Resolve modules from the composition map for a given role and layer.
+ * Returns an array of { path, layer } objects.
+ */
+function resolveLayer(map, layer, role, stacks, concerns) {
+  const resolved = [];
+  if (layer === 'always') {
+    const entries = map.always?.[role] ?? [];
+    for (const entry of entries) {
+      resolved.push({ path: entry, layer: 'always' });
+    }
+  }
+  if (layer === 'auto') {
+    const allStacks = map.auto?.['all-stacks'];
+    if (allStacks) {
+      // all-roles entries apply to every role
+      const allRolesEntries = allStacks['all-roles'] ?? [];
+      for (const entry of allRolesEntries) {
+        resolved.push({ path: entry, layer: 'auto' });
+      }
+      // role-specific entries under all-stacks
+      const roleEntries = allStacks[role] ?? [];
+      for (const entry of roleEntries) {
+        resolved.push({ path: entry, layer: 'auto' });
+      }
+    }
+  }
+  if (layer === 'stacks') {
+    for (const stack of stacks) {
+      const stackDef = map.stacks?.[stack];
+      if (!stackDef) continue;
+      // executor/verifier/reviewer/etc entries for the role
+      const roleEntries = stackDef[role] ?? [];
+      for (const entry of roleEntries) {
+        resolved.push({ path: entry, layer: 'stacks' });
+      }
+      // antipatterns are included for verifier and reviewer roles
+      if (role === 'verifier' || role === 'reviewer') {
+        const antipatternEntries = stackDef.antipatterns ?? [];
+        for (const entry of antipatternEntries) {
+          resolved.push({ path: entry, layer: 'stacks' });
+        }
+      }
+    }
+  }
+  if (layer === 'concerns') {
+    for (const concern of concerns) {
+      const concernDef = map.concerns?.[concern];
+      if (!concernDef) continue;
+      const roleEntries = concernDef[role] ?? [];
+      for (const entry of roleEntries) {
+        resolved.push({ path: entry, layer: 'concerns' });
+      }
+    }
+  }
+  return resolved;
+}
+/**
+ * Deduplicate modules, keeping first occurrence (highest priority).
+ */
+function deduplicateModules(modules) {
+  const seen = new Set();
+  const result = [];
+  for (const mod of modules) {
+    if (!seen.has(mod.path)) {
+      seen.add(mod.path);
+      result.push(mod);
+    }
+  }
+  return result;
+}
+/**
+ * Compose expertise modules for a given role, stack set, and concern set.
+ *
+ * @param {object} options
+ * @param {string} options.role - The role (executor, verifier, reviewer, etc.)
+ * @param {string[]} options.stacks - Detected project stacks (e.g. ['node', 'react'])
+ * @param {string[]} options.concerns - Declared task concerns (e.g. ['rtl', 'security-auth'])
+ * @param {string} options.projectRoot - Absolute path to the project root
+ * @param {string} options.runRoot - Absolute path to the run root for artifact output
+ * @param {string} [options.task] - Optional task identifier for the proof artifact
+ * @param {number} [options.tokenCeiling] - Max token budget (default 50,000)
+ * @returns {{ prompt: string, manifest: object }}
+ */
+export function composeExpertise(options) {
+  const {
+    role,
+    stacks = [],
+    concerns = [],
+    projectRoot,
+    runRoot,
+    task = 'default',
+    tokenCeiling = DEFAULT_TOKEN_CEILING,
+  } = options;
+  const mapPath = path.join(projectRoot, 'expertise', 'composition-map.yaml');
+  const map = readYamlFile(mapPath);
+  const expertiseRoot = path.join(projectRoot, 'expertise');
+  // Resolve modules in priority order: always > auto > stacks > concerns
+  const layers = ['always', 'auto', 'stacks', 'concerns'];
+  let allModules = [];
+  for (const layer of layers) {
+    const layerModules = resolveLayer(map, layer, role, stacks, concerns);
+    allModules = allModules.concat(layerModules);
+  }
+  // Deduplicate
+  allModules = deduplicateModules(allModules);
+  // Read file contents and compute tokens
+  const loaded = [];
+  const warnings = [];
+  for (const mod of allModules) {
+    const fullPath = path.join(expertiseRoot, mod.path);
+    try {
+      const content = fs.readFileSync(fullPath, 'utf8');
+      const tokens = estimateTokens(Buffer.byteLength(content, 'utf8'));
+      loaded.push({ ...mod, content, tokens, fullPath });
+    } catch (err) {
+      warnings.push(`warning: skipping missing module ${mod.path}: ${err.message}`);
+    }
+  }
+  // Enforce budget: 15-module cap + token ceiling
+  // Drop in reverse priority: concerns first, then stacks, then auto
+  const dropOrder = ['concerns', 'stacks', 'auto', 'always'];
+  let included = [...loaded];
+  let dropped = [];
+  // Enforce module cap
+  if (included.length > MODULE_CAP) {
+    const toDrop = enforceLimit(included, MODULE_CAP, dropOrder);
+    for (const m of toDrop) m.drop_reason = 'module_cap_exceeded';
+    dropped = dropped.concat(toDrop);
+    included = included.filter((m) => !toDrop.includes(m));
+  }
+  // Enforce token ceiling
+  let totalTokens = included.reduce((sum, m) => sum + m.tokens, 0);
+  if (totalTokens > tokenCeiling) {
+    const toDrop = enforceTokenBudget(included, tokenCeiling, dropOrder);
+    for (const m of toDrop) m.drop_reason = 'token_ceiling_exceeded';
+    dropped = dropped.concat(toDrop);
+    included = included.filter((m) => !toDrop.includes(m));
+    totalTokens = included.reduce((sum, m) => sum + m.tokens, 0);
+  }
+  // Build the combined prompt
+  const promptParts = [];
+  for (const mod of included) {
+    promptParts.push(`<!-- module: ${mod.path} (${mod.layer}) -->\n${mod.content}`);
+  }
+  const prompt = promptParts.join('\n\n---\n\n');
+  // Compute prompt hash
+  const promptHash = crypto.createHash('sha256').update(prompt).digest('hex');
+  const manifest = {
+    modules_included: included.map((m) => ({ path: m.path, layer: m.layer, tokens: m.tokens })),
+    modules_dropped: dropped.map((m) => ({ path: m.path, layer: m.layer, tokens: m.tokens, reason: m.drop_reason })),
+    total_tokens: totalTokens,
+    prompt_hash: promptHash,
+  };
+  // Write warnings to stderr
+  for (const w of warnings) {
+    process.stderr.write(`${w}\n`);
+  }
+  // Write composition proof artifact
+  writeProofArtifact(runRoot, role, task, manifest);
+  return { prompt, manifest };
+}
+/**
+ * Enforce a maximum count by dropping modules in reverse priority order.
+ */
+function enforceLimit(modules, limit, dropOrder) {
+  const toDrop = [];
+  let current = modules.length;
+  for (const layer of dropOrder) {
+    if (current <= limit) break;
+    // Iterate in reverse to drop last-added first within a layer
+    const layerModules = modules.filter((m) => m.layer === layer);
+    for (let i = layerModules.length - 1; i >= 0; i--) {
+      if (current <= limit) break;
+      toDrop.push(layerModules[i]);
+      current--;
+    }
+  }
+  return toDrop;
+}
+/**
+ * Enforce a token ceiling by dropping modules in reverse priority order.
+ */
+function enforceTokenBudget(modules, ceiling, dropOrder) {
+  const toDrop = [];
+  let totalTokens = modules.reduce((sum, m) => sum + m.tokens, 0);
+  for (const layer of dropOrder) {
+    if (totalTokens <= ceiling) break;
+    const layerModules = modules.filter((m) => m.layer === layer && !toDrop.includes(m));
+    for (let i = layerModules.length - 1; i >= 0; i--) {
+      if (totalTokens <= ceiling) break;
+      toDrop.push(layerModules[i]);
+      totalTokens -= layerModules[i].tokens;
+    }
+  }
+  return toDrop;
+}
+/**
+ * Write the composition proof artifact to the run artifacts directory.
+ */
+function writeProofArtifact(runRoot, role, task, manifest) {
+  try {
+    const artifactsDir = path.join(runRoot, 'artifacts');
+    fs.mkdirSync(artifactsDir, { recursive: true });
+    const artifactPath = path.join(artifactsDir, `composition-${role}-${task}.json`);
+    const artifact = {
+      generated_at: new Date().toISOString(),
+      role,
+      task,
+      ...manifest,
+    };
+    fs.writeFileSync(artifactPath, `${JSON.stringify(artifact, null, 2)}\n`);
+  } catch (err) {
+    process.stderr.write(`warning: failed to write composition proof artifact: ${err.message}\n`);
+  }
+}

package/tooling/src/adapters/model-router.js ADDED Viewed

@@ -0,0 +1,84 @@
+/**
+ * Model routing table — maps task types to recommended models.
+ * @type {Object<string, {model: string, reason: string}>}
+ */
+const MODEL_ROUTING_TABLE = {
+  // Mechanical tasks — Haiku
+  'fetch-url':           { model: 'haiku', reason: 'Mechanical, no reasoning needed' },
+  'write-handoff':       { model: 'haiku', reason: 'Structured file operations' },
+  'compress-archive':    { model: 'haiku', reason: 'File manipulation' },
+  // Comprehension tasks — Sonnet
+  'read-summarize':      { model: 'sonnet', reason: 'Comprehension, not deep reasoning' },
+  'write-implementation':{ model: 'sonnet', reason: 'Good spec + plan = mechanical coding' },
+  'task-review':         { model: 'sonnet', reason: 'Diff review against clear spec' },
+  'extract-learnings':   { model: 'sonnet', reason: 'Structured extraction' },
+  'internal-review':     { model: 'sonnet', reason: 'Pattern matching against expertise' },
+  'run-tests':           { model: 'sonnet', reason: 'Test execution and analysis' },
+  // Judgment tasks — Opus
+  'orchestrate':         { model: 'opus', reason: 'Needs judgment and coordination' },
+  'spec-harden':         { model: 'opus', reason: 'Adversarial thinking required' },
+  'design':              { model: 'opus', reason: 'Creativity + architecture decisions' },
+  'final-review':        { model: 'opus', reason: 'Holistic judgment against original input' },
+  'brainstorm':          { model: 'opus', reason: 'Creative exploration' },
+  'plan':                { model: 'opus', reason: 'Strategic task decomposition' },
+};
+/**
+ * Get the recommended model for a task type.
+ *
+ * If multi-model mode is not enabled, returns `{ model: 'inherit' }`.
+ * If config contains `model_overrides`, those take precedence over the
+ * default routing table.  Unknown task types fall back to 'opus' (safe
+ * default — never under-model a task).
+ *
+ * @param {string} taskType - one of the keys in MODEL_ROUTING_TABLE
+ * @param {object} config - project config (may override routing)
+ * @returns {{model: string, reason: string, overridden: boolean}}
+ */
+export function getModelForTask(taskType, config = {}) {
+  if (!isMultiModelEnabled(config)) {
+    return { model: 'inherit', reason: 'Multi-model mode not enabled', overridden: false };
+  }
+  // Check config-level overrides first
+  const overrides = config.model_overrides ?? {};
+  if (overrides[taskType]) {
+    return {
+      model: overrides[taskType].model ?? 'opus',
+      reason: overrides[taskType].reason ?? 'Config override',
+      overridden: true,
+    };
+  }
+  // Look up the default routing table
+  const entry = MODEL_ROUTING_TABLE[taskType];
+  if (entry) {
+    return { model: entry.model, reason: entry.reason, overridden: false };
+  }
+  // Unknown task type — safe default is opus (never under-model)
+  return { model: 'opus', reason: 'Unknown task type — safe default', overridden: false };
+}
+/**
+ * Check if multi-model mode is enabled.
+ * @param {object} config
+ * @returns {boolean}
+ */
+export function isMultiModelEnabled(config = {}) {
+  return config.model_mode === 'multi-model';
+}
+/**
+ * Get all routing decisions for logging/stats.
+ * @returns {Object<string, {model: string, reason: string}>}
+ */
+export function getRoutingTable() {
+  const copy = {};
+  for (const [key, value] of Object.entries(MODEL_ROUTING_TABLE)) {
+    copy[key] = { ...value };
+  }
+  return copy;
+}