@wazir-dev/cli 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/CHANGELOG.md +100 -2
  2. package/README.md +6 -6
  3. package/docs/concepts/architecture.md +1 -1
  4. package/docs/concepts/roles-and-workflows.md +2 -0
  5. package/docs/concepts/why-wazir.md +59 -0
  6. package/docs/decisions/2026-03-19-deferred-items.md +564 -0
  7. package/docs/decisions/2026-03-19-enhancement-decisions.md +300 -0
  8. package/docs/plans/2026-03-15-cli-pipeline-integration-plan.md +1 -1
  9. package/docs/readmes/INDEX.md +21 -5
  10. package/docs/readmes/features/expertise/README.md +2 -2
  11. package/docs/readmes/features/exports/README.md +2 -2
  12. package/docs/readmes/features/schemas/README.md +3 -0
  13. package/docs/readmes/features/skills/README.md +17 -0
  14. package/docs/readmes/features/skills/clarifier.md +5 -0
  15. package/docs/readmes/features/skills/claude-cli.md +5 -0
  16. package/docs/readmes/features/skills/codex-cli.md +5 -0
  17. package/docs/readmes/features/skills/dispatching-parallel-agents.md +5 -0
  18. package/docs/readmes/features/skills/executing-plans.md +5 -0
  19. package/docs/readmes/features/skills/executor.md +5 -0
  20. package/docs/readmes/features/skills/finishing-a-development-branch.md +5 -0
  21. package/docs/readmes/features/skills/gemini-cli.md +5 -0
  22. package/docs/readmes/features/skills/humanize.md +5 -0
  23. package/docs/readmes/features/skills/init-pipeline.md +5 -0
  24. package/docs/readmes/features/skills/receiving-code-review.md +5 -0
  25. package/docs/readmes/features/skills/requesting-code-review.md +5 -0
  26. package/docs/readmes/features/skills/reviewer.md +5 -0
  27. package/docs/readmes/features/skills/subagent-driven-development.md +5 -0
  28. package/docs/readmes/features/skills/using-git-worktrees.md +5 -0
  29. package/docs/readmes/features/skills/wazir.md +5 -0
  30. package/docs/readmes/features/skills/writing-skills.md +5 -0
  31. package/docs/readmes/features/workflows/prepare-next.md +1 -1
  32. package/docs/reference/configuration-reference.md +47 -6
  33. package/docs/reference/launch-checklist.md +4 -4
  34. package/docs/reference/review-loop-pattern.md +538 -0
  35. package/docs/reference/roles-reference.md +1 -0
  36. package/docs/reference/skill-tiers.md +147 -0
  37. package/docs/reference/tooling-cli.md +5 -1
  38. package/docs/truth-claims.yaml +18 -0
  39. package/expertise/antipatterns/process/ai-coding-antipatterns.md +97 -1
  40. package/exports/hosts/claude/.claude/agents/clarifier.md +3 -0
  41. package/exports/hosts/claude/.claude/agents/designer.md +3 -0
  42. package/exports/hosts/claude/.claude/agents/executor.md +2 -0
  43. package/exports/hosts/claude/.claude/agents/planner.md +3 -0
  44. package/exports/hosts/claude/.claude/agents/researcher.md +2 -0
  45. package/exports/hosts/claude/.claude/agents/reviewer.md +5 -1
  46. package/exports/hosts/claude/.claude/agents/specifier.md +3 -0
  47. package/exports/hosts/claude/.claude/commands/clarify.md +4 -0
  48. package/exports/hosts/claude/.claude/commands/design-review.md +4 -0
  49. package/exports/hosts/claude/.claude/commands/design.md +4 -0
  50. package/exports/hosts/claude/.claude/commands/discover.md +4 -0
  51. package/exports/hosts/claude/.claude/commands/execute.md +4 -0
  52. package/exports/hosts/claude/.claude/commands/plan-review.md +4 -0
  53. package/exports/hosts/claude/.claude/commands/plan.md +4 -0
  54. package/exports/hosts/claude/.claude/commands/spec-challenge.md +4 -0
  55. package/exports/hosts/claude/.claude/commands/specify.md +4 -0
  56. package/exports/hosts/claude/.claude/commands/verify.md +4 -0
  57. package/exports/hosts/claude/.claude/settings.json +9 -0
  58. package/exports/hosts/claude/CLAUDE.md +1 -1
  59. package/exports/hosts/claude/export.manifest.json +22 -20
  60. package/exports/hosts/claude/host-package.json +3 -1
  61. package/exports/hosts/codex/AGENTS.md +1 -1
  62. package/exports/hosts/codex/export.manifest.json +22 -20
  63. package/exports/hosts/codex/host-package.json +3 -1
  64. package/exports/hosts/cursor/.cursor/hooks.json +4 -0
  65. package/exports/hosts/cursor/.cursor/rules/wazir-core.mdc +1 -1
  66. package/exports/hosts/cursor/export.manifest.json +22 -20
  67. package/exports/hosts/cursor/host-package.json +3 -1
  68. package/exports/hosts/gemini/GEMINI.md +1 -1
  69. package/exports/hosts/gemini/export.manifest.json +22 -20
  70. package/exports/hosts/gemini/host-package.json +3 -1
  71. package/hooks/context-mode-router +191 -0
  72. package/hooks/definitions/context_mode_router.yaml +19 -0
  73. package/hooks/definitions/loop_cap_guard.yaml +1 -1
  74. package/hooks/hooks.json +43 -0
  75. package/hooks/protected-path-write-guard +8 -0
  76. package/hooks/routing-matrix.json +45 -0
  77. package/hooks/session-start +62 -1
  78. package/llms-full.txt +905 -132
  79. package/package.json +3 -3
  80. package/roles/clarifier.md +3 -0
  81. package/roles/designer.md +3 -0
  82. package/roles/executor.md +2 -0
  83. package/roles/planner.md +3 -0
  84. package/roles/researcher.md +2 -0
  85. package/roles/reviewer.md +5 -1
  86. package/roles/specifier.md +3 -0
  87. package/schemas/hook.schema.json +2 -1
  88. package/schemas/phase-report.schema.json +80 -0
  89. package/schemas/usage.schema.json +25 -1
  90. package/schemas/wazir-manifest.schema.json +19 -0
  91. package/skills/brainstorming/SKILL.md +20 -56
  92. package/skills/clarifier/SKILL.md +243 -0
  93. package/skills/claude-cli/SKILL.md +320 -0
  94. package/skills/codex-cli/SKILL.md +260 -0
  95. package/skills/debugging/SKILL.md +24 -1
  96. package/skills/design/SKILL.md +13 -0
  97. package/skills/dispatching-parallel-agents/SKILL.md +13 -0
  98. package/skills/executing-plans/SKILL.md +28 -2
  99. package/skills/executor/SKILL.md +129 -0
  100. package/skills/finishing-a-development-branch/SKILL.md +13 -0
  101. package/skills/gemini-cli/SKILL.md +260 -0
  102. package/skills/humanize/SKILL.md +13 -0
  103. package/skills/init-pipeline/SKILL.md +76 -78
  104. package/skills/prepare-next/SKILL.md +81 -10
  105. package/skills/receiving-code-review/SKILL.md +21 -0
  106. package/skills/requesting-code-review/SKILL.md +38 -5
  107. package/skills/reviewer/SKILL.md +423 -0
  108. package/skills/run-audit/SKILL.md +13 -0
  109. package/skills/scan-project/SKILL.md +13 -0
  110. package/skills/self-audit/SKILL.md +197 -16
  111. package/skills/subagent-driven-development/SKILL.md +38 -2
  112. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +2 -0
  113. package/skills/subagent-driven-development/implementer-prompt.md +8 -0
  114. package/skills/subagent-driven-development/spec-reviewer-prompt.md +7 -0
  115. package/skills/tdd/SKILL.md +21 -0
  116. package/skills/using-git-worktrees/SKILL.md +13 -0
  117. package/skills/using-skills/SKILL.md +13 -0
  118. package/skills/verification/SKILL.md +13 -0
  119. package/skills/wazir/SKILL.md +286 -262
  120. package/skills/writing-plans/SKILL.md +44 -4
  121. package/skills/writing-skills/SKILL.md +13 -0
  122. package/templates/artifacts/implementation-plan.md +3 -0
  123. package/templates/artifacts/tasks-template.md +133 -0
  124. package/templates/examples/phase-report.example.json +48 -0
  125. package/templates/examples/wazir-manifest.example.yaml +1 -1
  126. package/tooling/src/adapters/composition-engine.js +256 -0
  127. package/tooling/src/adapters/model-router.js +84 -0
  128. package/tooling/src/capture/command.js +111 -2
  129. package/tooling/src/capture/run-config.js +23 -0
  130. package/tooling/src/capture/store.js +24 -0
  131. package/tooling/src/capture/usage.js +106 -0
  132. package/tooling/src/checks/ac-matrix.js +256 -0
  133. package/tooling/src/checks/brand-truth.js +3 -6
  134. package/tooling/src/checks/command-registry.js +13 -0
  135. package/tooling/src/checks/docs-truth.js +1 -1
  136. package/tooling/src/checks/runtime-surface.js +3 -7
  137. package/tooling/src/checks/skills.js +111 -0
  138. package/tooling/src/cli.js +17 -3
  139. package/tooling/src/commands/stats.js +161 -0
  140. package/tooling/src/commands/validate.js +5 -1
  141. package/tooling/src/export/compiler.js +33 -37
  142. package/tooling/src/gating/agent.js +145 -0
  143. package/tooling/src/guards/phase-prerequisite-guard.js +127 -0
  144. package/tooling/src/hooks/routing-logic.js +69 -0
  145. package/tooling/src/init/auto-detect.js +260 -0
  146. package/tooling/src/init/command.js +161 -0
  147. package/tooling/src/input/scanner.js +46 -0
  148. package/tooling/src/reports/command.js +103 -0
  149. package/tooling/src/reports/phase-report.js +323 -0
  150. package/tooling/src/state/command.js +160 -0
  151. package/tooling/src/state/db.js +287 -0
  152. package/tooling/src/status/command.js +53 -1
  153. package/wazir.manifest.yaml +26 -17
  154. package/workflows/clarify.md +4 -0
  155. package/workflows/design-review.md +4 -0
  156. package/workflows/design.md +4 -0
  157. package/workflows/discover.md +4 -0
  158. package/workflows/execute.md +4 -0
  159. package/workflows/plan-review.md +4 -0
  160. package/workflows/plan.md +4 -0
  161. package/workflows/spec-challenge.md +4 -0
  162. package/workflows/specify.md +4 -0
  163. package/workflows/verify.md +4 -0
@@ -5,15 +5,31 @@ description: Use after clarification, research, and design approval to create an
5
5
 
6
6
  # Writing Plans
7
7
 
8
+ ## Command Routing
9
+ Follow the Canonical Command Matrix in `hooks/routing-matrix.json`.
10
+ - Large commands (test runners, builds, diffs, dependency trees, linting) → context-mode tools
11
+ - Small commands (git status, ls, pwd, wazir CLI) → native Bash
12
+ - If context-mode unavailable, fall back to native Bash with warning
13
+
14
+ ## Codebase Exploration
15
+ 1. Query `wazir index search-symbols <query>` first
16
+ 2. Use `wazir recall file <path> --tier L1` for targeted reads
17
+ 3. Fall back to direct file reads ONLY for files identified by index queries
18
+ 4. Maximum 10 direct file reads without a justifying index query
19
+ 5. If no index exists: `wazir index build && wazir index summarize --tier all`
20
+
8
21
  Inputs:
9
22
 
10
23
  - approved design or approved clarified direction
11
24
  - current repo state
12
25
  - relevant research findings
13
26
 
14
- Output:
27
+ Output path:
15
28
 
16
- - one implementation plan in `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
29
+ - **Inside a pipeline run** (`.wazir/runs/latest/` exists): write to `.wazir/runs/latest/clarified/execution-plan.md` and task specs to `.wazir/runs/latest/tasks/task-NNN/spec.md`
30
+ - **Standalone** (no active run): write to `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
31
+
32
+ To detect: check if `.wazir/runs/latest/clarified/` exists. If yes, use run paths.
17
33
 
18
34
  The plan must include:
19
35
 
@@ -25,6 +41,30 @@ The plan must include:
25
41
 
26
42
  Rules:
27
43
 
28
- - do not write repo-local task files outside the plan directory
29
- - do not rely on retired `run-*` workflow wrappers
44
+ - do not write implementation code during planning
30
45
  - make the plan detailed enough that another weak model can execute it without inventing missing steps
46
+ - each task spec must have testable acceptance criteria, not vague descriptions
47
+
48
+ ## Plan Review Loop
49
+
50
+ After writing the plan, invoke `wz:reviewer --mode plan-review` to run the plan-review loop using plan dimensions (see `workflows/plan-review.md` and `docs/reference/review-loop-pattern.md`). Do NOT call `codex exec` or `codex review` directly — the reviewer skill handles Codex integration internally.
51
+
52
+ The planner resolves findings from each pass. The loop runs for `pass_counts[depth]` passes (quick=3, standard=5, deep=7). No extension.
53
+
54
+ For non-code artifacts (the plan itself), Codex review uses stdin pipe:
55
+
56
+ ```bash
57
+ CODEX_MODEL=$(jq -r '.multi_tool.codex.model // empty' .wazir/state/config.json 2>/dev/null)
58
+ CODEX_MODEL=${CODEX_MODEL:-gpt-5.4}
59
+ cat <plan-path> | codex exec -c model="$CODEX_MODEL" "Review this implementation plan focusing on [dimension]..."
60
+ ```
61
+
62
+ `codex review -c model="$CODEX_MODEL"` is used only for code artifacts, not plans.
63
+
64
+ Codex error handling: if `codex` exits non-zero, log the error, mark the pass as `codex-unavailable`, and use self-review findings only. Never treat a Codex failure as a clean pass.
65
+
66
+ Loop depth follows the project's depth config (quick/standard/deep).
67
+
68
+ Standalone mode: if no `.wazir/runs/latest/` exists, artifacts go to `docs/plans/` and review logs go alongside (`docs/plans/YYYY-MM-DD-<topic>-review-pass-N.md`). Loop cap guard is not invoked in standalone mode.
69
+
70
+ After the loop completes, present findings summary and wait for user approval before completing.
@@ -5,6 +5,19 @@ description: Use when creating new skills, editing existing skills, or verifying
5
5
 
6
6
  # Writing Skills
7
7
 
8
+ ## Command Routing
9
+ Follow the Canonical Command Matrix in `hooks/routing-matrix.json`.
10
+ - Large commands (test runners, builds, diffs, dependency trees, linting) → context-mode tools
11
+ - Small commands (git status, ls, pwd, wazir CLI) → native Bash
12
+ - If context-mode unavailable, fall back to native Bash with warning
13
+
14
+ ## Codebase Exploration
15
+ 1. Query `wazir index search-symbols <query>` first
16
+ 2. Use `wazir recall file <path> --tier L1` for targeted reads
17
+ 3. Fall back to direct file reads ONLY for files identified by index queries
18
+ 4. Maximum 10 direct file reads without a justifying index query
19
+ 5. If no index exists: `wazir index build && wazir index summarize --tier all`
20
+
8
21
  ## Overview
9
22
 
10
23
  **Writing skills IS Test-Driven Development applied to process documentation.**
@@ -16,6 +16,9 @@ approval_status: required
16
16
 
17
17
  ## Tasks And Subtasks
18
18
 
19
+ Use the spec-kit task format defined in `templates/artifacts/tasks-template.md`.
20
+ The execution plan produced by `wz:writing-plans` follows this template.
21
+
19
22
  ## Acceptance Criteria
20
23
 
21
24
  ## Verification
@@ -0,0 +1,133 @@
1
+ ---
2
+ artifact_type: execution_plan
3
+ phase: plan
4
+ role: planner
5
+ run_id: <run-id>
6
+ loop: 1
7
+ status: draft
8
+ sources:
9
+ - <approved-spec>
10
+ - <approved-design>
11
+ approval_status: required
12
+ template_ref: tasks-template
13
+ ---
14
+
15
+ # Execution Plan: <Project Title>
16
+
17
+ ## Constitution Check
18
+ - Approved spec: `<path-to-spec>`
19
+ - Approved design: `<path-to-design>`
20
+ - Branch: `<branch-name>`
21
+ - Depth: <quick|standard|deep>
22
+
23
+ ## MVP Strategy
24
+ 1. Complete Phase 1 (Setup) + Phase 2 (Foundational)
25
+ 2. Complete first User Story phase → test independently → deploy/demo (MVP!)
26
+ 3. Add stories incrementally — each adds value without breaking previous
27
+
28
+ ## Dependency Graph
29
+ ```
30
+ <task-id> → <task-id> (describe dependency)
31
+ ```
32
+
33
+ ---
34
+
35
+ ## Phase 1: Setup
36
+
37
+ **Goal:** Project scaffolding — directories, configs, stubs.
38
+
39
+ - [ ] T001 Setup task description with `path/to/file`
40
+ - [ ] T002 [P] Parallel setup task with `another/path`
41
+
42
+ ---
43
+
44
+ ## Phase 2: Foundational
45
+
46
+ **Goal:** Core infrastructure that BLOCKS all user stories. Must complete before any story phase.
47
+
48
+ - [ ] T003 Foundational task with `path/to/file`
49
+ - [ ] T004 [P] Parallel foundational task with `another/file`
50
+
51
+ **Independent test:** Describe how to verify this phase independently.
52
+
53
+ ---
54
+
55
+ ## Phase 3: User Story 1 — [US1] <Story Title>
56
+
57
+ **Goal:** <What this story delivers to the user>
58
+
59
+ **Independent test criteria:**
60
+ - <How to verify this story works end-to-end without other stories>
61
+
62
+ **Implementation tasks:**
63
+ - [ ] T005 [US1] Task description with `path/to/file`
64
+ - [ ] T006 [P] [US1] Parallel task with `another/file`
65
+ - [ ] T007 [US1] Task description with `path/to/file`
66
+
67
+ ---
68
+
69
+ ## Phase 4: User Story 2 — [US2] <Story Title>
70
+
71
+ **Goal:** <What this story delivers>
72
+
73
+ **Independent test criteria:**
74
+ - <Verification approach>
75
+
76
+ **Implementation tasks:**
77
+ - [ ] T008 [US2] Task with `path/to/file`
78
+ - [ ] T009 [P] [US2] Parallel task with `path/to/file`
79
+
80
+ ---
81
+
82
+ ## Phase N: Polish & Cross-Cutting
83
+
84
+ **Goal:** Final integration, documentation, cleanup.
85
+
86
+ - [ ] T0XX Polish task with `path/to/file`
87
+
88
+ ---
89
+
90
+ ## Cross-cutting Constraints
91
+
92
+ | ID | Constraint | When verified |
93
+ |----|-----------|--------------|
94
+ | CC1 | <constraint> | <timing> |
95
+
96
+ ## Task Summary
97
+
98
+ | Phase | Tasks | Stories | Size | Execution |
99
+ |-------|-------|---------|------|-----------|
100
+ | 1: Setup | T001-T002 | — | S | Serial |
101
+ | 2: Foundational | T003-T004 | — | M | T003∥T004 |
102
+ | 3: [US1] | T005-T007 | US1 | M | Serial |
103
+ | 4: [US2] | T008-T009 | US2 | M | T008→T009 |
104
+ | N: Polish | T0XX | — | S | Serial |
105
+
106
+ ## Format Reference
107
+
108
+ **Task line format:**
109
+ ```
110
+ - [ ] [TaskID] [P?] [Story?] Description with `file/path`
111
+ ```
112
+
113
+ - `- [ ]` — checkbox (always present)
114
+ - `TaskID` — sequential: T001, T002, T003...
115
+ - `[P]` — parallel marker: ONLY if task can run simultaneously with adjacent tasks (different files, no dependencies)
116
+ - `[US1]` — user story label: maps to story phases from the spec
117
+ - Description — clear action with exact file path in backticks
118
+
119
+ **Phase rules:**
120
+ - Phase 1 (Setup): project init, scaffolding
121
+ - Phase 2 (Foundational): BLOCKS all user stories — must complete first
122
+ - Phase 3+: one phase per user story, in priority order from spec
123
+ - Final phase: polish, cross-cutting, documentation
124
+
125
+ **Task ordering within phases:**
126
+ - Models before services, services before endpoints
127
+ - Each user story phase has: goal, independent test criteria, implementation tasks
128
+ - Tasks organized by user story, NOT by layer
129
+
130
+ **Parallel markers:**
131
+ - `[P]` means this task can run simultaneously with the next `[P]` task
132
+ - Only use when tasks edit different files with no shared dependencies
133
+ - Adjacent `[P]` tasks form a parallel group
@@ -0,0 +1,48 @@
1
+ {
2
+ "phase_name": "verify",
3
+ "run_id": "run-2026-03-11-example",
4
+ "timestamp": "2026-03-11T13:00:00Z",
5
+ "attempted_actions": [
6
+ {
7
+ "description": "Run full test suite against implementation artifacts",
8
+ "outcome": "success",
9
+ "evidence": "42 tests passed, 0 failed"
10
+ },
11
+ {
12
+ "description": "Check lint compliance across all changed files",
13
+ "outcome": "success",
14
+ "evidence": "0 lint errors found"
15
+ }
16
+ ],
17
+ "drift_analysis": {
18
+ "delta": 0,
19
+ "description": "No drift detected between spec requirements and implementation."
20
+ },
21
+ "quality_metrics": {
22
+ "test_pass_count": 42,
23
+ "test_fail_count": 0,
24
+ "lint_errors": 0,
25
+ "type_errors": 0
26
+ },
27
+ "risk_flags": [
28
+ {
29
+ "severity": "low",
30
+ "description": "One acceptance criterion is verified by a single unit test only.",
31
+ "mitigation": "Add an integration test covering the same criterion in the next phase."
32
+ }
33
+ ],
34
+ "decisions": [
35
+ {
36
+ "description": "Proceed to review without additional verification loops.",
37
+ "rationale": "All tests pass and drift delta is zero.",
38
+ "alternatives_considered": [
39
+ "Run a second verification loop with stricter coverage thresholds"
40
+ ],
41
+ "source": "verifier"
42
+ }
43
+ ],
44
+ "verdict_recommendation": {
45
+ "verdict": "continue",
46
+ "reasoning": "All quality metrics are within acceptable thresholds and no high-severity risks were identified."
47
+ }
48
+ }
@@ -49,7 +49,7 @@ protected_paths:
49
49
  - input
50
50
  - exports/hosts
51
51
  prohibited_terms:
52
- - agent-os
52
+ - legacy-name
53
53
  adapters:
54
54
  context_mode:
55
55
  enabled_by_default: false
@@ -0,0 +1,256 @@
1
+ import crypto from 'node:crypto';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+
5
+ import { readYamlFile } from '../loaders.js';
6
+ import { estimateTokens } from '../capture/usage.js';
7
+
8
+ const DEFAULT_TOKEN_CEILING = 50_000;
9
+ const MODULE_CAP = 15;
10
+
11
+ /**
12
+ * Resolve modules from the composition map for a given role and layer.
13
+ * Returns an array of { path, layer } objects.
14
+ */
15
+ function resolveLayer(map, layer, role, stacks, concerns) {
16
+ const resolved = [];
17
+
18
+ if (layer === 'always') {
19
+ const entries = map.always?.[role] ?? [];
20
+ for (const entry of entries) {
21
+ resolved.push({ path: entry, layer: 'always' });
22
+ }
23
+ }
24
+
25
+ if (layer === 'auto') {
26
+ const allStacks = map.auto?.['all-stacks'];
27
+ if (allStacks) {
28
+ // all-roles entries apply to every role
29
+ const allRolesEntries = allStacks['all-roles'] ?? [];
30
+ for (const entry of allRolesEntries) {
31
+ resolved.push({ path: entry, layer: 'auto' });
32
+ }
33
+ // role-specific entries under all-stacks
34
+ const roleEntries = allStacks[role] ?? [];
35
+ for (const entry of roleEntries) {
36
+ resolved.push({ path: entry, layer: 'auto' });
37
+ }
38
+ }
39
+ }
40
+
41
+ if (layer === 'stacks') {
42
+ for (const stack of stacks) {
43
+ const stackDef = map.stacks?.[stack];
44
+ if (!stackDef) continue;
45
+ // executor/verifier/reviewer/etc entries for the role
46
+ const roleEntries = stackDef[role] ?? [];
47
+ for (const entry of roleEntries) {
48
+ resolved.push({ path: entry, layer: 'stacks' });
49
+ }
50
+ // antipatterns are included for verifier and reviewer roles
51
+ if (role === 'verifier' || role === 'reviewer') {
52
+ const antipatternEntries = stackDef.antipatterns ?? [];
53
+ for (const entry of antipatternEntries) {
54
+ resolved.push({ path: entry, layer: 'stacks' });
55
+ }
56
+ }
57
+ }
58
+ }
59
+
60
+ if (layer === 'concerns') {
61
+ for (const concern of concerns) {
62
+ const concernDef = map.concerns?.[concern];
63
+ if (!concernDef) continue;
64
+ const roleEntries = concernDef[role] ?? [];
65
+ for (const entry of roleEntries) {
66
+ resolved.push({ path: entry, layer: 'concerns' });
67
+ }
68
+ }
69
+ }
70
+
71
+ return resolved;
72
+ }
73
+
74
+ /**
75
+ * Deduplicate modules, keeping first occurrence (highest priority).
76
+ */
77
+ function deduplicateModules(modules) {
78
+ const seen = new Set();
79
+ const result = [];
80
+ for (const mod of modules) {
81
+ if (!seen.has(mod.path)) {
82
+ seen.add(mod.path);
83
+ result.push(mod);
84
+ }
85
+ }
86
+ return result;
87
+ }
88
+
89
+ /**
90
+ * Compose expertise modules for a given role, stack set, and concern set.
91
+ *
92
+ * @param {object} options
93
+ * @param {string} options.role - The role (executor, verifier, reviewer, etc.)
94
+ * @param {string[]} options.stacks - Detected project stacks (e.g. ['node', 'react'])
95
+ * @param {string[]} options.concerns - Declared task concerns (e.g. ['rtl', 'security-auth'])
96
+ * @param {string} options.projectRoot - Absolute path to the project root
97
+ * @param {string} options.runRoot - Absolute path to the run root for artifact output
98
+ * @param {string} [options.task] - Optional task identifier for the proof artifact
99
+ * @param {number} [options.tokenCeiling] - Max token budget (default 50,000)
100
+ * @returns {{ prompt: string, manifest: object }}
101
+ */
102
+ export function composeExpertise(options) {
103
+ const {
104
+ role,
105
+ stacks = [],
106
+ concerns = [],
107
+ projectRoot,
108
+ runRoot,
109
+ task = 'default',
110
+ tokenCeiling = DEFAULT_TOKEN_CEILING,
111
+ } = options;
112
+
113
+ const mapPath = path.join(projectRoot, 'expertise', 'composition-map.yaml');
114
+ const map = readYamlFile(mapPath);
115
+ const expertiseRoot = path.join(projectRoot, 'expertise');
116
+
117
+ // Resolve modules in priority order: always > auto > stacks > concerns
118
+ const layers = ['always', 'auto', 'stacks', 'concerns'];
119
+ let allModules = [];
120
+ for (const layer of layers) {
121
+ const layerModules = resolveLayer(map, layer, role, stacks, concerns);
122
+ allModules = allModules.concat(layerModules);
123
+ }
124
+
125
+ // Deduplicate
126
+ allModules = deduplicateModules(allModules);
127
+
128
+ // Read file contents and compute tokens
129
+ const loaded = [];
130
+ const warnings = [];
131
+
132
+ for (const mod of allModules) {
133
+ const fullPath = path.join(expertiseRoot, mod.path);
134
+ try {
135
+ const content = fs.readFileSync(fullPath, 'utf8');
136
+ const tokens = estimateTokens(Buffer.byteLength(content, 'utf8'));
137
+ loaded.push({ ...mod, content, tokens, fullPath });
138
+ } catch (err) {
139
+ warnings.push(`warning: skipping missing module ${mod.path}: ${err.message}`);
140
+ }
141
+ }
142
+
143
+ // Enforce budget: 15-module cap + token ceiling
144
+ // Drop in reverse priority: concerns first, then stacks, then auto
145
+ const dropOrder = ['concerns', 'stacks', 'auto', 'always'];
146
+ let included = [...loaded];
147
+ let dropped = [];
148
+
149
+ // Enforce module cap
150
+ if (included.length > MODULE_CAP) {
151
+ const toDrop = enforceLimit(included, MODULE_CAP, dropOrder);
152
+ for (const m of toDrop) m.drop_reason = 'module_cap_exceeded';
153
+ dropped = dropped.concat(toDrop);
154
+ included = included.filter((m) => !toDrop.includes(m));
155
+ }
156
+
157
+ // Enforce token ceiling
158
+ let totalTokens = included.reduce((sum, m) => sum + m.tokens, 0);
159
+ if (totalTokens > tokenCeiling) {
160
+ const toDrop = enforceTokenBudget(included, tokenCeiling, dropOrder);
161
+ for (const m of toDrop) m.drop_reason = 'token_ceiling_exceeded';
162
+ dropped = dropped.concat(toDrop);
163
+ included = included.filter((m) => !toDrop.includes(m));
164
+ totalTokens = included.reduce((sum, m) => sum + m.tokens, 0);
165
+ }
166
+
167
+ // Build the combined prompt
168
+ const promptParts = [];
169
+ for (const mod of included) {
170
+ promptParts.push(`<!-- module: ${mod.path} (${mod.layer}) -->\n${mod.content}`);
171
+ }
172
+ const prompt = promptParts.join('\n\n---\n\n');
173
+
174
+ // Compute prompt hash
175
+ const promptHash = crypto.createHash('sha256').update(prompt).digest('hex');
176
+
177
+ const manifest = {
178
+ modules_included: included.map((m) => ({ path: m.path, layer: m.layer, tokens: m.tokens })),
179
+ modules_dropped: dropped.map((m) => ({ path: m.path, layer: m.layer, tokens: m.tokens, reason: m.drop_reason })),
180
+ total_tokens: totalTokens,
181
+ prompt_hash: promptHash,
182
+ };
183
+
184
+ // Write warnings to stderr
185
+ for (const w of warnings) {
186
+ process.stderr.write(`${w}\n`);
187
+ }
188
+
189
+ // Write composition proof artifact
190
+ writeProofArtifact(runRoot, role, task, manifest);
191
+
192
+ return { prompt, manifest };
193
+ }
194
+
195
+ /**
196
+ * Enforce a maximum count by dropping modules in reverse priority order.
197
+ */
198
+ function enforceLimit(modules, limit, dropOrder) {
199
+ const toDrop = [];
200
+ let current = modules.length;
201
+
202
+ for (const layer of dropOrder) {
203
+ if (current <= limit) break;
204
+ // Iterate in reverse to drop last-added first within a layer
205
+ const layerModules = modules.filter((m) => m.layer === layer);
206
+ for (let i = layerModules.length - 1; i >= 0; i--) {
207
+ if (current <= limit) break;
208
+ toDrop.push(layerModules[i]);
209
+ current--;
210
+ }
211
+ }
212
+
213
+ return toDrop;
214
+ }
215
+
216
+ /**
217
+ * Enforce a token ceiling by dropping modules in reverse priority order.
218
+ */
219
+ function enforceTokenBudget(modules, ceiling, dropOrder) {
220
+ const toDrop = [];
221
+ let totalTokens = modules.reduce((sum, m) => sum + m.tokens, 0);
222
+
223
+ for (const layer of dropOrder) {
224
+ if (totalTokens <= ceiling) break;
225
+ const layerModules = modules.filter((m) => m.layer === layer && !toDrop.includes(m));
226
+ for (let i = layerModules.length - 1; i >= 0; i--) {
227
+ if (totalTokens <= ceiling) break;
228
+ toDrop.push(layerModules[i]);
229
+ totalTokens -= layerModules[i].tokens;
230
+ }
231
+ }
232
+
233
+ return toDrop;
234
+ }
235
+
236
+ /**
237
+ * Write the composition proof artifact to the run artifacts directory.
238
+ */
239
+ function writeProofArtifact(runRoot, role, task, manifest) {
240
+ try {
241
+ const artifactsDir = path.join(runRoot, 'artifacts');
242
+ fs.mkdirSync(artifactsDir, { recursive: true });
243
+
244
+ const artifactPath = path.join(artifactsDir, `composition-${role}-${task}.json`);
245
+ const artifact = {
246
+ generated_at: new Date().toISOString(),
247
+ role,
248
+ task,
249
+ ...manifest,
250
+ };
251
+
252
+ fs.writeFileSync(artifactPath, `${JSON.stringify(artifact, null, 2)}\n`);
253
+ } catch (err) {
254
+ process.stderr.write(`warning: failed to write composition proof artifact: ${err.message}\n`);
255
+ }
256
+ }
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Model routing table — maps task types to recommended models.
3
+ * @type {Object<string, {model: string, reason: string}>}
4
+ */
5
+ const MODEL_ROUTING_TABLE = {
6
+ // Mechanical tasks — Haiku
7
+ 'fetch-url': { model: 'haiku', reason: 'Mechanical, no reasoning needed' },
8
+ 'write-handoff': { model: 'haiku', reason: 'Structured file operations' },
9
+ 'compress-archive': { model: 'haiku', reason: 'File manipulation' },
10
+
11
+ // Comprehension tasks — Sonnet
12
+ 'read-summarize': { model: 'sonnet', reason: 'Comprehension, not deep reasoning' },
13
+ 'write-implementation':{ model: 'sonnet', reason: 'Good spec + plan = mechanical coding' },
14
+ 'task-review': { model: 'sonnet', reason: 'Diff review against clear spec' },
15
+ 'extract-learnings': { model: 'sonnet', reason: 'Structured extraction' },
16
+ 'internal-review': { model: 'sonnet', reason: 'Pattern matching against expertise' },
17
+ 'run-tests': { model: 'sonnet', reason: 'Test execution and analysis' },
18
+
19
+ // Judgment tasks — Opus
20
+ 'orchestrate': { model: 'opus', reason: 'Needs judgment and coordination' },
21
+ 'spec-harden': { model: 'opus', reason: 'Adversarial thinking required' },
22
+ 'design': { model: 'opus', reason: 'Creativity + architecture decisions' },
23
+ 'final-review': { model: 'opus', reason: 'Holistic judgment against original input' },
24
+ 'brainstorm': { model: 'opus', reason: 'Creative exploration' },
25
+ 'plan': { model: 'opus', reason: 'Strategic task decomposition' },
26
+ };
27
+
28
+ /**
29
+ * Get the recommended model for a task type.
30
+ *
31
+ * If multi-model mode is not enabled, returns `{ model: 'inherit' }`.
32
+ * If config contains `model_overrides`, those take precedence over the
33
+ * default routing table. Unknown task types fall back to 'opus' (safe
34
+ * default — never under-model a task).
35
+ *
36
+ * @param {string} taskType - one of the keys in MODEL_ROUTING_TABLE
37
+ * @param {object} config - project config (may override routing)
38
+ * @returns {{model: string, reason: string, overridden: boolean}}
39
+ */
40
+ export function getModelForTask(taskType, config = {}) {
41
+ if (!isMultiModelEnabled(config)) {
42
+ return { model: 'inherit', reason: 'Multi-model mode not enabled', overridden: false };
43
+ }
44
+
45
+ // Check config-level overrides first
46
+ const overrides = config.model_overrides ?? {};
47
+ if (overrides[taskType]) {
48
+ return {
49
+ model: overrides[taskType].model ?? 'opus',
50
+ reason: overrides[taskType].reason ?? 'Config override',
51
+ overridden: true,
52
+ };
53
+ }
54
+
55
+ // Look up the default routing table
56
+ const entry = MODEL_ROUTING_TABLE[taskType];
57
+ if (entry) {
58
+ return { model: entry.model, reason: entry.reason, overridden: false };
59
+ }
60
+
61
+ // Unknown task type — safe default is opus (never under-model)
62
+ return { model: 'opus', reason: 'Unknown task type — safe default', overridden: false };
63
+ }
64
+
65
+ /**
66
+ * Check if multi-model mode is enabled.
67
+ * @param {object} config
68
+ * @returns {boolean}
69
+ */
70
+ export function isMultiModelEnabled(config = {}) {
71
+ return config.model_mode === 'multi-model';
72
+ }
73
+
74
+ /**
75
+ * Get all routing decisions for logging/stats.
76
+ * @returns {Object<string, {model: string, reason: string}>}
77
+ */
78
+ export function getRoutingTable() {
79
+ const copy = {};
80
+ for (const [key, value] of Object.entries(MODEL_ROUTING_TABLE)) {
81
+ copy[key] = { ...value };
82
+ }
83
+ return copy;
84
+ }