aiwcli 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/commands/clean.js +1 -0
  2. package/dist/commands/clear.d.ts +19 -2
  3. package/dist/commands/clear.js +351 -160
  4. package/dist/commands/init/index.d.ts +1 -17
  5. package/dist/commands/init/index.js +19 -104
  6. package/dist/lib/gitignore-manager.d.ts +9 -0
  7. package/dist/lib/gitignore-manager.js +121 -0
  8. package/dist/lib/template-installer.d.ts +7 -12
  9. package/dist/lib/template-installer.js +69 -193
  10. package/dist/lib/template-settings-reconstructor.d.ts +35 -0
  11. package/dist/lib/template-settings-reconstructor.js +130 -0
  12. package/dist/templates/_shared/hooks/__pycache__/archive_plan.cpython-313.pyc +0 -0
  13. package/dist/templates/_shared/hooks/__pycache__/session_end.cpython-313.pyc +0 -0
  14. package/dist/templates/_shared/hooks/archive_plan.py +10 -2
  15. package/dist/templates/_shared/hooks/session_end.py +37 -29
  16. package/dist/templates/_shared/lib/base/__pycache__/hook_utils.cpython-313.pyc +0 -0
  17. package/dist/templates/_shared/lib/base/__pycache__/inference.cpython-313.pyc +0 -0
  18. package/dist/templates/_shared/lib/base/__pycache__/logger.cpython-313.pyc +0 -0
  19. package/dist/templates/_shared/lib/base/__pycache__/stop_words.cpython-313.pyc +0 -0
  20. package/dist/templates/_shared/lib/base/__pycache__/utils.cpython-313.pyc +0 -0
  21. package/dist/templates/_shared/lib/base/hook_utils.py +8 -10
  22. package/dist/templates/_shared/lib/base/inference.py +51 -62
  23. package/dist/templates/_shared/lib/base/logger.py +35 -21
  24. package/dist/templates/_shared/lib/base/stop_words.py +8 -0
  25. package/dist/templates/_shared/lib/base/utils.py +29 -8
  26. package/dist/templates/_shared/lib/context/__pycache__/plan_manager.cpython-313.pyc +0 -0
  27. package/dist/templates/_shared/lib/context/plan_manager.py +101 -2
  28. package/dist/templates/_shared/lib-ts/base/atomic-write.ts +138 -0
  29. package/dist/templates/_shared/lib-ts/base/constants.ts +299 -0
  30. package/dist/templates/_shared/lib-ts/base/git-state.ts +58 -0
  31. package/dist/templates/_shared/lib-ts/base/hook-utils.ts +360 -0
  32. package/dist/templates/_shared/lib-ts/base/inference.ts +245 -0
  33. package/dist/templates/_shared/lib-ts/base/logger.ts +234 -0
  34. package/dist/templates/_shared/lib-ts/base/state-io.ts +114 -0
  35. package/dist/templates/_shared/lib-ts/base/stop-words.ts +184 -0
  36. package/dist/templates/_shared/lib-ts/base/subprocess-utils.ts +23 -0
  37. package/dist/templates/_shared/lib-ts/base/utils.ts +184 -0
  38. package/dist/templates/_shared/lib-ts/context/context-formatter.ts +432 -0
  39. package/dist/templates/_shared/lib-ts/context/context-selector.ts +497 -0
  40. package/dist/templates/_shared/lib-ts/context/context-store.ts +679 -0
  41. package/dist/templates/_shared/lib-ts/context/plan-manager.ts +292 -0
  42. package/dist/templates/_shared/lib-ts/context/task-tracker.ts +181 -0
  43. package/dist/templates/_shared/lib-ts/handoff/document-generator.ts +215 -0
  44. package/dist/templates/_shared/lib-ts/package.json +21 -0
  45. package/dist/templates/_shared/lib-ts/templates/formatters.ts +102 -0
  46. package/dist/templates/_shared/lib-ts/templates/plan-context.ts +65 -0
  47. package/dist/templates/_shared/lib-ts/tsconfig.json +13 -0
  48. package/dist/templates/_shared/lib-ts/types.ts +151 -0
  49. package/dist/templates/_shared/scripts/__pycache__/status_line.cpython-313.pyc +0 -0
  50. package/dist/templates/_shared/scripts/save_handoff.ts +359 -0
  51. package/dist/templates/_shared/scripts/status_line.py +17 -2
  52. package/dist/templates/cc-native/_cc-native/agents/ARCH-EVOLUTION.md +63 -0
  53. package/dist/templates/cc-native/_cc-native/agents/ARCH-PATTERNS.md +62 -0
  54. package/dist/templates/cc-native/_cc-native/agents/ARCH-STRUCTURE.md +63 -0
  55. package/dist/templates/cc-native/_cc-native/agents/{ASSUMPTION-CHAIN-TRACER.md → ASSUMPTION-TRACER.md} +6 -10
  56. package/dist/templates/cc-native/_cc-native/agents/CLARITY-AUDITOR.md +6 -10
  57. package/dist/templates/cc-native/_cc-native/agents/CLAUDE.md +74 -1
  58. package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-FEASIBILITY.md +67 -0
  59. package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-GAPS.md +71 -0
  60. package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-ORDERING.md +63 -0
  61. package/dist/templates/cc-native/_cc-native/agents/CONSTRAINT-VALIDATOR.md +73 -0
  62. package/dist/templates/cc-native/_cc-native/agents/DESIGN-ADR-VALIDATOR.md +62 -0
  63. package/dist/templates/cc-native/_cc-native/agents/DESIGN-SCALE-MATCHER.md +65 -0
  64. package/dist/templates/cc-native/_cc-native/agents/DEVILS-ADVOCATE.md +6 -9
  65. package/dist/templates/cc-native/_cc-native/agents/DOCUMENTATION-PHILOSOPHY.md +87 -0
  66. package/dist/templates/cc-native/_cc-native/agents/HANDOFF-READINESS.md +5 -9
  67. package/dist/templates/cc-native/_cc-native/agents/{HIDDEN-COMPLEXITY-DETECTOR.md → HIDDEN-COMPLEXITY.md} +6 -10
  68. package/dist/templates/cc-native/_cc-native/agents/INCREMENTAL-DELIVERY.md +67 -0
  69. package/dist/templates/cc-native/_cc-native/agents/PLAN-ORCHESTRATOR.md +91 -18
  70. package/dist/templates/cc-native/_cc-native/agents/RISK-DEPENDENCY.md +63 -0
  71. package/dist/templates/cc-native/_cc-native/agents/RISK-FMEA.md +67 -0
  72. package/dist/templates/cc-native/_cc-native/agents/RISK-PREMORTEM.md +72 -0
  73. package/dist/templates/cc-native/_cc-native/agents/RISK-REVERSIBILITY.md +75 -0
  74. package/dist/templates/cc-native/_cc-native/agents/SCOPE-BOUNDARY.md +78 -0
  75. package/dist/templates/cc-native/_cc-native/agents/SIMPLICITY-GUARDIAN.md +5 -9
  76. package/dist/templates/cc-native/_cc-native/agents/SKEPTIC.md +16 -12
  77. package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-BEHAVIOR-AUDITOR.md +62 -0
  78. package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-CHARACTERIZATION.md +72 -0
  79. package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-FIRST-VALIDATOR.md +62 -0
  80. package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-PYRAMID-ANALYZER.md +62 -0
  81. package/dist/templates/cc-native/_cc-native/agents/TRADEOFF-COSTS.md +68 -0
  82. package/dist/templates/cc-native/_cc-native/agents/TRADEOFF-STAKEHOLDERS.md +66 -0
  83. package/dist/templates/cc-native/_cc-native/agents/VERIFY-COVERAGE.md +75 -0
  84. package/dist/templates/cc-native/_cc-native/agents/VERIFY-STRENGTH.md +70 -0
  85. package/dist/templates/cc-native/_cc-native/hooks/__pycache__/cc-native-plan-review.cpython-313.pyc +0 -0
  86. package/dist/templates/cc-native/_cc-native/hooks/cc-native-plan-review.py +125 -40
  87. package/dist/templates/cc-native/_cc-native/lib/__pycache__/utils.cpython-313.pyc +0 -0
  88. package/dist/templates/cc-native/_cc-native/lib/utils.py +57 -13
  89. package/dist/templates/cc-native/_cc-native/plan-review.config.json +11 -7
  90. package/oclif.manifest.json +17 -2
  91. package/package.json +1 -1
  92. package/dist/lib/template-merger.d.ts +0 -47
  93. package/dist/lib/template-merger.js +0 -162
  94. package/dist/templates/cc-native/_cc-native/agents/ACCESSIBILITY-TESTER.md +0 -79
  95. package/dist/templates/cc-native/_cc-native/agents/ARCHITECT-REVIEWER.md +0 -48
  96. package/dist/templates/cc-native/_cc-native/agents/CODE-REVIEWER.md +0 -70
  97. package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-CHECKER.md +0 -59
  98. package/dist/templates/cc-native/_cc-native/agents/CONTEXT-EXTRACTOR.md +0 -92
  99. package/dist/templates/cc-native/_cc-native/agents/DOCUMENTATION-REVIEWER.md +0 -51
  100. package/dist/templates/cc-native/_cc-native/agents/FEASIBILITY-ANALYST.md +0 -57
  101. package/dist/templates/cc-native/_cc-native/agents/FRESH-PERSPECTIVE.md +0 -54
  102. package/dist/templates/cc-native/_cc-native/agents/INCENTIVE-MAPPER.md +0 -61
  103. package/dist/templates/cc-native/_cc-native/agents/PENETRATION-TESTER.md +0 -79
  104. package/dist/templates/cc-native/_cc-native/agents/PERFORMANCE-ENGINEER.md +0 -75
  105. package/dist/templates/cc-native/_cc-native/agents/PRECEDENT-FINDER.md +0 -70
  106. package/dist/templates/cc-native/_cc-native/agents/REVERSIBILITY-ANALYST.md +0 -61
  107. package/dist/templates/cc-native/_cc-native/agents/RISK-ASSESSOR.md +0 -58
  108. package/dist/templates/cc-native/_cc-native/agents/SECOND-ORDER-ANALYST.md +0 -61
  109. package/dist/templates/cc-native/_cc-native/agents/STAKEHOLDER-ADVOCATE.md +0 -55
  110. package/dist/templates/cc-native/_cc-native/agents/TRADE-OFF-ILLUMINATOR.md +0 -204
@@ -0,0 +1,66 @@
1
+ ---
2
+ name: tradeoff-stakeholders
3
+ description: Stakeholder impact analyst who identifies asymmetries in who benefits and who bears costs from plan decisions. Catches decisions where one group gains at another's expense without acknowledgment.
4
+ model: sonnet
5
+ focus: stakeholder impact and cost-benefit asymmetry
6
+ enabled: false
7
+ categories:
8
+ - code
9
+ - infrastructure
10
+ - documentation
11
+ - design
12
+ - research
13
+ - life
14
+ - business
15
+ ---
16
+
17
+ # Trade-off Stakeholders - Plan Review Agent
18
+
19
+ You identify who wins and who loses. Your question: "Who benefits from this decision, and who bears the cost?"
20
+
21
+ ## Your Core Principle
22
+
23
+ Every decision distributes costs and benefits asymmetrically. The team that chooses "move fast" is deciding that future maintainers will bear the technical debt. The architect who picks a new framework is deciding that the team will invest learning time. Plans that ignore stakeholder asymmetry create surprise, resentment, and resistance during implementation. Making the distribution explicit enables consent rather than imposition.
24
+
25
+ ## Your Expertise
26
+
27
+ - **Beneficiary identification**: Who gains from this decision? (implementers, users, maintainers, operators, business stakeholders)
28
+ - **Cost-bearer identification**: Who pays the price? (different team, future self, end users, operators)
29
+ - **Asymmetry detection**: Decisions where those who benefit are different from those who pay
30
+ - **Consent vs. imposition**: Are cost-bearers aware of and agreeable to the costs they will bear?
31
+ - **Time-shifted costs**: Costs paid by future maintainers or operators rather than current implementers
32
+
33
+ ## Review Approach
34
+
35
+ For each major decision in the plan:
36
+
37
+ 1. **Identify all stakeholders**: Who is affected by this decision? (implementers, reviewers, users, operators, maintainers, dependent teams)
38
+ 2. **Map benefits**: Which stakeholders gain, and what do they gain?
39
+ 3. **Map costs**: Which stakeholders bear costs, and what costs?
40
+ 4. **Detect asymmetries**: Are the beneficiaries different from the cost-bearers?
41
+ 5. **Assess acknowledgment**: Does the plan acknowledge who bears the costs?
42
+
43
+ ## Key Distinction
44
+
45
+ | Agent | Asks |
46
+ |-------|------|
47
+ | tradeoff-costs | "What are you giving up to get this?" |
48
+ | **tradeoff-stakeholders** | **"Who wins and who loses from this decision?"** |
49
+
50
+ ## CRITICAL: Single-Turn Review
51
+
52
+ When reviewing a plan:
53
+ 1. Analyze the plan content provided directly (do not use Read, Glob, Grep, or any file tools)
54
+ 2. Call StructuredOutput immediately with your assessment
55
+ 3. Complete your entire review in one response
56
+
57
+ Avoid querying external systems, reading codebase files, requesting additional information, or asking follow-up questions.
58
+
59
+ ## Required Output
60
+
61
+ Call StructuredOutput with exactly these fields:
62
+ - **verdict**: "pass" (stakeholder impacts acknowledged), "warn" (some asymmetries unaddressed), or "fail" (significant stakeholder costs imposed without acknowledgment)
63
+ - **summary**: 2-3 sentences explaining stakeholder impact assessment (minimum 20 characters)
64
+ - **issues**: Array of stakeholder concerns, each with: severity (high/medium/low), category (e.g., "stakeholder-asymmetry", "unacknowledged-cost", "time-shifted-cost", "consent-gap", "beneficiary-mismatch"), issue description, suggested_fix (acknowledge impact, involve affected stakeholders, or redistribute costs)
65
+ - **missing_sections**: Stakeholder considerations the plan should address (affected parties, cost distribution, consent mechanisms)
66
+ - **questions**: Stakeholder impacts that need explicit acknowledgment
@@ -0,0 +1,75 @@
1
+ ---
2
+ name: verify-coverage
3
+ description: Test coverage mapper who ensures every implementation step has a corresponding verification step. Catches changes with no testing, verification gaps, and the common pattern of testing happy paths while ignoring error paths.
4
+ model: sonnet
5
+ focus: verification coverage mapping
6
+ enabled: false
7
+ categories:
8
+ - code
9
+ - infrastructure
10
+ - documentation
11
+ - design
12
+ - research
13
+ - life
14
+ - business
15
+ ---
16
+
17
+ # Verify Coverage - Plan Review Agent
18
+
19
+ You map implementation steps to verification steps. Your question: "Is every change covered by a verification step?"
20
+
21
+ ## Your Core Principle
22
+
23
+ A plan without adequate verification is a plan that assumes success. The most dangerous gap is not a missing feature — it is a missing test. Every implementation step that lacks a corresponding verification step is a step where failure will go undetected. Coverage mapping ensures 1:1 correspondence between "what we change" and "how we confirm it worked."
24
+
25
+ ## Your Expertise
26
+
27
+ - **Coverage gap detection**: Implementation steps with no corresponding verification
28
+ - **Happy path bias**: Verification that only tests the success case, ignoring error and edge cases
29
+ - **Verification specificity**: Are verification steps concrete enough to execute without interpretation?
30
+ - **Regression awareness**: Do verification steps confirm existing functionality still works after the change?
31
+ - **Coverage completeness**: Does the verification plan cover all dimensions of the change (functionality, performance, security)?
32
+
33
+ ## Review Approach
34
+
35
+ Build a coverage map between implementation and verification:
36
+
37
+ 1. **List all implementation steps**: Every change the plan makes
38
+ 2. **List all verification steps**: Every check the plan includes
39
+ 3. **Map 1:1**: For each implementation step, identify its verification step(s)
40
+ 4. **Find gaps**: Implementation steps with no verification
41
+ 5. **Assess coverage quality**: Do verification steps test the right things?
42
+
43
+ ## Verification Coverage Levels
44
+
45
+ | Level | Description | Example |
46
+ |-------|-------------|---------|
47
+ | **Full** | Every change verified with specific criteria | "Run `pytest test_auth.py -k test_token_expiry` — 3 tests pass" |
48
+ | **Partial** | Some changes verified, others assumed | "Run the auth tests" (misses schema change verification) |
49
+ | **Minimal** | Only overall functionality checked | "Verify it works" |
50
+ | **None** | Implementation step has no verification | Change with no corresponding check |
51
+
52
+ ## Key Distinction
53
+
54
+ | Agent | Asks |
55
+ |-------|------|
56
+ | verify-strength | "Would these tests catch a subtle bug?" |
57
+ | **verify-coverage** | **"Is every change covered by a verification step?"** |
58
+
59
+ ## CRITICAL: Single-Turn Review
60
+
61
+ When reviewing a plan:
62
+ 1. Analyze the plan content provided directly (do not use Read, Glob, Grep, or any file tools)
63
+ 2. Call StructuredOutput immediately with your assessment
64
+ 3. Complete your entire review in one response
65
+
66
+ Avoid querying external systems, reading codebase files, requesting additional information, or asking follow-up questions.
67
+
68
+ ## Required Output
69
+
70
+ Call StructuredOutput with exactly these fields:
71
+ - **verdict**: "pass" (verification covers all changes), "warn" (some gaps in verification coverage), or "fail" (critical changes without verification)
72
+ - **summary**: 2-3 sentences explaining verification coverage assessment (minimum 20 characters)
73
+ - **issues**: Array of coverage concerns, each with: severity (high/medium/low), category (e.g., "missing-verification", "happy-path-only", "weak-verification", "no-regression-check"), issue description, suggested_fix (specific verification step to add)
74
+ - **missing_sections**: Verification gaps the plan should address (untested changes, missing edge cases, absent regression checks)
75
+ - **questions**: Verification aspects that need clarification
@@ -0,0 +1,70 @@
1
+ ---
2
+ name: verify-strength
3
+ description: Test quality analyst who evaluates whether verification steps would catch subtle bugs, not just total failures. Uses mutation testing logic to assess whether tests distinguish correct from almost-correct implementations.
4
+ model: sonnet
5
+ focus: test quality and mutation analysis
6
+ enabled: false
7
+ categories:
8
+ - code
9
+ - infrastructure
10
+ ---
11
+
12
+ # Verify Strength - Plan Review Agent
13
+
14
+ You evaluate the quality of verification steps. Your question: "Would these tests catch a subtle bug, or only a total failure?"
15
+
16
+ ## Your Core Principle
17
+
18
+ Mutation testing (DeMillo et al. 1978) reveals test strength by asking: "If I introduced a small bug, would the tests catch it?" Weak tests pass on both correct and incorrect implementations. Strong tests fail when the implementation is wrong in any way. A plan with 100% coverage but weak assertions is less safe than a plan with 50% coverage but strong assertions.
19
+
20
+ ## Your Expertise
21
+
22
+ - **Assertion strength evaluation**: Do verification steps check specific expected values, or just "no error"?
23
+ - **Mutation sensitivity**: Would a small change to the implementation (off-by-one, wrong variable, swapped condition) be caught?
24
+ - **Boundary testing**: Do tests exercise boundary conditions where bugs cluster?
25
+ - **Negative testing**: Do tests verify that invalid inputs are rejected, not just that valid inputs succeed?
26
+ - **State verification**: Do tests check the full resulting state, or just the return value?
27
+
28
+ ## Review Approach
29
+
30
+ For each verification step in the plan, apply mutation logic:
31
+
32
+ 1. **Identify what is being verified**: What specific behavior does this test confirm?
33
+ 2. **Apply mental mutations**: If the implementation had an off-by-one error, wrong variable, or swapped condition, would this test catch it?
34
+ 3. **Evaluate assertion specificity**: Does the test check a specific expected value, or just "it runs without error"?
35
+ 4. **Check boundary coverage**: Are edge cases and boundary values tested?
36
+ 5. **Assess negative testing**: Are failure cases and invalid inputs covered?
37
+
38
+ ## Test Strength Levels
39
+
40
+ | Level | Test Behavior | Example |
41
+ |-------|---------------|---------|
42
+ | **Strong** | Fails on any mutation to the implementation | Checks specific values, boundaries, and error cases |
43
+ | **Moderate** | Catches major bugs but misses subtle ones | Checks return type and approximate value |
44
+ | **Weak** | Only catches total failure | "Assert no error" or "assert result is not null" |
45
+ | **Absent** | No verification at all | Implementation change with no test |
46
+
47
+ ## Key Distinction
48
+
49
+ | Agent | Asks |
50
+ |-------|------|
51
+ | verify-coverage | "Is every change covered by a verification step?" |
52
+ | **verify-strength** | **"Would these tests catch a subtle bug?"** |
53
+
54
+ ## CRITICAL: Single-Turn Review
55
+
56
+ When reviewing a plan:
57
+ 1. Analyze the plan content provided directly (do not use Read, Glob, Grep, or any file tools)
58
+ 2. Call StructuredOutput immediately with your assessment
59
+ 3. Complete your entire review in one response
60
+
61
+ Avoid querying external systems, reading codebase files, requesting additional information, or asking follow-up questions.
62
+
63
+ ## Required Output
64
+
65
+ Call StructuredOutput with exactly these fields:
66
+ - **verdict**: "pass" (tests would catch subtle bugs), "warn" (some weak assertions), or "fail" (tests would miss common bug patterns)
67
+ - **summary**: 2-3 sentences explaining test strength assessment (minimum 20 characters)
68
+ - **issues**: Array of strength concerns, each with: severity (high/medium/low), category (e.g., "weak-assertion", "no-boundary-test", "missing-negative-test", "mutation-survivor", "state-unchecked"), issue description, suggested_fix (strengthen specific assertion or add test case)
69
+ - **missing_sections**: Test strength improvements the plan should address (boundary tests, negative tests, specific assertions)
70
+ - **questions**: Test quality aspects that need clarification
@@ -69,6 +69,7 @@ try:
69
69
  write_combined_artifacts,
70
70
  build_inline_review_summary,
71
71
  extract_top_issues_text,
72
+ build_high_issues_document,
72
73
  load_config,
73
74
  get_display_settings,
74
75
  )
@@ -133,16 +134,44 @@ def skip_with_info(reason: str) -> int:
133
134
  # ---------------------------
134
135
 
135
136
  DEFAULT_AGENTS: List[Dict[str, Any]] = [
136
- {"name": "architect-reviewer", "model": "sonnet", "focus": "architectural concerns and scalability", "enabled": True, "categories": ["code", "infrastructure", "design"]},
137
- {"name": "penetration-tester", "model": "sonnet", "focus": "security vulnerabilities and attack vectors", "enabled": True, "categories": ["code", "infrastructure"]},
138
- {"name": "performance-engineer", "model": "sonnet", "focus": "performance bottlenecks and optimization", "enabled": True, "categories": ["code", "infrastructure"]},
139
- {"name": "accessibility-tester", "model": "sonnet", "focus": "accessibility compliance and UX concerns", "enabled": True, "categories": ["code", "design"]},
137
+ # Mandatory agents
138
+ {"name": "handoff-readiness", "model": "sonnet", "focus": "fresh context execution readiness", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
139
+ {"name": "clarity-auditor", "model": "sonnet", "focus": "communication clarity and execution readiness", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
140
+ {"name": "skeptic", "model": "sonnet", "focus": "problem-solution alignment and assumption validation", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
141
+ {"name": "documentation-philosophy", "model": "sonnet", "focus": "knowledge capture and documentation placement", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
142
+ # Risk family
143
+ {"name": "risk-premortem", "model": "sonnet", "focus": "pre-mortem failure analysis", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
144
+ {"name": "risk-fmea", "model": "sonnet", "focus": "systematic failure mode analysis", "enabled": True, "categories": ["code", "infrastructure", "design"]},
145
+ {"name": "risk-dependency", "model": "sonnet", "focus": "dependency chain and blast radius analysis", "enabled": True, "categories": ["code", "infrastructure"]},
146
+ {"name": "risk-reversibility", "model": "sonnet", "focus": "decision reversibility and optionality", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
147
+ # Completeness family
148
+ {"name": "completeness-gaps", "model": "sonnet", "focus": "structural gap analysis", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
149
+ {"name": "completeness-feasibility", "model": "sonnet", "focus": "feasibility and resource analysis", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
150
+ {"name": "completeness-ordering", "model": "sonnet", "focus": "step ordering and critical path analysis", "enabled": True, "categories": ["code", "infrastructure", "design"]},
151
+ # Architecture family
152
+ {"name": "arch-structure", "model": "sonnet", "focus": "coupling, cohesion, and boundary analysis", "enabled": True, "categories": ["code", "infrastructure", "design"]},
153
+ {"name": "arch-evolution", "model": "sonnet", "focus": "evolutionary architecture and change amplification", "enabled": True, "categories": ["code", "infrastructure", "design"]},
154
+ {"name": "arch-patterns", "model": "sonnet", "focus": "pattern selection and technology fit", "enabled": True, "categories": ["code", "infrastructure"]},
155
+ # Verification family
156
+ {"name": "verify-coverage", "model": "sonnet", "focus": "verification coverage mapping", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
157
+ {"name": "verify-strength", "model": "sonnet", "focus": "test quality and mutation analysis", "enabled": True, "categories": ["code", "infrastructure"]},
158
+ # Trade-off family
159
+ {"name": "tradeoff-costs", "model": "sonnet", "focus": "opportunity cost and capability sacrifice", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
160
+ {"name": "tradeoff-stakeholders", "model": "sonnet", "focus": "stakeholder impact and cost-benefit asymmetry", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
161
+ # Standalone agents
162
+ {"name": "scope-boundary", "model": "sonnet", "focus": "scope drift and boundary enforcement", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
163
+ {"name": "hidden-complexity", "model": "sonnet", "focus": "understated complexity and hidden difficulty", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
164
+ {"name": "simplicity-guardian", "model": "sonnet", "focus": "over-engineering and unnecessary complexity", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
165
+ {"name": "devils-advocate", "model": "sonnet", "focus": "contrarian analysis and reductio ad absurdum", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
166
+ {"name": "assumption-tracer", "model": "sonnet", "focus": "dependency chains and foundational assumptions", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
167
+ {"name": "incremental-delivery", "model": "sonnet", "focus": "incremental delivery and vertical slicing", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
168
+ {"name": "constraint-validator", "model": "sonnet", "focus": "constraint identification and satisfaction", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
140
169
  ]
141
170
 
142
171
  DEFAULT_ORCHESTRATOR: Dict[str, Any] = {
143
172
  "enabled": True,
144
- "model": "haiku",
145
- "timeout": 30,
173
+ "model": "opus",
174
+ "timeout": 60,
146
175
  }
147
176
 
148
177
  DEFAULT_AGENT_MODEL: str = "sonnet"
@@ -154,6 +183,30 @@ DEFAULT_REVIEW_ITERATIONS: Dict[str, int] = {
154
183
  }
155
184
 
156
185
 
186
+ def resolve_mandatory_agents(config_value, complexity: str) -> set:
187
+ """Resolve mandatory agent names based on config format and complexity.
188
+
189
+ Supports two formats:
190
+ - Legacy (list): ["a", "b"] — all treated as 'always'
191
+ - Structured (dict): {"always": [...], "medium+": [...], "high": [...]}
192
+ """
193
+ if isinstance(config_value, list):
194
+ return set(config_value)
195
+
196
+ if not isinstance(config_value, dict):
197
+ return {"handoff-readiness", "clarity-auditor", "skeptic"}
198
+
199
+ names = set(config_value.get("always", []))
200
+
201
+ if complexity in ("medium", "high"):
202
+ names.update(config_value.get("medium+", []))
203
+
204
+ if complexity == "high":
205
+ names.update(config_value.get("high", []))
206
+
207
+ return names
208
+
209
+
157
210
  # ---------------------------
158
211
  # Context-based State Management
159
212
  # ---------------------------
@@ -358,6 +411,7 @@ def load_settings(proj_dir: Path) -> Dict[str, Any]:
358
411
  "orchestrator": DEFAULT_ORCHESTRATOR.copy(),
359
412
  "timeout": 180,
360
413
  "warnThreshold": 0.5,
414
+ "highIssueThreshold": 3,
361
415
  "legacyMode": False,
362
416
  "display": DEFAULT_DISPLAY.copy(),
363
417
  "agentSelection": DEFAULT_AGENT_SELECTION.copy(),
@@ -567,10 +621,14 @@ def main() -> int:
567
621
  timeout=orch_settings.get("timeout", 30),
568
622
  )
569
623
 
570
- # Compute mandatory agent names early so orchestrator can exclude them
571
- mandatory_names = set(agent_settings.get("mandatoryAgents", [
624
+ # Two-phase mandatory resolution:
625
+ # Phase 1 (pre-orchestrator): Only "always" mandatory agents excluded from orchestrator pool
626
+ # Phase 2 (post-orchestrator): Full mandatory set including conditional agents
627
+ mandatory_config = agent_settings.get("mandatoryAgents", [
572
628
  "handoff-readiness", "clarity-auditor", "skeptic"
573
- ]))
629
+ ])
630
+ always_mandatory = resolve_mandatory_agents(mandatory_config, "simple")
631
+ mandatory_names = always_mandatory
574
632
 
575
633
  log_debug("cc-native-plan-review", f"Codex enabled: {codex_enabled}, Gemini enabled: {gemini_enabled}")
576
634
  log_debug("cc-native-plan-review", f"Agent library: {[a.name for a in agent_library]}")
@@ -585,7 +643,7 @@ def main() -> int:
585
643
  if gemini_enabled:
586
644
  phase1_tasks.append(("gemini", lambda: run_gemini_review(plan, REVIEW_SCHEMA, plan_settings)))
587
645
  if orchestrator_config.enabled and enabled_agents and not legacy_mode:
588
- phase1_tasks.append(("orchestrator", lambda: run_orchestrator(plan, enabled_agents, orchestrator_config, agent_settings, mandatory_names=mandatory_names)))
646
+ phase1_tasks.append(("orchestrator", lambda: run_orchestrator(plan, enabled_agents, orchestrator_config, agent_settings, mandatory_names=always_mandatory)))
589
647
 
590
648
  log_info("cc-native-plan-review", f"=== PHASE 1: Running {len(phase1_tasks)} tasks in parallel ===")
591
649
 
@@ -605,12 +663,8 @@ def main() -> int:
605
663
  # Collect CLI results
606
664
  if "codex" in phase1_results and phase1_results["codex"]:
607
665
  cli_results["codex"] = phase1_results["codex"]
608
- if phase1_results["codex"].verdict and phase1_results["codex"].verdict not in ("skip", "error"):
609
- all_verdicts.append(phase1_results["codex"].verdict)
610
666
  if "gemini" in phase1_results and phase1_results["gemini"]:
611
667
  cli_results["gemini"] = phase1_results["gemini"]
612
- if phase1_results["gemini"].verdict and phase1_results["gemini"].verdict not in ("skip", "error"):
613
- all_verdicts.append(phase1_results["gemini"].verdict)
614
668
 
615
669
  # Get orchestrator result
616
670
  if "orchestrator" in phase1_results and phase1_results["orchestrator"]:
@@ -640,6 +694,11 @@ def main() -> int:
640
694
  if orch_result and not legacy_mode:
641
695
  detected_complexity = orch_result.complexity
642
696
 
697
+ # Phase 2: Recompute mandatory set with actual complexity
698
+ mandatory_names = resolve_mandatory_agents(mandatory_config, detected_complexity)
699
+ mandatory_agents = [a for a in enabled_agents if a.name in mandatory_names]
700
+ non_mandatory = [a for a in enabled_agents if a.name not in mandatory_names]
701
+
643
702
  # Get orchestrator's additional selections (excluding mandatory since they always run)
644
703
  orch_selected_names = set(orch_result.selected_agents) - mandatory_names
645
704
  orch_selected = [a for a in non_mandatory if a.name in orch_selected_names]
@@ -666,8 +725,9 @@ def main() -> int:
666
725
  log_info("cc-native-plan-review", f"Final selection: {len(selected_agents)} agents ({len(mandatory_agents)} mandatory + {len(orch_selected)} additional)")
667
726
  else:
668
727
  log_info("cc-native-plan-review", "Running in legacy mode (all enabled agents)")
669
- selected_agents = enabled_agents
670
728
  detected_complexity = "medium" # Default for legacy mode
729
+ mandatory_names = resolve_mandatory_agents(mandatory_config, detected_complexity)
730
+ selected_agents = enabled_agents
671
731
 
672
732
  log_diagnostic("cc-native-plan-review", "decide",
673
733
  f"Selected {len(selected_agents)} agents, complexity={detected_complexity}",
@@ -706,8 +766,6 @@ def main() -> int:
706
766
  try:
707
767
  result = future.result()
708
768
  agent_results[agent.name] = result
709
- if result.verdict and result.verdict not in ("skip", "error"):
710
- all_verdicts.append(result.verdict)
711
769
  log_info("cc-native-plan-review", f"{agent.name} completed with verdict: {result.verdict}")
712
770
  except Exception as ex:
713
771
  log_error("cc-native-plan-review", f"{agent.name} failed with exception: {ex}")
@@ -720,6 +778,25 @@ def main() -> int:
720
778
  err=str(ex),
721
779
  )
722
780
 
781
+ # ============================================
782
+ # Per-agent high-severity threshold: override verdict to "fail" if threshold met
783
+ # ============================================
784
+ high_issue_threshold = agent_settings.get("highIssueThreshold", 3)
785
+ all_verdicts = [] # Recompute with overrides applied
786
+
787
+ for r in list(cli_results.values()) + list(agent_results.values()):
788
+ if not r.verdict or r.verdict in ("skip", "error"):
789
+ continue
790
+ agent_high = sum(
791
+ 1 for issue in (r.data.get("issues", []) if r.data else [])
792
+ if issue.get("severity") == "high"
793
+ )
794
+ if agent_high >= high_issue_threshold:
795
+ log_info("cc-native-plan-review",
796
+ f"{r.name}: verdict overridden to 'fail' ({agent_high} high issues >= {high_issue_threshold})")
797
+ r.verdict = "fail"
798
+ all_verdicts.append(r.verdict)
799
+
723
800
  # ============================================
724
801
  # PHASE 4: Generate Combined Output
725
802
  # ============================================
@@ -765,33 +842,27 @@ def main() -> int:
765
842
 
766
843
  context_parts = [inline_summary, f"\nFull review: `{review_file}`\n"]
767
844
 
768
- # Review decision — only fail triggers a block
845
+ # Review decision — fail veto triggers a block (per-agent override already applied)
769
846
  warn_threshold = agent_settings.get("warnThreshold", 0.5)
770
- should_deny, deny_reason, review_score = compute_review_decision(all_verdicts, warn_threshold)
771
-
772
- # Count high-severity issues for logging
773
- high_count = sum(
774
- 1 for r in list(combined_result.cli_reviewers.values()) + list(combined_result.agents.values())
775
- if r.data
776
- for issue in r.data.get("issues", [])
777
- if issue.get("severity") == "high"
847
+ should_deny, deny_reason, review_score = compute_review_decision(
848
+ all_verdicts, warn_threshold,
778
849
  )
779
850
 
780
851
  # Structured log entries for review influence tracking
781
- log_info("cc-native-plan-review", f"REVIEW_DECISION: verdict={combined_result.overall_verdict}, deny={should_deny}, score={review_score:.2f}, high_issues={high_count}")
852
+ log_info("cc-native-plan-review", f"REVIEW_DECISION: verdict={combined_result.overall_verdict}, deny={should_deny}, reason={deny_reason}, score={review_score:.2f}")
782
853
  log_diagnostic("cc-native-plan-review", "result",
783
- f"verdict={combined_result.overall_verdict}, deny={should_deny}, high={high_count}",
854
+ f"verdict={combined_result.overall_verdict}, deny={should_deny}, reason={deny_reason}",
784
855
  decision="deny" if should_deny else "allow",
785
- reasoning=f"score={review_score:.2f}, threshold={warn_threshold}",
856
+ reasoning=f"reason={deny_reason}, score={review_score:.2f}, warn_threshold={warn_threshold}",
786
857
  inputs={"overall_verdict": combined_result.overall_verdict,
787
- "high_issue_count": high_count, "review_score": round(review_score, 2),
858
+ "review_score": round(review_score, 2),
788
859
  "cli_count": len(cli_results), "agent_count": len(agent_results)})
789
860
 
790
861
  # Terminal progress indicator
791
862
  verdict_emoji = "✅" if not should_deny else "❌"
792
863
  eprint(f"[plan-review] {verdict_emoji} {combined_result.overall_verdict.upper()} (score={review_score:.2f})")
793
864
  if should_deny:
794
- eprint(f"[plan-review] Blocking ExitPlanMode — {high_count} high-severity issue(s) found")
865
+ eprint(f"[plan-review] Blocking ExitPlanMode — {deny_reason}")
795
866
 
796
867
  # Handle iteration logic
797
868
  needs_more_iterations = False
@@ -809,11 +880,13 @@ def main() -> int:
809
880
  else:
810
881
  # Final iteration - increment current and save state
811
882
  iteration_state["current"] = iteration_state.get("current", 1) + 1
812
- # Also increment max by 1 to allow another review cycle if the user rejects
813
- # the plan and requests changes. Without this, once iterations are exhausted,
814
- # the hook would skip review entirely even if the user sent the
815
- # planner back to revise. This ensures rejected plans can always be re-reviewed.
816
- iteration_state["max"] = iteration_state.get("max", 1) + 1
883
+ # Extend max ONLY when the plan passes review (for user rejection recovery).
884
+ # When the hook denies (should_deny=True), don't extend the hook will
885
+ # keep blocking on each resubmission via should_deny regardless of max.
886
+ # This prevents max from inflating on repeated hook rejections while still
887
+ # allowing re-review after a user rejects a plan that passed review.
888
+ if not should_deny:
889
+ iteration_state["max"] = iteration_state.get("max", 1) + 1
817
890
  save_iteration_state(reviews_dir, iteration_state)
818
891
 
819
892
  # Emit output with correct Claude Code hook format
@@ -832,29 +905,41 @@ def main() -> int:
832
905
  )
833
906
 
834
907
  if needs_more_iterations:
835
- mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="deny")
908
+ mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="hook_deny_iteration")
836
909
  current = iteration_state["current"] - 1 # Display the just-completed iteration
837
910
  max_iter = iteration_state["max"]
838
911
  remaining = max_iter - current
839
912
  top_issues_text = extract_top_issues_text(combined_result, max_count=3, severity="high")
913
+ # Two-fold deny signal: inline issues (fallback) + high-issues.md (primary)
914
+ high_issues_doc = build_high_issues_document(combined_result)
915
+ high_issues_path = review_folder / "high-issues.md"
916
+ high_issues_path.write_text(high_issues_doc, encoding="utf-8")
840
917
  emit_context_and_block(
841
918
  context_text,
842
919
  f"Plan review iteration {current}/{max_iter} FAILED ({deny_reason}, score={review_score:.2f}). "
843
920
  f"Critical issues: {top_issues_text}. "
921
+ f"IMPORTANT: Read `{high_issues_path}` for ALL high-severity issues — "
922
+ f"this file contains only the most critical findings, no noise. "
844
923
  f"{_REVIEWER_CAVEAT} "
845
- f"Revise the plan, then call ExitPlanMode again. "
924
+ f"Revise the plan to address these issues, then call ExitPlanMode again. "
846
925
  f"({remaining} revision{'s' if remaining != 1 else ''} remaining) "
847
926
  f"{_RESUBMIT_INSTRUCTION}",
848
927
  )
849
928
  elif should_deny:
850
- mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="deny")
929
+ mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="hook_deny_final")
851
930
  top_issues_text = extract_top_issues_text(combined_result, max_count=3, severity="high")
931
+ # Two-fold deny signal: inline issues (fallback) + high-issues.md (primary)
932
+ high_issues_doc = build_high_issues_document(combined_result)
933
+ high_issues_path = review_folder / "high-issues.md"
934
+ high_issues_path.write_text(high_issues_doc, encoding="utf-8")
852
935
  emit_context_and_block(
853
936
  context_text,
854
937
  f"Plan review FAILED ({deny_reason}, score={review_score:.2f}). "
855
938
  f"Critical issues: {top_issues_text}. "
939
+ f"IMPORTANT: Read `{high_issues_path}` for ALL high-severity issues — "
940
+ f"this file contains only the most critical findings, no noise. "
856
941
  f"{_REVIEWER_CAVEAT} "
857
- f"Revise the plan, then call ExitPlanMode again. "
942
+ f"Revise the plan to address these issues, then call ExitPlanMode again. "
858
943
  f"{_RESUBMIT_INSTRUCTION}",
859
944
  )
860
945
  else: