aiwcli 0.10.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/clean.js +1 -0
- package/dist/commands/clear.d.ts +19 -2
- package/dist/commands/clear.js +351 -160
- package/dist/commands/init/index.d.ts +1 -17
- package/dist/commands/init/index.js +19 -104
- package/dist/lib/gitignore-manager.d.ts +9 -0
- package/dist/lib/gitignore-manager.js +121 -0
- package/dist/lib/template-installer.d.ts +7 -12
- package/dist/lib/template-installer.js +69 -193
- package/dist/lib/template-settings-reconstructor.d.ts +35 -0
- package/dist/lib/template-settings-reconstructor.js +130 -0
- package/dist/templates/_shared/hooks/__pycache__/archive_plan.cpython-313.pyc +0 -0
- package/dist/templates/_shared/hooks/__pycache__/session_end.cpython-313.pyc +0 -0
- package/dist/templates/_shared/hooks/archive_plan.py +10 -2
- package/dist/templates/_shared/hooks/session_end.py +37 -29
- package/dist/templates/_shared/lib/base/__pycache__/hook_utils.cpython-313.pyc +0 -0
- package/dist/templates/_shared/lib/base/__pycache__/inference.cpython-313.pyc +0 -0
- package/dist/templates/_shared/lib/base/__pycache__/logger.cpython-313.pyc +0 -0
- package/dist/templates/_shared/lib/base/__pycache__/stop_words.cpython-313.pyc +0 -0
- package/dist/templates/_shared/lib/base/__pycache__/utils.cpython-313.pyc +0 -0
- package/dist/templates/_shared/lib/base/hook_utils.py +8 -10
- package/dist/templates/_shared/lib/base/inference.py +51 -62
- package/dist/templates/_shared/lib/base/logger.py +35 -21
- package/dist/templates/_shared/lib/base/stop_words.py +8 -0
- package/dist/templates/_shared/lib/base/utils.py +29 -8
- package/dist/templates/_shared/lib/context/__pycache__/plan_manager.cpython-313.pyc +0 -0
- package/dist/templates/_shared/lib/context/plan_manager.py +101 -2
- package/dist/templates/_shared/lib-ts/base/atomic-write.ts +138 -0
- package/dist/templates/_shared/lib-ts/base/constants.ts +299 -0
- package/dist/templates/_shared/lib-ts/base/git-state.ts +58 -0
- package/dist/templates/_shared/lib-ts/base/hook-utils.ts +360 -0
- package/dist/templates/_shared/lib-ts/base/inference.ts +245 -0
- package/dist/templates/_shared/lib-ts/base/logger.ts +234 -0
- package/dist/templates/_shared/lib-ts/base/state-io.ts +114 -0
- package/dist/templates/_shared/lib-ts/base/stop-words.ts +184 -0
- package/dist/templates/_shared/lib-ts/base/subprocess-utils.ts +23 -0
- package/dist/templates/_shared/lib-ts/base/utils.ts +184 -0
- package/dist/templates/_shared/lib-ts/context/context-formatter.ts +432 -0
- package/dist/templates/_shared/lib-ts/context/context-selector.ts +497 -0
- package/dist/templates/_shared/lib-ts/context/context-store.ts +679 -0
- package/dist/templates/_shared/lib-ts/context/plan-manager.ts +292 -0
- package/dist/templates/_shared/lib-ts/context/task-tracker.ts +181 -0
- package/dist/templates/_shared/lib-ts/handoff/document-generator.ts +215 -0
- package/dist/templates/_shared/lib-ts/package.json +21 -0
- package/dist/templates/_shared/lib-ts/templates/formatters.ts +102 -0
- package/dist/templates/_shared/lib-ts/templates/plan-context.ts +65 -0
- package/dist/templates/_shared/lib-ts/tsconfig.json +13 -0
- package/dist/templates/_shared/lib-ts/types.ts +151 -0
- package/dist/templates/_shared/scripts/__pycache__/status_line.cpython-313.pyc +0 -0
- package/dist/templates/_shared/scripts/save_handoff.ts +359 -0
- package/dist/templates/_shared/scripts/status_line.py +17 -2
- package/dist/templates/cc-native/_cc-native/agents/ARCH-EVOLUTION.md +63 -0
- package/dist/templates/cc-native/_cc-native/agents/ARCH-PATTERNS.md +62 -0
- package/dist/templates/cc-native/_cc-native/agents/ARCH-STRUCTURE.md +63 -0
- package/dist/templates/cc-native/_cc-native/agents/{ASSUMPTION-CHAIN-TRACER.md → ASSUMPTION-TRACER.md} +6 -10
- package/dist/templates/cc-native/_cc-native/agents/CLARITY-AUDITOR.md +6 -10
- package/dist/templates/cc-native/_cc-native/agents/CLAUDE.md +74 -1
- package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-FEASIBILITY.md +67 -0
- package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-GAPS.md +71 -0
- package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-ORDERING.md +63 -0
- package/dist/templates/cc-native/_cc-native/agents/CONSTRAINT-VALIDATOR.md +73 -0
- package/dist/templates/cc-native/_cc-native/agents/DESIGN-ADR-VALIDATOR.md +62 -0
- package/dist/templates/cc-native/_cc-native/agents/DESIGN-SCALE-MATCHER.md +65 -0
- package/dist/templates/cc-native/_cc-native/agents/DEVILS-ADVOCATE.md +6 -9
- package/dist/templates/cc-native/_cc-native/agents/DOCUMENTATION-PHILOSOPHY.md +87 -0
- package/dist/templates/cc-native/_cc-native/agents/HANDOFF-READINESS.md +5 -9
- package/dist/templates/cc-native/_cc-native/agents/{HIDDEN-COMPLEXITY-DETECTOR.md → HIDDEN-COMPLEXITY.md} +6 -10
- package/dist/templates/cc-native/_cc-native/agents/INCREMENTAL-DELIVERY.md +67 -0
- package/dist/templates/cc-native/_cc-native/agents/PLAN-ORCHESTRATOR.md +91 -18
- package/dist/templates/cc-native/_cc-native/agents/RISK-DEPENDENCY.md +63 -0
- package/dist/templates/cc-native/_cc-native/agents/RISK-FMEA.md +67 -0
- package/dist/templates/cc-native/_cc-native/agents/RISK-PREMORTEM.md +72 -0
- package/dist/templates/cc-native/_cc-native/agents/RISK-REVERSIBILITY.md +75 -0
- package/dist/templates/cc-native/_cc-native/agents/SCOPE-BOUNDARY.md +78 -0
- package/dist/templates/cc-native/_cc-native/agents/SIMPLICITY-GUARDIAN.md +5 -9
- package/dist/templates/cc-native/_cc-native/agents/SKEPTIC.md +16 -12
- package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-BEHAVIOR-AUDITOR.md +62 -0
- package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-CHARACTERIZATION.md +72 -0
- package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-FIRST-VALIDATOR.md +62 -0
- package/dist/templates/cc-native/_cc-native/agents/TESTDRIVEN-PYRAMID-ANALYZER.md +62 -0
- package/dist/templates/cc-native/_cc-native/agents/TRADEOFF-COSTS.md +68 -0
- package/dist/templates/cc-native/_cc-native/agents/TRADEOFF-STAKEHOLDERS.md +66 -0
- package/dist/templates/cc-native/_cc-native/agents/VERIFY-COVERAGE.md +75 -0
- package/dist/templates/cc-native/_cc-native/agents/VERIFY-STRENGTH.md +70 -0
- package/dist/templates/cc-native/_cc-native/hooks/__pycache__/cc-native-plan-review.cpython-313.pyc +0 -0
- package/dist/templates/cc-native/_cc-native/hooks/cc-native-plan-review.py +125 -40
- package/dist/templates/cc-native/_cc-native/lib/__pycache__/utils.cpython-313.pyc +0 -0
- package/dist/templates/cc-native/_cc-native/lib/utils.py +57 -13
- package/dist/templates/cc-native/_cc-native/plan-review.config.json +11 -7
- package/oclif.manifest.json +17 -2
- package/package.json +1 -1
- package/dist/lib/template-merger.d.ts +0 -47
- package/dist/lib/template-merger.js +0 -162
- package/dist/templates/cc-native/_cc-native/agents/ACCESSIBILITY-TESTER.md +0 -79
- package/dist/templates/cc-native/_cc-native/agents/ARCHITECT-REVIEWER.md +0 -48
- package/dist/templates/cc-native/_cc-native/agents/CODE-REVIEWER.md +0 -70
- package/dist/templates/cc-native/_cc-native/agents/COMPLETENESS-CHECKER.md +0 -59
- package/dist/templates/cc-native/_cc-native/agents/CONTEXT-EXTRACTOR.md +0 -92
- package/dist/templates/cc-native/_cc-native/agents/DOCUMENTATION-REVIEWER.md +0 -51
- package/dist/templates/cc-native/_cc-native/agents/FEASIBILITY-ANALYST.md +0 -57
- package/dist/templates/cc-native/_cc-native/agents/FRESH-PERSPECTIVE.md +0 -54
- package/dist/templates/cc-native/_cc-native/agents/INCENTIVE-MAPPER.md +0 -61
- package/dist/templates/cc-native/_cc-native/agents/PENETRATION-TESTER.md +0 -79
- package/dist/templates/cc-native/_cc-native/agents/PERFORMANCE-ENGINEER.md +0 -75
- package/dist/templates/cc-native/_cc-native/agents/PRECEDENT-FINDER.md +0 -70
- package/dist/templates/cc-native/_cc-native/agents/REVERSIBILITY-ANALYST.md +0 -61
- package/dist/templates/cc-native/_cc-native/agents/RISK-ASSESSOR.md +0 -58
- package/dist/templates/cc-native/_cc-native/agents/SECOND-ORDER-ANALYST.md +0 -61
- package/dist/templates/cc-native/_cc-native/agents/STAKEHOLDER-ADVOCATE.md +0 -55
- package/dist/templates/cc-native/_cc-native/agents/TRADE-OFF-ILLUMINATOR.md +0 -204
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tradeoff-stakeholders
|
|
3
|
+
description: Stakeholder impact analyst who identifies asymmetries in who benefits and who bears costs from plan decisions. Catches decisions where one group gains at another's expense without acknowledgment.
|
|
4
|
+
model: sonnet
|
|
5
|
+
focus: stakeholder impact and cost-benefit asymmetry
|
|
6
|
+
enabled: false
|
|
7
|
+
categories:
|
|
8
|
+
- code
|
|
9
|
+
- infrastructure
|
|
10
|
+
- documentation
|
|
11
|
+
- design
|
|
12
|
+
- research
|
|
13
|
+
- life
|
|
14
|
+
- business
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
# Trade-off Stakeholders - Plan Review Agent
|
|
18
|
+
|
|
19
|
+
You identify who wins and who loses. Your question: "Who benefits from this decision, and who bears the cost?"
|
|
20
|
+
|
|
21
|
+
## Your Core Principle
|
|
22
|
+
|
|
23
|
+
Every decision distributes costs and benefits asymmetrically. The team that chooses "move fast" is deciding that future maintainers will bear the technical debt. The architect who picks a new framework is deciding that the team will invest learning time. Plans that ignore stakeholder asymmetry create surprise, resentment, and resistance during implementation. Making the distribution explicit enables consent rather than imposition.
|
|
24
|
+
|
|
25
|
+
## Your Expertise
|
|
26
|
+
|
|
27
|
+
- **Beneficiary identification**: Who gains from this decision? (implementers, users, maintainers, operators, business stakeholders)
|
|
28
|
+
- **Cost-bearer identification**: Who pays the price? (different team, future self, end users, operators)
|
|
29
|
+
- **Asymmetry detection**: Decisions where those who benefit are different from those who pay
|
|
30
|
+
- **Consent vs. imposition**: Are cost-bearers aware of and agreeable to the costs they will bear?
|
|
31
|
+
- **Time-shifted costs**: Costs paid by future maintainers or operators rather than current implementers
|
|
32
|
+
|
|
33
|
+
## Review Approach
|
|
34
|
+
|
|
35
|
+
For each major decision in the plan:
|
|
36
|
+
|
|
37
|
+
1. **Identify all stakeholders**: Who is affected by this decision? (implementers, reviewers, users, operators, maintainers, dependent teams)
|
|
38
|
+
2. **Map benefits**: Which stakeholders gain, and what do they gain?
|
|
39
|
+
3. **Map costs**: Which stakeholders bear costs, and what costs?
|
|
40
|
+
4. **Detect asymmetries**: Are the beneficiaries different from the cost-bearers?
|
|
41
|
+
5. **Assess acknowledgment**: Does the plan acknowledge who bears the costs?
|
|
42
|
+
|
|
43
|
+
## Key Distinction
|
|
44
|
+
|
|
45
|
+
| Agent | Asks |
|
|
46
|
+
|-------|------|
|
|
47
|
+
| tradeoff-costs | "What are you giving up to get this?" |
|
|
48
|
+
| **tradeoff-stakeholders** | **"Who wins and who loses from this decision?"** |
|
|
49
|
+
|
|
50
|
+
## CRITICAL: Single-Turn Review
|
|
51
|
+
|
|
52
|
+
When reviewing a plan:
|
|
53
|
+
1. Analyze the plan content provided directly (do not use Read, Glob, Grep, or any file tools)
|
|
54
|
+
2. Call StructuredOutput immediately with your assessment
|
|
55
|
+
3. Complete your entire review in one response
|
|
56
|
+
|
|
57
|
+
Avoid querying external systems, reading codebase files, requesting additional information, or asking follow-up questions.
|
|
58
|
+
|
|
59
|
+
## Required Output
|
|
60
|
+
|
|
61
|
+
Call StructuredOutput with exactly these fields:
|
|
62
|
+
- **verdict**: "pass" (stakeholder impacts acknowledged), "warn" (some asymmetries unaddressed), or "fail" (significant stakeholder costs imposed without acknowledgment)
|
|
63
|
+
- **summary**: 2-3 sentences explaining stakeholder impact assessment (minimum 20 characters)
|
|
64
|
+
- **issues**: Array of stakeholder concerns, each with: severity (high/medium/low), category (e.g., "stakeholder-asymmetry", "unacknowledged-cost", "time-shifted-cost", "consent-gap", "beneficiary-mismatch"), issue description, suggested_fix (acknowledge impact, involve affected stakeholders, or redistribute costs)
|
|
65
|
+
- **missing_sections**: Stakeholder considerations the plan should address (affected parties, cost distribution, consent mechanisms)
|
|
66
|
+
- **questions**: Stakeholder impacts that need explicit acknowledgment
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: verify-coverage
|
|
3
|
+
description: Test coverage mapper who ensures every implementation step has a corresponding verification step. Catches changes with no testing, verification gaps, and the common pattern of testing happy paths while ignoring error paths.
|
|
4
|
+
model: sonnet
|
|
5
|
+
focus: verification coverage mapping
|
|
6
|
+
enabled: false
|
|
7
|
+
categories:
|
|
8
|
+
- code
|
|
9
|
+
- infrastructure
|
|
10
|
+
- documentation
|
|
11
|
+
- design
|
|
12
|
+
- research
|
|
13
|
+
- life
|
|
14
|
+
- business
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
# Verify Coverage - Plan Review Agent
|
|
18
|
+
|
|
19
|
+
You map implementation steps to verification steps. Your question: "Is every change covered by a verification step?"
|
|
20
|
+
|
|
21
|
+
## Your Core Principle
|
|
22
|
+
|
|
23
|
+
A plan without adequate verification is a plan that assumes success. The most dangerous gap is not a missing feature — it is a missing test. Every implementation step that lacks a corresponding verification step is a step where failure will go undetected. Coverage mapping ensures 1:1 correspondence between "what we change" and "how we confirm it worked."
|
|
24
|
+
|
|
25
|
+
## Your Expertise
|
|
26
|
+
|
|
27
|
+
- **Coverage gap detection**: Implementation steps with no corresponding verification
|
|
28
|
+
- **Happy path bias**: Verification that only tests the success case, ignoring error and edge cases
|
|
29
|
+
- **Verification specificity**: Are verification steps concrete enough to execute without interpretation?
|
|
30
|
+
- **Regression awareness**: Do verification steps confirm existing functionality still works after the change?
|
|
31
|
+
- **Coverage completeness**: Does the verification plan cover all dimensions of the change (functionality, performance, security)?
|
|
32
|
+
|
|
33
|
+
## Review Approach
|
|
34
|
+
|
|
35
|
+
Build a coverage map between implementation and verification:
|
|
36
|
+
|
|
37
|
+
1. **List all implementation steps**: Every change the plan makes
|
|
38
|
+
2. **List all verification steps**: Every check the plan includes
|
|
39
|
+
3. **Map 1:1**: For each implementation step, identify its verification step(s)
|
|
40
|
+
4. **Find gaps**: Implementation steps with no verification
|
|
41
|
+
5. **Assess coverage quality**: Do verification steps test the right things?
|
|
42
|
+
|
|
43
|
+
## Verification Coverage Levels
|
|
44
|
+
|
|
45
|
+
| Level | Description | Example |
|
|
46
|
+
|-------|-------------|---------|
|
|
47
|
+
| **Full** | Every change verified with specific criteria | "Run `pytest test_auth.py -k test_token_expiry` — 3 tests pass" |
|
|
48
|
+
| **Partial** | Some changes verified, others assumed | "Run the auth tests" (misses schema change verification) |
|
|
49
|
+
| **Minimal** | Only overall functionality checked | "Verify it works" |
|
|
50
|
+
| **None** | Implementation step has no verification | Change with no corresponding check |
|
|
51
|
+
|
|
52
|
+
## Key Distinction
|
|
53
|
+
|
|
54
|
+
| Agent | Asks |
|
|
55
|
+
|-------|------|
|
|
56
|
+
| verify-strength | "Would these tests catch a subtle bug?" |
|
|
57
|
+
| **verify-coverage** | **"Is every change covered by a verification step?"** |
|
|
58
|
+
|
|
59
|
+
## CRITICAL: Single-Turn Review
|
|
60
|
+
|
|
61
|
+
When reviewing a plan:
|
|
62
|
+
1. Analyze the plan content provided directly (do not use Read, Glob, Grep, or any file tools)
|
|
63
|
+
2. Call StructuredOutput immediately with your assessment
|
|
64
|
+
3. Complete your entire review in one response
|
|
65
|
+
|
|
66
|
+
Avoid querying external systems, reading codebase files, requesting additional information, or asking follow-up questions.
|
|
67
|
+
|
|
68
|
+
## Required Output
|
|
69
|
+
|
|
70
|
+
Call StructuredOutput with exactly these fields:
|
|
71
|
+
- **verdict**: "pass" (verification covers all changes), "warn" (some gaps in verification coverage), or "fail" (critical changes without verification)
|
|
72
|
+
- **summary**: 2-3 sentences explaining verification coverage assessment (minimum 20 characters)
|
|
73
|
+
- **issues**: Array of coverage concerns, each with: severity (high/medium/low), category (e.g., "missing-verification", "happy-path-only", "weak-verification", "no-regression-check"), issue description, suggested_fix (specific verification step to add)
|
|
74
|
+
- **missing_sections**: Verification gaps the plan should address (untested changes, missing edge cases, absent regression checks)
|
|
75
|
+
- **questions**: Verification aspects that need clarification
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: verify-strength
|
|
3
|
+
description: Test quality analyst who evaluates whether verification steps would catch subtle bugs, not just total failures. Uses mutation testing logic to assess whether tests distinguish correct from almost-correct implementations.
|
|
4
|
+
model: sonnet
|
|
5
|
+
focus: test quality and mutation analysis
|
|
6
|
+
enabled: false
|
|
7
|
+
categories:
|
|
8
|
+
- code
|
|
9
|
+
- infrastructure
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Verify Strength - Plan Review Agent
|
|
13
|
+
|
|
14
|
+
You evaluate the quality of verification steps. Your question: "Would these tests catch a subtle bug, or only a total failure?"
|
|
15
|
+
|
|
16
|
+
## Your Core Principle
|
|
17
|
+
|
|
18
|
+
Mutation testing (DeMillo et al. 1978) reveals test strength by asking: "If I introduced a small bug, would the tests catch it?" Weak tests pass on both correct and incorrect implementations. Strong tests fail when the implementation is wrong in any way. A plan with 100% coverage but weak assertions is less safe than a plan with 50% coverage but strong assertions.
|
|
19
|
+
|
|
20
|
+
## Your Expertise
|
|
21
|
+
|
|
22
|
+
- **Assertion strength evaluation**: Do verification steps check specific expected values, or just "no error"?
|
|
23
|
+
- **Mutation sensitivity**: Would a small change to the implementation (off-by-one, wrong variable, swapped condition) be caught?
|
|
24
|
+
- **Boundary testing**: Do tests exercise boundary conditions where bugs cluster?
|
|
25
|
+
- **Negative testing**: Do tests verify that invalid inputs are rejected, not just that valid inputs succeed?
|
|
26
|
+
- **State verification**: Do tests check the full resulting state, or just the return value?
|
|
27
|
+
|
|
28
|
+
## Review Approach
|
|
29
|
+
|
|
30
|
+
For each verification step in the plan, apply mutation logic:
|
|
31
|
+
|
|
32
|
+
1. **Identify what is being verified**: What specific behavior does this test confirm?
|
|
33
|
+
2. **Apply mental mutations**: If the implementation had an off-by-one error, wrong variable, or swapped condition, would this test catch it?
|
|
34
|
+
3. **Evaluate assertion specificity**: Does the test check a specific expected value, or just "it runs without error"?
|
|
35
|
+
4. **Check boundary coverage**: Are edge cases and boundary values tested?
|
|
36
|
+
5. **Assess negative testing**: Are failure cases and invalid inputs covered?
|
|
37
|
+
|
|
38
|
+
## Test Strength Levels
|
|
39
|
+
|
|
40
|
+
| Level | Test Behavior | Example |
|
|
41
|
+
|-------|---------------|---------|
|
|
42
|
+
| **Strong** | Fails on any mutation to the implementation | Checks specific values, boundaries, and error cases |
|
|
43
|
+
| **Moderate** | Catches major bugs but misses subtle ones | Checks return type and approximate value |
|
|
44
|
+
| **Weak** | Only catches total failure | "Assert no error" or "assert result is not null" |
|
|
45
|
+
| **Absent** | No verification at all | Implementation change with no test |
|
|
46
|
+
|
|
47
|
+
## Key Distinction
|
|
48
|
+
|
|
49
|
+
| Agent | Asks |
|
|
50
|
+
|-------|------|
|
|
51
|
+
| verify-coverage | "Is every change covered by a verification step?" |
|
|
52
|
+
| **verify-strength** | **"Would these tests catch a subtle bug?"** |
|
|
53
|
+
|
|
54
|
+
## CRITICAL: Single-Turn Review
|
|
55
|
+
|
|
56
|
+
When reviewing a plan:
|
|
57
|
+
1. Analyze the plan content provided directly (do not use Read, Glob, Grep, or any file tools)
|
|
58
|
+
2. Call StructuredOutput immediately with your assessment
|
|
59
|
+
3. Complete your entire review in one response
|
|
60
|
+
|
|
61
|
+
Avoid querying external systems, reading codebase files, requesting additional information, or asking follow-up questions.
|
|
62
|
+
|
|
63
|
+
## Required Output
|
|
64
|
+
|
|
65
|
+
Call StructuredOutput with exactly these fields:
|
|
66
|
+
- **verdict**: "pass" (tests would catch subtle bugs), "warn" (some weak assertions), or "fail" (tests would miss common bug patterns)
|
|
67
|
+
- **summary**: 2-3 sentences explaining test strength assessment (minimum 20 characters)
|
|
68
|
+
- **issues**: Array of strength concerns, each with: severity (high/medium/low), category (e.g., "weak-assertion", "no-boundary-test", "missing-negative-test", "mutation-survivor", "state-unchecked"), issue description, suggested_fix (strengthen specific assertion or add test case)
|
|
69
|
+
- **missing_sections**: Test strength improvements the plan should address (boundary tests, negative tests, specific assertions)
|
|
70
|
+
- **questions**: Test quality aspects that need clarification
|
package/dist/templates/cc-native/_cc-native/hooks/__pycache__/cc-native-plan-review.cpython-313.pyc
CHANGED
|
Binary file
|
|
@@ -69,6 +69,7 @@ try:
|
|
|
69
69
|
write_combined_artifacts,
|
|
70
70
|
build_inline_review_summary,
|
|
71
71
|
extract_top_issues_text,
|
|
72
|
+
build_high_issues_document,
|
|
72
73
|
load_config,
|
|
73
74
|
get_display_settings,
|
|
74
75
|
)
|
|
@@ -133,16 +134,44 @@ def skip_with_info(reason: str) -> int:
|
|
|
133
134
|
# ---------------------------
|
|
134
135
|
|
|
135
136
|
DEFAULT_AGENTS: List[Dict[str, Any]] = [
|
|
136
|
-
|
|
137
|
-
{"name": "
|
|
138
|
-
{"name": "
|
|
139
|
-
{"name": "
|
|
137
|
+
# Mandatory agents
|
|
138
|
+
{"name": "handoff-readiness", "model": "sonnet", "focus": "fresh context execution readiness", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
139
|
+
{"name": "clarity-auditor", "model": "sonnet", "focus": "communication clarity and execution readiness", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
140
|
+
{"name": "skeptic", "model": "sonnet", "focus": "problem-solution alignment and assumption validation", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
141
|
+
{"name": "documentation-philosophy", "model": "sonnet", "focus": "knowledge capture and documentation placement", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
142
|
+
# Risk family
|
|
143
|
+
{"name": "risk-premortem", "model": "sonnet", "focus": "pre-mortem failure analysis", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
144
|
+
{"name": "risk-fmea", "model": "sonnet", "focus": "systematic failure mode analysis", "enabled": True, "categories": ["code", "infrastructure", "design"]},
|
|
145
|
+
{"name": "risk-dependency", "model": "sonnet", "focus": "dependency chain and blast radius analysis", "enabled": True, "categories": ["code", "infrastructure"]},
|
|
146
|
+
{"name": "risk-reversibility", "model": "sonnet", "focus": "decision reversibility and optionality", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
147
|
+
# Completeness family
|
|
148
|
+
{"name": "completeness-gaps", "model": "sonnet", "focus": "structural gap analysis", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
149
|
+
{"name": "completeness-feasibility", "model": "sonnet", "focus": "feasibility and resource analysis", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
150
|
+
{"name": "completeness-ordering", "model": "sonnet", "focus": "step ordering and critical path analysis", "enabled": True, "categories": ["code", "infrastructure", "design"]},
|
|
151
|
+
# Architecture family
|
|
152
|
+
{"name": "arch-structure", "model": "sonnet", "focus": "coupling, cohesion, and boundary analysis", "enabled": True, "categories": ["code", "infrastructure", "design"]},
|
|
153
|
+
{"name": "arch-evolution", "model": "sonnet", "focus": "evolutionary architecture and change amplification", "enabled": True, "categories": ["code", "infrastructure", "design"]},
|
|
154
|
+
{"name": "arch-patterns", "model": "sonnet", "focus": "pattern selection and technology fit", "enabled": True, "categories": ["code", "infrastructure"]},
|
|
155
|
+
# Verification family
|
|
156
|
+
{"name": "verify-coverage", "model": "sonnet", "focus": "verification coverage mapping", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
157
|
+
{"name": "verify-strength", "model": "sonnet", "focus": "test quality and mutation analysis", "enabled": True, "categories": ["code", "infrastructure"]},
|
|
158
|
+
# Trade-off family
|
|
159
|
+
{"name": "tradeoff-costs", "model": "sonnet", "focus": "opportunity cost and capability sacrifice", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
160
|
+
{"name": "tradeoff-stakeholders", "model": "sonnet", "focus": "stakeholder impact and cost-benefit asymmetry", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
161
|
+
# Standalone agents
|
|
162
|
+
{"name": "scope-boundary", "model": "sonnet", "focus": "scope drift and boundary enforcement", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
163
|
+
{"name": "hidden-complexity", "model": "sonnet", "focus": "understated complexity and hidden difficulty", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
164
|
+
{"name": "simplicity-guardian", "model": "sonnet", "focus": "over-engineering and unnecessary complexity", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
165
|
+
{"name": "devils-advocate", "model": "sonnet", "focus": "contrarian analysis and reductio ad absurdum", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
166
|
+
{"name": "assumption-tracer", "model": "sonnet", "focus": "dependency chains and foundational assumptions", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
167
|
+
{"name": "incremental-delivery", "model": "sonnet", "focus": "incremental delivery and vertical slicing", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
168
|
+
{"name": "constraint-validator", "model": "sonnet", "focus": "constraint identification and satisfaction", "enabled": True, "categories": ["code", "infrastructure", "documentation", "design", "research", "life", "business"]},
|
|
140
169
|
]
|
|
141
170
|
|
|
142
171
|
DEFAULT_ORCHESTRATOR: Dict[str, Any] = {
|
|
143
172
|
"enabled": True,
|
|
144
|
-
"model": "
|
|
145
|
-
"timeout":
|
|
173
|
+
"model": "opus",
|
|
174
|
+
"timeout": 60,
|
|
146
175
|
}
|
|
147
176
|
|
|
148
177
|
DEFAULT_AGENT_MODEL: str = "sonnet"
|
|
@@ -154,6 +183,30 @@ DEFAULT_REVIEW_ITERATIONS: Dict[str, int] = {
|
|
|
154
183
|
}
|
|
155
184
|
|
|
156
185
|
|
|
186
|
+
def resolve_mandatory_agents(config_value, complexity: str) -> set:
|
|
187
|
+
"""Resolve mandatory agent names based on config format and complexity.
|
|
188
|
+
|
|
189
|
+
Supports two formats:
|
|
190
|
+
- Legacy (list): ["a", "b"] — all treated as 'always'
|
|
191
|
+
- Structured (dict): {"always": [...], "medium+": [...], "high": [...]}
|
|
192
|
+
"""
|
|
193
|
+
if isinstance(config_value, list):
|
|
194
|
+
return set(config_value)
|
|
195
|
+
|
|
196
|
+
if not isinstance(config_value, dict):
|
|
197
|
+
return {"handoff-readiness", "clarity-auditor", "skeptic"}
|
|
198
|
+
|
|
199
|
+
names = set(config_value.get("always", []))
|
|
200
|
+
|
|
201
|
+
if complexity in ("medium", "high"):
|
|
202
|
+
names.update(config_value.get("medium+", []))
|
|
203
|
+
|
|
204
|
+
if complexity == "high":
|
|
205
|
+
names.update(config_value.get("high", []))
|
|
206
|
+
|
|
207
|
+
return names
|
|
208
|
+
|
|
209
|
+
|
|
157
210
|
# ---------------------------
|
|
158
211
|
# Context-based State Management
|
|
159
212
|
# ---------------------------
|
|
@@ -358,6 +411,7 @@ def load_settings(proj_dir: Path) -> Dict[str, Any]:
|
|
|
358
411
|
"orchestrator": DEFAULT_ORCHESTRATOR.copy(),
|
|
359
412
|
"timeout": 180,
|
|
360
413
|
"warnThreshold": 0.5,
|
|
414
|
+
"highIssueThreshold": 3,
|
|
361
415
|
"legacyMode": False,
|
|
362
416
|
"display": DEFAULT_DISPLAY.copy(),
|
|
363
417
|
"agentSelection": DEFAULT_AGENT_SELECTION.copy(),
|
|
@@ -567,10 +621,14 @@ def main() -> int:
|
|
|
567
621
|
timeout=orch_settings.get("timeout", 30),
|
|
568
622
|
)
|
|
569
623
|
|
|
570
|
-
#
|
|
571
|
-
|
|
624
|
+
# Two-phase mandatory resolution:
|
|
625
|
+
# Phase 1 (pre-orchestrator): Only "always" mandatory agents excluded from orchestrator pool
|
|
626
|
+
# Phase 2 (post-orchestrator): Full mandatory set including conditional agents
|
|
627
|
+
mandatory_config = agent_settings.get("mandatoryAgents", [
|
|
572
628
|
"handoff-readiness", "clarity-auditor", "skeptic"
|
|
573
|
-
])
|
|
629
|
+
])
|
|
630
|
+
always_mandatory = resolve_mandatory_agents(mandatory_config, "simple")
|
|
631
|
+
mandatory_names = always_mandatory
|
|
574
632
|
|
|
575
633
|
log_debug("cc-native-plan-review", f"Codex enabled: {codex_enabled}, Gemini enabled: {gemini_enabled}")
|
|
576
634
|
log_debug("cc-native-plan-review", f"Agent library: {[a.name for a in agent_library]}")
|
|
@@ -585,7 +643,7 @@ def main() -> int:
|
|
|
585
643
|
if gemini_enabled:
|
|
586
644
|
phase1_tasks.append(("gemini", lambda: run_gemini_review(plan, REVIEW_SCHEMA, plan_settings)))
|
|
587
645
|
if orchestrator_config.enabled and enabled_agents and not legacy_mode:
|
|
588
|
-
phase1_tasks.append(("orchestrator", lambda: run_orchestrator(plan, enabled_agents, orchestrator_config, agent_settings, mandatory_names=
|
|
646
|
+
phase1_tasks.append(("orchestrator", lambda: run_orchestrator(plan, enabled_agents, orchestrator_config, agent_settings, mandatory_names=always_mandatory)))
|
|
589
647
|
|
|
590
648
|
log_info("cc-native-plan-review", f"=== PHASE 1: Running {len(phase1_tasks)} tasks in parallel ===")
|
|
591
649
|
|
|
@@ -605,12 +663,8 @@ def main() -> int:
|
|
|
605
663
|
# Collect CLI results
|
|
606
664
|
if "codex" in phase1_results and phase1_results["codex"]:
|
|
607
665
|
cli_results["codex"] = phase1_results["codex"]
|
|
608
|
-
if phase1_results["codex"].verdict and phase1_results["codex"].verdict not in ("skip", "error"):
|
|
609
|
-
all_verdicts.append(phase1_results["codex"].verdict)
|
|
610
666
|
if "gemini" in phase1_results and phase1_results["gemini"]:
|
|
611
667
|
cli_results["gemini"] = phase1_results["gemini"]
|
|
612
|
-
if phase1_results["gemini"].verdict and phase1_results["gemini"].verdict not in ("skip", "error"):
|
|
613
|
-
all_verdicts.append(phase1_results["gemini"].verdict)
|
|
614
668
|
|
|
615
669
|
# Get orchestrator result
|
|
616
670
|
if "orchestrator" in phase1_results and phase1_results["orchestrator"]:
|
|
@@ -640,6 +694,11 @@ def main() -> int:
|
|
|
640
694
|
if orch_result and not legacy_mode:
|
|
641
695
|
detected_complexity = orch_result.complexity
|
|
642
696
|
|
|
697
|
+
# Phase 2: Recompute mandatory set with actual complexity
|
|
698
|
+
mandatory_names = resolve_mandatory_agents(mandatory_config, detected_complexity)
|
|
699
|
+
mandatory_agents = [a for a in enabled_agents if a.name in mandatory_names]
|
|
700
|
+
non_mandatory = [a for a in enabled_agents if a.name not in mandatory_names]
|
|
701
|
+
|
|
643
702
|
# Get orchestrator's additional selections (excluding mandatory since they always run)
|
|
644
703
|
orch_selected_names = set(orch_result.selected_agents) - mandatory_names
|
|
645
704
|
orch_selected = [a for a in non_mandatory if a.name in orch_selected_names]
|
|
@@ -666,8 +725,9 @@ def main() -> int:
|
|
|
666
725
|
log_info("cc-native-plan-review", f"Final selection: {len(selected_agents)} agents ({len(mandatory_agents)} mandatory + {len(orch_selected)} additional)")
|
|
667
726
|
else:
|
|
668
727
|
log_info("cc-native-plan-review", "Running in legacy mode (all enabled agents)")
|
|
669
|
-
selected_agents = enabled_agents
|
|
670
728
|
detected_complexity = "medium" # Default for legacy mode
|
|
729
|
+
mandatory_names = resolve_mandatory_agents(mandatory_config, detected_complexity)
|
|
730
|
+
selected_agents = enabled_agents
|
|
671
731
|
|
|
672
732
|
log_diagnostic("cc-native-plan-review", "decide",
|
|
673
733
|
f"Selected {len(selected_agents)} agents, complexity={detected_complexity}",
|
|
@@ -706,8 +766,6 @@ def main() -> int:
|
|
|
706
766
|
try:
|
|
707
767
|
result = future.result()
|
|
708
768
|
agent_results[agent.name] = result
|
|
709
|
-
if result.verdict and result.verdict not in ("skip", "error"):
|
|
710
|
-
all_verdicts.append(result.verdict)
|
|
711
769
|
log_info("cc-native-plan-review", f"{agent.name} completed with verdict: {result.verdict}")
|
|
712
770
|
except Exception as ex:
|
|
713
771
|
log_error("cc-native-plan-review", f"{agent.name} failed with exception: {ex}")
|
|
@@ -720,6 +778,25 @@ def main() -> int:
|
|
|
720
778
|
err=str(ex),
|
|
721
779
|
)
|
|
722
780
|
|
|
781
|
+
# ============================================
|
|
782
|
+
# Per-agent high-severity threshold: override verdict to "fail" if threshold met
|
|
783
|
+
# ============================================
|
|
784
|
+
high_issue_threshold = agent_settings.get("highIssueThreshold", 3)
|
|
785
|
+
all_verdicts = [] # Recompute with overrides applied
|
|
786
|
+
|
|
787
|
+
for r in list(cli_results.values()) + list(agent_results.values()):
|
|
788
|
+
if not r.verdict or r.verdict in ("skip", "error"):
|
|
789
|
+
continue
|
|
790
|
+
agent_high = sum(
|
|
791
|
+
1 for issue in (r.data.get("issues", []) if r.data else [])
|
|
792
|
+
if issue.get("severity") == "high"
|
|
793
|
+
)
|
|
794
|
+
if agent_high >= high_issue_threshold:
|
|
795
|
+
log_info("cc-native-plan-review",
|
|
796
|
+
f"{r.name}: verdict overridden to 'fail' ({agent_high} high issues >= {high_issue_threshold})")
|
|
797
|
+
r.verdict = "fail"
|
|
798
|
+
all_verdicts.append(r.verdict)
|
|
799
|
+
|
|
723
800
|
# ============================================
|
|
724
801
|
# PHASE 4: Generate Combined Output
|
|
725
802
|
# ============================================
|
|
@@ -765,33 +842,27 @@ def main() -> int:
|
|
|
765
842
|
|
|
766
843
|
context_parts = [inline_summary, f"\nFull review: `{review_file}`\n"]
|
|
767
844
|
|
|
768
|
-
# Review decision —
|
|
845
|
+
# Review decision — fail veto triggers a block (per-agent override already applied)
|
|
769
846
|
warn_threshold = agent_settings.get("warnThreshold", 0.5)
|
|
770
|
-
should_deny, deny_reason, review_score = compute_review_decision(
|
|
771
|
-
|
|
772
|
-
# Count high-severity issues for logging
|
|
773
|
-
high_count = sum(
|
|
774
|
-
1 for r in list(combined_result.cli_reviewers.values()) + list(combined_result.agents.values())
|
|
775
|
-
if r.data
|
|
776
|
-
for issue in r.data.get("issues", [])
|
|
777
|
-
if issue.get("severity") == "high"
|
|
847
|
+
should_deny, deny_reason, review_score = compute_review_decision(
|
|
848
|
+
all_verdicts, warn_threshold,
|
|
778
849
|
)
|
|
779
850
|
|
|
780
851
|
# Structured log entries for review influence tracking
|
|
781
|
-
log_info("cc-native-plan-review", f"REVIEW_DECISION: verdict={combined_result.overall_verdict}, deny={should_deny},
|
|
852
|
+
log_info("cc-native-plan-review", f"REVIEW_DECISION: verdict={combined_result.overall_verdict}, deny={should_deny}, reason={deny_reason}, score={review_score:.2f}")
|
|
782
853
|
log_diagnostic("cc-native-plan-review", "result",
|
|
783
|
-
f"verdict={combined_result.overall_verdict}, deny={should_deny},
|
|
854
|
+
f"verdict={combined_result.overall_verdict}, deny={should_deny}, reason={deny_reason}",
|
|
784
855
|
decision="deny" if should_deny else "allow",
|
|
785
|
-
reasoning=f"score={review_score:.2f},
|
|
856
|
+
reasoning=f"reason={deny_reason}, score={review_score:.2f}, warn_threshold={warn_threshold}",
|
|
786
857
|
inputs={"overall_verdict": combined_result.overall_verdict,
|
|
787
|
-
"
|
|
858
|
+
"review_score": round(review_score, 2),
|
|
788
859
|
"cli_count": len(cli_results), "agent_count": len(agent_results)})
|
|
789
860
|
|
|
790
861
|
# Terminal progress indicator
|
|
791
862
|
verdict_emoji = "✅" if not should_deny else "❌"
|
|
792
863
|
eprint(f"[plan-review] {verdict_emoji} {combined_result.overall_verdict.upper()} (score={review_score:.2f})")
|
|
793
864
|
if should_deny:
|
|
794
|
-
eprint(f"[plan-review] Blocking ExitPlanMode — {
|
|
865
|
+
eprint(f"[plan-review] Blocking ExitPlanMode — {deny_reason}")
|
|
795
866
|
|
|
796
867
|
# Handle iteration logic
|
|
797
868
|
needs_more_iterations = False
|
|
@@ -809,11 +880,13 @@ def main() -> int:
|
|
|
809
880
|
else:
|
|
810
881
|
# Final iteration - increment current and save state
|
|
811
882
|
iteration_state["current"] = iteration_state.get("current", 1) + 1
|
|
812
|
-
#
|
|
813
|
-
# the
|
|
814
|
-
#
|
|
815
|
-
#
|
|
816
|
-
|
|
883
|
+
# Extend max ONLY when the plan passes review (for user rejection recovery).
|
|
884
|
+
# When the hook denies (should_deny=True), don't extend — the hook will
|
|
885
|
+
# keep blocking on each resubmission via should_deny regardless of max.
|
|
886
|
+
# This prevents max from inflating on repeated hook rejections while still
|
|
887
|
+
# allowing re-review after a user rejects a plan that passed review.
|
|
888
|
+
if not should_deny:
|
|
889
|
+
iteration_state["max"] = iteration_state.get("max", 1) + 1
|
|
817
890
|
save_iteration_state(reviews_dir, iteration_state)
|
|
818
891
|
|
|
819
892
|
# Emit output with correct Claude Code hook format
|
|
@@ -832,29 +905,41 @@ def main() -> int:
|
|
|
832
905
|
)
|
|
833
906
|
|
|
834
907
|
if needs_more_iterations:
|
|
835
|
-
mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="
|
|
908
|
+
mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="hook_deny_iteration")
|
|
836
909
|
current = iteration_state["current"] - 1 # Display the just-completed iteration
|
|
837
910
|
max_iter = iteration_state["max"]
|
|
838
911
|
remaining = max_iter - current
|
|
839
912
|
top_issues_text = extract_top_issues_text(combined_result, max_count=3, severity="high")
|
|
913
|
+
# Two-fold deny signal: inline issues (fallback) + high-issues.md (primary)
|
|
914
|
+
high_issues_doc = build_high_issues_document(combined_result)
|
|
915
|
+
high_issues_path = review_folder / "high-issues.md"
|
|
916
|
+
high_issues_path.write_text(high_issues_doc, encoding="utf-8")
|
|
840
917
|
emit_context_and_block(
|
|
841
918
|
context_text,
|
|
842
919
|
f"Plan review iteration {current}/{max_iter} FAILED ({deny_reason}, score={review_score:.2f}). "
|
|
843
920
|
f"Critical issues: {top_issues_text}. "
|
|
921
|
+
f"IMPORTANT: Read `{high_issues_path}` for ALL high-severity issues — "
|
|
922
|
+
f"this file contains only the most critical findings, no noise. "
|
|
844
923
|
f"{_REVIEWER_CAVEAT} "
|
|
845
|
-
f"Revise the plan, then call ExitPlanMode again. "
|
|
924
|
+
f"Revise the plan to address these issues, then call ExitPlanMode again. "
|
|
846
925
|
f"({remaining} revision{'s' if remaining != 1 else ''} remaining) "
|
|
847
926
|
f"{_RESUBMIT_INSTRUCTION}",
|
|
848
927
|
)
|
|
849
928
|
elif should_deny:
|
|
850
|
-
mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="
|
|
929
|
+
mark_plan_reviewed(session_id, plan_hash, "cc-native-plan-review", iteration_state, decision="hook_deny_final")
|
|
851
930
|
top_issues_text = extract_top_issues_text(combined_result, max_count=3, severity="high")
|
|
931
|
+
# Two-fold deny signal: inline issues (fallback) + high-issues.md (primary)
|
|
932
|
+
high_issues_doc = build_high_issues_document(combined_result)
|
|
933
|
+
high_issues_path = review_folder / "high-issues.md"
|
|
934
|
+
high_issues_path.write_text(high_issues_doc, encoding="utf-8")
|
|
852
935
|
emit_context_and_block(
|
|
853
936
|
context_text,
|
|
854
937
|
f"Plan review FAILED ({deny_reason}, score={review_score:.2f}). "
|
|
855
938
|
f"Critical issues: {top_issues_text}. "
|
|
939
|
+
f"IMPORTANT: Read `{high_issues_path}` for ALL high-severity issues — "
|
|
940
|
+
f"this file contains only the most critical findings, no noise. "
|
|
856
941
|
f"{_REVIEWER_CAVEAT} "
|
|
857
|
-
f"Revise the plan, then call ExitPlanMode again. "
|
|
942
|
+
f"Revise the plan to address these issues, then call ExitPlanMode again. "
|
|
858
943
|
f"{_RESUBMIT_INSTRUCTION}",
|
|
859
944
|
)
|
|
860
945
|
else:
|