mindforge-cc 2.3.0 → 3.0.0-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/.agent/skills/mindforge-plan-phase/SKILL.md +1 -0
  2. package/.agent/skills/mindforge-system-architecture/SKILL.md +136 -0
  3. package/.agent/skills/mindforge-system-architecture/examples.md +120 -0
  4. package/.agent/skills/mindforge-system-architecture/scaling-checklist.md +76 -0
  5. package/.agent/skills/mindforge-tdd/SKILL.md +112 -0
  6. package/.agent/skills/mindforge-tdd/deep-modules.md +21 -0
  7. package/.agent/skills/mindforge-tdd/interface-design.md +22 -0
  8. package/.agent/skills/mindforge-tdd/mocking.md +24 -0
  9. package/.agent/skills/mindforge-tdd/refactoring.md +21 -0
  10. package/.agent/skills/mindforge-tdd/tests.md +28 -0
  11. package/.agent/workflows/mindforge-plan-phase.md +30 -1
  12. package/.agent/workflows/mindforge:architecture.md +40 -0
  13. package/.agent/workflows/mindforge:executor.md +18 -0
  14. package/.agent/workflows/mindforge:identity.md +18 -0
  15. package/.agent/workflows/mindforge:memory.md +18 -0
  16. package/.agent/workflows/mindforge:planner.md +18 -0
  17. package/.agent/workflows/mindforge:researcher.md +18 -0
  18. package/.agent/workflows/mindforge:reviewer.md +18 -0
  19. package/.agent/workflows/mindforge:tdd.md +41 -0
  20. package/.agent/workflows/mindforge:tool.md +18 -0
  21. package/.mindforge/engine/ads-protocol.md +54 -0
  22. package/.mindforge/engine/compaction-protocol.md +21 -36
  23. package/.mindforge/engine/context-injector.md +26 -0
  24. package/.mindforge/engine/knowledge-graph-protocol.md +125 -0
  25. package/.mindforge/engine/shard-controller.md +53 -0
  26. package/.mindforge/engine/temporal-protocol.md +40 -0
  27. package/.mindforge/memory/knowledge-base.jsonl +11 -0
  28. package/.mindforge/memory/pattern-library.jsonl +2 -0
  29. package/.mindforge/memory/team-preferences.jsonl +5 -0
  30. package/.mindforge/personas/mf-executor.md +40 -0
  31. package/.mindforge/personas/mf-memory.md +33 -0
  32. package/.mindforge/personas/mf-planner.md +45 -0
  33. package/.mindforge/personas/mf-researcher.md +39 -0
  34. package/.mindforge/personas/mf-reviewer.md +35 -0
  35. package/.mindforge/personas/mf-tool.md +33 -0
  36. package/.planning/AUDIT.jsonl +2 -1
  37. package/.planning/HANDOFF.json +6 -26
  38. package/.planning/ROADMAP.md +3 -1
  39. package/.planning/TEMPORAL-TEST.md +1 -0
  40. package/.planning/history/36525e1d9da1b674/ARCHITECTURE.md +0 -0
  41. package/.planning/history/36525e1d9da1b674/HANDOFF.json +8 -0
  42. package/.planning/history/36525e1d9da1b674/PROJECT.md +33 -0
  43. package/.planning/history/36525e1d9da1b674/RELEASE-CHECKLIST.md +68 -0
  44. package/.planning/history/36525e1d9da1b674/REQUIREMENTS.md +0 -0
  45. package/.planning/history/36525e1d9da1b674/ROADMAP.md +12 -0
  46. package/.planning/history/36525e1d9da1b674/SNAPSHOT-META.json +18 -0
  47. package/.planning/history/36525e1d9da1b674/STATE.md +31 -0
  48. package/.planning/history/36525e1d9da1b674/TEMPORAL-TEST.md +1 -0
  49. package/.planning/history/36525e1d9da1b674/jira-sync.json +5 -0
  50. package/.planning/history/36525e1d9da1b674/slack-threads.json +3 -0
  51. package/.planning/history/test-audit-001/ARCHITECTURE.md +0 -0
  52. package/.planning/history/test-audit-001/HANDOFF.json +8 -0
  53. package/.planning/history/test-audit-001/PROJECT.md +33 -0
  54. package/.planning/history/test-audit-001/RELEASE-CHECKLIST.md +68 -0
  55. package/.planning/history/test-audit-001/REQUIREMENTS.md +0 -0
  56. package/.planning/history/test-audit-001/ROADMAP.md +12 -0
  57. package/.planning/history/test-audit-001/SNAPSHOT-META.json +17 -0
  58. package/.planning/history/test-audit-001/STATE.md +31 -0
  59. package/.planning/history/test-audit-001/TEMPORAL-TEST.md +1 -0
  60. package/.planning/history/test-audit-001/jira-sync.json +5 -0
  61. package/.planning/history/test-audit-001/slack-threads.json +3 -0
  62. package/.planning/jira-sync.json +3 -7
  63. package/.planning/phases/.gitkeep +0 -0
  64. package/.planning/research/.gitkeep +0 -0
  65. package/.planning/screenshots/.gitkeep +0 -0
  66. package/.planning/slack-threads.json +1 -4
  67. package/CHANGELOG.md +170 -9
  68. package/README.md +40 -13
  69. package/RELEASENOTES.md +1 -1
  70. package/bin/autonomous/auto-runner.js +23 -0
  71. package/bin/dashboard/server.js +2 -0
  72. package/bin/dashboard/temporal-api.js +82 -0
  73. package/bin/engine/temporal-cli.js +52 -0
  74. package/bin/engine/temporal-hub.js +138 -0
  75. package/bin/hindsight-injector.js +59 -0
  76. package/bin/installer-core.js +143 -46
  77. package/bin/memory/auto-shadow.js +274 -0
  78. package/bin/memory/embedding-engine.js +326 -0
  79. package/bin/memory/knowledge-capture.js +122 -5
  80. package/bin/memory/knowledge-graph.js +572 -0
  81. package/bin/memory/knowledge-store.js +15 -3
  82. package/bin/mindforge-cli.js +19 -0
  83. package/bin/models/model-router.js +1 -0
  84. package/bin/review/ads-engine.js +126 -0
  85. package/bin/review/ads-synthesizer.js +117 -0
  86. package/bin/shard-helper.js +134 -0
  87. package/bin/spawn-agent.js +61 -0
  88. package/bin/wizard/theme.js +3 -1
  89. package/docs/PERSONAS.md +71 -5
  90. package/docs/adr/ADR-042-ads-protocol.md +30 -0
  91. package/docs/architecture/README.md +55 -0
  92. package/docs/architecture/V3-CORE.md +52 -0
  93. package/docs/commands-reference.md +3 -2
  94. package/docs/usp-features.md +33 -15
  95. package/package.json +1 -1
  96. package/.planning/approvals/v2-architecture-approval.json +0 -15
  97. package/.planning/decisions/ADR-001-handoff-tracking.md +0 -41
  98. package/.planning/decisions/ADR-002-markdown-commands.md +0 -46
  99. package/.planning/decisions/ADR-003-skills-trigger-model.md +0 -37
  100. package/.planning/decisions/ADR-004-wave-parallelism-model.md +0 -45
  101. package/.planning/decisions/ADR-005-append-only-audit-log.md +0 -51
  102. package/.planning/decisions/ADR-006-tiered-skills-system.md +0 -22
  103. package/.planning/decisions/ADR-007-trigger-keyword-model.md +0 -22
  104. package/.planning/decisions/ADR-008-just-in-time-skill-loading.md +0 -29
  105. package/.planning/decisions/ADR-009-enterprise-integration-retry-policy.md +0 -8
  106. package/.planning/decisions/ADR-010-governance-tier-escalation.md +0 -8
  107. package/.planning/decisions/ADR-011-multi-developer-handoff-contract.md +0 -8
  108. package/.planning/decisions/ADR-012-intelligence-feedback-loops.md +0 -19
  109. package/.planning/decisions/ADR-013-mindforge-md-constitution.md +0 -16
  110. package/.planning/decisions/ADR-014-metrics-as-signals-not-evaluation.md +0 -15
  111. package/.planning/decisions/ADR-015-npm-based-skill-registry.md +0 -26
  112. package/.planning/decisions/ADR-016-ci-exit-code-0-on-timeout.md +0 -27
  113. package/.planning/decisions/ADR-017-sdk-localhost-only.md +0 -28
  114. package/.planning/decisions/ADR-018-installer-self-install-detection.md +0 -15
  115. package/.planning/decisions/ADR-019-self-update-scope-preservation.md +0 -14
  116. package/.planning/decisions/ADR-020-v1.0.0-stable-interface-contract.md +0 -23
  117. package/.planning/decisions/ADR-021-autonomy-boundary.md +0 -17
  118. package/.planning/decisions/ADR-022-node-repair-hierarchy.md +0 -19
  119. package/.planning/decisions/ADR-023-gate-3-timing.md +0 -15
  120. package/.planning/decisions/ADR-036-learn-command-docs-as-skill-source.md +0 -26
  121. package/.planning/decisions/ADR-037-auto-capture-frequency-threshold.md +0 -26
  122. package/.planning/decisions/ADR-038-skill-quality-minimum-60.md +0 -27
  123. package/.planning/phases/day1/REVIEW-DAY1.md +0 -50
  124. package/.planning/phases/day1/SECURITY-REVIEW-DAY1.md +0 -15
  125. package/.planning/phases/day2/REVIEW-DAY2.md +0 -521
  126. package/.planning/phases/day3/REVIEW-DAY3.md +0 -234
  127. /package/{.planning/phases/01-migrate-gsd-to-mindforge/.gitkeep → .mindforge/memory/decision-library.jsonl} +0 -0
  128. /package/docs/{Context → context}/Master-Context.md +0 -0
  129. /package/docs/{References → references}/audit-events.md +0 -0
  130. /package/docs/{References → references}/checkpoints.md +0 -0
  131. /package/docs/{References → references}/commands.md +0 -0
  132. /package/docs/{References → references}/config-reference.md +0 -0
  133. /package/docs/{References → references}/continuation-format.md +0 -0
  134. /package/docs/{References → references}/decimal-phase-calculation.md +0 -0
  135. /package/docs/{References → references}/git-integration.md +0 -0
  136. /package/docs/{References → references}/git-planning-commit.md +0 -0
  137. /package/docs/{References → references}/model-profile-resolution.md +0 -0
  138. /package/docs/{References → references}/model-profiles.md +0 -0
  139. /package/docs/{References → references}/phase-argument-parsing.md +0 -0
  140. /package/docs/{References → references}/planning-config.md +0 -0
  141. /package/docs/{References → references}/questioning.md +0 -0
  142. /package/docs/{References → references}/sdk-api.md +0 -0
  143. /package/docs/{References → references}/skills-api.md +0 -0
  144. /package/docs/{References → references}/tdd.md +0 -0
  145. /package/docs/{References → references}/ui-brand.md +0 -0
  146. /package/docs/{References → references}/user-profiling.md +0 -0
  147. /package/docs/{References → references}/verification-patterns.md +0 -0
  148. /package/docs/{References → references}/workstream-flag.md +0 -0
  149. /package/docs/{Templates → templates}/Agents/CLAUDE-MD.md +0 -0
  150. /package/docs/{Templates → templates}/Agents/COPILOT-INSTRUCTIONS.md +0 -0
  151. /package/docs/{Templates → templates}/Agents/DEBUGGER-PROMPT.md +0 -0
  152. /package/docs/{Templates → templates}/Agents/PLANNER-PROMPT.md +0 -0
  153. /package/docs/{Templates → templates}/Execution/CONTINUE-HERE.md +0 -0
  154. /package/docs/{Templates → templates}/Execution/DISCUSSION-LOG.md +0 -0
  155. /package/docs/{Templates → templates}/Execution/PHASE-PROMPT.md +0 -0
  156. /package/docs/{Templates → templates}/Execution/STATE.md +0 -0
  157. /package/docs/{Templates → templates}/Execution/SUMMARY-COMPLEX.md +0 -0
  158. /package/docs/{Templates → templates}/Execution/SUMMARY-MINIMAL.md +0 -0
  159. /package/docs/{Templates → templates}/Execution/SUMMARY-STANDARD.md +0 -0
  160. /package/docs/{Templates → templates}/Execution/SUMMARY.md +0 -0
  161. /package/docs/{Templates → templates}/Profile/DEV-PREFERENCES.md +0 -0
  162. /package/docs/{Templates → templates}/Profile/USER-PROFILE.md +0 -0
  163. /package/docs/{Templates → templates}/Profile/USER-SETUP.md +0 -0
  164. /package/docs/{Templates → templates}/Project/DISCOVERY.md +0 -0
  165. /package/docs/{Templates → templates}/Project/MILESTONE-ARCHIVE.md +0 -0
  166. /package/docs/{Templates → templates}/Project/MILESTONE.md +0 -0
  167. /package/docs/{Templates → templates}/Project/PROJECT.md +0 -0
  168. /package/docs/{Templates → templates}/Project/REQUIREMENTS.md +0 -0
  169. /package/docs/{Templates → templates}/Project/RETROSPECTIVE.md +0 -0
  170. /package/docs/{Templates → templates}/Project/ROADMAP.md +0 -0
  171. /package/docs/{Templates → templates}/Quality/DEBUG.md +0 -0
  172. /package/docs/{Templates → templates}/Quality/UAT.md +0 -0
  173. /package/docs/{Templates → templates}/Quality/UI-SPEC.md +0 -0
  174. /package/docs/{Templates → templates}/Quality/VALIDATION.md +0 -0
  175. /package/docs/{Templates → templates}/Quality/VERIFICATION-REPORT.md +0 -0
  176. /package/docs/{Templates → templates}/System/CONFIG.json +0 -0
  177. /package/docs/{Templates → templates}/System/CONTEXT.md +0 -0
  178. /package/docs/{Templates/Codebase → templates/codebase}/architecture.md +0 -0
  179. /package/docs/{Templates/Codebase → templates/codebase}/concerns.md +0 -0
  180. /package/docs/{Templates/Codebase → templates/codebase}/conventions.md +0 -0
  181. /package/docs/{Templates/Codebase → templates/codebase}/integrations.md +0 -0
  182. /package/docs/{Templates/Codebase → templates/codebase}/stack.md +0 -0
  183. /package/docs/{Templates/Codebase → templates/codebase}/structure.md +0 -0
  184. /package/docs/{Templates/Codebase → templates/codebase}/testing.md +0 -0
  185. /package/docs/{Templates/Research → templates/research}/ARCHITECTURE.md +0 -0
  186. /package/docs/{Templates/Research → templates/research}/FEATURES.md +0 -0
  187. /package/docs/{Templates/Research → templates/research}/PITFALLS.md +0 -0
  188. /package/docs/{Templates/Research → templates/research}/STACK.md +0 -0
  189. /package/docs/{Templates/Research → templates/research}/SUMMARY.md +0 -0
@@ -26,6 +26,7 @@ Phase number: $ARGUMENTS (optional — auto-detects next unplanned phase if omit
26
26
  - `--skip-verify` — Skip verification loop
27
27
  - `--prd <file>` — Use a PRD/acceptance criteria file instead of discuss-phase. Parses requirements into CONTEXT.md automatically. Skips discuss-phase entirely.
28
28
  - `--reviews` — Replan incorporating cross-AI review feedback from REVIEWS.md (produced by `/mindforge-review`)
29
+ - `--ads` — Use Adversarial Decision Synthesis (Architect vs Auditor) for high-fidelity planning
29
30
  - `--text` — Use plain-text numbered lists instead of TUI menus (required for `/rc` remote sessions)
30
31
 
31
32
  Normalize phase input in step 2 before any directory lookups.
@@ -0,0 +1,136 @@
1
+ ---
2
+ name: system-architecture
3
+ description: Design systems with appropriate complexity - no more, no less. Use when the user asks to architect applications, design system boundaries, plan service decomposition, evaluate monolith vs microservices, make scaling decisions, or review structural trade-offs. Applies to new system design, refactoring, and migration planning.
4
+ ---
5
+
6
+ # System Architecture
7
+
8
+ Design real structures with clear boundaries, explicit trade-offs, and appropriate complexity. Match architecture to actual requirements, not imagined future needs.
9
+
10
+ ## Workflow
11
+
12
+ When the user requests an architecture, follow these steps:
13
+
14
+ ```
15
+ Task Progress:
16
+ - [ ] Step 1: Clarify constraints
17
+ - [ ] Step 2: Identify domains
18
+ - [ ] Step 3: Map data flow
19
+ - [ ] Step 4: Draw boundaries with rationale
20
+ - [ ] Step 5: Run complexity checklist
21
+ - [ ] Step 6: Present architecture with trade-offs
22
+ ```
23
+
24
+ **Step 1 - Clarify constraints.** Ask about:
25
+
26
+ | Constraint | Question | Why it matters |
27
+ |------------|----------|----------------|
28
+ | Scale | What's the real load? (users, requests/sec, data size) | Design for 10x current, not 1000x |
29
+ | Team | How many developers? How many teams? | Deployable units ≤ number of teams |
30
+ | Lifespan | Prototype? MVP? Long-term product? | Temporary systems need temporary solutions |
31
+ | Change vectors | What actually varies? | Abstract only where you have evidence of variation |
32
+
33
+ **Step 2 - Identify domains.** Group by business capability, not technical layer. Look for things that change for different reasons and at different rates.
34
+
35
+ **Step 3 - Map data flow.** Trace: where does data enter → how does it transform → where does it exit? Make the flow obvious.
36
+
37
+ **Step 4 - Draw boundaries.** Every boundary needs a reason: different team, different change rate, different compliance requirement, or different scaling need.
38
+
39
+ **Step 5 - Run complexity checklist.** Before adding any non-trivial pattern:
40
+
41
+ ```
42
+ [ ] Have I tried the simple solution?
43
+ [ ] Do I have evidence it's insufficient?
44
+ [ ] Can my team operate this?
45
+ [ ] Will this still make sense in 6 months?
46
+ [ ] Can I explain why this complexity is necessary?
47
+ ```
48
+
49
+ If any answer is "no", keep it simple.
50
+
51
+ **Step 6 - Present the architecture** using the output template below.
52
+
53
+ ## Output Template
54
+
55
+ ```markdown
56
+ ### System: [Name]
57
+
58
+ **Constraints**:
59
+ - Scale: [current and expected load]
60
+ - Team: [size and structure]
61
+ - Lifespan: [prototype / MVP / long-term]
62
+
63
+ **Architecture**:
64
+ [Component diagram or description of components and their relationships]
65
+
66
+ **Data Flow**:
67
+ [How data enters → transforms → exits]
68
+
69
+ **Key Boundaries**:
70
+ | Boundary | Reason | Change Rate |
71
+ |----------|--------|-------------|
72
+ | ... | ... | ... |
73
+
74
+ **Trade-offs**:
75
+ - Chose X over Y because [reason]
76
+ - Accepted [limitation] to gain [benefit]
77
+
78
+ **Complexity Justification**:
79
+ - [Each non-trivial pattern] → [why it's needed, with evidence]
80
+ ```
81
+
82
+ ## Core Principles
83
+
84
+ 1. **Boundaries at real differences.** Separate concerns that change for different reasons and at different rates.
85
+ 2. **Dependencies flow inward.** Core logic depends on nothing. Infrastructure depends on core.
86
+ 3. **Follow the data.** Architecture should make data flow obvious.
87
+ 4. **Design for failure.** Network fails. Databases timeout. Build compensation into the structure.
88
+ 5. **Design for operations.** You will debug this at 3am. Every request needs a trace. Every error needs context for replay.
89
+
90
+ For concrete good/bad examples of each principle, see [examples.md](examples.md).
91
+
92
+ ## Anti-Patterns
93
+
94
+ | Don't | Do Instead |
95
+ |-------|------------|
96
+ | Microservices for a 3-person team | Well-structured monolith |
97
+ | Event sourcing for CRUD | Simple state storage |
98
+ | Message queues within the same process | Just call the function |
99
+ | Distributed transactions | Redesign to avoid, or accept eventual consistency |
100
+ | Repository wrapping an ORM | Use the ORM directly |
101
+ | Interfaces with one implementation | Mock at boundaries only |
102
+ | AbstractFactoryFactoryBean | Just instantiate the thing |
103
+ | DI containers for simple graphs | Constructor injection is enough |
104
+ | Clean Architecture for a TODO app | Match layers to actual complexity |
105
+ | DDD tactics without strategic design | Aggregates need bounded contexts |
106
+ | Hexagonal ports with one adapter | Just call the database |
107
+ | CQRS when reads = writes | Add when they diverge |
108
+ | "We might swap databases" | You won't; rewrite if you do |
109
+ | "Multi-tenant someday" | Build it when you have tenant #2 |
110
+ | "Microservices for team scale" | Helps at 50+ engineers, not 4 |
111
+
112
+ ## Success Criteria
113
+
114
+ Your architecture is right-sized when:
115
+
116
+ 1. **You can draw it** - dependency graph fits on a whiteboard
117
+ 2. **You can explain it** - new team member understands data flow in 30 minutes
118
+ 3. **You can change it** - adding a feature touches 1-3 modules, not 10
119
+ 4. **You can delete it** - removing a component needs no archaeology
120
+ 5. **You can debug it** - tracing a request takes minutes, not hours
121
+ 6. **It matches your team** - deployable units ≤ number of teams
122
+
123
+ ## When the Simple Solution Isn't Enough
124
+
125
+ If the complexity checklist says "yes, scale is real", see [scaling-checklist.md](scaling-checklist.md) for concrete techniques covering caching, async processing, partitioning, horizontal scaling, and multi-region.
126
+
127
+ ## Iterative Architecture
128
+
129
+ Architecture is discovered, not designed upfront:
130
+
131
+ 1. **Start obvious** - group by domain, not by technical layer
132
+ 2. **Let hotspots emerge** - monitor which modules change together
133
+ 3. **Extract when painful** - split only when the current form causes measurable problems
134
+ 4. **Document decisions** - record why boundaries exist so future you knows what's load-bearing
135
+
136
+ Every senior engineer has a graveyard of over-engineered systems they regret. Learn from their pain. Build boring systems that work.
@@ -0,0 +1,120 @@
1
+ # Architecture Examples
2
+
3
+ Concrete good/bad examples for each core principle in SKILL.md.
4
+
5
+ ---
6
+
7
+ ## Boundaries at Real Differences
8
+
9
+ **Good** - Meaningful boundary:
10
+ ```
11
+ # Users and Billing are separate bounded contexts
12
+ # - Different teams own them
13
+ # - Different change cadences (users: weekly, billing: quarterly)
14
+ # - Different compliance requirements
15
+
16
+ src/
17
+ users/ # User management domain
18
+ models.py
19
+ services.py
20
+ api.py
21
+ billing/ # Billing domain
22
+ models.py
23
+ services.py
24
+ api.py
25
+ shared/ # Truly shared utilities
26
+ auth.py
27
+ ```
28
+
29
+ **Bad** - Ceremony without purpose:
30
+ ```
31
+ # UserService → UserRepository → UserRepositoryImpl
32
+ # ...when you'll never swap the database
33
+
34
+ src/
35
+ interfaces/
36
+ IUserRepository.py # One implementation exists
37
+ repositories/
38
+ UserRepositoryImpl.py # Wraps SQLAlchemy, which is already a repository
39
+ services/
40
+ UserService.py # Just calls the repository
41
+ ```
42
+
43
+ ---
44
+
45
+ ## Dependencies Flow Inward
46
+
47
+ **Good** - Clear dependency direction:
48
+ ```
49
+ # Dependency flows inward: infrastructure → application → domain
50
+
51
+ domain/ # Pure business logic, no imports from outer layers
52
+ order.py # Order entity with business rules
53
+
54
+ application/ # Use cases, orchestrates domain
55
+ place_order.py # Imports from domain/, not infrastructure/
56
+
57
+ infrastructure/ # External concerns
58
+ postgres.py # Implements persistence, imports from application/
59
+ stripe.py # Implements payments
60
+ ```
61
+
62
+ ---
63
+
64
+ ## Follow the Data
65
+
66
+ **Good** - Obvious data flow:
67
+ ```
68
+ Request → Validate → Transform → Store → Respond
69
+
70
+ # Each step is a clear function/module:
71
+ api/routes.py # Request enters
72
+ validators.py # Validation
73
+ transformers.py # Business logic transformation
74
+ repositories.py # Storage
75
+ serializers.py # Response shaping
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Design for Failure
81
+
82
+ **Good** - Failure-aware design with compensation:
83
+ ```python
84
+ class OrderService:
85
+ def place_order(self, order: Order) -> Result:
86
+ inventory = self.inventory.reserve(order.items)
87
+ if inventory.failed:
88
+ return Result.failure("Items unavailable", retry=False)
89
+
90
+ payment = self.payments.charge(order.total)
91
+ if payment.failed:
92
+ self.inventory.release(inventory.reservation_id) # Compensate
93
+ return Result.failure("Payment failed", retry=True)
94
+
95
+ return Result.success(order)
96
+ ```
97
+
98
+ ---
99
+
100
+ ## Design for Operations
101
+
102
+ **Good** - Observable architecture:
103
+ ```python
104
+ @trace
105
+ def handle_request(request):
106
+ log.info("Processing", request_id=request.id, user=request.user_id)
107
+ try:
108
+ result = process(request)
109
+ log.info("Completed", request_id=request.id, result=result.status)
110
+ return result
111
+ except Exception as e:
112
+ log.error("Failed", request_id=request.id, error=str(e),
113
+ context=request.to_dict()) # Full context for replay
114
+ raise
115
+ ```
116
+
117
+ Key elements:
118
+ - Every request gets a correlation ID
119
+ - Every service logs with that ID
120
+ - Every error includes full context for reproduction
@@ -0,0 +1,76 @@
1
+ # Scaling Checklist
2
+
3
+ Concrete techniques for when the complexity checklist in SKILL.md confirms scale is a real problem. Apply in order - each level solves the previous level's bottleneck.
4
+
5
+ ---
6
+
7
+ ## Level 0: Optimize First
8
+
9
+ Before adding infrastructure, exhaust these:
10
+
11
+ - [ ] Database queries have proper indexes
12
+ - [ ] N+1 queries eliminated
13
+ - [ ] Connection pooling configured
14
+ - [ ] Slow endpoints profiled and optimized
15
+ - [ ] Static assets served via CDN
16
+
17
+ ## Level 1: Read-Heavy
18
+
19
+ **Symptom**: Database reads are the bottleneck.
20
+
21
+ | Technique | When | Trade-off |
22
+ |-----------|------|-----------|
23
+ | Application cache (in-memory) | Small, frequently accessed data | Stale data, memory pressure |
24
+ | Redis/Memcached | Shared cache across instances | Network hop, cache invalidation complexity |
25
+ | Read replicas | High read volume, slight staleness OK | Replication lag, eventual consistency |
26
+ | CDN | Static or semi-static content | Cache invalidation delay |
27
+
28
+ ## Level 2: Write-Heavy
29
+
30
+ **Symptom**: Database writes or processing are the bottleneck.
31
+
32
+ | Technique | When | Trade-off |
33
+ |-----------|------|-----------|
34
+ | Async task queue (Celery, SQS) | Work can be deferred | Eventual consistency, failure handling |
35
+ | Write-behind cache | Batch frequent writes | Data loss risk on crash |
36
+ | Event streaming (Kafka) | Multiple consumers of same data | Operational complexity, ordering guarantees |
37
+ | CQRS | Reads and writes have diverged significantly | Two models to maintain |
38
+
39
+ ## Level 3: Traffic Spikes
40
+
41
+ **Symptom**: Individual instances can't handle peak load.
42
+
43
+ | Technique | When | Trade-off |
44
+ |-----------|------|-----------|
45
+ | Horizontal scaling + load balancer | Stateless services | Session management, deploy complexity |
46
+ | Auto-scaling | Unpredictable traffic patterns | Cold start latency, cost spikes |
47
+ | Rate limiting | Protect against abuse/spikes | Legitimate users may be throttled |
48
+ | Circuit breakers | Downstream services degrade | Partial functionality during failures |
49
+
50
+ ## Level 4: Data Growth
51
+
52
+ **Symptom**: Single database can't hold or query all the data efficiently.
53
+
54
+ | Technique | When | Trade-off |
55
+ |-----------|------|-----------|
56
+ | Table partitioning | Time-series or naturally partitioned data | Query complexity, partition management |
57
+ | Archival / cold storage | Old data rarely accessed | Access latency for archived data |
58
+ | Database sharding | Partitioning insufficient, clear shard key exists | Cross-shard queries, operational burden |
59
+ | Search index (Elasticsearch) | Full-text or complex queries on large datasets | Index lag, another system to operate |
60
+
61
+ ## Level 5: Multi-Region
62
+
63
+ **Symptom**: Users are geographically distributed, latency matters.
64
+
65
+ | Technique | When | Trade-off |
66
+ |-----------|------|-----------|
67
+ | CDN + edge caching | Static/semi-static content | Cache invalidation |
68
+ | Read replicas per region | Read-heavy, slight staleness OK | Replication lag |
69
+ | Active-passive failover | Disaster recovery | Failover time, cost of standby |
70
+ | Active-active multi-region | True global low-latency required | Conflict resolution, extreme complexity |
71
+
72
+ ---
73
+
74
+ ## Decision Rule
75
+
76
+ Always start at Level 0. Move to the next level only when you have **measured evidence** that the current level is insufficient. Skipping levels is how you end up with Kafka for a TODO app.
@@ -0,0 +1,112 @@
1
+ ---
2
+ name: tdd
3
+ description: Strict TDD workflow (Red-Green-Refactor).
4
+ ---
5
+
6
+ ---
7
+ name: tdd
8
+ description: Test-driven development with red-green-refactor loop. Use when user wants to build features or fix bugs using TDD, mentions "red-green-refactor", wants integration tests, or asks for test-first development.
9
+ ---
10
+
11
+ # Test-Driven Development
12
+
13
+ ## Philosophy
14
+
15
+ **Core principle**: Tests should verify behavior through public interfaces, not implementation details. Code can change entirely; tests shouldn't.
16
+
17
+ **Good tests** are integration-style: they exercise real code paths through public APIs. They describe _what_ the system does, not _how_ it does it. A good test reads like a specification - "user can checkout with valid cart" tells you exactly what capability exists. These tests survive refactors because they don't care about internal structure.
18
+
19
+ **Bad tests** are coupled to implementation. They mock internal collaborators, test private methods, or verify through external means (like querying a database directly instead of using the interface). The warning sign: your test breaks when you refactor, but behavior hasn't changed. If you rename an internal function and tests fail, those tests were testing implementation, not behavior.
20
+
21
+ See [tests.md](tests.md) for examples and [mocking.md](mocking.md) for mocking guidelines.
22
+
23
+ ## Anti-Pattern: Horizontal Slices
24
+
25
+ **DO NOT write all tests first, then all implementation.** This is "horizontal slicing" - treating RED as "write all tests" and GREEN as "write all code."
26
+
27
+ This produces **crap tests**:
28
+
29
+ - Tests written in bulk test _imagined_ behavior, not _actual_ behavior
30
+ - You end up testing the _shape_ of things (data structures, function signatures) rather than user-facing behavior
31
+ - Tests become insensitive to real changes - they pass when behavior breaks, fail when behavior is fine
32
+ - You outrun your headlights, committing to test structure before understanding the implementation
33
+
34
+ **Correct approach**: Vertical slices via tracer bullets. One test → one implementation → repeat. Each test responds to what you learned from the previous cycle. Because you just wrote the code, you know exactly what behavior matters and how to verify it.
35
+
36
+ ```
37
+ WRONG (horizontal):
38
+ RED: test1, test2, test3, test4, test5
39
+ GREEN: impl1, impl2, impl3, impl4, impl5
40
+
41
+ RIGHT (vertical):
42
+ RED→GREEN: test1→impl1
43
+ RED→GREEN: test2→impl2
44
+ RED→GREEN: test3→impl3
45
+ ...
46
+ ```
47
+
48
+ ## Workflow
49
+
50
+ ### 1. Planning
51
+
52
+ Before writing any code:
53
+
54
+ - [ ] Confirm with user what interface changes are needed
55
+ - [ ] Confirm with user which behaviors to test (prioritize)
56
+ - [ ] Identify opportunities for [deep modules](deep-modules.md) (small interface, deep implementation)
57
+ - [ ] Design interfaces for [testability](interface-design.md)
58
+ - [ ] List the behaviors to test (not implementation steps)
59
+ - [ ] Get user approval on the plan
60
+
61
+ Ask: "What should the public interface look like? Which behaviors are most important to test?"
62
+
63
+ **You can't test everything.** Confirm with the user exactly which behaviors matter most. Focus testing effort on critical paths and complex logic, not every possible edge case.
64
+
65
+ ### 2. Tracer Bullet
66
+
67
+ Write ONE test that confirms ONE thing about the system:
68
+
69
+ ```
70
+ RED: Write test for first behavior → test fails
71
+ GREEN: Write minimal code to pass → test passes
72
+ ```
73
+
74
+ This is your tracer bullet - proves the path works end-to-end.
75
+
76
+ ### 3. Incremental Loop
77
+
78
+ For each remaining behavior:
79
+
80
+ ```
81
+ RED: Write next test → fails
82
+ GREEN: Minimal code to pass → passes
83
+ ```
84
+
85
+ Rules:
86
+
87
+ - One test at a time
88
+ - Only enough code to pass current test
89
+ - Don't anticipate future tests
90
+ - Keep tests focused on observable behavior
91
+
92
+ ### 4. Refactor
93
+
94
+ After all tests pass, look for [refactor candidates](refactoring.md):
95
+
96
+ - [ ] Extract duplication
97
+ - [ ] Deepen modules (move complexity behind simple interfaces)
98
+ - [ ] Apply SOLID principles where natural
99
+ - [ ] Consider what new code reveals about existing code
100
+ - [ ] Run tests after each refactor step
101
+
102
+ **Never refactor while RED.** Get to GREEN first.
103
+
104
+ ## Checklist Per Cycle
105
+
106
+ ```
107
+ [ ] Test describes behavior, not implementation
108
+ [ ] Test uses public interface only
109
+ [ ] Test would survive internal refactor
110
+ [ ] Code is minimal for this test
111
+ [ ] No speculative features added
112
+ ```
@@ -0,0 +1,21 @@
1
+ # Deep Modules in TDD
2
+
3
+ Deep modules are those with complex internal logic but a simple, stable interface. In TDD, our goal is to test the **depth** of the logic through the **surface** of the interface.
4
+
5
+ ## Principles
6
+
7
+ ### 1. Test behavior, not implementation
8
+ - Write tests against the public API of the module.
9
+ - Avoid testing private methods or internal state directly.
10
+ - If the internal logic is too complex to test through the public API, the module might be **too deep** or the interface **too narrow**.
11
+
12
+ ### 2. Isolate complexity
13
+ - Use "Socialable Tests" for internal helper classes that are tightly coupled.
14
+ - Use "Solitary Tests" (with mocks) for external dependencies (DB, API, etc.).
15
+
16
+ ### 3. Handle state transitions
17
+ - Deep modules often manage complex state machines.
18
+ - Use TDD to map out every valid (and invalid) state transition.
19
+
20
+ ## Strategy: The "Opaque Box" Approach
21
+ Treat the module as an opaque box. Feed it inputs and verify outputs/side-effects. If the logic inside changes but the behavior remains the same, your tests should **not** break.
@@ -0,0 +1,22 @@
1
+ # Interface Design through TDD
2
+
3
+ TDD is your best tool for designing clean, usable interfaces. By writing the test first, you are the **first consumer** of your own API.
4
+
5
+ ## Design Signals from Tests
6
+
7
+ ### "This test is too hard to setup"
8
+ - **Signal**: Your class has too many dependencies or is doing too much (violating SRP).
9
+ - **Fix**: Break the class into smaller, more focused components.
10
+
11
+ ### "I have to mock 5 things just to test one method"
12
+ - **Signal**: Excessive coupling.
13
+ - **Fix**: Use Dependency Injection and define clearer boundaries.
14
+
15
+ ### "I don't know what to name this test"
16
+ - **Signal**: The behavior is ill-defined or the module has "Identity Crisis".
17
+ - **Fix**: Re-evaluate the purpose of the component.
18
+
19
+ ## Best Practices
20
+ - **Prefer Composition over Inheritance**: Tests are much easier to write for composed objects.
21
+ - **Keep Interfaces Narrow**: Only expose what is absolutely necessary for the consumer (and the test).
22
+ - **Return Meaningful Values**: Avoid `void` where possible; returning results makes assertion-based testing natural.
@@ -0,0 +1,24 @@
1
+ # Mocking Strategies
2
+
3
+ Mocking is a double-edged sword. Used correctly, it isolates your code. Used poorly, it creates brittle tests that break with every refactor.
4
+
5
+ ## When to Mock
6
+ - **External Systems**: Databases, File Systems, Network APIs, 3rd-party libraries.
7
+ - **Non-Deterministic Logic**: Time (`Date.now()`), Random numbers, GUID generation.
8
+ - **Expensive Operations**: Heavy computations or long-running tasks that would slow down the TDD loop.
9
+
10
+ ## What NOT to Mock
11
+ - **Value Objects**: Objects that only hold data.
12
+ - **Internal Helpers**: If a helper is part of the module's logic, let the test exercise it ("Socialable Testing").
13
+ - **Language Features**: Don't mock standard library functions unless necessary for environment isolation.
14
+
15
+ ## The Mocking Traps
16
+
17
+ ### 1. The "Mirror" Trap
18
+ - **Problem**: Mocking internal calls so closely that the test just mirrors the implementation code.
19
+ - **Result**: You can't refactor without breaking the test.
20
+ - **Fix**: Assert on final outcomes or side-effects, not inner method calls.
21
+
22
+ ### 2. The "Over-Mocking" Trap
23
+ - **Problem**: Mocking so many things that you aren't actually testing your code.
24
+ - **Fix**: Use **Fakes** (real implementations of interfaces, e.g., `InMemoryDatabase`) over Mocks where possible.
@@ -0,0 +1,21 @@
1
+ # Refactoring with Confidence
2
+
3
+ Step 3 of the TDD loop is **Refactor**. This is where we move from "working code" to "clean code".
4
+
5
+ ## The Refactoring Workflow
6
+ 1. **Ensure tests are Green**: Never refactor while tests are failing.
7
+ 2. **Make small changes**: One variable rename, one function extraction, or one pattern application.
8
+ 3. **Run tests after EVERY change**: If they turn red, undo immediately.
9
+ 4. **Repeat** until the code is clean.
10
+
11
+ ## Refactoring-Safe Tests
12
+ Tests that facilitate refactoring have the following traits:
13
+ - **Implementation Agnostic**: They don't care *how* the result is calculated.
14
+ - **Explicit**: They clearly state the business rule being protected.
15
+ - **Fast**: They run in milliseconds, allowing for frequent execution.
16
+
17
+ ## Signs You Need to Refactor
18
+ - Duplicated code logic.
19
+ - Long methods (> 20 lines).
20
+ - Large classes with multiple responsibilities.
21
+ - Hard-coded values that should be configuration.
@@ -0,0 +1,28 @@
1
+ # Writing Effective TDD Tests
2
+
3
+ Your tests are the specification for your system. Treat them as first-class citizens.
4
+
5
+ ## The Test Anatomy: AAA
6
+
7
+ - **Arrange**: Set up the environment, dependencies, and inputs.
8
+ - **Act**: Call the method/function being tested.
9
+ - **Assert**: Verify the result or state change.
10
+
11
+ ## High-Quality TDD Tests
12
+
13
+ ### 1. Single Responsibility
14
+ Each test should verify exactly **one** requirement. If a test has "and" in its name, it's likely two tests.
15
+
16
+ ### 2. Descriptive Naming
17
+ Use the `should ... when ...` pattern.
18
+ - ✅ `should_calculate_discount_when_user_is_premium`
19
+ - ❌ `test_discount_logic_1`
20
+
21
+ ### 3. Avoid Logic in Tests
22
+ If your test has `if` statements or `for` loops, it's too complex. Tests should be straightforward declarations of expected behavior.
23
+
24
+ ### 4. Zero Watermarks
25
+ Ensure your tests clean up after themselves (e.g., deleting temporary files or resetting mocks) to avoid "leaky state" affecting other tests.
26
+
27
+ ## Running Tests
28
+ Always keep your test runner in "watch mode" during TDD. The feedback loop must be near-instant.
@@ -34,7 +34,7 @@ Parse JSON for: `researcher_model`, `planner_model`, `checker_model`, `research_
34
34
 
35
35
  ## 2. Parse and Normalize Arguments
36
36
 
37
- Extract from $ARGUMENTS: phase number (integer or decimal like `2.1`), flags (`--research`, `--skip-research`, `--gaps`, `--skip-verify`, `--prd <filepath>`, `--reviews`, `--text`).
37
+ Extract from $ARGUMENTS: phase number (integer or decimal like `2.1`), flags (`--research`, `--skip-research`, `--gaps`, `--skip-verify`, `--prd <filepath>`, `--reviews`, `--ads`, `--text`).
38
38
 
39
39
  Set `TEXT_MODE=true` if `--text` is present in $ARGUMENTS OR `text_mode` from init JSON is `true`. When `TEXT_MODE` is active, replace every `AskUserQuestion` call with a plain-text numbered list and ask the user to type their choice number. This is required for Claude Code remote sessions (`/rc` mode) where TUI menus don't work through the the agent App.
40
40
 
@@ -457,6 +457,8 @@ Proceed to Step 8 only if user selects 2 or 3.
457
457
 
458
458
  ## 8. Spawn mindforge-planner Agent
459
459
 
460
+ **Skip if:** `--ads` flag is present. Proceed to Step 8.5.
461
+
460
462
  Display banner:
461
463
  ```
462
464
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -550,6 +552,33 @@ Task(
550
552
  )
551
553
  ```
552
554
 
555
+ ## 8.5. Adversarial Decision Synthesis (ADS)
556
+
557
+ **Skip if:** No `--ads` flag.
558
+
559
+ Display banner:
560
+ ```
561
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
562
+ MindForge ► ADS SYNTHESIS LOOP
563
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
564
+
565
+ ◆ Spawning Blue-Team Architect...
566
+ ◆ Spawning Red-Team Auditor...
567
+ ◆ Spawning Gold-Team Synthesizer (SOUL Scorer)...
568
+ ```
569
+
570
+ ### Call ADS Engine
571
+
572
+ Invoke the ADS orchestrator to perform the cross-model synthesis loop:
573
+
574
+ ```bash
575
+ node "bin/review/ads-engine.js" --phase "${PHASE}" --output-dir "${PHASE_DIR}"
576
+ ```
577
+
578
+ The ADS engine handles the 3-model logic, SOUL scoring, and ADR generation. Once complete, it will have written the finalized `PLAN.md` to the phase directory.
579
+
580
+ Continue to Step 9.
581
+
553
582
  ## 9. Handle Planner Return
554
583
 
555
584
  - **`## PLANNING COMPLETE`:** Display plan count. If `--skip-verify` or `plan_checker_enabled` is false (from init): skip to step 13. Otherwise: step 10.
@@ -0,0 +1,40 @@
1
+ ---
2
+ description: Design systems with appropriate complexity - no more, no less.
3
+ ---
4
+ # 🏗️ /mindforge:architecture
5
+
6
+ <instruction>
7
+ Architect robust systems with clear boundaries, explicit trade-offs, and justified complexity using the MindForge System Architecture framework.
8
+ </instruction>
9
+
10
+ <context>
11
+ Follow the architectural governance defined in [.agent/skills/mindforge-system-architecture/SKILL.md](.agent/skills/mindforge-system-architecture/SKILL.md).
12
+ </context>
13
+
14
+ <rules>
15
+ - **Justify Complexity**: Every architectural addition must solve a documented problem.
16
+ - **Define Boundaries**: Ensure clear separation of concerns between services/modules.
17
+ - **Traceability**: All decisions must be mapped to project requirements.
18
+ - **Scale-Awareness**: Design for the next order of magnitude, but implement for the current one.
19
+ </rules>
20
+
21
+ <process>
22
+ 1. **Constraint Analysis**: Clarify scale, team size, and system lifespan.
23
+ 2. **Domain Mapping**: Identify core entities and their relationships.
24
+ 3. **Data Flow Design**: Map end-to-end flows and state transitions.
25
+ 4. **Complexity Audit**: Run the [Scaling Checklist](.agent/skills/mindforge-system-architecture/scaling-checklist.md) to eliminate over-engineering.
26
+ 5. **Trade-off Matrix**: Document selected approach vs. alternatives with pros/cons.
27
+ </process>
28
+
29
+ <supporting_documents>
30
+ - [Architecture Examples](.agent/skills/mindforge-system-architecture/examples.md)
31
+ - [Complexity Checklist](.agent/skills/mindforge-system-architecture/scaling-checklist.md)
32
+ </supporting_documents>
33
+
34
+ <output_format>
35
+ Produce an `ARCHITECTURE.md` draft or update containing:
36
+ - Executive Summary
37
+ - Component/Service Diagram (Mermaid)
38
+ - Data Flow Overview
39
+ - Trade-off Analysis
40
+ </output_format>