workspace-maxxing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
  2. package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
  3. package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
  4. package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
  5. package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
  6. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
  7. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
  8. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
  9. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
  10. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
  11. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
  12. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
  13. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
  14. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
  15. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
  16. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
  17. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  18. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
  19. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
  20. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
  21. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
  22. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
  23. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  24. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  25. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  26. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  27. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  28. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
  29. package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
  30. package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
  31. package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
  32. package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
  33. package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
  34. package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
  35. package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
  36. package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
  37. package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
  38. package/README.md +144 -0
  39. package/dist/agent-creator.d.ts +9 -0
  40. package/dist/agent-creator.d.ts.map +1 -0
  41. package/dist/agent-creator.js +199 -0
  42. package/dist/agent-creator.js.map +1 -0
  43. package/dist/agent-iterator.d.ts +38 -0
  44. package/dist/agent-iterator.d.ts.map +1 -0
  45. package/dist/agent-iterator.js +327 -0
  46. package/dist/agent-iterator.js.map +1 -0
  47. package/dist/index.d.ts +3 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +197 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/install.d.ts +18 -0
  52. package/dist/install.d.ts.map +1 -0
  53. package/dist/install.js +117 -0
  54. package/dist/install.js.map +1 -0
  55. package/dist/platforms/claude.d.ts +7 -0
  56. package/dist/platforms/claude.d.ts.map +1 -0
  57. package/dist/platforms/claude.js +70 -0
  58. package/dist/platforms/claude.js.map +1 -0
  59. package/dist/platforms/copilot.d.ts +7 -0
  60. package/dist/platforms/copilot.d.ts.map +1 -0
  61. package/dist/platforms/copilot.js +75 -0
  62. package/dist/platforms/copilot.js.map +1 -0
  63. package/dist/platforms/gemini.d.ts +7 -0
  64. package/dist/platforms/gemini.d.ts.map +1 -0
  65. package/dist/platforms/gemini.js +81 -0
  66. package/dist/platforms/gemini.js.map +1 -0
  67. package/dist/platforms/index.d.ts +8 -0
  68. package/dist/platforms/index.d.ts.map +1 -0
  69. package/dist/platforms/index.js +41 -0
  70. package/dist/platforms/index.js.map +1 -0
  71. package/dist/platforms/opencode.d.ts +7 -0
  72. package/dist/platforms/opencode.d.ts.map +1 -0
  73. package/dist/platforms/opencode.js +70 -0
  74. package/dist/platforms/opencode.js.map +1 -0
  75. package/dist/scripts/benchmark.d.ts +20 -0
  76. package/dist/scripts/benchmark.d.ts.map +1 -0
  77. package/dist/scripts/benchmark.js +170 -0
  78. package/dist/scripts/benchmark.js.map +1 -0
  79. package/dist/scripts/dispatch.d.ts +32 -0
  80. package/dist/scripts/dispatch.d.ts.map +1 -0
  81. package/dist/scripts/dispatch.js +386 -0
  82. package/dist/scripts/dispatch.js.map +1 -0
  83. package/dist/scripts/generate-tests.d.ts +11 -0
  84. package/dist/scripts/generate-tests.d.ts.map +1 -0
  85. package/dist/scripts/generate-tests.js +118 -0
  86. package/dist/scripts/generate-tests.js.map +1 -0
  87. package/dist/scripts/install-tool.d.ts +8 -0
  88. package/dist/scripts/install-tool.d.ts.map +1 -0
  89. package/dist/scripts/install-tool.js +98 -0
  90. package/dist/scripts/install-tool.js.map +1 -0
  91. package/dist/scripts/iterate.d.ts +44 -0
  92. package/dist/scripts/iterate.d.ts.map +1 -0
  93. package/dist/scripts/iterate.js +260 -0
  94. package/dist/scripts/iterate.js.map +1 -0
  95. package/dist/scripts/orchestrator.d.ts +40 -0
  96. package/dist/scripts/orchestrator.d.ts.map +1 -0
  97. package/dist/scripts/orchestrator.js +378 -0
  98. package/dist/scripts/orchestrator.js.map +1 -0
  99. package/dist/scripts/scaffold.d.ts +8 -0
  100. package/dist/scripts/scaffold.d.ts.map +1 -0
  101. package/dist/scripts/scaffold.js +279 -0
  102. package/dist/scripts/scaffold.js.map +1 -0
  103. package/dist/scripts/validate.d.ts +11 -0
  104. package/dist/scripts/validate.d.ts.map +1 -0
  105. package/dist/scripts/validate.js +472 -0
  106. package/dist/scripts/validate.js.map +1 -0
  107. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
  108. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
  109. package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
  110. package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
  111. package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
  112. package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
  113. package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
  114. package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
  115. package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
  116. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
  117. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
  118. package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
  119. package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
  120. package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
  121. package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
  122. package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
  123. package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
  124. package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
  125. package/jest.config.js +8 -0
  126. package/package.json +32 -0
  127. package/src/agent-creator.ts +180 -0
  128. package/src/agent-iterator.ts +397 -0
  129. package/src/index.ts +189 -0
  130. package/src/install.ts +105 -0
  131. package/src/platforms/claude.ts +40 -0
  132. package/src/platforms/copilot.ts +50 -0
  133. package/src/platforms/gemini.ts +55 -0
  134. package/src/platforms/index.ts +45 -0
  135. package/src/platforms/opencode.ts +41 -0
  136. package/src/scripts/benchmark.ts +171 -0
  137. package/src/scripts/dispatch.ts +473 -0
  138. package/src/scripts/generate-tests.ts +112 -0
  139. package/src/scripts/install-tool.ts +82 -0
  140. package/src/scripts/iterate.ts +271 -0
  141. package/src/scripts/orchestrator.ts +539 -0
  142. package/src/scripts/scaffold.ts +282 -0
  143. package/src/scripts/validate.ts +516 -0
  144. package/templates/.workspace-templates/CONTEXT.md +44 -0
  145. package/templates/.workspace-templates/SYSTEM.md +44 -0
  146. package/templates/.workspace-templates/references/anti-patterns.md +16 -0
  147. package/templates/.workspace-templates/references/iron-laws.md +26 -0
  148. package/templates/.workspace-templates/references/reporting-format.md +52 -0
  149. package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
  150. package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
  151. package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
  152. package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
  153. package/templates/.workspace-templates/scripts/iterate.ts +265 -0
  154. package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
  155. package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
  156. package/templates/.workspace-templates/scripts/validate.ts +452 -0
  157. package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
  158. package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
  159. package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
  160. package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  161. package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
  162. package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
  163. package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
  164. package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
  165. package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
  166. package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  167. package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  168. package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  169. package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  170. package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  171. package/templates/.workspace-templates/workspace/README.md +14 -0
  172. package/templates/SKILL.md +347 -0
  173. package/tests/benchmark.test.ts +158 -0
  174. package/tests/cli.test.ts +109 -0
  175. package/tests/dispatch-parallel.test.ts +124 -0
  176. package/tests/dispatch.test.ts +218 -0
  177. package/tests/fixer-skill.test.ts +203 -0
  178. package/tests/generate-tests.test.ts +101 -0
  179. package/tests/install-tool.test.ts +141 -0
  180. package/tests/install.test.ts +144 -0
  181. package/tests/integration.test.ts +324 -0
  182. package/tests/iterate.test.ts +219 -0
  183. package/tests/orchestrator.test.ts +710 -0
  184. package/tests/scaffold.test.ts +238 -0
  185. package/tests/templates-enhanced.test.ts +208 -0
  186. package/tests/templates.test.ts +219 -0
  187. package/tests/validate.test.ts +421 -0
  188. package/tests/validation-enhanced.test.ts +303 -0
  189. package/tests/worker-skill.test.ts +88 -0
  190. package/tsconfig.json +19 -0
  191. package/workspace/00-meta/CONTEXT.md +3 -0
  192. package/workspace/00-meta/execution-log.md +17 -0
  193. package/workspace/00-meta/tools.md +11 -0
  194. package/workspace/01-input/CONTEXT.md +27 -0
  195. package/workspace/CONTEXT.md +35 -0
  196. package/workspace/README.md +14 -0
  197. package/workspace/SYSTEM.md +36 -0
  198. package/workspace-maxxing-0.1.0.tgz +0 -0
@@ -0,0 +1,214 @@
1
+ # Workspace-Maxxing Design Spec — Sub-Project 3: Autonomous Iteration & Validation
2
+
3
+ > **Phase 3 of 4:** Autonomous iteration engine + sub-agent orchestration. Phase 4 adds benchmarking and multi-agent support.
4
+
5
+ ## Context
6
+
7
+ Sub-Project 1 delivered the npx CLI with skill installation. Sub-Project 2 added helper scripts (scaffold, validate, install-tool) for programmatic workspace creation. Sub-Project 3 adds autonomous iteration — the agent self-tests, self-evaluates, and improves the workspace without human involvement, escalating only when stuck.
8
+
9
+ ## Architecture
10
+
11
+ ### Data Flow
12
+
13
+ ```
14
+ Agent scaffolds workspace → runs iterate.ts
15
+
16
+ ├─ Pass 1: Validate-Fix Loop
17
+ │ ├─ Run validate.ts
18
+ │ ├─ If failures → fix specific issues → re-validate
19
+ │ └─ Repeat until pass OR max retries (3) → escalate to human
20
+
21
+ ├─ Pass 2: Score-Driven Content Quality
22
+ │ ├─ Score workspace (structure + content quality, 0-100)
23
+ │ ├─ Identify lowest-scoring areas
24
+ │ └─ Agent improves content, re-scores until plateau
25
+
26
+ ├─ Pass 3: Completeness Checklist
27
+ │ ├─ Check: every stage has inputs/outputs/dependencies
28
+ │ ├─ Check: routing table references all folders
29
+ │ └─ Agent fills gaps
30
+
31
+ └─ Sub-Agent Testing (agent-orchestrated via SKILL.md)
32
+ ├─ Agent runs generate-tests.ts to create test cases
33
+ ├─ Agent spawns sub-agents: half generate, half evaluate
34
+ ├─ Results aggregated → agent reviews
35
+ └─ If confidence low → escalate to human
36
+ ```
37
+
38
+ ### Components
39
+
40
+ #### 1. Iterate Script (`scripts/iterate.ts`)
41
+
42
+ Orchestrates the 3-pass improvement loop.
43
+
44
+ **CLI Interface:**
45
+ ```bash
46
+ node scripts/iterate.ts --workspace ./workspace --max-retries 3
47
+ ```
48
+
49
+ **Pass 1: Validate-Fix Loop**
50
+ - Runs `validate.ts` programmatically (imports the module, not shell)
51
+ - If failures exist, returns structured error details to the agent
52
+ - Retries up to `--max-retries` times (default 3)
53
+ - If still failing after max retries, returns `{ escalate: true }` with failure details
54
+ - The agent reads the output and attempts fixes between retries
55
+
56
+ **Pass 2: Score-Driven Content Quality**
57
+ - Scores workspace on a 0-100 scale using these criteria:
58
+ - SYSTEM.md quality (has role, folder map, rules) — 20 points
59
+ - CONTEXT.md quality (has routing table, references all stages) — 20 points
60
+ - Each stage CONTEXT.md has purpose, inputs, outputs, dependencies — 15 points per stage (capped at 45 total for 3 stages)
61
+ - tools.md exists and has content — 15 points
62
+ - Identifies lowest-scoring areas and reports them
63
+ - Agent improves content between score runs
64
+
65
+ **Pass 3: Completeness Checklist**
66
+ - Fixed checklist of structural requirements:
67
+ - Every stage has inputs defined
68
+ - Every stage has outputs defined
69
+ - Every stage has dependencies defined
70
+ - Routing table references all numbered folders
71
+ - README.md exists and has usage instructions
72
+ - Reports pass/fail per item
73
+
74
+ **Output:** JSON to stdout with structured results:
75
+ ```json
76
+ {
77
+ "passes": {
78
+ "validate": { "status": "passed", "retries": 1 },
79
+ "score": { "score": 78, "improvements": ["01-input missing dependencies"] },
80
+ "checklist": { "items": 5, "passed": 5, "failed": 0 }
81
+ },
82
+ "escalate": false
83
+ }
84
+ ```
85
+
86
+ **Dependencies:** Node.js builtins only (`fs`, `path`, `process`). Imports `validateWorkspace` from `validate.ts` directly.
87
+
88
+ #### 2. Generate Tests Script (`scripts/generate-tests.ts`)
89
+
90
+ Generates test cases for workspace evaluation.
91
+
92
+ **CLI Interface:**
93
+ ```bash
94
+ node scripts/generate-tests.ts --workspace ./workspace --output ./tests.json
95
+ ```
96
+
97
+ **What it generates:**
98
+ - For each numbered stage folder: 2-3 test cases
99
+ - Test case types: `sample` (normal input), `edge-case` (boundary conditions), `empty` (missing input)
100
+ - Each test case includes: stage name, type, sample input, expected output description
101
+
102
+ **Output:** JSON file at `--output` path:
103
+ ```json
104
+ {
105
+ "workspace": "research",
106
+ "testCases": [
107
+ {
108
+ "stage": "01-input",
109
+ "type": "sample",
110
+ "input": "A research question about climate change",
111
+ "expected": "Stage should collect and validate the research question"
112
+ },
113
+ {
114
+ "stage": "01-input",
115
+ "type": "edge-case",
116
+ "input": "",
117
+ "expected": "Stage should handle empty input gracefully"
118
+ }
119
+ ]
120
+ }
121
+ ```
122
+
123
+ **Dependencies:** Node.js builtins only
124
+
125
+ #### 3. Enhanced SKILL.md
126
+
127
+ Updated to include "## Autonomous Iteration" section with:
128
+ - Instructions for running `iterate.ts` and interpreting results
129
+ - How to fix validation failures between retries
130
+ - How to improve scores between scoring runs
131
+ - How to fill checklist gaps
132
+ - Sub-agent spawning instructions:
133
+ - Run `generate-tests.ts` to create test cases
134
+ - Split test cases: half for generation sub-agents, half for evaluation sub-agents
135
+ - Generation sub-agents: create sample content for assigned test cases
136
+ - Evaluation sub-agents: review workspace against assigned test cases
137
+ - Aggregate results, assess confidence
138
+ - Escalation criteria: present failures to human with proposed fix
139
+
140
+ ### File Structure
141
+
142
+ ```
143
+ workspace-maxxing/
144
+ ├── src/
145
+ │ ├── scripts/
146
+ │ │ ├── iterate.ts — Orchestration script source
147
+ │ │ └── generate-tests.ts — Test case generator source
148
+ │ ├── index.ts — Unchanged
149
+ │ └── install.ts — Modified: also copies new scripts
150
+ ├── templates/
151
+ │ ├── SKILL.md — Enhanced with autonomous iteration
152
+ │ └── .workspace-templates/
153
+ │ └── scripts/
154
+ │ ├── iterate.ts — Copy for distribution
155
+ │ └── generate-tests.ts — Copy for distribution
156
+ ├── tests/
157
+ │ ├── iterate.test.ts
158
+ │ └── generate-tests.test.ts
159
+ ```
160
+
161
+ ### Changes to Existing Files
162
+
163
+ **`src/install.ts`:** Add `iterate.ts` and `generate-tests.ts` to the scripts copy list.
164
+
165
+ **`templates/SKILL.md`:** Add "## Autonomous Iteration" section with full instructions.
166
+
167
+ ### Error Handling
168
+
169
+ - **iterate.ts:** Never crashes with unhandled exception. If validate fails after max retries, returns `{ escalate: true }` with structured failure details. Score and checklist passes are best-effort — they log warnings but don't block the process.
170
+ - **generate-tests.ts:** If workspace has no numbered stage folders, returns empty test cases array with a warning message. Never throws.
171
+ - **Escalation:** When `iterate.ts` returns `escalate: true`, SKILL.md instructs the agent to present the specific failures to the human with a proposed fix, rather than silently continuing.
172
+
173
+ ### Testing Strategy
174
+
175
+ - **iterate.test.ts:**
176
+ - Mock `validateWorkspace` to return failures → verify retry logic
177
+ - Mock `validateWorkspace` to pass immediately → verify single pass, no retries
178
+ - Mock `validateWorkspace` to always fail → verify escalation after max retries
179
+ - Verify scoring function returns correct scores for known workspaces
180
+ - Verify checklist function reports correct pass/fail
181
+
182
+ - **generate-tests.test.ts:**
183
+ - Create workspace with 3 stages → verify 6-9 test cases generated (2-3 per stage)
184
+ - Create workspace with no stages → verify empty test cases with warning
185
+ - Verify test case structure (stage, type, input, expected)
186
+ - Verify output file is valid JSON
187
+
188
+ - **Integration:**
189
+ - Scaffold workspace → run iterate → verify score improves
190
+ - Scaffold workspace → run generate-tests → verify valid JSON output
191
+
192
+ ### Scope
193
+
194
+ **In Scope (This Phase):**
195
+ - `iterate.ts` with 3-pass loop (validate-fix, score, checklist)
196
+ - `generate-tests.ts` for test case generation
197
+ - Enhanced SKILL.md with autonomous iteration instructions
198
+ - Tests for both scripts
199
+ - Installer updated to copy new scripts
200
+
201
+ **Out of Scope (Future Phases):**
202
+ - Benchmark scoring system (Phase 4) — this is workspace quality scoring, not benchmark
203
+ - Multi-agent CLI flags (--claude, --copilot, --gemini) (Phase 4)
204
+ - External sub-agent API integration (sub-agents are spawned via agent's native tool use)
205
+ - Hill-climbing algorithm automation (Phase 3 was scoped as agent-driven, not script-driven)
206
+
207
+ ### Success Criteria
208
+
209
+ 1. `node scripts/iterate.ts` runs 3-pass loop and returns structured results
210
+ 2. `node scripts/generate-tests.ts` generates test cases for all stages
211
+ 3. Enhanced SKILL.md documents autonomous iteration workflow
212
+ 4. Installer copies new scripts to skill directory
213
+ 5. All tests pass (Phase 1 + Phase 2 + Phase 3)
214
+ 6. Human escalation triggers correctly when validation fails after max retries
@@ -0,0 +1,188 @@
1
+ # Autonomous Iteration with Sub-Agent Batches — Design Spec
2
+
3
+ ## Problem
4
+
5
+ The current iteration workflow is script-driven (`iterate.ts`) with sequential validate-fix-score loops. Sub-skills lack obra/superpowers patterns (no YAML frontmatter, trigger phrases, anti-rationalization tables, iron laws). The goal is a truly autonomous workflow where fresh-context sub-agents execute test cases in parallel batches, validated by a dedicated validator agent, with fix loops for failures.
6
+
7
+ ## Solution
8
+
9
+ New `orchestrator.ts` script coordinates batched parallel worker sub-agents with validator checkpoints and fix loops. Three new sub-skills (`worker`, `fixer`, enhanced `validation`). All 7 existing sub-skills rewritten with obra/superpowers patterns. `dispatch.ts` extended for parallel invocation.
10
+
11
+ ## Architecture
12
+
13
+ ### Core Components
14
+
15
+ ```
16
+ orchestrator.ts (new)
17
+ ├── Generates test cases via generate-tests.ts
18
+ ├── Splits into batches (configurable via --batch-size, default 3)
19
+ ├── Dispatches worker sub-agents in parallel per batch
20
+ ├── Collects outputs (file + JSON)
21
+ ├── Dispatches validator sub-agent on batch results
22
+ └── If batch score < threshold → dispatches fixer sub-agents → re-validates → next batch
23
+ ```
24
+
25
+ ### New Sub-Skills
26
+
27
+ | Sub-Skill | Purpose | Trigger |
28
+ |-----------|---------|---------|
29
+ | `worker` | Executes a single test case against the workspace, produces output | "run test case", "execute workspace task" |
30
+ | `fixer` | Applies targeted fixes to failing test case outputs | "fix failing test", "improve output" |
31
+ | `validation` (enhanced) | Benchmarks batch outputs, returns structured score | "validate batch", "check results" |
32
+
33
+ ### Extended Components
34
+
35
+ | Component | Change |
36
+ |-----------|--------|
37
+ | `dispatch.ts` | Added `--parallel` flag + `--batch-id` for grouped invocation |
38
+ | `SKILL.md` | New "Autonomous Iteration Workflow" section replacing old iterate.ts docs |
39
+ | All 7 existing sub-skills | Rewritten with obra patterns (YAML frontmatter, trigger phrases, anti-rationalization tables, iron laws) |
40
+
41
+ ### Output Structure
42
+
43
+ ```
44
+ .agents/iteration/
45
+ ├── batch-01/
46
+ │ ├── tc-001/
47
+ │ │ ├── output.md (worker output, human-readable)
48
+ │ │ ├── report.json (structured JSON for validation)
49
+ │ │ └── fix-output.md (fixer output if needed)
50
+ │ ├── tc-002/
51
+ │ └── batch-report.json (validator benchmark results)
52
+ ├── batch-02/
53
+ └── summary.json (final aggregated results)
54
+ ```
55
+
56
+ ## Data Flow & Batch Lifecycle
57
+
58
+ ### Full Flow
59
+
60
+ ```
61
+ 1. GENERATE
62
+ orchestrator.ts → generate-tests.ts → tests.json
63
+
64
+ 2. BATCH SPLIT
65
+ tests.json → batches of N (default 3)
66
+
67
+ 3. WORKER DISPATCH (parallel per batch)
68
+ For each test case in batch:
69
+ dispatch.ts --skill worker --test-case <id> --batch <N> --workspace <path>
70
+ → reads workspace CONTEXT.md + test case
71
+ → executes task, writes output.md + report.json
72
+
73
+ 4. VALIDATOR DISPATCH
74
+ dispatch.ts --skill validation --batch <N> --workspace <path>
75
+ → reads all report.json files in batch directory
76
+ → runs benchmark scoring
77
+ → writes batch-report.json
78
+
79
+ 5. FIX LOOP (if batch score < threshold)
80
+ For each failing test case:
81
+ dispatch.ts --skill fixer --test-case <id> --batch <N> --workspace <path>
82
+ → reads validator findings + original output
83
+ → applies fixes, overwrites output.md + report.json
84
+ → re-run validator
85
+ → repeat until passing or max retries (default 3)
86
+
87
+ 6. NEXT BATCH or COMPLETE
88
+ If all batches done → write summary.json
89
+ If any batch escalated → report to human
90
+ ```
91
+
92
+ ### Key Interfaces
93
+
94
+ - **Worker input:** test case JSON + workspace path + batch ID
95
+ - **Worker output:** `output.md` (human-readable), `report.json` (structured: `{testCaseId, status, output, findings}`)
96
+ - **Validator input:** batch directory path + workspace path
97
+ - **Validator output:** `batch-report.json` (per-test scores, overall batch score, findings, fix suggestions)
98
+ - **Fixer input:** validator findings + original output path
99
+ - **Fixer output:** updated `output.md` + `report.json`
100
+
101
+ ### Concurrency Model
102
+
103
+ - Workers within a batch run in parallel (via `dispatch.ts --parallel`)
104
+ - Batches run sequentially (validator must complete before next batch starts)
105
+ - Fix loop runs sequentially per batch (fixers parallel within a batch)
106
+
107
+ ## Sub-Skill Design Pattern
108
+
109
+ All sub-skills follow this structure:
110
+
111
+ ```yaml
112
+ ---
113
+ name: <skill-name>
114
+ description: "<trigger-friendly description>"
115
+ triggers: ["<phrase1>", "<phrase2>"]
116
+ ---
117
+ ```
118
+
119
+ Then:
120
+ - **Overview** — one paragraph
121
+ - **When to Use / When Not to Use** — clear boundaries
122
+ - **The Iron Law** — 3-4 non-negotiable rules
123
+ - **The Process** — numbered steps
124
+ - **Anti-Rationalization Table** — common excuses vs reality
125
+ - **Sub-Skill Dispatch** (if applicable) — what to dispatch next
126
+ - **Report Format** — structured JSON schema
127
+
128
+ ### New Sub-Skills Specifics
129
+
130
+ **`worker` SKILL.md:**
131
+ - Focus: read test case, load relevant workspace sections, execute task, produce output
132
+ - Iron Law: NO SKIPPING TEST CASE STEPS, NO MODIFYING WORKSPACE STRUCTURE, NO CLAIMING DONE WITHOUT OUTPUT
133
+ - Dispatches to: `validation` after output complete
134
+
135
+ **`fixer` SKILL.md:**
136
+ - Focus: read validator findings, identify root cause, apply minimal fix, re-validate
137
+ - Iron Law: NO BLIND RETRIES, NO COSMETIC FIXES, NO FIXING WHAT ISN'T BROKEN
138
+ - Dispatches to: `validation` after fix applied
139
+
140
+ **`validation` (enhanced) SKILL.md:**
141
+ - Focus: batch-level benchmark scoring, per-test findings, fix suggestions
142
+ - Iron Law: NO SCORE INFLATION, NO SKIPPING FAILURES, NO VALIDATING WITHOUT BENCHMARK
143
+ - Dispatches to: `fixer` if score < threshold, `orchestrator` if passing
144
+
145
+ ## dispatch.ts Changes
146
+
147
+ - `--parallel` flag: spawns multiple sub-agent invocations concurrently
148
+ - `--batch-id` flag: tags outputs to batch directory
149
+ - Returns aggregated JSON when `--parallel` is used
150
+
151
+ ## File Changes
152
+
153
+ ### New Files
154
+ - `src/scripts/orchestrator.ts` — Batch orchestrator
155
+ - `templates/.workspace-templates/skills/worker/SKILL.md` — Worker sub-skill
156
+ - `templates/.workspace-templates/skills/fixer/SKILL.md` — Fixer sub-skill
157
+
158
+ ### Modified Files
159
+ - `src/scripts/dispatch.ts` — Parallel dispatch, batch ID support
160
+ - `templates/SKILL.md` — New "Autonomous Iteration Workflow" section
161
+ - `templates/.workspace-templates/skills/validation/SKILL.md` — Enhanced with batch validation
162
+ - `templates/.workspace-templates/skills/iteration/SKILL.md` — Rewritten with obra patterns
163
+ - `templates/.workspace-templates/skills/research/SKILL.md` — Rewritten with obra patterns
164
+ - `templates/.workspace-templates/skills/architecture/SKILL.md` — Rewritten with obra patterns
165
+ - `templates/.workspace-templates/skills/testing/SKILL.md` — Rewritten with obra patterns
166
+ - `templates/.workspace-templates/skills/prompt-engineering/SKILL.md` — Rewritten with obra patterns
167
+ - `templates/.workspace-templates/skills/tooling/SKILL.md` — Rewritten with obra patterns
168
+
169
+ ### New Test Files
170
+ - `tests/orchestrator.test.ts`
171
+ - `tests/dispatch-parallel.test.ts`
172
+ - `tests/worker-skill.test.ts`
173
+ - `tests/fixer-skill.test.ts`
174
+ - `tests/validation-enhanced.test.ts`
175
+
176
+ ## Error Handling
177
+
178
+ - **Worker timeout:** If a worker doesn't complete within timeout (default 300s), mark test case as failed, continue with batch
179
+ - **Validator failure:** If validator can't parse outputs, escalate to human
180
+ - **Fix loop exhaustion:** After max retries (default 3), mark batch as partially failed, continue to next batch
181
+ - **Orchestrator crash:** Summary.json written at each batch boundary for recovery
182
+
183
+ ## Testing Strategy
184
+
185
+ - **Unit tests:** orchestrator.ts batch splitting, dispatch.ts parallel invocation, report aggregation
186
+ - **Integration tests:** full batch lifecycle (generate → dispatch → validate → fix → complete)
187
+ - **Sub-skill tests:** each sub-skill's report format, trigger phrases, iron law compliance
188
+ - **Edge cases:** empty test case list, single test case, all failures, all passes, mixed results
@@ -0,0 +1,137 @@
1
+ # Sub-Project 5: Autonomous Workflow & Sub-Skill Framework — Design Spec
2
+
3
+ ## Overview
4
+
5
+ Transform workspace-maxxing from a single-skill tool into a comprehensive autonomous workflow system using obra/superpowers patterns. Add YAML frontmatter, trigger phrases, anti-rationalization tables, sub-skill dispatch, and a hybrid phase-driven → condition-driven flow.
6
+
7
+ ## Architecture
8
+
9
+ ### File Structure
10
+
11
+ ```
12
+ templates/
13
+ ├── SKILL.md # Main entry point (rewritten)
14
+ └── .workspace-templates/
15
+ ├── skills/
16
+ │ ├── validation/SKILL.md # Workspace compliance checking
17
+ │ ├── research/SKILL.md # Pattern investigation & context gathering
18
+ │ ├── prompt-engineering/SKILL.md # Prompt improvement & optimization
19
+ │ ├── testing/SKILL.md # Test generation & evaluation
20
+ │ ├── iteration/SKILL.md # Autonomous improvement loop
21
+ │ ├── architecture/SKILL.md # Workspace structure design
22
+ │ └── tooling/SKILL.md # Tool assessment & installation
23
+ └── references/
24
+ ├── anti-patterns.md # Shared rationalization tables
25
+ ├── reporting-format.md # Standard sub-skill report structure
26
+ └── iron-laws.md # Shared discipline rules
27
+ ```
28
+
29
+ ### Main SKILL.md — Rewritten with obra patterns
30
+
31
+ **YAML Frontmatter:**
32
+ ```yaml
33
+ ---
34
+ name: workspace-maxxing
35
+ description: "Autonomously creates, validates, and improves ICM-compliant workspaces. Use when user asks to 'build a workspace', 'create a workflow', 'automate a process', 'improve this workspace', 'validate this workspace', or 'iterate on this workspace'."
36
+ ---
37
+ ```
38
+
39
+ **Core Sections:**
40
+ - `## Overview` — Single-line core principle
41
+ - `## When to Use` — Decision tree (when/when not)
42
+ - `## The Iron Law` — Absolute gates (no build without plan, no plan without research)
43
+ - `## Hybrid Flow` — Phase-driven → condition-driven workflow diagram (DOT)
44
+ - `## Sub-Skill Dispatch` — Table mapping conditions to sub-skills
45
+ - `## Available Scripts` — Existing script documentation
46
+ - `## Anti-Rationalization Table` — Pre-empts agent shortcuts
47
+ - `## Integration` — How sub-skills connect
48
+
49
+ ### Sub-Skills — Each follows obra SKILL.md pattern
50
+
51
+ Every sub-skill SKILL.md contains:
52
+ 1. YAML frontmatter with name + description + trigger phrases
53
+ 2. `## Overview` — What it does in one line
54
+ 3. `## When to Use` — Decision criteria
55
+ 4. `## The Process` — Step-by-step workflow
56
+ 5. `## Red Flags` — What to watch for
57
+ 6. `## Report Format` — Structured JSON output
58
+ 7. `## Integration` — Which sub-skill to dispatch next
59
+
60
+ ### Shared References
61
+
62
+ **anti-patterns.md:**
63
+ - Common rationalizations agents use to skip steps
64
+ - Reality checks for each rationalization
65
+ - Applies to all sub-skills
66
+
67
+ **reporting-format.md:**
68
+ - Standard JSON report structure all sub-skills return
69
+ - Fields: skill, status, findings, recommendations, nextSkill
70
+ - Ensures consistent handoff between sub-skills
71
+
72
+ **iron-laws.md:**
73
+ - NO BUILD WITHOUT PLAN
74
+ - NO PLAN WITHOUT RESEARCH
75
+ - NO IMPROVEMENT WITHOUT VALIDATION
76
+ - NO COMPLETION CLAIM WITHOUT VERIFICATION
77
+
78
+ ### Hybrid Flow
79
+
80
+ ```
81
+ Phase 1: RESEARCH (dispatch research sub-skill)
82
+
83
+ Phase 2: ARCHITECTURE (dispatch architecture sub-skill)
84
+
85
+ Phase 3: BUILD (use scaffold.ts script)
86
+
87
+ Phase 4: VALIDATE (dispatch validation sub-skill)
88
+
89
+ Condition Loop (repeat until score > 85 AND all validations pass):
90
+ ├─ If validation failed → dispatch validation sub-skill
91
+ ├─ If score < 80 → dispatch prompt-engineering sub-skill
92
+ ├─ If no tests exist → dispatch testing sub-skill
93
+ ├─ If score plateaued → dispatch iteration sub-skill
94
+ └─ If tools missing → dispatch tooling sub-skill
95
+
96
+ Phase 5: DELIVER
97
+ ```
98
+
99
+ ### Sub-Skill Dispatch Script
100
+
101
+ New file: `src/scripts/dispatch.ts`
102
+
103
+ - Loads sub-skill SKILL.md from `skills/<name>/SKILL.md`
104
+ - Prints the sub-skill's full instructions to stdout for the agent to follow
105
+ - Accepts `--skill <name>` and `--workspace <path>` flags
106
+ - Returns the sub-skill's report as JSON when the agent completes its work
107
+ - Zero dependencies (Node.js builtins only)
108
+
109
+ **Usage:**
110
+ ```bash
111
+ node scripts/dispatch.ts --skill validation --workspace ./workspace
112
+ ```
113
+
114
+ The agent reads the dispatched instructions, executes the sub-skill's workflow, and writes the report JSON to stdout.
115
+
116
+ ### Integration Points
117
+
118
+ - `install.ts` enhanced to copy `skills/` and `references/` directories during install
119
+ - `dispatch.ts` invoked by agents via shell command from skill directory
120
+ - Existing scripts (scaffold, validate, iterate, benchmark) remain unchanged
121
+ - Sub-skills reference existing scripts where applicable
122
+
123
+ ## Testing Strategy
124
+
125
+ - `tests/dispatch.test.ts` — Sub-skill dispatch and report structure
126
+ - `tests/sub-skill-integration.test.ts` — End-to-end sub-skill workflow
127
+ - `tests/templates-enhanced.test.ts` — Verify all sub-skill SKILL.md files have required sections
128
+ - All existing tests must continue passing (95/95 baseline)
129
+
130
+ ## Constraints
131
+
132
+ - Zero external dependencies (Node.js builtins only)
133
+ - Scripts invoked via shell commands, not as CLI flags on main package
134
+ - Sub-skills follow obra/superpowers SKILL.md format
135
+ - Main SKILL.md uses YAML frontmatter for trigger detection
136
+ - Progressive disclosure: sub-skill content only loaded when dispatched
137
+ - All sub-skills return structured JSON reports
@@ -0,0 +1,105 @@
1
+ # Sub-Project 4: Benchmarking & Multi-Agent Support — Design Spec
2
+
3
+ ## Overview
4
+
5
+ Add weighted benchmark scoring, multi-agent CLI installation targeting, guided iteration reports, and console+JSON benchmark output to workspace-maxxing.
6
+
7
+ ## Architecture
8
+
9
+ ### New File: `src/scripts/benchmark.ts`
10
+
11
+ Zero-dependency Node.js script. Exports:
12
+ - `calculateBenchmark(workspacePath: string)` — scans workspace, calls validate logic, applies weights, returns benchmark data object
13
+ - `formatBenchmarkTable(data)` — returns formatted string for console output
14
+ - `saveBenchmarkReport(workspacePath, data)` — writes JSON to `.workspace-benchmarks/<name>-<timestamp>.json`
15
+
16
+ ### Weighted Scoring Engine
17
+
18
+ **Default weights:**
19
+ | Stage | Weight | Rationale |
20
+ |-------|--------|-----------|
21
+ | `01-ideation` | 1.5x | Core thinking quality — most critical |
22
+ | `02-research` | 1.3x | Evidence gathering — high importance |
23
+ | `03-architecture` | 1.2x | Structural decisions — important |
24
+ | All other stages | 1.0x | Baseline |
25
+
26
+ **Formula:**
27
+ ```
28
+ weightedStageScore = rawStageScore × weight
29
+ finalScore = Σ(weightedStageScores) / Σ(appliedWeights) × (100 / maxRawScore)
30
+ ```
31
+
32
+ - Stages that don't exist in a workspace are excluded from both numerator and denominator
33
+ - `maxRawScore` = 45 (the per-stage cap from validate.ts)
34
+ - Final score normalized to 0-100
35
+
36
+ ### Multi-Agent CLI Flags
37
+
38
+ **Flag behavior:**
39
+ | Flag | Installation Target |
40
+ |------|-------------------|
41
+ | (none) | `.agents/skills/workspace-maxxing/` (agent-agnostic default) |
42
+ | `--opencode` | `.agents/skills/workspace-maxxing/` (same as default) |
43
+ | `--claude` | `.claude/skills/` |
44
+ | `--copilot` | `.github/copilot-instructions/` |
45
+ | `--gemini` | `.gemini/skills/` |
46
+
47
+ **Implementation:**
48
+ - `src/index.ts` parses flags before install
49
+ - `install.ts` receives `targetAgent` parameter
50
+ - Agent-specific paths defined in a single config map
51
+ - All scripts and templates remain identical — only destination changes
52
+ - SKILL.md includes metadata note about which agent it was installed for (no behavioral changes)
53
+
54
+ ### Guided Iteration Reports
55
+
56
+ **Flow:**
57
+ 1. Agent runs `iterate.ts`
58
+ 2. Each pass: `validate.ts` → `benchmark.ts` → structured report returned
59
+ 3. Report includes: current score, weighted benchmark score, per-stage breakdown, fix suggestions, `improvementPotential` flag
60
+ 4. Agent decides whether to apply fixes and re-run
61
+ 5. No automatic looping — agent is in control
62
+
63
+ **Report structure:**
64
+ ```json
65
+ {
66
+ "workspace": "my-project",
67
+ "agent": "opencode",
68
+ "timestamp": "2026-04-07T...",
69
+ "rawScore": 72,
70
+ "weightedScore": 78,
71
+ "stages": [
72
+ { "name": "01-ideation", "raw": 85, "weight": 1.5, "weighted": 95 },
73
+ { "name": "02-research", "raw": 60, "weight": 1.3, "weighted": 58 }
74
+ ],
75
+ "fixSuggestions": ["Add research sources to 02-research", "Expand architecture diagrams"],
76
+ "improvementPotential": true
77
+ }
78
+ ```
79
+
80
+ ### Benchmark Output
81
+
82
+ **Console:** Formatted table with stage names, raw scores, weights, weighted scores, and total.
83
+
84
+ **JSON:** Saved to `.workspace-benchmarks/<workspace-name>-<timestamp>.json` with full metadata: workspace path, agent flag used, timestamp, all scores, weights applied.
85
+
86
+ ## Integration Points
87
+
88
+ - `validate.ts` already returns per-stage scores — `benchmark.ts` consumes those and applies weights
89
+ - `iterate.ts` calls `validate.ts` internally — extended return value includes weighted benchmark data
90
+ - `install.ts` enhanced with agent-targeting flag parsing
91
+ - `src/index.ts` enhanced with CLI flag parsing for `--claude`, `--copilot`, `--gemini`, `--opencode`
92
+
93
+ ## Testing Strategy
94
+
95
+ - `tests/benchmark.test.ts` — weighted scoring calculations, edge cases (missing stages, zero scores, normalization)
96
+ - `tests/cli-flags.test.ts` — flag parsing and installation targeting
97
+ - `tests/iterate-enhanced.test.ts` — guided iteration report structure
98
+ - All existing tests must continue passing (75/75 baseline)
99
+
100
+ ## Constraints
101
+
102
+ - Zero external dependencies (Node.js builtins only: `fs`, `path`, `process`, `child_process`)
103
+ - Scripts invoked via shell commands, not as CLI flags on main package
104
+ - Agent-agnostic by default (no flag = universal behavior)
105
+ - Guided iterations only (no autonomous hill-climbing loop)