workspace-maxxing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
- package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
- package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
- package/README.md +144 -0
- package/dist/agent-creator.d.ts +9 -0
- package/dist/agent-creator.d.ts.map +1 -0
- package/dist/agent-creator.js +199 -0
- package/dist/agent-creator.js.map +1 -0
- package/dist/agent-iterator.d.ts +38 -0
- package/dist/agent-iterator.d.ts.map +1 -0
- package/dist/agent-iterator.js +327 -0
- package/dist/agent-iterator.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +197 -0
- package/dist/index.js.map +1 -0
- package/dist/install.d.ts +18 -0
- package/dist/install.d.ts.map +1 -0
- package/dist/install.js +117 -0
- package/dist/install.js.map +1 -0
- package/dist/platforms/claude.d.ts +7 -0
- package/dist/platforms/claude.d.ts.map +1 -0
- package/dist/platforms/claude.js +70 -0
- package/dist/platforms/claude.js.map +1 -0
- package/dist/platforms/copilot.d.ts +7 -0
- package/dist/platforms/copilot.d.ts.map +1 -0
- package/dist/platforms/copilot.js +75 -0
- package/dist/platforms/copilot.js.map +1 -0
- package/dist/platforms/gemini.d.ts +7 -0
- package/dist/platforms/gemini.d.ts.map +1 -0
- package/dist/platforms/gemini.js +81 -0
- package/dist/platforms/gemini.js.map +1 -0
- package/dist/platforms/index.d.ts +8 -0
- package/dist/platforms/index.d.ts.map +1 -0
- package/dist/platforms/index.js +41 -0
- package/dist/platforms/index.js.map +1 -0
- package/dist/platforms/opencode.d.ts +7 -0
- package/dist/platforms/opencode.d.ts.map +1 -0
- package/dist/platforms/opencode.js +70 -0
- package/dist/platforms/opencode.js.map +1 -0
- package/dist/scripts/benchmark.d.ts +20 -0
- package/dist/scripts/benchmark.d.ts.map +1 -0
- package/dist/scripts/benchmark.js +170 -0
- package/dist/scripts/benchmark.js.map +1 -0
- package/dist/scripts/dispatch.d.ts +32 -0
- package/dist/scripts/dispatch.d.ts.map +1 -0
- package/dist/scripts/dispatch.js +386 -0
- package/dist/scripts/dispatch.js.map +1 -0
- package/dist/scripts/generate-tests.d.ts +11 -0
- package/dist/scripts/generate-tests.d.ts.map +1 -0
- package/dist/scripts/generate-tests.js +118 -0
- package/dist/scripts/generate-tests.js.map +1 -0
- package/dist/scripts/install-tool.d.ts +8 -0
- package/dist/scripts/install-tool.d.ts.map +1 -0
- package/dist/scripts/install-tool.js +98 -0
- package/dist/scripts/install-tool.js.map +1 -0
- package/dist/scripts/iterate.d.ts +44 -0
- package/dist/scripts/iterate.d.ts.map +1 -0
- package/dist/scripts/iterate.js +260 -0
- package/dist/scripts/iterate.js.map +1 -0
- package/dist/scripts/orchestrator.d.ts +40 -0
- package/dist/scripts/orchestrator.d.ts.map +1 -0
- package/dist/scripts/orchestrator.js +378 -0
- package/dist/scripts/orchestrator.js.map +1 -0
- package/dist/scripts/scaffold.d.ts +8 -0
- package/dist/scripts/scaffold.d.ts.map +1 -0
- package/dist/scripts/scaffold.js +279 -0
- package/dist/scripts/scaffold.js.map +1 -0
- package/dist/scripts/validate.d.ts +11 -0
- package/dist/scripts/validate.d.ts.map +1 -0
- package/dist/scripts/validate.js +472 -0
- package/dist/scripts/validate.js.map +1 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
- package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
- package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
- package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
- package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
- package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
- package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
- package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
- package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
- package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
- package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
- package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
- package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
- package/jest.config.js +8 -0
- package/package.json +32 -0
- package/src/agent-creator.ts +180 -0
- package/src/agent-iterator.ts +397 -0
- package/src/index.ts +189 -0
- package/src/install.ts +105 -0
- package/src/platforms/claude.ts +40 -0
- package/src/platforms/copilot.ts +50 -0
- package/src/platforms/gemini.ts +55 -0
- package/src/platforms/index.ts +45 -0
- package/src/platforms/opencode.ts +41 -0
- package/src/scripts/benchmark.ts +171 -0
- package/src/scripts/dispatch.ts +473 -0
- package/src/scripts/generate-tests.ts +112 -0
- package/src/scripts/install-tool.ts +82 -0
- package/src/scripts/iterate.ts +271 -0
- package/src/scripts/orchestrator.ts +539 -0
- package/src/scripts/scaffold.ts +282 -0
- package/src/scripts/validate.ts +516 -0
- package/templates/.workspace-templates/CONTEXT.md +44 -0
- package/templates/.workspace-templates/SYSTEM.md +44 -0
- package/templates/.workspace-templates/references/anti-patterns.md +16 -0
- package/templates/.workspace-templates/references/iron-laws.md +26 -0
- package/templates/.workspace-templates/references/reporting-format.md +52 -0
- package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
- package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
- package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
- package/templates/.workspace-templates/scripts/iterate.ts +265 -0
- package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
- package/templates/.workspace-templates/scripts/validate.ts +452 -0
- package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
- package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/README.md +14 -0
- package/templates/SKILL.md +347 -0
- package/tests/benchmark.test.ts +158 -0
- package/tests/cli.test.ts +109 -0
- package/tests/dispatch-parallel.test.ts +124 -0
- package/tests/dispatch.test.ts +218 -0
- package/tests/fixer-skill.test.ts +203 -0
- package/tests/generate-tests.test.ts +101 -0
- package/tests/install-tool.test.ts +141 -0
- package/tests/install.test.ts +144 -0
- package/tests/integration.test.ts +324 -0
- package/tests/iterate.test.ts +219 -0
- package/tests/orchestrator.test.ts +710 -0
- package/tests/scaffold.test.ts +238 -0
- package/tests/templates-enhanced.test.ts +208 -0
- package/tests/templates.test.ts +219 -0
- package/tests/validate.test.ts +421 -0
- package/tests/validation-enhanced.test.ts +303 -0
- package/tests/worker-skill.test.ts +88 -0
- package/tsconfig.json +19 -0
- package/workspace/00-meta/CONTEXT.md +3 -0
- package/workspace/00-meta/execution-log.md +17 -0
- package/workspace/00-meta/tools.md +11 -0
- package/workspace/01-input/CONTEXT.md +27 -0
- package/workspace/CONTEXT.md +35 -0
- package/workspace/README.md +14 -0
- package/workspace/SYSTEM.md +36 -0
- package/workspace-maxxing-0.1.0.tgz +0 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# Workspace-Maxxing Design Spec — Sub-Project 3: Autonomous Iteration & Validation
|
|
2
|
+
|
|
3
|
+
> **Phase 3 of 4:** Autonomous iteration engine + sub-agent orchestration. Phase 4 adds benchmarking and multi-agent support.
|
|
4
|
+
|
|
5
|
+
## Context
|
|
6
|
+
|
|
7
|
+
Sub-Project 1 delivered the npx CLI with skill installation. Sub-Project 2 added helper scripts (scaffold, validate, install-tool) for programmatic workspace creation. Sub-Project 3 adds autonomous iteration — the agent self-tests, self-evaluates, and improves the workspace without human involvement, escalating only when stuck.
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
### Data Flow
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
Agent scaffolds workspace → runs iterate.ts
|
|
15
|
+
│
|
|
16
|
+
├─ Pass 1: Validate-Fix Loop
|
|
17
|
+
│ ├─ Run validate.ts
|
|
18
|
+
│ ├─ If failures → fix specific issues → re-validate
|
|
19
|
+
│ └─ Repeat until pass OR max retries (3) → escalate to human
|
|
20
|
+
│
|
|
21
|
+
├─ Pass 2: Score-Driven Content Quality
|
|
22
|
+
│ ├─ Score workspace (structure + content quality, 0-100)
|
|
23
|
+
│ ├─ Identify lowest-scoring areas
|
|
24
|
+
│ └─ Agent improves content, re-scores until plateau
|
|
25
|
+
│
|
|
26
|
+
├─ Pass 3: Completeness Checklist
|
|
27
|
+
│ ├─ Check: every stage has inputs/outputs/dependencies
|
|
28
|
+
│ ├─ Check: routing table references all folders
|
|
29
|
+
│ └─ Agent fills gaps
|
|
30
|
+
│
|
|
31
|
+
└─ Sub-Agent Testing (agent-orchestrated via SKILL.md)
|
|
32
|
+
├─ Agent runs generate-tests.ts to create test cases
|
|
33
|
+
├─ Agent spawns sub-agents: half generate, half evaluate
|
|
34
|
+
├─ Results aggregated → agent reviews
|
|
35
|
+
└─ If confidence low → escalate to human
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Components
|
|
39
|
+
|
|
40
|
+
#### 1. Iterate Script (`scripts/iterate.ts`)
|
|
41
|
+
|
|
42
|
+
Orchestrates the 3-pass improvement loop.
|
|
43
|
+
|
|
44
|
+
**CLI Interface:**
|
|
45
|
+
```bash
|
|
46
|
+
node scripts/iterate.ts --workspace ./workspace --max-retries 3
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Pass 1: Validate-Fix Loop**
|
|
50
|
+
- Runs `validate.ts` programmatically (imports the module, not shell)
|
|
51
|
+
- If failures exist, returns structured error details to the agent
|
|
52
|
+
- Retries up to `--max-retries` times (default 3)
|
|
53
|
+
- If still failing after max retries, returns `{ escalate: true }` with failure details
|
|
54
|
+
- The agent reads the output and attempts fixes between retries
|
|
55
|
+
|
|
56
|
+
**Pass 2: Score-Driven Content Quality**
|
|
57
|
+
- Scores workspace on a 0-100 scale using these criteria:
|
|
58
|
+
- SYSTEM.md quality (has role, folder map, rules) — 20 points
|
|
59
|
+
- CONTEXT.md quality (has routing table, references all stages) — 20 points
|
|
60
|
+
- Each stage CONTEXT.md has purpose, inputs, outputs, dependencies — 15 points per stage (capped at 45 total for 3 stages)
|
|
61
|
+
- tools.md exists and has content — 15 points
|
|
62
|
+
- Identifies lowest-scoring areas and reports them
|
|
63
|
+
- Agent improves content between score runs
|
|
64
|
+
|
|
65
|
+
**Pass 3: Completeness Checklist**
|
|
66
|
+
- Fixed checklist of structural requirements:
|
|
67
|
+
- Every stage has inputs defined
|
|
68
|
+
- Every stage has outputs defined
|
|
69
|
+
- Every stage has dependencies defined
|
|
70
|
+
- Routing table references all numbered folders
|
|
71
|
+
- README.md exists and has usage instructions
|
|
72
|
+
- Reports pass/fail per item
|
|
73
|
+
|
|
74
|
+
**Output:** JSON to stdout with structured results:
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"passes": {
|
|
78
|
+
"validate": { "status": "passed", "retries": 1 },
|
|
79
|
+
"score": { "score": 78, "improvements": ["01-input missing dependencies"] },
|
|
80
|
+
"checklist": { "items": 5, "passed": 5, "failed": 0 }
|
|
81
|
+
},
|
|
82
|
+
"escalate": false
|
|
83
|
+
}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Dependencies:** Node.js builtins only (`fs`, `path`, `process`). Imports `validateWorkspace` from `validate.ts` directly.
|
|
87
|
+
|
|
88
|
+
#### 2. Generate Tests Script (`scripts/generate-tests.ts`)
|
|
89
|
+
|
|
90
|
+
Generates test cases for workspace evaluation.
|
|
91
|
+
|
|
92
|
+
**CLI Interface:**
|
|
93
|
+
```bash
|
|
94
|
+
node scripts/generate-tests.ts --workspace ./workspace --output ./tests.json
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**What it generates:**
|
|
98
|
+
- For each numbered stage folder: 2-3 test cases
|
|
99
|
+
- Test case types: `sample` (normal input), `edge-case` (boundary conditions), `empty` (missing input)
|
|
100
|
+
- Each test case includes: stage name, type, sample input, expected output description
|
|
101
|
+
|
|
102
|
+
**Output:** JSON file at `--output` path:
|
|
103
|
+
```json
|
|
104
|
+
{
|
|
105
|
+
"workspace": "research",
|
|
106
|
+
"testCases": [
|
|
107
|
+
{
|
|
108
|
+
"stage": "01-input",
|
|
109
|
+
"type": "sample",
|
|
110
|
+
"input": "A research question about climate change",
|
|
111
|
+
"expected": "Stage should collect and validate the research question"
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"stage": "01-input",
|
|
115
|
+
"type": "edge-case",
|
|
116
|
+
"input": "",
|
|
117
|
+
"expected": "Stage should handle empty input gracefully"
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**Dependencies:** Node.js builtins only
|
|
124
|
+
|
|
125
|
+
#### 3. Enhanced SKILL.md
|
|
126
|
+
|
|
127
|
+
Updated to include "## Autonomous Iteration" section with:
|
|
128
|
+
- Instructions for running `iterate.ts` and interpreting results
|
|
129
|
+
- How to fix validation failures between retries
|
|
130
|
+
- How to improve scores between scoring runs
|
|
131
|
+
- How to fill checklist gaps
|
|
132
|
+
- Sub-agent spawning instructions:
|
|
133
|
+
- Run `generate-tests.ts` to create test cases
|
|
134
|
+
- Split test cases: half for generation sub-agents, half for evaluation sub-agents
|
|
135
|
+
- Generation sub-agents: create sample content for assigned test cases
|
|
136
|
+
- Evaluation sub-agents: review workspace against assigned test cases
|
|
137
|
+
- Aggregate results, assess confidence
|
|
138
|
+
- Escalation criteria: present failures to human with proposed fix
|
|
139
|
+
|
|
140
|
+
### File Structure
|
|
141
|
+
|
|
142
|
+
```
|
|
143
|
+
workspace-maxxing/
|
|
144
|
+
├── src/
|
|
145
|
+
│ ├── scripts/
|
|
146
|
+
│ │ ├── iterate.ts — Orchestration script source
|
|
147
|
+
│ │ └── generate-tests.ts — Test case generator source
|
|
148
|
+
│ ├── index.ts — Unchanged
|
|
149
|
+
│ └── install.ts — Modified: also copies new scripts
|
|
150
|
+
├── templates/
|
|
151
|
+
│ ├── SKILL.md — Enhanced with autonomous iteration
|
|
152
|
+
│ └── .workspace-templates/
|
|
153
|
+
│ └── scripts/
|
|
154
|
+
│ ├── iterate.ts — Copy for distribution
|
|
155
|
+
│ └── generate-tests.ts — Copy for distribution
|
|
156
|
+
├── tests/
|
|
157
|
+
│ ├── iterate.test.ts
|
|
158
|
+
│ └── generate-tests.test.ts
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Changes to Existing Files
|
|
162
|
+
|
|
163
|
+
**`src/install.ts`:** Add `iterate.ts` and `generate-tests.ts` to the scripts copy list.
|
|
164
|
+
|
|
165
|
+
**`templates/SKILL.md`:** Add "## Autonomous Iteration" section with full instructions.
|
|
166
|
+
|
|
167
|
+
### Error Handling
|
|
168
|
+
|
|
169
|
+
- **iterate.ts:** Never crashes with unhandled exception. If validate fails after max retries, returns `{ escalate: true }` with structured failure details. Score and checklist passes are best-effort — they log warnings but don't block the process.
|
|
170
|
+
- **generate-tests.ts:** If workspace has no numbered stage folders, returns empty test cases array with a warning message. Never throws.
|
|
171
|
+
- **Escalation:** When `iterate.ts` returns `escalate: true`, SKILL.md instructs the agent to present the specific failures to the human with a proposed fix, rather than silently continuing.
|
|
172
|
+
|
|
173
|
+
### Testing Strategy
|
|
174
|
+
|
|
175
|
+
- **iterate.test.ts:**
|
|
176
|
+
- Mock `validateWorkspace` to return failures → verify retry logic
|
|
177
|
+
- Mock `validateWorkspace` to pass immediately → verify single pass, no retries
|
|
178
|
+
- Mock `validateWorkspace` to always fail → verify escalation after max retries
|
|
179
|
+
- Verify scoring function returns correct scores for known workspaces
|
|
180
|
+
- Verify checklist function reports correct pass/fail
|
|
181
|
+
|
|
182
|
+
- **generate-tests.test.ts:**
|
|
183
|
+
- Create workspace with 3 stages → verify 6-9 test cases generated (2-3 per stage)
|
|
184
|
+
- Create workspace with no stages → verify empty test cases with warning
|
|
185
|
+
- Verify test case structure (stage, type, input, expected)
|
|
186
|
+
- Verify output file is valid JSON
|
|
187
|
+
|
|
188
|
+
- **Integration:**
|
|
189
|
+
- Scaffold workspace → run iterate → verify score improves
|
|
190
|
+
- Scaffold workspace → run generate-tests → verify valid JSON output
|
|
191
|
+
|
|
192
|
+
### Scope
|
|
193
|
+
|
|
194
|
+
**In Scope (This Phase):**
|
|
195
|
+
- `iterate.ts` with 3-pass loop (validate-fix, score, checklist)
|
|
196
|
+
- `generate-tests.ts` for test case generation
|
|
197
|
+
- Enhanced SKILL.md with autonomous iteration instructions
|
|
198
|
+
- Tests for both scripts
|
|
199
|
+
- Installer updated to copy new scripts
|
|
200
|
+
|
|
201
|
+
**Out of Scope (Future Phases):**
|
|
202
|
+
- Benchmark scoring system (Phase 4) — this is workspace quality scoring, not benchmark
|
|
203
|
+
- Multi-agent CLI flags (--claude, --copilot, --gemini) (Phase 4)
|
|
204
|
+
- External sub-agent API integration (sub-agents are spawned via agent's native tool use)
|
|
205
|
+
- Hill-climbing algorithm automation (Phase 3 was scoped as agent-driven, not script-driven)
|
|
206
|
+
|
|
207
|
+
### Success Criteria
|
|
208
|
+
|
|
209
|
+
1. `node scripts/iterate.ts` runs 3-pass loop and returns structured results
|
|
210
|
+
2. `node scripts/generate-tests.ts` generates test cases for all stages
|
|
211
|
+
3. Enhanced SKILL.md documents autonomous iteration workflow
|
|
212
|
+
4. Installer copies new scripts to skill directory
|
|
213
|
+
5. All tests pass (Phase 1 + Phase 2 + Phase 3)
|
|
214
|
+
6. Human escalation triggers correctly when validation fails after max retries
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Autonomous Iteration with Sub-Agent Batches — Design Spec
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
The current iteration workflow is script-driven (`iterate.ts`) with sequential validate-fix-score loops. Sub-skills lack obra/superpowers patterns (no YAML frontmatter, trigger phrases, anti-rationalization tables, iron laws). The goal is a truly autonomous workflow where fresh-context sub-agents execute test cases in parallel batches, validated by a dedicated validator agent, with fix loops for failures.
|
|
6
|
+
|
|
7
|
+
## Solution
|
|
8
|
+
|
|
9
|
+
New `orchestrator.ts` script coordinates batched parallel worker sub-agents with validator checkpoints and fix loops. Three new sub-skills (`worker`, `fixer`, enhanced `validation`). All 7 existing sub-skills rewritten with obra/superpowers patterns. `dispatch.ts` extended for parallel invocation.
|
|
10
|
+
|
|
11
|
+
## Architecture
|
|
12
|
+
|
|
13
|
+
### Core Components
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
orchestrator.ts (new)
|
|
17
|
+
├── Generates test cases via generate-tests.ts
|
|
18
|
+
├── Splits into batches (configurable via --batch-size, default 3)
|
|
19
|
+
├── Dispatches worker sub-agents in parallel per batch
|
|
20
|
+
├── Collects outputs (file + JSON)
|
|
21
|
+
├── Dispatches validator sub-agent on batch results
|
|
22
|
+
└── If batch score < threshold → dispatches fixer sub-agents → re-validates → next batch
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### New Sub-Skills
|
|
26
|
+
|
|
27
|
+
| Sub-Skill | Purpose | Trigger |
|
|
28
|
+
|-----------|---------|---------|
|
|
29
|
+
| `worker` | Executes a single test case against the workspace, produces output | "run test case", "execute workspace task" |
|
|
30
|
+
| `fixer` | Applies targeted fixes to failing test case outputs | "fix failing test", "improve output" |
|
|
31
|
+
| `validation` (enhanced) | Benchmarks batch outputs, returns structured score | "validate batch", "check results" |
|
|
32
|
+
|
|
33
|
+
### Extended Components
|
|
34
|
+
|
|
35
|
+
| Component | Change |
|
|
36
|
+
|-----------|--------|
|
|
37
|
+
| `dispatch.ts` | Added `--parallel` flag + `--batch-id` for grouped invocation |
|
|
38
|
+
| `SKILL.md` | New "Autonomous Iteration Workflow" section replacing old iterate.ts docs |
|
|
39
|
+
| All 7 existing sub-skills | Rewritten with obra patterns (YAML frontmatter, trigger phrases, anti-rationalization tables, iron laws) |
|
|
40
|
+
|
|
41
|
+
### Output Structure
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
.agents/iteration/
|
|
45
|
+
├── batch-01/
|
|
46
|
+
│ ├── tc-001/
|
|
47
|
+
│ │ ├── output.md (worker output, human-readable)
|
|
48
|
+
│ │ ├── report.json (structured JSON for validation)
|
|
49
|
+
│ │ └── fix-output.md (fixer output if needed)
|
|
50
|
+
│ ├── tc-002/
|
|
51
|
+
│ └── batch-report.json (validator benchmark results)
|
|
52
|
+
├── batch-02/
|
|
53
|
+
└── summary.json (final aggregated results)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Data Flow & Batch Lifecycle
|
|
57
|
+
|
|
58
|
+
### Full Flow
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
1. GENERATE
|
|
62
|
+
orchestrator.ts → generate-tests.ts → tests.json
|
|
63
|
+
|
|
64
|
+
2. BATCH SPLIT
|
|
65
|
+
tests.json → batches of N (default 3)
|
|
66
|
+
|
|
67
|
+
3. WORKER DISPATCH (parallel per batch)
|
|
68
|
+
For each test case in batch:
|
|
69
|
+
dispatch.ts --skill worker --test-case <id> --batch <N> --workspace <path>
|
|
70
|
+
→ reads workspace CONTEXT.md + test case
|
|
71
|
+
→ executes task, writes output.md + report.json
|
|
72
|
+
|
|
73
|
+
4. VALIDATOR DISPATCH
|
|
74
|
+
dispatch.ts --skill validation --batch <N> --workspace <path>
|
|
75
|
+
→ reads all report.json files in batch directory
|
|
76
|
+
→ runs benchmark scoring
|
|
77
|
+
→ writes batch-report.json
|
|
78
|
+
|
|
79
|
+
5. FIX LOOP (if batch score < threshold)
|
|
80
|
+
For each failing test case:
|
|
81
|
+
dispatch.ts --skill fixer --test-case <id> --batch <N> --workspace <path>
|
|
82
|
+
→ reads validator findings + original output
|
|
83
|
+
→ applies fixes, overwrites output.md + report.json
|
|
84
|
+
→ re-run validator
|
|
85
|
+
→ repeat until passing or max retries (default 3)
|
|
86
|
+
|
|
87
|
+
6. NEXT BATCH or COMPLETE
|
|
88
|
+
If all batches done → write summary.json
|
|
89
|
+
If any batch escalated → report to human
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Key Interfaces
|
|
93
|
+
|
|
94
|
+
- **Worker input:** test case JSON + workspace path + batch ID
|
|
95
|
+
- **Worker output:** `output.md` (human-readable), `report.json` (structured: `{testCaseId, status, output, findings}`)
|
|
96
|
+
- **Validator input:** batch directory path + workspace path
|
|
97
|
+
- **Validator output:** `batch-report.json` (per-test scores, overall batch score, findings, fix suggestions)
|
|
98
|
+
- **Fixer input:** validator findings + original output path
|
|
99
|
+
- **Fixer output:** updated `output.md` + `report.json`
|
|
100
|
+
|
|
101
|
+
### Concurrency Model
|
|
102
|
+
|
|
103
|
+
- Workers within a batch run in parallel (via `dispatch.ts --parallel`)
|
|
104
|
+
- Batches run sequentially (validator must complete before next batch starts)
|
|
105
|
+
- Fix loop runs sequentially per batch (fixers parallel within a batch)
|
|
106
|
+
|
|
107
|
+
## Sub-Skill Design Pattern
|
|
108
|
+
|
|
109
|
+
All sub-skills follow this structure:
|
|
110
|
+
|
|
111
|
+
```yaml
|
|
112
|
+
---
|
|
113
|
+
name: <skill-name>
|
|
114
|
+
description: "<trigger-friendly description>"
|
|
115
|
+
triggers: ["<phrase1>", "<phrase2>"]
|
|
116
|
+
---
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Then:
|
|
120
|
+
- **Overview** — one paragraph
|
|
121
|
+
- **When to Use / When Not to Use** — clear boundaries
|
|
122
|
+
- **The Iron Law** — 3-4 non-negotiable rules
|
|
123
|
+
- **The Process** — numbered steps
|
|
124
|
+
- **Anti-Rationalization Table** — common excuses vs reality
|
|
125
|
+
- **Sub-Skill Dispatch** (if applicable) — what to dispatch next
|
|
126
|
+
- **Report Format** — structured JSON schema
|
|
127
|
+
|
|
128
|
+
### New Sub-Skills Specifics
|
|
129
|
+
|
|
130
|
+
**`worker` SKILL.md:**
|
|
131
|
+
- Focus: read test case, load relevant workspace sections, execute task, produce output
|
|
132
|
+
- Iron Law: NO SKIPPING TEST CASE STEPS, NO MODIFYING WORKSPACE STRUCTURE, NO CLAIMING DONE WITHOUT OUTPUT
|
|
133
|
+
- Dispatches to: `validation` after output complete
|
|
134
|
+
|
|
135
|
+
**`fixer` SKILL.md:**
|
|
136
|
+
- Focus: read validator findings, identify root cause, apply minimal fix, re-validate
|
|
137
|
+
- Iron Law: NO BLIND RETRIES, NO COSMETIC FIXES, NO FIXING WHAT ISN'T BROKEN
|
|
138
|
+
- Dispatches to: `validation` after fix applied
|
|
139
|
+
|
|
140
|
+
**`validation` (enhanced) SKILL.md:**
|
|
141
|
+
- Focus: batch-level benchmark scoring, per-test findings, fix suggestions
|
|
142
|
+
- Iron Law: NO SCORE INFLATION, NO SKIPPING FAILURES, NO VALIDATING WITHOUT BENCHMARK
|
|
143
|
+
- Dispatches to: `fixer` if score < threshold, `orchestrator` if passing
|
|
144
|
+
|
|
145
|
+
## dispatch.ts Changes
|
|
146
|
+
|
|
147
|
+
- `--parallel` flag: spawns multiple sub-agent invocations concurrently
|
|
148
|
+
- `--batch-id` flag: tags outputs to batch directory
|
|
149
|
+
- Returns aggregated JSON when `--parallel` is used
|
|
150
|
+
|
|
151
|
+
## File Changes
|
|
152
|
+
|
|
153
|
+
### New Files
|
|
154
|
+
- `src/scripts/orchestrator.ts` — Batch orchestrator
|
|
155
|
+
- `templates/.workspace-templates/skills/worker/SKILL.md` — Worker sub-skill
|
|
156
|
+
- `templates/.workspace-templates/skills/fixer/SKILL.md` — Fixer sub-skill
|
|
157
|
+
|
|
158
|
+
### Modified Files
|
|
159
|
+
- `src/scripts/dispatch.ts` — Parallel dispatch, batch ID support
|
|
160
|
+
- `templates/SKILL.md` — New "Autonomous Iteration Workflow" section
|
|
161
|
+
- `templates/.workspace-templates/skills/validation/SKILL.md` — Enhanced with batch validation
|
|
162
|
+
- `templates/.workspace-templates/skills/iteration/SKILL.md` — Rewritten with obra patterns
|
|
163
|
+
- `templates/.workspace-templates/skills/research/SKILL.md` — Rewritten with obra patterns
|
|
164
|
+
- `templates/.workspace-templates/skills/architecture/SKILL.md` — Rewritten with obra patterns
|
|
165
|
+
- `templates/.workspace-templates/skills/testing/SKILL.md` — Rewritten with obra patterns
|
|
166
|
+
- `templates/.workspace-templates/skills/prompt-engineering/SKILL.md` — Rewritten with obra patterns
|
|
167
|
+
- `templates/.workspace-templates/skills/tooling/SKILL.md` — Rewritten with obra patterns
|
|
168
|
+
|
|
169
|
+
### New Test Files
|
|
170
|
+
- `tests/orchestrator.test.ts`
|
|
171
|
+
- `tests/dispatch-parallel.test.ts`
|
|
172
|
+
- `tests/worker-skill.test.ts`
|
|
173
|
+
- `tests/fixer-skill.test.ts`
|
|
174
|
+
- `tests/validation-enhanced.test.ts`
|
|
175
|
+
|
|
176
|
+
## Error Handling
|
|
177
|
+
|
|
178
|
+
- **Worker timeout:** If a worker doesn't complete within timeout (default 300s), mark test case as failed, continue with batch
|
|
179
|
+
- **Validator failure:** If validator can't parse outputs, escalate to human
|
|
180
|
+
- **Fix loop exhaustion:** After max retries (default 3), mark batch as partially failed, continue to next batch
|
|
181
|
+
- **Orchestrator crash:** Summary.json written at each batch boundary for recovery
|
|
182
|
+
|
|
183
|
+
## Testing Strategy
|
|
184
|
+
|
|
185
|
+
- **Unit tests:** orchestrator.ts batch splitting, dispatch.ts parallel invocation, report aggregation
|
|
186
|
+
- **Integration tests:** full batch lifecycle (generate → dispatch → validate → fix → complete)
|
|
187
|
+
- **Sub-skill tests:** each sub-skill's report format, trigger phrases, iron law compliance
|
|
188
|
+
- **Edge cases:** empty test case list, single test case, all failures, all passes, mixed results
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Sub-Project 5: Autonomous Workflow & Sub-Skill Framework — Design Spec
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Transform workspace-maxxing from a single-skill tool into a comprehensive autonomous workflow system using obra/superpowers patterns. Add YAML frontmatter, trigger phrases, anti-rationalization tables, sub-skill dispatch, and a hybrid phase-driven → condition-driven flow.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
### File Structure
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
templates/
|
|
13
|
+
├── SKILL.md # Main entry point (rewritten)
|
|
14
|
+
└── .workspace-templates/
|
|
15
|
+
├── skills/
|
|
16
|
+
│ ├── validation/SKILL.md # Workspace compliance checking
|
|
17
|
+
│ ├── research/SKILL.md # Pattern investigation & context gathering
|
|
18
|
+
│ ├── prompt-engineering/SKILL.md # Prompt improvement & optimization
|
|
19
|
+
│ ├── testing/SKILL.md # Test generation & evaluation
|
|
20
|
+
│ ├── iteration/SKILL.md # Autonomous improvement loop
|
|
21
|
+
│ ├── architecture/SKILL.md # Workspace structure design
|
|
22
|
+
│ └── tooling/SKILL.md # Tool assessment & installation
|
|
23
|
+
└── references/
|
|
24
|
+
├── anti-patterns.md # Shared rationalization tables
|
|
25
|
+
├── reporting-format.md # Standard sub-skill report structure
|
|
26
|
+
└── iron-laws.md # Shared discipline rules
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Main SKILL.md — Rewritten with obra patterns
|
|
30
|
+
|
|
31
|
+
**YAML Frontmatter:**
|
|
32
|
+
```yaml
|
|
33
|
+
---
|
|
34
|
+
name: workspace-maxxing
|
|
35
|
+
description: "Autonomously creates, validates, and improves ICM-compliant workspaces. Use when user asks to 'build a workspace', 'create a workflow', 'automate a process', 'improve this workspace', 'validate this workspace', or 'iterate on this workspace'."
|
|
36
|
+
---
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Core Sections:**
|
|
40
|
+
- `## Overview` — Single-line core principle
|
|
41
|
+
- `## When to Use` — Decision tree (when/when not)
|
|
42
|
+
- `## The Iron Law` — Absolute gates (no build without plan, no plan without research)
|
|
43
|
+
- `## Hybrid Flow` — Phase-driven → condition-driven workflow diagram (DOT)
|
|
44
|
+
- `## Sub-Skill Dispatch` — Table mapping conditions to sub-skills
|
|
45
|
+
- `## Available Scripts` — Existing script documentation
|
|
46
|
+
- `## Anti-Rationalization Table` — Pre-empts agent shortcuts
|
|
47
|
+
- `## Integration` — How sub-skills connect
|
|
48
|
+
|
|
49
|
+
### Sub-Skills — Each follows obra SKILL.md pattern
|
|
50
|
+
|
|
51
|
+
Every sub-skill SKILL.md contains:
|
|
52
|
+
1. YAML frontmatter with name + description + trigger phrases
|
|
53
|
+
2. `## Overview` — What it does in one line
|
|
54
|
+
3. `## When to Use` — Decision criteria
|
|
55
|
+
4. `## The Process` — Step-by-step workflow
|
|
56
|
+
5. `## Red Flags` — What to watch for
|
|
57
|
+
6. `## Report Format` — Structured JSON output
|
|
58
|
+
7. `## Integration` — Which sub-skill to dispatch next
|
|
59
|
+
|
|
60
|
+
### Shared References
|
|
61
|
+
|
|
62
|
+
**anti-patterns.md:**
|
|
63
|
+
- Common rationalizations agents use to skip steps
|
|
64
|
+
- Reality checks for each rationalization
|
|
65
|
+
- Applies to all sub-skills
|
|
66
|
+
|
|
67
|
+
**reporting-format.md:**
|
|
68
|
+
- Standard JSON report structure all sub-skills return
|
|
69
|
+
- Fields: skill, status, findings, recommendations, nextSkill
|
|
70
|
+
- Ensures consistent handoff between sub-skills
|
|
71
|
+
|
|
72
|
+
**iron-laws.md:**
|
|
73
|
+
- NO BUILD WITHOUT PLAN
|
|
74
|
+
- NO PLAN WITHOUT RESEARCH
|
|
75
|
+
- NO IMPROVEMENT WITHOUT VALIDATION
|
|
76
|
+
- NO COMPLETION CLAIM WITHOUT VERIFICATION
|
|
77
|
+
|
|
78
|
+
### Hybrid Flow
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
Phase 1: RESEARCH (dispatch research sub-skill)
|
|
82
|
+
↓
|
|
83
|
+
Phase 2: ARCHITECTURE (dispatch architecture sub-skill)
|
|
84
|
+
↓
|
|
85
|
+
Phase 3: BUILD (use scaffold.ts script)
|
|
86
|
+
↓
|
|
87
|
+
Phase 4: VALIDATE (dispatch validation sub-skill)
|
|
88
|
+
↓
|
|
89
|
+
Condition Loop (repeat until score > 85 AND all validations pass):
|
|
90
|
+
├─ If validation failed → dispatch validation sub-skill
|
|
91
|
+
├─ If score < 80 → dispatch prompt-engineering sub-skill
|
|
92
|
+
├─ If no tests exist → dispatch testing sub-skill
|
|
93
|
+
├─ If score plateaued → dispatch iteration sub-skill
|
|
94
|
+
└─ If tools missing → dispatch tooling sub-skill
|
|
95
|
+
↓
|
|
96
|
+
Phase 5: DELIVER
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Sub-Skill Dispatch Script
|
|
100
|
+
|
|
101
|
+
New file: `src/scripts/dispatch.ts`
|
|
102
|
+
|
|
103
|
+
- Loads sub-skill SKILL.md from `skills/<name>/SKILL.md`
|
|
104
|
+
- Prints the sub-skill's full instructions to stdout for the agent to follow
|
|
105
|
+
- Accepts `--skill <name>` and `--workspace <path>` flags
|
|
106
|
+
- Returns the sub-skill's report as JSON when the agent completes its work
|
|
107
|
+
- Zero dependencies (Node.js builtins only)
|
|
108
|
+
|
|
109
|
+
**Usage:**
|
|
110
|
+
```bash
|
|
111
|
+
node scripts/dispatch.ts --skill validation --workspace ./workspace
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The agent reads the dispatched instructions, executes the sub-skill's workflow, and writes the report JSON to stdout.
|
|
115
|
+
|
|
116
|
+
### Integration Points
|
|
117
|
+
|
|
118
|
+
- `install.ts` enhanced to copy `skills/` and `references/` directories during install
|
|
119
|
+
- `dispatch.ts` invoked by agents via shell command from skill directory
|
|
120
|
+
- Existing scripts (scaffold, validate, iterate, benchmark) remain unchanged
|
|
121
|
+
- Sub-skills reference existing scripts where applicable
|
|
122
|
+
|
|
123
|
+
## Testing Strategy
|
|
124
|
+
|
|
125
|
+
- `tests/dispatch.test.ts` — Sub-skill dispatch and report structure
|
|
126
|
+
- `tests/sub-skill-integration.test.ts` — End-to-end sub-skill workflow
|
|
127
|
+
- `tests/templates-enhanced.test.ts` — Verify all sub-skill SKILL.md files have required sections
|
|
128
|
+
- All existing tests must continue passing (95/95 baseline)
|
|
129
|
+
|
|
130
|
+
## Constraints
|
|
131
|
+
|
|
132
|
+
- Zero external dependencies (Node.js builtins only)
|
|
133
|
+
- Scripts invoked via shell commands, not as CLI flags on main package
|
|
134
|
+
- Sub-skills follow obra/superpowers SKILL.md format
|
|
135
|
+
- Main SKILL.md uses YAML frontmatter for trigger detection
|
|
136
|
+
- Progressive disclosure: sub-skill content only loaded when dispatched
|
|
137
|
+
- All sub-skills return structured JSON reports
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Sub-Project 4: Benchmarking & Multi-Agent Support — Design Spec
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Add weighted benchmark scoring, multi-agent CLI installation targeting, guided iteration reports, and console+JSON benchmark output to workspace-maxxing.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
### New File: `src/scripts/benchmark.ts`
|
|
10
|
+
|
|
11
|
+
Zero-dependency Node.js script. Exports:
|
|
12
|
+
- `calculateBenchmark(workspacePath: string)` — scans workspace, calls validate logic, applies weights, returns benchmark data object
|
|
13
|
+
- `formatBenchmarkTable(data)` — returns formatted string for console output
|
|
14
|
+
- `saveBenchmarkReport(workspacePath, data)` — writes JSON to `.workspace-benchmarks/<name>-<timestamp>.json`
|
|
15
|
+
|
|
16
|
+
### Weighted Scoring Engine
|
|
17
|
+
|
|
18
|
+
**Default weights:**
|
|
19
|
+
| Stage | Weight | Rationale |
|
|
20
|
+
|-------|--------|-----------|
|
|
21
|
+
| `01-ideation` | 1.5x | Core thinking quality — most critical |
|
|
22
|
+
| `02-research` | 1.3x | Evidence gathering — high importance |
|
|
23
|
+
| `03-architecture` | 1.2x | Structural decisions — important |
|
|
24
|
+
| All other stages | 1.0x | Baseline |
|
|
25
|
+
|
|
26
|
+
**Formula:**
|
|
27
|
+
```
|
|
28
|
+
weightedStageScore = rawStageScore × weight
|
|
29
|
+
finalScore = Σ(weightedStageScores) / Σ(appliedWeights) × (100 / maxRawScore)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
- Stages that don't exist in a workspace are excluded from both numerator and denominator
|
|
33
|
+
- `maxRawScore` = 45 (the per-stage cap from validate.ts)
|
|
34
|
+
- Final score normalized to 0-100
|
|
35
|
+
|
|
36
|
+
### Multi-Agent CLI Flags
|
|
37
|
+
|
|
38
|
+
**Flag behavior:**
|
|
39
|
+
| Flag | Installation Target |
|
|
40
|
+
|------|-------------------|
|
|
41
|
+
| (none) | `.agents/skills/workspace-maxxing/` (agent-agnostic default) |
|
|
42
|
+
| `--opencode` | `.agents/skills/workspace-maxxing/` (same as default) |
|
|
43
|
+
| `--claude` | `.claude/skills/` |
|
|
44
|
+
| `--copilot` | `.github/copilot-instructions/` |
|
|
45
|
+
| `--gemini` | `.gemini/skills/` |
|
|
46
|
+
|
|
47
|
+
**Implementation:**
|
|
48
|
+
- `src/index.ts` parses flags before install
|
|
49
|
+
- `install.ts` receives `targetAgent` parameter
|
|
50
|
+
- Agent-specific paths defined in a single config map
|
|
51
|
+
- All scripts and templates remain identical — only destination changes
|
|
52
|
+
- SKILL.md includes metadata note about which agent it was installed for (no behavioral changes)
|
|
53
|
+
|
|
54
|
+
### Guided Iteration Reports
|
|
55
|
+
|
|
56
|
+
**Flow:**
|
|
57
|
+
1. Agent runs `iterate.ts`
|
|
58
|
+
2. Each pass: `validate.ts` → `benchmark.ts` → structured report returned
|
|
59
|
+
3. Report includes: current score, weighted benchmark score, per-stage breakdown, fix suggestions, `improvementPotential` flag
|
|
60
|
+
4. Agent decides whether to apply fixes and re-run
|
|
61
|
+
5. No automatic looping — agent is in control
|
|
62
|
+
|
|
63
|
+
**Report structure:**
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"workspace": "my-project",
|
|
67
|
+
"agent": "opencode",
|
|
68
|
+
"timestamp": "2026-04-07T...",
|
|
69
|
+
"rawScore": 72,
|
|
70
|
+
"weightedScore": 78,
|
|
71
|
+
"stages": [
|
|
72
|
+
{ "name": "01-ideation", "raw": 85, "weight": 1.5, "weighted": 95 },
|
|
73
|
+
{ "name": "02-research", "raw": 60, "weight": 1.3, "weighted": 58 }
|
|
74
|
+
],
|
|
75
|
+
"fixSuggestions": ["Add research sources to 02-research", "Expand architecture diagrams"],
|
|
76
|
+
"improvementPotential": true
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Benchmark Output
|
|
81
|
+
|
|
82
|
+
**Console:** Formatted table with stage names, raw scores, weights, weighted scores, and total.
|
|
83
|
+
|
|
84
|
+
**JSON:** Saved to `.workspace-benchmarks/<workspace-name>-<timestamp>.json` with full metadata: workspace path, agent flag used, timestamp, all scores, weights applied.
|
|
85
|
+
|
|
86
|
+
## Integration Points
|
|
87
|
+
|
|
88
|
+
- `validate.ts` already returns per-stage scores — `benchmark.ts` consumes those and applies weights
|
|
89
|
+
- `iterate.ts` calls `validate.ts` internally — extended return value includes weighted benchmark data
|
|
90
|
+
- `install.ts` enhanced with agent-targeting flag parsing
|
|
91
|
+
- `src/index.ts` enhanced with CLI flag parsing for `--claude`, `--copilot`, `--gemini`, `--opencode`
|
|
92
|
+
|
|
93
|
+
## Testing Strategy
|
|
94
|
+
|
|
95
|
+
- `tests/benchmark.test.ts` — weighted scoring calculations, edge cases (missing stages, zero scores, normalization)
|
|
96
|
+
- `tests/cli-flags.test.ts` — flag parsing and installation targeting
|
|
97
|
+
- `tests/iterate-enhanced.test.ts` — guided iteration report structure
|
|
98
|
+
- All existing tests must continue passing (75/75 baseline)
|
|
99
|
+
|
|
100
|
+
## Constraints
|
|
101
|
+
|
|
102
|
+
- Zero external dependencies (Node.js builtins only: `fs`, `path`, `process`, `child_process`)
|
|
103
|
+
- Scripts invoked via shell commands, not as CLI flags on main package
|
|
104
|
+
- Agent-agnostic by default (no flag = universal behavior)
|
|
105
|
+
- Guided iterations only (no autonomous hill-climbing loop)
|