workspace-maxxing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
- package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
- package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
- package/README.md +144 -0
- package/dist/agent-creator.d.ts +9 -0
- package/dist/agent-creator.d.ts.map +1 -0
- package/dist/agent-creator.js +199 -0
- package/dist/agent-creator.js.map +1 -0
- package/dist/agent-iterator.d.ts +38 -0
- package/dist/agent-iterator.d.ts.map +1 -0
- package/dist/agent-iterator.js +327 -0
- package/dist/agent-iterator.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +197 -0
- package/dist/index.js.map +1 -0
- package/dist/install.d.ts +18 -0
- package/dist/install.d.ts.map +1 -0
- package/dist/install.js +117 -0
- package/dist/install.js.map +1 -0
- package/dist/platforms/claude.d.ts +7 -0
- package/dist/platforms/claude.d.ts.map +1 -0
- package/dist/platforms/claude.js +70 -0
- package/dist/platforms/claude.js.map +1 -0
- package/dist/platforms/copilot.d.ts +7 -0
- package/dist/platforms/copilot.d.ts.map +1 -0
- package/dist/platforms/copilot.js +75 -0
- package/dist/platforms/copilot.js.map +1 -0
- package/dist/platforms/gemini.d.ts +7 -0
- package/dist/platforms/gemini.d.ts.map +1 -0
- package/dist/platforms/gemini.js +81 -0
- package/dist/platforms/gemini.js.map +1 -0
- package/dist/platforms/index.d.ts +8 -0
- package/dist/platforms/index.d.ts.map +1 -0
- package/dist/platforms/index.js +41 -0
- package/dist/platforms/index.js.map +1 -0
- package/dist/platforms/opencode.d.ts +7 -0
- package/dist/platforms/opencode.d.ts.map +1 -0
- package/dist/platforms/opencode.js +70 -0
- package/dist/platforms/opencode.js.map +1 -0
- package/dist/scripts/benchmark.d.ts +20 -0
- package/dist/scripts/benchmark.d.ts.map +1 -0
- package/dist/scripts/benchmark.js +170 -0
- package/dist/scripts/benchmark.js.map +1 -0
- package/dist/scripts/dispatch.d.ts +32 -0
- package/dist/scripts/dispatch.d.ts.map +1 -0
- package/dist/scripts/dispatch.js +386 -0
- package/dist/scripts/dispatch.js.map +1 -0
- package/dist/scripts/generate-tests.d.ts +11 -0
- package/dist/scripts/generate-tests.d.ts.map +1 -0
- package/dist/scripts/generate-tests.js +118 -0
- package/dist/scripts/generate-tests.js.map +1 -0
- package/dist/scripts/install-tool.d.ts +8 -0
- package/dist/scripts/install-tool.d.ts.map +1 -0
- package/dist/scripts/install-tool.js +98 -0
- package/dist/scripts/install-tool.js.map +1 -0
- package/dist/scripts/iterate.d.ts +44 -0
- package/dist/scripts/iterate.d.ts.map +1 -0
- package/dist/scripts/iterate.js +260 -0
- package/dist/scripts/iterate.js.map +1 -0
- package/dist/scripts/orchestrator.d.ts +40 -0
- package/dist/scripts/orchestrator.d.ts.map +1 -0
- package/dist/scripts/orchestrator.js +378 -0
- package/dist/scripts/orchestrator.js.map +1 -0
- package/dist/scripts/scaffold.d.ts +8 -0
- package/dist/scripts/scaffold.d.ts.map +1 -0
- package/dist/scripts/scaffold.js +279 -0
- package/dist/scripts/scaffold.js.map +1 -0
- package/dist/scripts/validate.d.ts +11 -0
- package/dist/scripts/validate.d.ts.map +1 -0
- package/dist/scripts/validate.js +472 -0
- package/dist/scripts/validate.js.map +1 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
- package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
- package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
- package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
- package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
- package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
- package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
- package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
- package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
- package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
- package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
- package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
- package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
- package/jest.config.js +8 -0
- package/package.json +32 -0
- package/src/agent-creator.ts +180 -0
- package/src/agent-iterator.ts +397 -0
- package/src/index.ts +189 -0
- package/src/install.ts +105 -0
- package/src/platforms/claude.ts +40 -0
- package/src/platforms/copilot.ts +50 -0
- package/src/platforms/gemini.ts +55 -0
- package/src/platforms/index.ts +45 -0
- package/src/platforms/opencode.ts +41 -0
- package/src/scripts/benchmark.ts +171 -0
- package/src/scripts/dispatch.ts +473 -0
- package/src/scripts/generate-tests.ts +112 -0
- package/src/scripts/install-tool.ts +82 -0
- package/src/scripts/iterate.ts +271 -0
- package/src/scripts/orchestrator.ts +539 -0
- package/src/scripts/scaffold.ts +282 -0
- package/src/scripts/validate.ts +516 -0
- package/templates/.workspace-templates/CONTEXT.md +44 -0
- package/templates/.workspace-templates/SYSTEM.md +44 -0
- package/templates/.workspace-templates/references/anti-patterns.md +16 -0
- package/templates/.workspace-templates/references/iron-laws.md +26 -0
- package/templates/.workspace-templates/references/reporting-format.md +52 -0
- package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
- package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
- package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
- package/templates/.workspace-templates/scripts/iterate.ts +265 -0
- package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
- package/templates/.workspace-templates/scripts/validate.ts +452 -0
- package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
- package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/README.md +14 -0
- package/templates/SKILL.md +347 -0
- package/tests/benchmark.test.ts +158 -0
- package/tests/cli.test.ts +109 -0
- package/tests/dispatch-parallel.test.ts +124 -0
- package/tests/dispatch.test.ts +218 -0
- package/tests/fixer-skill.test.ts +203 -0
- package/tests/generate-tests.test.ts +101 -0
- package/tests/install-tool.test.ts +141 -0
- package/tests/install.test.ts +144 -0
- package/tests/integration.test.ts +324 -0
- package/tests/iterate.test.ts +219 -0
- package/tests/orchestrator.test.ts +710 -0
- package/tests/scaffold.test.ts +238 -0
- package/tests/templates-enhanced.test.ts +208 -0
- package/tests/templates.test.ts +219 -0
- package/tests/validate.test.ts +421 -0
- package/tests/validation-enhanced.test.ts +303 -0
- package/tests/worker-skill.test.ts +88 -0
- package/tsconfig.json +19 -0
- package/workspace/00-meta/CONTEXT.md +3 -0
- package/workspace/00-meta/execution-log.md +17 -0
- package/workspace/00-meta/tools.md +11 -0
- package/workspace/01-input/CONTEXT.md +27 -0
- package/workspace/CONTEXT.md +35 -0
- package/workspace/README.md +14 -0
- package/workspace/SYSTEM.md +36 -0
- package/workspace-maxxing-0.1.0.tgz +0 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Workspace-Maxxing Design Spec — Sub-Project 2: Workspace Builder Logic
|
|
2
|
+
|
|
3
|
+
> **Phase 2 of 4:** Helper scripts + enhanced skill instructions. Subsequent phases add autonomous iteration and benchmarking.
|
|
4
|
+
|
|
5
|
+
## Context
|
|
6
|
+
|
|
7
|
+
Sub-Project 1 delivered the npx CLI that installs a SKILL.md and ICM workspace templates. Sub-Project 2 adds executable helper scripts that the agent uses to scaffold, validate, and install tools for workspaces — replacing manual file creation with reliable, programmatic generation.
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
### Components
|
|
12
|
+
|
|
13
|
+
#### 1. Scaffold Script (`scripts/scaffold.ts`)
|
|
14
|
+
|
|
15
|
+
Generates ICM-compliant workspace structures from a JSON plan.
|
|
16
|
+
|
|
17
|
+
**CLI Interface:**
|
|
18
|
+
```bash
|
|
19
|
+
node scripts/scaffold.ts --name "research" --stages "01-research,02-analysis,03-report" --output ./workspace
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**What it creates:**
|
|
23
|
+
- `SYSTEM.md` (Layer 0) with folder map matching the provided stages
|
|
24
|
+
- `CONTEXT.md` (Layer 1) with routing table for each stage
|
|
25
|
+
- Numbered stage folders, each with a `CONTEXT.md` (Layer 2)
|
|
26
|
+
- `README.md` with usage instructions
|
|
27
|
+
- `00-meta/tools.md` for tool inventory
|
|
28
|
+
|
|
29
|
+
**Dependencies:** Node.js builtins only (`fs`, `path`, `process`)
|
|
30
|
+
|
|
31
|
+
#### 2. Validate Script (`scripts/validate.ts`)
|
|
32
|
+
|
|
33
|
+
Checks a workspace for ICM compliance.
|
|
34
|
+
|
|
35
|
+
**CLI Interface:**
|
|
36
|
+
```bash
|
|
37
|
+
node scripts/validate.ts --workspace ./workspace
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**What it checks:**
|
|
41
|
+
- SYSTEM.md exists and contains a folder map
|
|
42
|
+
- CONTEXT.md exists at root level
|
|
43
|
+
- Every numbered folder has a CONTEXT.md
|
|
44
|
+
- No empty CONTEXT.md files
|
|
45
|
+
- One-way dependency compliance (downstream folders don't reference upstream in reverse)
|
|
46
|
+
- No duplicate content across files (canonical source check — basic heuristic: flags any identical text blocks > 50 characters found in multiple files)
|
|
47
|
+
|
|
48
|
+
**Output:** Prints pass/fail per check. Exit code 0 if all pass, 1 if any fail.
|
|
49
|
+
|
|
50
|
+
**Dependencies:** Node.js builtins only
|
|
51
|
+
|
|
52
|
+
#### 3. Install Tool Script (`scripts/install-tool.ts`)
|
|
53
|
+
|
|
54
|
+
Installs tools and updates workspace inventory.
|
|
55
|
+
|
|
56
|
+
**CLI Interface:**
|
|
57
|
+
```bash
|
|
58
|
+
node scripts/install-tool.ts --tool "pdf-lib" --manager npm --workspace ./workspace
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**What it does:**
|
|
62
|
+
- Runs the appropriate install command (`npm install`, `pip install`, etc.)
|
|
63
|
+
- Updates `00-meta/tools.md` with the tool name, version, and timestamp
|
|
64
|
+
- Supports managers: `npm`, `pip`, `npx` (global), `brew`
|
|
65
|
+
|
|
66
|
+
**Dependencies:** Node.js builtins + `child_process` for running install commands
|
|
67
|
+
|
|
68
|
+
#### 4. Enhanced SKILL.md
|
|
69
|
+
|
|
70
|
+
Updated to include:
|
|
71
|
+
- "## Available Scripts" section documenting all three scripts
|
|
72
|
+
- Examples of how to invoke each script
|
|
73
|
+
- Workflow: scaffold → validate → fix → install tools → validate → deliver
|
|
74
|
+
- When to use scripts vs. manual file creation (scripts for structure, manual for content)
|
|
75
|
+
|
|
76
|
+
### File Structure
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
workspace-maxxing/
|
|
80
|
+
├── src/
|
|
81
|
+
│ ├── scripts/
|
|
82
|
+
│ │ ├── scaffold.ts — Scaffold script source
|
|
83
|
+
│ │ ├── validate.ts — Validate script source
|
|
84
|
+
│ │ └── install-tool.ts — Install tool script source
|
|
85
|
+
│ ├── index.ts — Unchanged from Phase 1
|
|
86
|
+
│ └── install.ts — Modified: also copies scripts/
|
|
87
|
+
├── templates/
|
|
88
|
+
│ ├── SKILL.md — Enhanced with script instructions
|
|
89
|
+
│ └── .workspace-templates/
|
|
90
|
+
│ └── scripts/ — Scripts copied during install
|
|
91
|
+
│ ├── scaffold.ts
|
|
92
|
+
│ ├── validate.ts
|
|
93
|
+
│ └── install-tool.ts
|
|
94
|
+
├── tests/
|
|
95
|
+
│ ├── scaffold.test.ts
|
|
96
|
+
│ ├── validate.test.ts
|
|
97
|
+
│ └── install-tool.test.ts
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Data Flow
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
User: "Create a research workspace that outputs PDFs"
|
|
104
|
+
│
|
|
105
|
+
▼
|
|
106
|
+
Agent reads SKILL.md → sees Available Scripts section
|
|
107
|
+
│
|
|
108
|
+
▼
|
|
109
|
+
Agent proposes workspace structure to user
|
|
110
|
+
│
|
|
111
|
+
▼
|
|
112
|
+
User approves
|
|
113
|
+
│
|
|
114
|
+
▼
|
|
115
|
+
Agent runs: node scripts/scaffold.ts --name "research" --stages "01-research,02-analysis,03-pdf-export" --output ./workspace
|
|
116
|
+
│
|
|
117
|
+
▼
|
|
118
|
+
Agent runs: node scripts/validate.ts --workspace ./workspace
|
|
119
|
+
│
|
|
120
|
+
├─ If FAIL → Agent reads errors, fixes workspace, re-validates
|
|
121
|
+
│
|
|
122
|
+
▼
|
|
123
|
+
Agent assesses tools → proposes pdf-lib, puppeteer, etc.
|
|
124
|
+
│
|
|
125
|
+
▼
|
|
126
|
+
User approves
|
|
127
|
+
│
|
|
128
|
+
▼
|
|
129
|
+
Agent runs: node scripts/install-tool.ts --tool "pdf-lib" --manager npm --workspace ./workspace
|
|
130
|
+
│
|
|
131
|
+
▼
|
|
132
|
+
Agent runs: node scripts/validate.ts --workspace ./workspace (final check)
|
|
133
|
+
│
|
|
134
|
+
▼
|
|
135
|
+
Deliver: workspace + USAGE.md
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Changes to Existing Files
|
|
139
|
+
|
|
140
|
+
**`src/install.ts`:** Add scripts directory to the copy list. When installing the skill, also copy `templates/.workspace-templates/scripts/` to the skill directory.
|
|
141
|
+
|
|
142
|
+
**`templates/SKILL.md`:** Add "## Available Scripts" section with usage examples for all three scripts.
|
|
143
|
+
|
|
144
|
+
### Error Handling
|
|
145
|
+
|
|
146
|
+
- **scaffold.ts:** Fails if output directory already exists (with `--force` flag to overwrite). Fails if stages list is empty or malformed.
|
|
147
|
+
- **validate.ts:** Never fails with exception — always returns structured output. Exit code indicates pass/fail.
|
|
148
|
+
- **install-tool.ts:** Fails if install command returns non-zero exit code. Reports error message from the package manager.
|
|
149
|
+
|
|
150
|
+
### Testing Strategy
|
|
151
|
+
|
|
152
|
+
- **scaffold.test.ts:** Verify correct folder structure is created, SYSTEM.md has correct folder map, CONTEXT.md files exist for each stage
|
|
153
|
+
- **validate.test.ts:** Create valid workspace → expect pass. Create invalid workspace (missing CONTEXT.md, empty files) → expect specific failures
|
|
154
|
+
- **install-tool.test.ts:** Mock child_process.execSync, verify correct command is run and tools.md is updated
|
|
155
|
+
- **Integration:** Run scaffold → validate → expect pass
|
|
156
|
+
|
|
157
|
+
### Scope
|
|
158
|
+
|
|
159
|
+
**In Scope (This Phase):**
|
|
160
|
+
- Three helper scripts (scaffold, validate, install-tool)
|
|
161
|
+
- Enhanced SKILL.md with script usage instructions
|
|
162
|
+
- Installer updated to copy scripts
|
|
163
|
+
- Tests for all three scripts
|
|
164
|
+
|
|
165
|
+
**Out of Scope (Future Phases):**
|
|
166
|
+
- Autonomous iteration engine (Phase 3)
|
|
167
|
+
- Sub-agent orchestration framework (Phase 3)
|
|
168
|
+
- Benchmark scoring system (Phase 4)
|
|
169
|
+
- Multi-agent support (Phase 4)
|
|
170
|
+
- Hill-climbing on scores (Phase 3)
|
|
171
|
+
|
|
172
|
+
### Success Criteria
|
|
173
|
+
|
|
174
|
+
1. `node scripts/scaffold.ts` creates valid ICM workspace from plan
|
|
175
|
+
2. `node scripts/validate.ts` correctly identifies valid and invalid workspaces
|
|
176
|
+
3. `node scripts/install-tool.ts` installs packages and updates inventory
|
|
177
|
+
4. Enhanced SKILL.md documents all scripts with examples
|
|
178
|
+
5. Installer copies scripts to skill directory
|
|
179
|
+
6. All tests pass (Phase 1 + Phase 2)
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# Workspace-Maxxing Design Spec
|
|
2
|
+
|
|
3
|
+
## Sub-Project 1: npx CLI + Base Skill
|
|
4
|
+
|
|
5
|
+
> **Phase 1 of 4:** Installer + skill foundation. Subsequent phases add workspace builder logic, autonomous iteration, and benchmarking.
|
|
6
|
+
|
|
7
|
+
## Context
|
|
8
|
+
|
|
9
|
+
Workspace-Maxxing is an npx-installable skill that enables AI coding agents (starting with OpenCode) to create, iterate on, and validate structured workspaces using the Interpretable Context Methodology (ICM) by Jake Van Clief. The workspace IS the skill — a folder structure with interconnected markdown files that doubles as an installable skill package.
|
|
10
|
+
|
|
11
|
+
**Inspired by:**
|
|
12
|
+
- [AutoAgent](https://github.com/kevinrgu/autoagent) — autonomous harness engineering via meta-agent iteration
|
|
13
|
+
- [ICM Paper](https://arxiv.org/abs/2603.16021) — folder structure as agentic architecture
|
|
14
|
+
- [Content-Agent-Routing-Promptbase](https://github.com/RinDig/Content-Agent-Routing-Promptbase) — layered routing architecture for AI context
|
|
15
|
+
|
|
16
|
+
## Architecture
|
|
17
|
+
|
|
18
|
+
### Layered Routing Model (ICM)
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
Layer 0 — SYSTEM.md (always loaded)
|
|
22
|
+
├── Folder map + ID systems
|
|
23
|
+
├── Core instructions for the agent
|
|
24
|
+
└── ~800 tokens
|
|
25
|
+
|
|
26
|
+
Layer 1 — CONTEXT.md (routing table)
|
|
27
|
+
├── Maps tasks to workspaces
|
|
28
|
+
└── ~300 tokens
|
|
29
|
+
|
|
30
|
+
Layer 2 — Workspace CONTEXT.md (per-folder)
|
|
31
|
+
├── What to load, in what order, for what task
|
|
32
|
+
└── ~200-500 tokens each
|
|
33
|
+
|
|
34
|
+
Layer 3 — Content Files (selectively loaded)
|
|
35
|
+
├── Actual reference material
|
|
36
|
+
└── ~500-3000 tokens each
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Core Principles
|
|
40
|
+
|
|
41
|
+
1. **Canonical Sources** — Every piece of information lives in exactly one place. Files point to data, never duplicate it.
|
|
42
|
+
2. **One-Way Dependencies** — A → B is fine. B → A creates O(n²) maintenance. If bidirectional, introduce C.
|
|
43
|
+
3. **Selective Section Loading** — Route to sections of files, not entire files. Load only what the task needs.
|
|
44
|
+
4. **Numbered Folders** — Represent workflow stages. 01-research → 02-draft → 03-review → 04-output.
|
|
45
|
+
5. **Routing ≠ Content** — CONTEXT.md files tell agents what to load. They don't contain the knowledge itself.
|
|
46
|
+
|
|
47
|
+
## System Design
|
|
48
|
+
|
|
49
|
+
### Components
|
|
50
|
+
|
|
51
|
+
#### 1. npx CLI (`src/index.ts`)
|
|
52
|
+
|
|
53
|
+
Entry point. Parses arguments, delegates to installer.
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
npx workspace-maxxing --opencode
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
- `--opencode` — installs skill into `.agents/skills/workspace-maxxing/`
|
|
60
|
+
- Future flags: `--claude`, `--copilot`, `--gemini`
|
|
61
|
+
|
|
62
|
+
#### 2. Installer (`src/install.ts`)
|
|
63
|
+
|
|
64
|
+
- Detects project root (nearest `.git` or `package.json`)
|
|
65
|
+
- Creates `.agents/skills/workspace-maxxing/` directory
|
|
66
|
+
- Copies `SKILL.md` and `.workspace-templates/` from package
|
|
67
|
+
- Idempotent: safe to run multiple times
|
|
68
|
+
- No runtime dependencies beyond Node.js builtins (`fs`, `path`)
|
|
69
|
+
|
|
70
|
+
#### 3. Skill File (`templates/SKILL.md`)
|
|
71
|
+
|
|
72
|
+
The core artifact. Instructions for OpenCode agents on how to:
|
|
73
|
+
|
|
74
|
+
- **Intent Capture:** Understand what workspace the user wants
|
|
75
|
+
- **Propose Structure:** Design workspace using ICM methodology
|
|
76
|
+
- **Human Approval Gate:** Present plan, wait for approval
|
|
77
|
+
- **Build Workspace:** Create numbered folders, markdown files, routing tables
|
|
78
|
+
- **Tool Assessment:** Scan available tools, propose installations, get approval
|
|
79
|
+
- **Autonomous Iteration:** After build, spawn sub-agents to test with diverse use cases, self-evaluate, update prompts, only involve human if confidence is low
|
|
80
|
+
- **Final Output:** Deliver workspace + skill package + usage guide
|
|
81
|
+
|
|
82
|
+
The SKILL.md must contain these sections:
|
|
83
|
+
|
|
84
|
+
```markdown
|
|
85
|
+
# Workspace-Maxxing Skill
|
|
86
|
+
|
|
87
|
+
## Role
|
|
88
|
+
You are a workspace architect. You create structured, ICM-compliant workspaces.
|
|
89
|
+
|
|
90
|
+
## Process
|
|
91
|
+
1. CAPTURE INTENT — Ask: "What workflow do you want to automate?"
|
|
92
|
+
2. PROPOSE STRUCTURE — Design workspace with numbered folders, CONTEXT.md routing files, canonical sources
|
|
93
|
+
3. GET APPROVAL — Present plan. Wait. Do not build until approved.
|
|
94
|
+
4. BUILD WORKSPACE — Create folders, markdown files, routing tables
|
|
95
|
+
5. ASSESS TOOLS — Scan environment. List available tools. Propose missing tools needed. Get approval.
|
|
96
|
+
6. INSTALL TOOLS — After approval, install proposed tools
|
|
97
|
+
7. TEST AUTONOMOUSLY — Spawn sub-agents with diverse test cases. Self-evaluate outputs.
|
|
98
|
+
8. ITERATE — Update system prompts based on test results. Only involve human if confidence is low.
|
|
99
|
+
9. DELIVER — Output: workspace folder + skill package + usage guide
|
|
100
|
+
|
|
101
|
+
## ICM Rules
|
|
102
|
+
- Canonical sources: each fact lives in exactly one file
|
|
103
|
+
- One-way dependencies only: A → B, never B → A
|
|
104
|
+
- Selective loading: route to sections, not whole files
|
|
105
|
+
- Numbered folders for workflow stages
|
|
106
|
+
|
|
107
|
+
## Output Format
|
|
108
|
+
- workspace/ — the built workspace
|
|
109
|
+
- .agents/skills/<workspace-name>/ — installable skill
|
|
110
|
+
- USAGE.md — how to use this workspace in future sessions
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### 4. Workspace Templates (`templates/.workspace-templates/`)
|
|
114
|
+
|
|
115
|
+
Base structure copied during install:
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
.workspace-templates/
|
|
119
|
+
├── SYSTEM.md # Layer 0 template (always loaded)
|
|
120
|
+
├── CONTEXT.md # Layer 1 routing template
|
|
121
|
+
└── workspace/ # Example workspace skeleton
|
|
122
|
+
├── 00-meta/
|
|
123
|
+
│ └── CONTEXT.md # Workspace-level routing
|
|
124
|
+
├── 01-input/
|
|
125
|
+
│ └── CONTEXT.md
|
|
126
|
+
├── 02-process/
|
|
127
|
+
│ └── CONTEXT.md
|
|
128
|
+
├── 03-output/
|
|
129
|
+
│ └── CONTEXT.md
|
|
130
|
+
└── README.md # Usage guide template
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### File Map (Project Structure)
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
workspace-maxxing/
|
|
137
|
+
├── package.json
|
|
138
|
+
├── tsconfig.json
|
|
139
|
+
├── src/
|
|
140
|
+
│ ├── index.ts # CLI entry point
|
|
141
|
+
│ └── install.ts # File copying, path resolution
|
|
142
|
+
├── templates/
|
|
143
|
+
│ ├── SKILL.md # Agent instructions (installed as skill)
|
|
144
|
+
│ └── .workspace-templates/
|
|
145
|
+
│ ├── SYSTEM.md # Layer 0 template
|
|
146
|
+
│ ├── CONTEXT.md # Layer 1 routing template
|
|
147
|
+
│ └── workspace/ # Workspace skeleton
|
|
148
|
+
│ ├── 00-meta/CONTEXT.md
|
|
149
|
+
│ ├── 01-input/CONTEXT.md
|
|
150
|
+
│ ├── 02-process/CONTEXT.md
|
|
151
|
+
│ ├── 03-output/CONTEXT.md
|
|
152
|
+
│ └── README.md
|
|
153
|
+
└── docs/
|
|
154
|
+
└── superpowers/
|
|
155
|
+
└── specs/
|
|
156
|
+
└── 2026-04-07-workspace-maxxing-design.md
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Data Flow
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
User runs: npx workspace-maxxing --opencode
|
|
163
|
+
│
|
|
164
|
+
▼
|
|
165
|
+
CLI parses args → calls installer
|
|
166
|
+
│
|
|
167
|
+
▼
|
|
168
|
+
Installer copies files to .agents/skills/workspace-maxxing/
|
|
169
|
+
│
|
|
170
|
+
▼
|
|
171
|
+
User opens new OpenCode session, invokes skill
|
|
172
|
+
│
|
|
173
|
+
▼
|
|
174
|
+
Agent reads SKILL.md → follows instructions:
|
|
175
|
+
1. Captures user intent ("create a research workspace")
|
|
176
|
+
2. Proposes workspace structure (ICM-based)
|
|
177
|
+
3. User approves
|
|
178
|
+
4. Agent builds workspace from templates + customization
|
|
179
|
+
5. Agent assesses available tools, proposes installations
|
|
180
|
+
6. User approves tool installations
|
|
181
|
+
7. Agent installs tools
|
|
182
|
+
8. Agent spawns sub-agents to test workspace
|
|
183
|
+
9. Sub-agents self-evaluate, update prompts
|
|
184
|
+
10. Human involved only if confidence is low
|
|
185
|
+
11. Final output: workspace + skill package + usage guide
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Error Handling
|
|
189
|
+
|
|
190
|
+
- **Missing project root:** Warn user, install in current directory
|
|
191
|
+
- **Existing skill directory:** Overwrite with confirmation message
|
|
192
|
+
- **Failed file copy:** Abort, report which file failed, no partial state
|
|
193
|
+
- **Invalid flag:** Show usage help with supported flags
|
|
194
|
+
|
|
195
|
+
### Testing Strategy
|
|
196
|
+
|
|
197
|
+
- **CLI tests:** Verify `--opencode` flag creates correct directory structure
|
|
198
|
+
- **Installer tests:** Verify idempotency, correct file paths, no partial state on failure
|
|
199
|
+
- **Template validation:** Verify all template files exist and are non-empty
|
|
200
|
+
- **Integration test:** Run `npx workspace-maxxing --opencode` in temp directory, verify skill is loadable
|
|
201
|
+
|
|
202
|
+
## Scope
|
|
203
|
+
|
|
204
|
+
### In Scope (This Phase)
|
|
205
|
+
|
|
206
|
+
- npx CLI with `--opencode` flag
|
|
207
|
+
- Installer that copies skill files
|
|
208
|
+
- SKILL.md with complete agent instructions for workspace creation
|
|
209
|
+
- Workspace templates (ICM structure)
|
|
210
|
+
- Basic tool assessment instructions in SKILL.md
|
|
211
|
+
|
|
212
|
+
### Out of Scope (Future Phases)
|
|
213
|
+
|
|
214
|
+
- Autonomous iteration engine (Phase 3)
|
|
215
|
+
- Sub-agent orchestration framework (Phase 2)
|
|
216
|
+
- Benchmark scoring system (Phase 4)
|
|
217
|
+
- Multi-agent support --claude, --copilot, --gemini (Phase 4)
|
|
218
|
+
- Hill-climbing on scores (Phase 3)
|
|
219
|
+
- Human checkpoint UI (Phase 3)
|
|
220
|
+
|
|
221
|
+
## Success Criteria
|
|
222
|
+
|
|
223
|
+
1. `npx workspace-maxxing --opencode` installs a working skill
|
|
224
|
+
2. OpenCode agent can read the skill and understand how to build workspaces
|
|
225
|
+
3. Installed skill includes valid ICM-structured workspace templates
|
|
226
|
+
4. Running installer twice produces same result (idempotent)
|
|
227
|
+
5. Zero runtime dependencies beyond Node.js builtins
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
# Session 294c Workflow Quality and Sub-Agent Invocation Hardening - Design Spec
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
This spec assesses `session-ses_294c.md` end-to-end and defines a corrective design so WorkflowMaxxing runs true agent-driven loops instead of simulated script loops. The core gap is that worker/fixer "sub-agents" were not actually spawned as agents; they were routed through local script dispatch, leading to fake passes, path churn, benchmark gaming, and unstable completion claims.
|
|
6
|
+
|
|
7
|
+
## Goals
|
|
8
|
+
|
|
9
|
+
1. Enforce real sub-agent execution for worker/fixer loops.
|
|
10
|
+
2. Make agent-generated test-cases the canonical input to iteration.
|
|
11
|
+
3. Prevent false completion claims when validation or threshold gates are not met.
|
|
12
|
+
4. Remove brittle setup steps (`npm init`, ad-hoc TypeScript bootstrapping) from skill usage flow.
|
|
13
|
+
5. Add deterministic diagnostics for runner failures and score inconsistencies.
|
|
14
|
+
|
|
15
|
+
## Non-Goals
|
|
16
|
+
|
|
17
|
+
1. No redesign of the ICM folder model.
|
|
18
|
+
2. No replacement of benchmark methodology in this phase.
|
|
19
|
+
3. No UI changes; this is workflow/runtime behavior hardening.
|
|
20
|
+
|
|
21
|
+
## Transcript Assessment Method
|
|
22
|
+
|
|
23
|
+
- Source: `C:\Users\E1560361\Desktop\npx-test-workspace\session-ses_294c.md`
|
|
24
|
+
- Unit of review: each user query, then assistant reasoning/output sequence attached to that query.
|
|
25
|
+
- Output: per-step quality rating and tagged defects.
|
|
26
|
+
|
|
27
|
+
## Query-by-Query Process Assessment
|
|
28
|
+
|
|
29
|
+
### Query 1
|
|
30
|
+
User query: "workspace that will give me a daily digest of all things AI"
|
|
31
|
+
|
|
32
|
+
| Step | Assistant Reasoning/Output | Quality | Tags |
|
|
33
|
+
|---|---|---|---|
|
|
34
|
+
| 1.1 | Loaded `workspace-maxxing` skill before implementation | Good | - |
|
|
35
|
+
| 1.2 | Loaded `research` skill, but treated dispatch/load as execution | Poor | FLOW-SA-001 |
|
|
36
|
+
| 1.3 | Performed repeated script/path inspection instead of executing sub-agent workflow contract | Poor | FLOW-SA-003, FLOW-PATH-001 |
|
|
37
|
+
| 1.4 | Bootstrapped target workspace (`npm init`, `npm install`, custom `tsconfig`) to run copied scripts | Poor | FLOW-CLI-001 |
|
|
38
|
+
| 1.5 | Used bash idioms in PowerShell (`&&`, `head`) causing avoidable failures | Poor | FLOW-CLI-002 |
|
|
39
|
+
| 1.6 | Manually copied skill trees to satisfy incorrect runtime assumptions | Poor | FLOW-PATH-001 |
|
|
40
|
+
| 1.7 | Generated `research-findings.md` manually by controller, not a spawned sub-agent | Mixed | FLOW-SA-003 |
|
|
41
|
+
| 1.8 | Requested architecture approval before scaffold | Good | - |
|
|
42
|
+
| 1.9 | Ran scaffold with `--force`, wiping runtime `.agents` state and re-copying manually | Poor | FLOW-STATE-001 |
|
|
43
|
+
| 1.10 | Used `dispatch --skill validation` and interpreted "loaded successfully" as validation activity | Poor | FLOW-SA-001 |
|
|
44
|
+
| 1.11 | Ran `validate.ts` directly and got real output | Good | - |
|
|
45
|
+
| 1.12 | Claimed completion with score 33/100 and later 81/100 (<85 target) | Poor | FLOW-QUALITY-001 |
|
|
46
|
+
|
|
47
|
+
### Query 2
|
|
48
|
+
User query: "continue next steps"
|
|
49
|
+
|
|
50
|
+
| Step | Assistant Reasoning/Output | Quality | Tags |
|
|
51
|
+
|---|---|---|---|
|
|
52
|
+
| 2.1 | Generated test-cases to `./workspace/test-cases.json` | Poor | FLOW-TEST-001 |
|
|
53
|
+
| 2.2 | Ran orchestrator without runner and got explicit requirement error | Good diagnosis | - |
|
|
54
|
+
| 2.3 | Framed manual fallback but still kept script-centric loop | Poor | FLOW-SA-003 |
|
|
55
|
+
|
|
56
|
+
### Query 3
|
|
57
|
+
User query: "2" (external runner)
|
|
58
|
+
|
|
59
|
+
| Step | Assistant Reasoning/Output | Quality | Tags |
|
|
60
|
+
|---|---|---|---|
|
|
61
|
+
| 3.1 | Passed `--subagent-runner` command that calls `dispatch.ts` itself | Critical defect | FLOW-SA-002 |
|
|
62
|
+
| 3.2 | Orchestrator escalated all batches; no true worker/fixer agent reasoning happened | Poor | FLOW-SA-002, FLOW-SA-003 |
|
|
63
|
+
| 3.3 | Continued by manually editing stage docs to increase scores | Poor | FLOW-QUALITY-003 |
|
|
64
|
+
|
|
65
|
+
### Query 4
|
|
66
|
+
User query: user pasted iteration skill contract
|
|
67
|
+
|
|
68
|
+
| Step | Assistant Reasoning/Output | Quality | Tags |
|
|
69
|
+
|---|---|---|---|
|
|
70
|
+
| 4.1 | Ran `iterate.ts` appropriately per user instruction | Good | - |
|
|
71
|
+
| 4.2 | Accepted inconsistent outputs (`iterate` reports score 100 while benchmark remained 81) without reconciliation | Critical defect | FLOW-QUALITY-002 |
|
|
72
|
+
| 4.3 | Optimized for benchmark keywords (Approach/Risks/Timeline/Resources etc.) rather than evidence-backed workflow quality | Poor | FLOW-QUALITY-003 |
|
|
73
|
+
| 4.4 | Repeatedly ignored duplicate-content validation failures as "boilerplate" without rule-aware mitigation | Poor | FLOW-VAL-001 |
|
|
74
|
+
|
|
75
|
+
## Defect Tag Catalog
|
|
76
|
+
|
|
77
|
+
### FLOW-SA-001 - Dispatch/load conflated with execution
|
|
78
|
+
Severity: High
|
|
79
|
+
|
|
80
|
+
Symptom:
|
|
81
|
+
- `dispatch.ts` result "Sub-skill loaded successfully" treated as if the skill had been executed.
|
|
82
|
+
|
|
83
|
+
Impact:
|
|
84
|
+
- Phases appear complete without real work.
|
|
85
|
+
|
|
86
|
+
### FLOW-SA-002 - Runner recursion / fake external runner
|
|
87
|
+
Severity: Critical
|
|
88
|
+
|
|
89
|
+
Symptom:
|
|
90
|
+
- `--subagent-runner` points to `dispatch.ts`, which itself returns simulated reports for worker/fixer when no true external runner emits structured output.
|
|
91
|
+
|
|
92
|
+
Impact:
|
|
93
|
+
- No actual sub-agent reasoning loop occurs.
|
|
94
|
+
|
|
95
|
+
### FLOW-SA-003 - No true sub-agent spawn contract
|
|
96
|
+
Severity: Critical
|
|
97
|
+
|
|
98
|
+
Symptom:
|
|
99
|
+
- No invocation path for real agent workers (for example, an explicit `npx` agent command or platform subagent primitive).
|
|
100
|
+
|
|
101
|
+
Impact:
|
|
102
|
+
- "Agent-driven" flow is script-driven simulation.
|
|
103
|
+
|
|
104
|
+
### FLOW-CLI-001 - Environment bootstrapping in target workspace
|
|
105
|
+
Severity: Medium
|
|
106
|
+
|
|
107
|
+
Symptom:
|
|
108
|
+
- `npm init`, dependency install, and compile steps executed in user target workspace during normal skill use.
|
|
109
|
+
|
|
110
|
+
Impact:
|
|
111
|
+
- Pollutes user workspace and adds unnecessary failure modes.
|
|
112
|
+
|
|
113
|
+
### FLOW-CLI-002 - Shell dialect mismatch
|
|
114
|
+
Severity: Medium
|
|
115
|
+
|
|
116
|
+
Symptom:
|
|
117
|
+
- Unix command idioms used in PowerShell.
|
|
118
|
+
|
|
119
|
+
Impact:
|
|
120
|
+
- Repeated avoidable command failures.
|
|
121
|
+
|
|
122
|
+
### FLOW-PATH-001 - Fragile skill path assumptions
|
|
123
|
+
Severity: High
|
|
124
|
+
|
|
125
|
+
Symptom:
|
|
126
|
+
- Manual copying/restructuring of skill folders required for dispatch path expectations.
|
|
127
|
+
|
|
128
|
+
Impact:
|
|
129
|
+
- Non-deterministic setup and brittle runtime behavior.
|
|
130
|
+
|
|
131
|
+
### FLOW-STATE-001 - Runtime state destruction during scaffold
|
|
132
|
+
Severity: Medium
|
|
133
|
+
|
|
134
|
+
Symptom:
|
|
135
|
+
- `--force` scaffold overwrote workspace and removed runtime state (`.agents/...`) requiring manual restoration.
|
|
136
|
+
|
|
137
|
+
Impact:
|
|
138
|
+
- Hidden regressions and operator confusion.
|
|
139
|
+
|
|
140
|
+
### FLOW-TEST-001 - Non-canonical test-case path
|
|
141
|
+
Severity: High
|
|
142
|
+
|
|
143
|
+
Symptom:
|
|
144
|
+
- Tests generated outside canonical `.agents/iteration/test-cases.json`.
|
|
145
|
+
|
|
146
|
+
Impact:
|
|
147
|
+
- Orchestrator/validator cannot rely on stable source-of-truth inputs.
|
|
148
|
+
|
|
149
|
+
### FLOW-QUALITY-001 - Premature success claims
|
|
150
|
+
Severity: Critical
|
|
151
|
+
|
|
152
|
+
Symptom:
|
|
153
|
+
- Completion claims made while score threshold not met (`81 < 85`).
|
|
154
|
+
|
|
155
|
+
Impact:
|
|
156
|
+
- False delivery confidence.
|
|
157
|
+
|
|
158
|
+
### FLOW-QUALITY-002 - Score inconsistency not reconciled
|
|
159
|
+
Severity: Critical
|
|
160
|
+
|
|
161
|
+
Symptom:
|
|
162
|
+
- Contradictory scoring outputs accepted without resolving source-of-truth.
|
|
163
|
+
|
|
164
|
+
Impact:
|
|
165
|
+
- Decision-making based on invalid state.
|
|
166
|
+
|
|
167
|
+
### FLOW-QUALITY-003 - Benchmark gaming vs real improvement
|
|
168
|
+
Severity: High
|
|
169
|
+
|
|
170
|
+
Symptom:
|
|
171
|
+
- Added keyword sections primarily to satisfy benchmark heuristics.
|
|
172
|
+
|
|
173
|
+
Impact:
|
|
174
|
+
- Inflated score not guaranteed to reflect real workflow quality.
|
|
175
|
+
|
|
176
|
+
### FLOW-VAL-001 - Repeated validation failure rationalized away
|
|
177
|
+
Severity: High
|
|
178
|
+
|
|
179
|
+
Symptom:
|
|
180
|
+
- Duplicate-content check kept failing, but process continued as if non-blocking.
|
|
181
|
+
|
|
182
|
+
Impact:
|
|
183
|
+
- Violates strict verification standard.
|
|
184
|
+
|
|
185
|
+
## Gap vs Subagent-Driven-Development Skill
|
|
186
|
+
|
|
187
|
+
Expected by `subagent-driven-development`:
|
|
188
|
+
1. Fresh implementer subagent per task.
|
|
189
|
+
2. Explicit spec-compliance reviewer subagent.
|
|
190
|
+
3. Explicit code-quality reviewer subagent.
|
|
191
|
+
4. Review loops before task completion.
|
|
192
|
+
|
|
193
|
+
Observed in session:
|
|
194
|
+
1. Controller performed manual edits directly.
|
|
195
|
+
2. No implementer/spec-review/code-review subagent orchestration.
|
|
196
|
+
3. No two-stage review gates.
|
|
197
|
+
4. No task-level subagent lifecycle telemetry.
|
|
198
|
+
|
|
199
|
+
Conclusion:
|
|
200
|
+
- Current loop is not subagent-driven. It is script-dispatch-driven with simulated returns.
|
|
201
|
+
|
|
202
|
+
## Target Design
|
|
203
|
+
|
|
204
|
+
### 1) Real Runner Contract
|
|
205
|
+
|
|
206
|
+
Define a strict external runner contract for worker/fixer:
|
|
207
|
+
|
|
208
|
+
Input placeholders:
|
|
209
|
+
- `{skill}`
|
|
210
|
+
- `{workspace}`
|
|
211
|
+
- `{batchId}`
|
|
212
|
+
- `{testCaseId}`
|
|
213
|
+
|
|
214
|
+
Output contract (JSON only):
|
|
215
|
+
- `skill`, `status`, `timestamp`, `findings`, `recommendations`, `metrics`, `nextSkill`
|
|
216
|
+
|
|
217
|
+
Hard rule:
|
|
218
|
+
- Worker/fixer dispatch without valid runner output must fail, never "simulated pass".
|
|
219
|
+
|
|
220
|
+
### 2) Canonical Agent-Generated Test-Cases
|
|
221
|
+
|
|
222
|
+
- Authoritative file: `.agents/iteration/test-cases.json`
|
|
223
|
+
- Required readiness marker: `.agents/iteration/.test-cases-ready`
|
|
224
|
+
- Required metadata fields: `id`, `input`, `expected`, `generatedBy`, `timestamp`
|
|
225
|
+
|
|
226
|
+
### 3) Quality Gate Hardening
|
|
227
|
+
|
|
228
|
+
Release gate must require all:
|
|
229
|
+
1. Benchmark score >= threshold.
|
|
230
|
+
2. Validation passed with no blocking findings.
|
|
231
|
+
3. No metric conflicts between `iterate.ts`, `benchmark.ts`, and orchestrator summary.
|
|
232
|
+
|
|
233
|
+
### 4) Path and Setup Simplification
|
|
234
|
+
|
|
235
|
+
- No local project bootstrap in target workspace.
|
|
236
|
+
- Use packaged CLI and runtime scripts only.
|
|
237
|
+
- Add preflight checks with explicit remediation text.
|
|
238
|
+
|
|
239
|
+
### 5) Subagent Lifecycle Telemetry
|
|
240
|
+
|
|
241
|
+
Store per invocation under `.agents/iteration/runs/`:
|
|
242
|
+
- command template
|
|
243
|
+
- rendered command
|
|
244
|
+
- stdout/stderr (bounded)
|
|
245
|
+
- parsed JSON
|
|
246
|
+
- duration/exit code
|
|
247
|
+
|
|
248
|
+
## Acceptance Criteria
|
|
249
|
+
|
|
250
|
+
1. Worker/fixer loops fail fast if no real runner output is produced.
|
|
251
|
+
2. Orchestrator uses `.agents/iteration/test-cases.json` as authoritative input.
|
|
252
|
+
3. At least one integration test verifies runner execution actually occurs (not simulated path).
|
|
253
|
+
4. Completion claim is blocked when score < threshold or validation has blocking findings.
|
|
254
|
+
5. Session equivalent to 294c can run end-to-end without manual path surgery or local bootstrap.
|
|
255
|
+
|
|
256
|
+
## Risks and Mitigations
|
|
257
|
+
|
|
258
|
+
1. Risk: Runner command portability across shells.
|
|
259
|
+
- Mitigation: document Windows-safe examples and parse command with explicit shell mode tests.
|
|
260
|
+
|
|
261
|
+
2. Risk: Existing users rely on simulated fallback.
|
|
262
|
+
- Mitigation: introduce explicit `--simulate` mode; default remains strict for worker/fixer.
|
|
263
|
+
|
|
264
|
+
3. Risk: Benchmark heuristic can still be gamed.
|
|
265
|
+
- Mitigation: add artifact-presence checks tied to stage-required evidence.
|