workspace-maxxing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
  2. package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
  3. package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
  4. package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
  5. package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
  6. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
  7. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
  8. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
  9. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
  10. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
  11. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
  12. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
  13. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
  14. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
  15. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
  16. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
  17. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  18. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
  19. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
  20. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
  21. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
  22. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
  23. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  24. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  25. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  26. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  27. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  28. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
  29. package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
  30. package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
  31. package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
  32. package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
  33. package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
  34. package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
  35. package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
  36. package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
  37. package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
  38. package/README.md +144 -0
  39. package/dist/agent-creator.d.ts +9 -0
  40. package/dist/agent-creator.d.ts.map +1 -0
  41. package/dist/agent-creator.js +199 -0
  42. package/dist/agent-creator.js.map +1 -0
  43. package/dist/agent-iterator.d.ts +38 -0
  44. package/dist/agent-iterator.d.ts.map +1 -0
  45. package/dist/agent-iterator.js +327 -0
  46. package/dist/agent-iterator.js.map +1 -0
  47. package/dist/index.d.ts +3 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +197 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/install.d.ts +18 -0
  52. package/dist/install.d.ts.map +1 -0
  53. package/dist/install.js +117 -0
  54. package/dist/install.js.map +1 -0
  55. package/dist/platforms/claude.d.ts +7 -0
  56. package/dist/platforms/claude.d.ts.map +1 -0
  57. package/dist/platforms/claude.js +70 -0
  58. package/dist/platforms/claude.js.map +1 -0
  59. package/dist/platforms/copilot.d.ts +7 -0
  60. package/dist/platforms/copilot.d.ts.map +1 -0
  61. package/dist/platforms/copilot.js +75 -0
  62. package/dist/platforms/copilot.js.map +1 -0
  63. package/dist/platforms/gemini.d.ts +7 -0
  64. package/dist/platforms/gemini.d.ts.map +1 -0
  65. package/dist/platforms/gemini.js +81 -0
  66. package/dist/platforms/gemini.js.map +1 -0
  67. package/dist/platforms/index.d.ts +8 -0
  68. package/dist/platforms/index.d.ts.map +1 -0
  69. package/dist/platforms/index.js +41 -0
  70. package/dist/platforms/index.js.map +1 -0
  71. package/dist/platforms/opencode.d.ts +7 -0
  72. package/dist/platforms/opencode.d.ts.map +1 -0
  73. package/dist/platforms/opencode.js +70 -0
  74. package/dist/platforms/opencode.js.map +1 -0
  75. package/dist/scripts/benchmark.d.ts +20 -0
  76. package/dist/scripts/benchmark.d.ts.map +1 -0
  77. package/dist/scripts/benchmark.js +170 -0
  78. package/dist/scripts/benchmark.js.map +1 -0
  79. package/dist/scripts/dispatch.d.ts +32 -0
  80. package/dist/scripts/dispatch.d.ts.map +1 -0
  81. package/dist/scripts/dispatch.js +386 -0
  82. package/dist/scripts/dispatch.js.map +1 -0
  83. package/dist/scripts/generate-tests.d.ts +11 -0
  84. package/dist/scripts/generate-tests.d.ts.map +1 -0
  85. package/dist/scripts/generate-tests.js +118 -0
  86. package/dist/scripts/generate-tests.js.map +1 -0
  87. package/dist/scripts/install-tool.d.ts +8 -0
  88. package/dist/scripts/install-tool.d.ts.map +1 -0
  89. package/dist/scripts/install-tool.js +98 -0
  90. package/dist/scripts/install-tool.js.map +1 -0
  91. package/dist/scripts/iterate.d.ts +44 -0
  92. package/dist/scripts/iterate.d.ts.map +1 -0
  93. package/dist/scripts/iterate.js +260 -0
  94. package/dist/scripts/iterate.js.map +1 -0
  95. package/dist/scripts/orchestrator.d.ts +40 -0
  96. package/dist/scripts/orchestrator.d.ts.map +1 -0
  97. package/dist/scripts/orchestrator.js +378 -0
  98. package/dist/scripts/orchestrator.js.map +1 -0
  99. package/dist/scripts/scaffold.d.ts +8 -0
  100. package/dist/scripts/scaffold.d.ts.map +1 -0
  101. package/dist/scripts/scaffold.js +279 -0
  102. package/dist/scripts/scaffold.js.map +1 -0
  103. package/dist/scripts/validate.d.ts +11 -0
  104. package/dist/scripts/validate.d.ts.map +1 -0
  105. package/dist/scripts/validate.js +472 -0
  106. package/dist/scripts/validate.js.map +1 -0
  107. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
  108. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
  109. package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
  110. package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
  111. package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
  112. package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
  113. package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
  114. package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
  115. package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
  116. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
  117. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
  118. package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
  119. package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
  120. package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
  121. package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
  122. package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
  123. package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
  124. package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
  125. package/jest.config.js +8 -0
  126. package/package.json +32 -0
  127. package/src/agent-creator.ts +180 -0
  128. package/src/agent-iterator.ts +397 -0
  129. package/src/index.ts +189 -0
  130. package/src/install.ts +105 -0
  131. package/src/platforms/claude.ts +40 -0
  132. package/src/platforms/copilot.ts +50 -0
  133. package/src/platforms/gemini.ts +55 -0
  134. package/src/platforms/index.ts +45 -0
  135. package/src/platforms/opencode.ts +41 -0
  136. package/src/scripts/benchmark.ts +171 -0
  137. package/src/scripts/dispatch.ts +473 -0
  138. package/src/scripts/generate-tests.ts +112 -0
  139. package/src/scripts/install-tool.ts +82 -0
  140. package/src/scripts/iterate.ts +271 -0
  141. package/src/scripts/orchestrator.ts +539 -0
  142. package/src/scripts/scaffold.ts +282 -0
  143. package/src/scripts/validate.ts +516 -0
  144. package/templates/.workspace-templates/CONTEXT.md +44 -0
  145. package/templates/.workspace-templates/SYSTEM.md +44 -0
  146. package/templates/.workspace-templates/references/anti-patterns.md +16 -0
  147. package/templates/.workspace-templates/references/iron-laws.md +26 -0
  148. package/templates/.workspace-templates/references/reporting-format.md +52 -0
  149. package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
  150. package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
  151. package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
  152. package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
  153. package/templates/.workspace-templates/scripts/iterate.ts +265 -0
  154. package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
  155. package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
  156. package/templates/.workspace-templates/scripts/validate.ts +452 -0
  157. package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
  158. package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
  159. package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
  160. package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  161. package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
  162. package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
  163. package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
  164. package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
  165. package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
  166. package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  167. package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  168. package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  169. package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  170. package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  171. package/templates/.workspace-templates/workspace/README.md +14 -0
  172. package/templates/SKILL.md +347 -0
  173. package/tests/benchmark.test.ts +158 -0
  174. package/tests/cli.test.ts +109 -0
  175. package/tests/dispatch-parallel.test.ts +124 -0
  176. package/tests/dispatch.test.ts +218 -0
  177. package/tests/fixer-skill.test.ts +203 -0
  178. package/tests/generate-tests.test.ts +101 -0
  179. package/tests/install-tool.test.ts +141 -0
  180. package/tests/install.test.ts +144 -0
  181. package/tests/integration.test.ts +324 -0
  182. package/tests/iterate.test.ts +219 -0
  183. package/tests/orchestrator.test.ts +710 -0
  184. package/tests/scaffold.test.ts +238 -0
  185. package/tests/templates-enhanced.test.ts +208 -0
  186. package/tests/templates.test.ts +219 -0
  187. package/tests/validate.test.ts +421 -0
  188. package/tests/validation-enhanced.test.ts +303 -0
  189. package/tests/worker-skill.test.ts +88 -0
  190. package/tsconfig.json +19 -0
  191. package/workspace/00-meta/CONTEXT.md +3 -0
  192. package/workspace/00-meta/execution-log.md +17 -0
  193. package/workspace/00-meta/tools.md +11 -0
  194. package/workspace/01-input/CONTEXT.md +27 -0
  195. package/workspace/CONTEXT.md +35 -0
  196. package/workspace/README.md +14 -0
  197. package/workspace/SYSTEM.md +36 -0
  198. package/workspace-maxxing-0.1.0.tgz +0 -0
@@ -0,0 +1,29 @@
1
+ # 01-input CONTEXT.md
2
+
3
+ ## Purpose
4
+ Collect, validate, and normalize workflow inputs.
5
+
6
+ ## Inputs
7
+ - Raw user input and source artifacts
8
+ - Intake constraints and acceptance boundaries
9
+
10
+ ## Outputs
11
+ - Validated input package ready for processing
12
+ - Input assumptions and constraints summary
13
+ - Markdown workflow artifacts only (no product source code)
14
+
15
+ ## Dependencies
16
+ - None (entry stage)
17
+
18
+ ## Required Evidence
19
+ - Update 00-meta/execution-log.md when 01-input is complete
20
+ - Link the markdown artifacts produced in this stage
21
+
22
+ ## Completion Criteria
23
+ - Inputs are validated and normalized
24
+ - Required fields are present
25
+ - Handoff package is complete
26
+ - Stage artifacts remain markdown-first and workflow-scoped
27
+
28
+ ## Handoff
29
+ - Hand off validated package to 02-process
@@ -0,0 +1,29 @@
1
+ # 02-process CONTEXT.md
2
+
3
+ ## Purpose
4
+ Transform validated inputs into structured working outputs.
5
+
6
+ ## Inputs
7
+ - Validated package from 01-input
8
+ - Processing requirements and quality constraints
9
+
10
+ ## Outputs
11
+ - Processed artifacts ready for final delivery
12
+ - Decision log for key transformations
13
+ - Markdown workflow artifacts only (no product source code)
14
+
15
+ ## Dependencies
16
+ - 01-input
17
+
18
+ ## Required Evidence
19
+ - Update 00-meta/execution-log.md when 02-process is complete
20
+ - Link the markdown artifacts produced in this stage
21
+
22
+ ## Completion Criteria
23
+ - Required transformations are complete
24
+ - Output structure is consistent and reviewable
25
+ - Handoff package is ready for output stage
26
+ - Stage artifacts remain markdown-first and workflow-scoped
27
+
28
+ ## Handoff
29
+ - Hand off processed artifacts to 03-output
@@ -0,0 +1,29 @@
1
+ # 03-output CONTEXT.md
2
+
3
+ ## Purpose
4
+ Assemble, finalize, and deliver workflow outputs.
5
+
6
+ ## Inputs
7
+ - Processed artifacts from 02-process
8
+ - Delivery requirements and formatting rules
9
+
10
+ ## Outputs
11
+ - Final deliverable package
12
+ - Delivery notes and validation summary
13
+ - Markdown workflow artifacts only (no product source code)
14
+
15
+ ## Dependencies
16
+ - 02-process
17
+
18
+ ## Required Evidence
19
+ - Update 00-meta/execution-log.md when 03-output is complete
20
+ - Link the markdown artifacts produced in this stage
21
+
22
+ ## Completion Criteria
23
+ - Final outputs satisfy delivery requirements
24
+ - Validation summary is complete
25
+ - Artifacts are ready for handoff to user
26
+ - Stage artifacts remain markdown-first and workflow-scoped
27
+
28
+ ## Handoff
29
+ - Final output stage: deliver package and close the workflow loop
@@ -0,0 +1,14 @@
1
+ # Workspace README
2
+
3
+ This is an example workspace built using ICM methodology.
4
+
5
+ ## Structure
6
+ - 00-meta: Metadata and configuration
7
+ - 01-input: Input collection
8
+ - 02-process: Processing logic
9
+ - 03-output: Output generation
10
+
11
+ ## Usage
12
+ Load SYSTEM.md first, then use CONTEXT.md to route to the appropriate workspace section.
13
+ Keep stage outputs as markdown workflow artifacts rather than product source code.
14
+ Update 00-meta/execution-log.md as each stage is completed to preserve sequential execution.
@@ -0,0 +1,347 @@
1
+ ---
2
+ name: workspace-maxxing
3
+ description: "Autonomously creates, validates, and improves ICM-compliant workspaces using batched parallel sub-agents. Use when user asks to 'build a workspace', 'create a workflow', 'automate a process', 'improve this workspace', 'validate this workspace', 'iterate on this workspace', or 'run test cases'."
4
+ ---
5
+
6
+ # Workspace-Maxxing Skill
7
+
8
+ ## Overview
9
+
10
+ Autonomous workflow system that creates, validates, and improves ICM-compliant workspaces through phased execution, batched parallel sub-agent iteration, and condition-driven improvement loops.
11
+
12
+ ## When to Use
13
+
14
+ - User asks to build, create, or automate a workflow
15
+ - User asks to improve, validate, or iterate on an existing workspace
16
+ - User asks for workspace architecture or structure design
17
+ - User asks to assess or install tools for a workspace
18
+ - User asks to run test cases against a workspace
19
+ - **User asks to create an agent for a specific task** (e.g., "create a daily digest agent", "make a news aggregator agent")
20
+
21
+ ## Agent Creation Workflow
22
+
23
+ When you invoke `workspace-maxxing` with a request to create an agent (e.g., "create a daily digest agent"), follow this flow:
24
+
25
+ ```
26
+ 1. Parse the request to extract the agent purpose (e.g., "Daily Digest")
27
+ 2. Create ICM workspace structure (SYSTEM.md, CONTEXT.md, stage folders)
28
+ 3. Create invokable agent in .agents/skills/@<purpose>/
29
+ 4. Run self-improvement loop on the agent
30
+ - Generate test cases (edge, empty, varied inputs)
31
+ - Validate agent handling
32
+ - Score robustness (0-100)
33
+ - If score < 85: improve prompts, retry
34
+ - Repeat until score >= 85 or max iterations (3)
35
+ 5. Install agent for platform (OpenCode/Claude/Copilot/Gemini)
36
+ 6. Deliver workspace with robust agent
37
+ ```
38
+
39
+ ### Agent Creation Example
40
+
41
+ User: "Create a daily digest agent"
42
+
43
+ ```
44
+ -> Extract purpose: "Daily Digest"
45
+ -> Create workspace with stages: 01-input, 02-process, 03-output
46
+ -> Create agent: @daily-digest in .agents/skills/daily-digest/
47
+ -> Run iteration:
48
+ - Test: empty input -> fix prompts
49
+ - Test: special chars -> fix prompts
50
+ - Test: normal input -> validate
51
+ - Score >= 85? Yes -> deliver
52
+ -> Agent is ready to invoke with @daily-digest
53
+ ```
54
+
55
+ ## When Not to Use
56
+
57
+ - Simple file creation or editing (use direct file operations)
58
+ - Questions about ICM methodology (answer directly)
59
+ - Non-workspace tasks (check for other applicable skills first)
60
+
61
+ ## The Iron Law
62
+
63
+ NO BUILD WITHOUT PLAN
64
+ NO PLAN WITHOUT RESEARCH
65
+ NO IMPROVEMENT WITHOUT VALIDATION
66
+ NO COMPLETION CLAIM WITHOUT VERIFICATION
67
+ NO PRODUCT IMPLEMENTATION INSIDE WORKSPACE BUILDING MODE
68
+ NO STAGE SKIPPING ACROSS NUMBERED WORKFLOW FOLDERS
69
+
70
+ ## Scope Guardrails
71
+
72
+ - This skill builds an ICM workflow workspace, not the end-product application.
73
+ - Keep outputs as file-structured markdown workflow artifacts in numbered stage folders.
74
+ - Do not generate backend/frontend/runtime code for the target domain while running this skill.
75
+ - If a user asks for product implementation details, capture them as workflow requirements and continue building the workspace structure.
76
+
77
+ ## Sequential Enforcement
78
+
79
+ - Follow numbered stage folders in strict order; do not jump ahead.
80
+ - Use 00-meta/execution-log.md as the source of truth for stage completion state.
81
+ - A later stage is blocked until the previous stage is checked complete with evidence notes.
82
+
83
+ ## Hybrid Flow
84
+
85
+ ```
86
+ Phase 1: RESEARCH (dispatch research sub-skill)
87
+ ->
88
+ Phase 2: ARCHITECTURE (dispatch architecture sub-skill)
89
+ ->
90
+ Phase 3: BUILD (use scaffold.ts script)
91
+ ->
92
+ Phase 4: VALIDATE (dispatch validation sub-skill)
93
+ ->
94
+ Phase 5: AUTONOMOUS ITERATION (use orchestrator.ts)
95
+ - Generate test cases
96
+ - Split into batches
97
+ - Dispatch workers in parallel per batch
98
+ - Validate batch results
99
+ - If score < threshold and failing test cases exist -> dispatch fixer sub-agents -> re-validate
100
+ - If score < threshold and no actionable failing test cases exist -> failed/escalated outcome
101
+ - Next batch or complete
102
+ ->
103
+ Phase 6: DELIVER
104
+ ```
105
+
106
+ ## Autonomous Iteration Workflow
107
+
108
+ The orchestrator manages batched parallel sub-agent execution:
109
+
110
+ ```bash
111
+ node scripts/orchestrator.ts --workspace ./workspace --batch-size 3 --score-threshold 85 --subagent-runner "<your-runner-command>"
112
+ ```
113
+
114
+ **Flow:**
115
+ 1. Generate test cases from workspace stages
116
+ 2. Split into batches (default 3 per batch)
117
+ 3. Dispatch worker sub-agents in parallel for each batch (external runner mode)
118
+ 4. Validate batch outputs with benchmark scoring
119
+ 5. If batch score < threshold and failing test cases exist -> dispatch fixer sub-agents -> re-validate (max 3 retries)
120
+ 6. If score remains < threshold and no actionable failing test cases exist -> mark batch failed/escalated
121
+ 7. Move to next batch or write summary
122
+
123
+ **Options:**
124
+ - `--batch-size <n>` - Test cases per batch (default: 3)
125
+ - `--score-threshold <n>` - Minimum batch score to pass (default: 85)
126
+ - `--max-fix-retries <n>` - Max fix attempts per batch (default: 3)
127
+ - `--worker-timeout <s>` - Worker timeout in seconds (default: 300)
128
+ - `--subagent-runner <command>` - External command template used to execute worker/fixer sub-agents; supports placeholders `{skill}`, `{workspace}`, `{batchId}`, `{testCaseId}`
129
+
130
+ ## Sub-Agent Iteration Contract
131
+
132
+ - True sub-agent mode requires `--subagent-runner` (or `WORKSPACE_MAXXING_SUBAGENT_RUNNER`) so worker/fixer test cases execute outside the orchestrator process.
133
+ - Worker/fixer execution MUST fail fast when no runner command is configured.
134
+ - Batch artifacts must include generated test cases, per-test-case reports, and summary evidence under `.agents/iteration/`.
135
+
136
+ ## Sub-Agent Runner Contract
137
+
138
+ - Worker/fixer loops are external-runner-only in strict mode.
139
+ - The runner command template must support placeholders: `{skill}`, `{workspace}`, `{batchId}`, `{testCaseId}`.
140
+ - Expected runner output is JSON with `{skill, status, timestamp, findings, recommendations, metrics, nextSkill}`.
141
+ - Non-JSON runner output is treated as a runner contract failure for worker/fixer execution.
142
+ - Use telemetry artifacts under `.agents/iteration/runs/` to diagnose command/rendering or payload issues.
143
+
144
+ ## Sub-Skill Dispatch
145
+
146
+ | Condition | Sub-Skill | Command |
147
+ |-----------|-----------|---------|
148
+ | Starting new workflow | `research` | `node scripts/dispatch.ts --skill research --workspace ./workspace` |
149
+ | After research complete | `architecture` | `node scripts/dispatch.ts --skill architecture --workspace ./workspace` |
150
+ | After architecture approved | (use scaffold.ts) | `node scripts/scaffold.ts --name "<name>" --stages "<stages>" --output ./workspace` |
151
+ | After building | `validation` | `node scripts/dispatch.ts --skill validation --workspace ./workspace` |
152
+ | Running autonomous iteration | (use orchestrator.ts) | `node scripts/orchestrator.ts --workspace ./workspace --subagent-runner "<runner>"` |
153
+ | Worker execution | `worker` | `node scripts/dispatch.ts --skill worker --workspace ./workspace --batch-id <N> --runner-command "<runner {skill} {workspace} {batchId} {testCaseId}>"` |
154
+ | Fix loop | `fixer` | `node scripts/dispatch.ts --skill fixer --workspace ./workspace --batch-id <N> --runner-command "<runner {skill} {workspace} {batchId} {testCaseId}>"` |
155
+ | Manual condition loop only (not orchestrator batch loop): score < 85 due to prompt quality | `prompt-engineering` | `node scripts/dispatch.ts --skill prompt-engineering --workspace ./workspace` |
156
+ | Manual condition loop only (not orchestrator batch loop): no tests exist | `testing` | `node scripts/dispatch.ts --skill testing --workspace ./workspace` |
157
+ | Manual condition loop only (not orchestrator batch loop): score plateaued across full runs | `iteration` | `node scripts/dispatch.ts --skill iteration --workspace ./workspace` |
158
+ | Manual condition loop only (not orchestrator batch loop): tools missing | `tooling` | `node scripts/dispatch.ts --skill tooling --workspace ./workspace` |
159
+
160
+ ## Available Scripts
161
+
162
+ ### orchestrator.ts - Autonomous Batch Iteration
163
+
164
+ Runs the full batched parallel sub-agent workflow.
165
+
166
+ ```bash
167
+ node scripts/orchestrator.ts --workspace ./workspace --batch-size 3 --score-threshold 85 --subagent-runner "<runner>"
168
+ ```
169
+
170
+ ### scaffold.ts - Generate ICM Workspace
171
+
172
+ Creates a complete ICM workspace structure from a plan.
173
+
174
+ ```bash
175
+ node scripts/scaffold.ts --name "research" --stages "01-research,02-analysis,03-report" --output ./workspace
176
+ ```
177
+
178
+ ### validate.ts - Check ICM Compliance
179
+
180
+ Validates a workspace against ICM rules.
181
+
182
+ ```bash
183
+ node scripts/validate.ts --workspace ./workspace
184
+ ```
185
+
186
+ ### install-tool.ts - Install Packages
187
+
188
+ Installs a tool and updates the workspace inventory.
189
+
190
+ ```bash
191
+ node scripts/install-tool.ts --tool "pdf-lib" --manager npm --workspace ./workspace
192
+ ```
193
+
194
+ ### iterate.ts - Single-Workspace Iteration (legacy)
195
+
196
+ Runs a 3-pass improvement loop. Use orchestrator.ts for batched parallel iteration.
197
+
198
+ ```bash
199
+ node scripts/iterate.ts --workspace ./workspace --max-retries 3
200
+ ```
201
+
202
+ ### generate-tests.ts - Generate Test Cases
203
+
204
+ Creates test cases for each stage (sample, edge-case, empty).
205
+
206
+ ```bash
207
+ node scripts/generate-tests.ts --workspace ./workspace --output ./tests.json
208
+ ```
209
+
210
+ ### benchmark.ts - Weighted Benchmark Scoring
211
+
212
+ Runs weighted benchmark scoring on a workspace.
213
+
214
+ ```bash
215
+ node scripts/benchmark.ts --workspace ./workspace
216
+ ```
217
+
218
+ ### dispatch.ts - Sub-Skill Dispatcher
219
+
220
+ Loads and executes sub-skill workflows. Supports parallel dispatch.
221
+
222
+ ```bash
223
+ node scripts/dispatch.ts --skill <name> --workspace ./workspace [--batch-id <N>] [--parallel --invocations <path>]
224
+ ```
225
+
226
+ ## Anti-Rationalization Table
227
+
228
+ | Thought | Reality |
229
+ |---------|---------|
230
+ | "This workspace looks good enough" | Good enough is the enemy of excellent. Run validation. |
231
+ | "I'll skip research and go straight to building" | Building without research produces generic, non-optimal workspaces. |
232
+ | "The user didn't ask for tests" | Autonomous workflows require self-verification. Tests are mandatory. |
233
+ | "I'll fix this later" | Later never comes. Fix it now or escalate. |
234
+ | "This sub-skill doesn't apply here" | If there's a 1% chance it applies, dispatch it. |
235
+ | "The score is fine" | Fine is not good. Target >= 85. |
236
+ | "I already validated this" | Validation is a snapshot. Re-validate after every change. |
237
+ | "I'll do all phases at once" | Phases exist for a reason. Complete each before moving to the next. |
238
+
239
+ ## Integration
240
+
241
+ - Sub-skills live in `skills/` directory, loaded via dispatch.ts
242
+ - Shared references in `references/` directory (anti-patterns, reporting-format, iron-laws)
243
+ - All sub-skills return structured JSON reports
244
+ - Orchestrator manages batch lifecycle with fix loops
245
+ - Condition loop continues until score >= 85 AND all validations pass
246
+ - Escalate to human if stuck after 3 iteration attempts
247
+
248
+ ## ICM Rules
249
+ - Canonical sources: each fact lives in exactly one file
250
+ - One-way dependencies only: A -> B, never B -> A
251
+ - Selective loading: route to sections, not whole files
252
+ - Numbered folders for workflow stages
253
+
254
+ ## Output Format
255
+ - workspace/ - the built markdown-first workflow workspace
256
+ - .agents/skills/<workspace-name>/ - installable skill
257
+ - USAGE.md - how to use this workspace in future sessions
258
+ - .agents/iteration/summary.json - autonomous iteration results
259
+
260
+ ## Creating Workspaces with Invokable Agents
261
+
262
+ The workspace-maxxing skill can now create both the workspace folder structure AND an invokable agent that can be called with `@` in the workspace.
263
+
264
+ ### CLI Commands
265
+
266
+ ```bash
267
+ # Create workspace WITH agent (default)
268
+ npx workspace-maxxing --create-workspace --workspace-name "Daily Digest" --stages "01-input,02-process,03-output"
269
+
270
+ # Create workspace WITHOUT agent (backward compatible)
271
+ npx workspace-maxxing --create-workspace --workspace-name "My Workflow" --no-agent
272
+
273
+ # Custom agent name
274
+ npx workspace-maxxing --create-workspace --workspace-name "AI News" --agent-name "@news-agent"
275
+
276
+ # Custom iteration settings
277
+ npx workspace-maxxing --create-workspace --workspace-name "My Workflow" --threshold 90 --max-iterations 5
278
+ ```
279
+
280
+ ### Options
281
+
282
+ | Option | Default | Description |
283
+ |--------|---------|-------------|
284
+ | `--create-workspace` | - | Enable workspace creation mode |
285
+ | `--workspace-name` | "My Workspace" | Name of the workspace |
286
+ | `--stages` | "01-input,02-process,03-output" | Comma-separated stage names |
287
+ | `--agent-name` | auto-generated (@workspace-name) | Custom agent name |
288
+ | `--no-agent` | false | Create workspace without agent |
289
+ | `--threshold` | 85 | Robustness threshold for agent iteration |
290
+ | `--max-iterations` | 3 | Max improvement cycles |
291
+
292
+ ### What Gets Created
293
+
294
+ When you run with `--create-workspace`:
295
+
296
+ 1. **ICM Workspace** - Folder structure with SYSTEM.md, CONTEXT.md, stage folders
297
+ 2. **Invokable Agent** - Stored in `.agents/skills/@<name>/`
298
+ 3. **Self-Improvement** - Agent runs through iteration loop until robustness >= threshold
299
+
300
+ ### Agent Structure
301
+
302
+ ```
303
+ workspace/
304
+ ├── .agents/
305
+ │ └── skills/
306
+ │ └── @<name>/ # The invokable agent
307
+ │ ├── SKILL.md
308
+ │ ├── config.json
309
+ │ ├── prompts/
310
+ │ │ ├── system.md
311
+ │ │ └── tasks/
312
+ │ ├── tools/
313
+ │ └── tests/
314
+ ├── 01-input/
315
+ ├── 02-process/
316
+ ├── 03-output/
317
+ ├── SYSTEM.md
318
+ └── CONTEXT.md
319
+ ```
320
+
321
+ ### Invoking the Agent
322
+
323
+ After workspace is created, use `@` followed by the agent name:
324
+
325
+ - **OpenCode**: `@daily-digest`
326
+ - **Claude Code**: Via `.claude/skills/` directory
327
+ - **Copilot**: Via `.github/copilot-instructions/`
328
+ - **Gemini**: Via `.gemini/skills/` directory
329
+
330
+ ### Agent Self-Improvement
331
+
332
+ When the agent is created, it runs through an iteration loop:
333
+
334
+ 1. **Generate test cases** - Edge cases, empty states, varied inputs
335
+ 2. **Validate** - Check agent handles each case properly
336
+ 3. **Score** - Compute robustness score (0-100)
337
+ 4. **Improve** - If score < threshold, update prompts to fix issues
338
+ 5. **Repeat** - Until score >= threshold or max iterations reached
339
+
340
+ This ensures the delivered agent is robust for real-world use.
341
+
342
+ ### Backward Compatibility
343
+
344
+ Existing workspace-maxxing behavior is unchanged:
345
+ - `--opencode`, `--claude`, `--copilot`, `--gemini` still install the skill
346
+ - Using `--no-agent` creates workspace-only (no agent)
347
+ - Default behavior (without `--no-agent`) includes agent creation
@@ -0,0 +1,158 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import * as os from 'os';
4
+ import { calculateBenchmark, formatBenchmarkTable, saveBenchmarkReport, BenchmarkResult } from '../src/scripts/benchmark';
5
+
6
+ describe('calculateBenchmark', () => {
7
+ let tempDir: string;
8
+
9
+ beforeEach(() => {
10
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'benchmark-test-'));
11
+ });
12
+
13
+ afterEach(() => {
14
+ fs.rmSync(tempDir, { recursive: true, force: true });
15
+ });
16
+
17
+ function createStage(ws: string, name: string, content: string) {
18
+ const dir = path.join(ws, name);
19
+ fs.mkdirSync(dir, { recursive: true });
20
+ fs.writeFileSync(path.join(dir, 'CONTEXT.md'), content);
21
+ }
22
+
23
+ it('returns weighted scores for a workspace with all stages', () => {
24
+ createStage(tempDir, '01-ideation', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
25
+ createStage(tempDir, '02-research', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
26
+ createStage(tempDir, '03-architecture', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
27
+
28
+ const result = calculateBenchmark(tempDir);
29
+
30
+ expect(result.stages).toHaveLength(3);
31
+ expect(result.stages[0].name).toBe('01-ideation');
32
+ expect(result.stages[0].weight).toBe(1.5);
33
+ expect(result.stages[1].name).toBe('02-research');
34
+ expect(result.stages[1].weight).toBe(1.3);
35
+ expect(result.stages[2].name).toBe('03-architecture');
36
+ expect(result.stages[2].weight).toBe(1.2);
37
+ });
38
+
39
+ it('excludes missing stages from calculation', () => {
40
+ createStage(tempDir, '01-ideation', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
41
+
42
+ const result = calculateBenchmark(tempDir);
43
+
44
+ expect(result.stages).toHaveLength(1);
45
+ expect(result.stages[0].name).toBe('01-ideation');
46
+ });
47
+
48
+ it('normalizes final score to 0-100', () => {
49
+ createStage(tempDir, '01-ideation', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
50
+
51
+ const result = calculateBenchmark(tempDir);
52
+
53
+ expect(result.weightedScore).toBeGreaterThanOrEqual(0);
54
+ expect(result.weightedScore).toBeLessThanOrEqual(100);
55
+ });
56
+
57
+ it('returns empty stages for workspace with no numbered folders', () => {
58
+ fs.mkdirSync(path.join(tempDir, '00-meta'), { recursive: true });
59
+
60
+ const result = calculateBenchmark(tempDir);
61
+
62
+ expect(result.stages).toHaveLength(0);
63
+ expect(result.weightedScore).toBe(0);
64
+ });
65
+
66
+ it('returns empty stages for non-existent workspace', () => {
67
+ const result = calculateBenchmark('/non-existent-path-xyz');
68
+
69
+ expect(result.stages).toHaveLength(0);
70
+ expect(result.weightedScore).toBe(0);
71
+ });
72
+
73
+ it('generates fix suggestions for incomplete stages', () => {
74
+ createStage(tempDir, '01-ideation', 'minimal content');
75
+
76
+ const result = calculateBenchmark(tempDir);
77
+
78
+ expect(result.fixSuggestions.length).toBeGreaterThan(0);
79
+ expect(result.improvementPotential).toBe(true);
80
+ });
81
+ });
82
+
83
+ describe('formatBenchmarkTable', () => {
84
+ it('formats a benchmark result as a console table', () => {
85
+ const data: BenchmarkResult = {
86
+ workspace: 'test-ws',
87
+ agent: 'opencode',
88
+ timestamp: '2026-04-07T00:00:00Z',
89
+ rawScore: 72,
90
+ weightedScore: 78,
91
+ stages: [
92
+ { name: '01-ideation', raw: 85, weight: 1.5, weighted: 95 },
93
+ { name: '02-research', raw: 60, weight: 1.3, weighted: 58 },
94
+ ],
95
+ fixSuggestions: ['Add research sources'],
96
+ improvementPotential: true,
97
+ };
98
+
99
+ const table = formatBenchmarkTable(data);
100
+
101
+ expect(table).toContain('01-ideation');
102
+ expect(table).toContain('02-research');
103
+ expect(table).toContain('78');
104
+ expect(table).toContain('TOTAL');
105
+ });
106
+
107
+ it('handles empty stages gracefully', () => {
108
+ const data: BenchmarkResult = {
109
+ workspace: 'test-ws',
110
+ agent: 'opencode',
111
+ timestamp: '2026-04-07T00:00:00Z',
112
+ rawScore: 0,
113
+ weightedScore: 0,
114
+ stages: [],
115
+ fixSuggestions: [],
116
+ improvementPotential: false,
117
+ };
118
+
119
+ const table = formatBenchmarkTable(data);
120
+
121
+ expect(table).toContain('0');
122
+ });
123
+ });
124
+
125
+ describe('saveBenchmarkReport', () => {
126
+ let tempDir: string;
127
+
128
+ beforeEach(() => {
129
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'benchmark-report-'));
130
+ });
131
+
132
+ afterEach(() => {
133
+ fs.rmSync(tempDir, { recursive: true, force: true });
134
+ });
135
+
136
+ it('saves benchmark report to .workspace-benchmarks directory', () => {
137
+ const data: BenchmarkResult = {
138
+ workspace: 'test-ws',
139
+ agent: 'opencode',
140
+ timestamp: '2026-04-07T00:00:00Z',
141
+ rawScore: 72,
142
+ weightedScore: 78,
143
+ stages: [],
144
+ fixSuggestions: [],
145
+ improvementPotential: false,
146
+ };
147
+
148
+ const filePath = saveBenchmarkReport(tempDir, data);
149
+
150
+ expect(filePath).toContain('.workspace-benchmarks');
151
+ expect(filePath).toContain('test-ws-');
152
+ expect(fs.existsSync(filePath)).toBe(true);
153
+
154
+ const saved = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
155
+ expect(saved.weightedScore).toBe(78);
156
+ expect(saved.workspace).toBe('test-ws');
157
+ });
158
+ });