npm - workspace-maxxing - Versions diffs - 0.1.0 - Mend

workspace-maxxing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
package/README.md +144 -0
package/dist/agent-creator.d.ts +9 -0
package/dist/agent-creator.d.ts.map +1 -0
package/dist/agent-creator.js +199 -0
package/dist/agent-creator.js.map +1 -0
package/dist/agent-iterator.d.ts +38 -0
package/dist/agent-iterator.d.ts.map +1 -0
package/dist/agent-iterator.js +327 -0
package/dist/agent-iterator.js.map +1 -0
package/dist/index.d.ts +3 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +197 -0
package/dist/index.js.map +1 -0
package/dist/install.d.ts +18 -0
package/dist/install.d.ts.map +1 -0
package/dist/install.js +117 -0
package/dist/install.js.map +1 -0
package/dist/platforms/claude.d.ts +7 -0
package/dist/platforms/claude.d.ts.map +1 -0
package/dist/platforms/claude.js +70 -0
package/dist/platforms/claude.js.map +1 -0
package/dist/platforms/copilot.d.ts +7 -0
package/dist/platforms/copilot.d.ts.map +1 -0
package/dist/platforms/copilot.js +75 -0
package/dist/platforms/copilot.js.map +1 -0
package/dist/platforms/gemini.d.ts +7 -0
package/dist/platforms/gemini.d.ts.map +1 -0
package/dist/platforms/gemini.js +81 -0
package/dist/platforms/gemini.js.map +1 -0
package/dist/platforms/index.d.ts +8 -0
package/dist/platforms/index.d.ts.map +1 -0
package/dist/platforms/index.js +41 -0
package/dist/platforms/index.js.map +1 -0
package/dist/platforms/opencode.d.ts +7 -0
package/dist/platforms/opencode.d.ts.map +1 -0
package/dist/platforms/opencode.js +70 -0
package/dist/platforms/opencode.js.map +1 -0
package/dist/scripts/benchmark.d.ts +20 -0
package/dist/scripts/benchmark.d.ts.map +1 -0
package/dist/scripts/benchmark.js +170 -0
package/dist/scripts/benchmark.js.map +1 -0
package/dist/scripts/dispatch.d.ts +32 -0
package/dist/scripts/dispatch.d.ts.map +1 -0
package/dist/scripts/dispatch.js +386 -0
package/dist/scripts/dispatch.js.map +1 -0
package/dist/scripts/generate-tests.d.ts +11 -0
package/dist/scripts/generate-tests.d.ts.map +1 -0
package/dist/scripts/generate-tests.js +118 -0
package/dist/scripts/generate-tests.js.map +1 -0
package/dist/scripts/install-tool.d.ts +8 -0
package/dist/scripts/install-tool.d.ts.map +1 -0
package/dist/scripts/install-tool.js +98 -0
package/dist/scripts/install-tool.js.map +1 -0
package/dist/scripts/iterate.d.ts +44 -0
package/dist/scripts/iterate.d.ts.map +1 -0
package/dist/scripts/iterate.js +260 -0
package/dist/scripts/iterate.js.map +1 -0
package/dist/scripts/orchestrator.d.ts +40 -0
package/dist/scripts/orchestrator.d.ts.map +1 -0
package/dist/scripts/orchestrator.js +378 -0
package/dist/scripts/orchestrator.js.map +1 -0
package/dist/scripts/scaffold.d.ts +8 -0
package/dist/scripts/scaffold.d.ts.map +1 -0
package/dist/scripts/scaffold.js +279 -0
package/dist/scripts/scaffold.js.map +1 -0
package/dist/scripts/validate.d.ts +11 -0
package/dist/scripts/validate.d.ts.map +1 -0
package/dist/scripts/validate.js +472 -0
package/dist/scripts/validate.js.map +1 -0
package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
package/jest.config.js +8 -0
package/package.json +32 -0
package/src/agent-creator.ts +180 -0
package/src/agent-iterator.ts +397 -0
package/src/index.ts +189 -0
package/src/install.ts +105 -0
package/src/platforms/claude.ts +40 -0
package/src/platforms/copilot.ts +50 -0
package/src/platforms/gemini.ts +55 -0
package/src/platforms/index.ts +45 -0
package/src/platforms/opencode.ts +41 -0
package/src/scripts/benchmark.ts +171 -0
package/src/scripts/dispatch.ts +473 -0
package/src/scripts/generate-tests.ts +112 -0
package/src/scripts/install-tool.ts +82 -0
package/src/scripts/iterate.ts +271 -0
package/src/scripts/orchestrator.ts +539 -0
package/src/scripts/scaffold.ts +282 -0
package/src/scripts/validate.ts +516 -0
package/templates/.workspace-templates/CONTEXT.md +44 -0
package/templates/.workspace-templates/SYSTEM.md +44 -0
package/templates/.workspace-templates/references/anti-patterns.md +16 -0
package/templates/.workspace-templates/references/iron-laws.md +26 -0
package/templates/.workspace-templates/references/reporting-format.md +52 -0
package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
package/templates/.workspace-templates/scripts/iterate.ts +265 -0
package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
package/templates/.workspace-templates/scripts/validate.ts +452 -0
package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
package/templates/.workspace-templates/workspace/README.md +14 -0
package/templates/SKILL.md +347 -0
package/tests/benchmark.test.ts +158 -0
package/tests/cli.test.ts +109 -0
package/tests/dispatch-parallel.test.ts +124 -0
package/tests/dispatch.test.ts +218 -0
package/tests/fixer-skill.test.ts +203 -0
package/tests/generate-tests.test.ts +101 -0
package/tests/install-tool.test.ts +141 -0
package/tests/install.test.ts +144 -0
package/tests/integration.test.ts +324 -0
package/tests/iterate.test.ts +219 -0
package/tests/orchestrator.test.ts +710 -0
package/tests/scaffold.test.ts +238 -0
package/tests/templates-enhanced.test.ts +208 -0
package/tests/templates.test.ts +219 -0
package/tests/validate.test.ts +421 -0
package/tests/validation-enhanced.test.ts +303 -0
package/tests/worker-skill.test.ts +88 -0
package/tsconfig.json +19 -0
package/workspace/00-meta/CONTEXT.md +3 -0
package/workspace/00-meta/execution-log.md +17 -0
package/workspace/00-meta/tools.md +11 -0
package/workspace/01-input/CONTEXT.md +27 -0
package/workspace/CONTEXT.md +35 -0
package/workspace/README.md +14 -0
package/workspace/SYSTEM.md +36 -0
package/workspace-maxxing-0.1.0.tgz +0 -0

package/templates/.workspace-templates/workspace/01-input/CONTEXT.md ADDED Viewed

@@ -0,0 +1,29 @@
+# 01-input CONTEXT.md
+## Purpose
+Collect, validate, and normalize workflow inputs.
+## Inputs
+- Raw user input and source artifacts
+- Intake constraints and acceptance boundaries
+## Outputs
+- Validated input package ready for processing
+- Input assumptions and constraints summary
+- Markdown workflow artifacts only (no product source code)
+## Dependencies
+- None (entry stage)
+## Required Evidence
+- Update 00-meta/execution-log.md when 01-input is complete
+- Link the markdown artifacts produced in this stage
+## Completion Criteria
+- Inputs are validated and normalized
+- Required fields are present
+- Handoff package is complete
+- Stage artifacts remain markdown-first and workflow-scoped
+## Handoff
+- Hand off validated package to 02-process

package/templates/.workspace-templates/workspace/02-process/CONTEXT.md ADDED Viewed

@@ -0,0 +1,29 @@
+# 02-process CONTEXT.md
+## Purpose
+Transform validated inputs into structured working outputs.
+## Inputs
+- Validated package from 01-input
+- Processing requirements and quality constraints
+## Outputs
+- Processed artifacts ready for final delivery
+- Decision log for key transformations
+- Markdown workflow artifacts only (no product source code)
+## Dependencies
+- 01-input
+## Required Evidence
+- Update 00-meta/execution-log.md when 02-process is complete
+- Link the markdown artifacts produced in this stage
+## Completion Criteria
+- Required transformations are complete
+- Output structure is consistent and reviewable
+- Handoff package is ready for output stage
+- Stage artifacts remain markdown-first and workflow-scoped
+## Handoff
+- Hand off processed artifacts to 03-output

package/templates/.workspace-templates/workspace/03-output/CONTEXT.md ADDED Viewed

@@ -0,0 +1,29 @@
+# 03-output CONTEXT.md
+## Purpose
+Assemble, finalize, and deliver workflow outputs.
+## Inputs
+- Processed artifacts from 02-process
+- Delivery requirements and formatting rules
+## Outputs
+- Final deliverable package
+- Delivery notes and validation summary
+- Markdown workflow artifacts only (no product source code)
+## Dependencies
+- 02-process
+## Required Evidence
+- Update 00-meta/execution-log.md when 03-output is complete
+- Link the markdown artifacts produced in this stage
+## Completion Criteria
+- Final outputs satisfy delivery requirements
+- Validation summary is complete
+- Artifacts are ready for handoff to user
+- Stage artifacts remain markdown-first and workflow-scoped
+## Handoff
+- Final output stage: deliver package and close the workflow loop

package/templates/.workspace-templates/workspace/README.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Workspace README
+This is an example workspace built using ICM methodology.
+## Structure
+- 00-meta: Metadata and configuration
+- 01-input: Input collection
+- 02-process: Processing logic
+- 03-output: Output generation
+## Usage
+Load SYSTEM.md first, then use CONTEXT.md to route to the appropriate workspace section.
+Keep stage outputs as markdown workflow artifacts rather than product source code.
+Update 00-meta/execution-log.md as each stage is completed to preserve sequential execution.

package/templates/SKILL.md ADDED Viewed

@@ -0,0 +1,347 @@
+---
+name: workspace-maxxing
+description: "Autonomously creates, validates, and improves ICM-compliant workspaces using batched parallel sub-agents. Use when user asks to 'build a workspace', 'create a workflow', 'automate a process', 'improve this workspace', 'validate this workspace', 'iterate on this workspace', or 'run test cases'."
+---
+# Workspace-Maxxing Skill
+## Overview
+Autonomous workflow system that creates, validates, and improves ICM-compliant workspaces through phased execution, batched parallel sub-agent iteration, and condition-driven improvement loops.
+## When to Use
+- User asks to build, create, or automate a workflow
+- User asks to improve, validate, or iterate on an existing workspace
+- User asks for workspace architecture or structure design
+- User asks to assess or install tools for a workspace
+- User asks to run test cases against a workspace
+- **User asks to create an agent for a specific task** (e.g., "create a daily digest agent", "make a news aggregator agent")
+## Agent Creation Workflow
+When you invoke `workspace-maxxing` with a request to create an agent (e.g., "create a daily digest agent"), follow this flow:
+```
+1. Parse the request to extract the agent purpose (e.g., "Daily Digest")
+2. Create ICM workspace structure (SYSTEM.md, CONTEXT.md, stage folders)
+3. Create invokable agent in .agents/skills/@<purpose>/
+4. Run self-improvement loop on the agent
+   - Generate test cases (edge, empty, varied inputs)
+   - Validate agent handling
+   - Score robustness (0-100)
+   - If score < 85: improve prompts, retry
+   - Repeat until score >= 85 or max iterations (3)
+5. Install agent for platform (OpenCode/Claude/Copilot/Gemini)
+6. Deliver workspace with robust agent
+```
+### Agent Creation Example
+User: "Create a daily digest agent"
+```
+-> Extract purpose: "Daily Digest"
+-> Create workspace with stages: 01-input, 02-process, 03-output
+-> Create agent: @daily-digest in .agents/skills/daily-digest/
+-> Run iteration:
+   - Test: empty input -> fix prompts
+   - Test: special chars -> fix prompts
+   - Test: normal input -> validate
+   - Score >= 85? Yes -> deliver
+-> Agent is ready to invoke with @daily-digest
+```
+## When Not to Use
+- Simple file creation or editing (use direct file operations)
+- Questions about ICM methodology (answer directly)
+- Non-workspace tasks (check for other applicable skills first)
+## The Iron Law
+NO BUILD WITHOUT PLAN
+NO PLAN WITHOUT RESEARCH
+NO IMPROVEMENT WITHOUT VALIDATION
+NO COMPLETION CLAIM WITHOUT VERIFICATION
+NO PRODUCT IMPLEMENTATION INSIDE WORKSPACE BUILDING MODE
+NO STAGE SKIPPING ACROSS NUMBERED WORKFLOW FOLDERS
+## Scope Guardrails
+- This skill builds an ICM workflow workspace, not the end-product application.
+- Keep outputs as file-structured markdown workflow artifacts in numbered stage folders.
+- Do not generate backend/frontend/runtime code for the target domain while running this skill.
+- If a user asks for product implementation details, capture them as workflow requirements and continue building the workspace structure.
+## Sequential Enforcement
+- Follow numbered stage folders in strict order; do not jump ahead.
+- Use 00-meta/execution-log.md as the source of truth for stage completion state.
+- A later stage is blocked until the previous stage is checked complete with evidence notes.
+## Hybrid Flow
+```
+Phase 1: RESEARCH (dispatch research sub-skill)
+  ->
+Phase 2: ARCHITECTURE (dispatch architecture sub-skill)
+  ->
+Phase 3: BUILD (use scaffold.ts script)
+  ->
+Phase 4: VALIDATE (dispatch validation sub-skill)
+  ->
+Phase 5: AUTONOMOUS ITERATION (use orchestrator.ts)
+  - Generate test cases
+  - Split into batches
+  - Dispatch workers in parallel per batch
+  - Validate batch results
+  - If score < threshold and failing test cases exist -> dispatch fixer sub-agents -> re-validate
+  - If score < threshold and no actionable failing test cases exist -> failed/escalated outcome
+  - Next batch or complete
+  ->
+Phase 6: DELIVER
+```
+## Autonomous Iteration Workflow
+The orchestrator manages batched parallel sub-agent execution:
+```bash
+node scripts/orchestrator.ts --workspace ./workspace --batch-size 3 --score-threshold 85 --subagent-runner "<your-runner-command>"
+```
+**Flow:**
+1. Generate test cases from workspace stages
+2. Split into batches (default 3 per batch)
+3. Dispatch worker sub-agents in parallel for each batch (external runner mode)
+4. Validate batch outputs with benchmark scoring
+5. If batch score < threshold and failing test cases exist -> dispatch fixer sub-agents -> re-validate (max 3 retries)
+6. If score remains < threshold and no actionable failing test cases exist -> mark batch failed/escalated
+7. Move to next batch or write summary
+**Options:**
+- `--batch-size <n>` - Test cases per batch (default: 3)
+- `--score-threshold <n>` - Minimum batch score to pass (default: 85)
+- `--max-fix-retries <n>` - Max fix attempts per batch (default: 3)
+- `--worker-timeout <s>` - Worker timeout in seconds (default: 300)
+- `--subagent-runner <command>` - External command template used to execute worker/fixer sub-agents; supports placeholders `{skill}`, `{workspace}`, `{batchId}`, `{testCaseId}`
+## Sub-Agent Iteration Contract
+- True sub-agent mode requires `--subagent-runner` (or `WORKSPACE_MAXXING_SUBAGENT_RUNNER`) so worker/fixer test cases execute outside the orchestrator process.
+- Worker/fixer execution MUST fail fast when no runner command is configured.
+- Batch artifacts must include generated test cases, per-test-case reports, and summary evidence under `.agents/iteration/`.
+## Sub-Agent Runner Contract
+- Worker/fixer loops are external-runner-only in strict mode.
+- The runner command template must support placeholders: `{skill}`, `{workspace}`, `{batchId}`, `{testCaseId}`.
+- Expected runner output is JSON with `{skill, status, timestamp, findings, recommendations, metrics, nextSkill}`.
+- Non-JSON runner output is treated as a runner contract failure for worker/fixer execution.
+- Use telemetry artifacts under `.agents/iteration/runs/` to diagnose command/rendering or payload issues.
+## Sub-Skill Dispatch
+| Condition | Sub-Skill | Command |
+|-----------|-----------|---------|
+| Starting new workflow | `research` | `node scripts/dispatch.ts --skill research --workspace ./workspace` |
+| After research complete | `architecture` | `node scripts/dispatch.ts --skill architecture --workspace ./workspace` |
+| After architecture approved | (use scaffold.ts) | `node scripts/scaffold.ts --name "<name>" --stages "<stages>" --output ./workspace` |
+| After building | `validation` | `node scripts/dispatch.ts --skill validation --workspace ./workspace` |
+| Running autonomous iteration | (use orchestrator.ts) | `node scripts/orchestrator.ts --workspace ./workspace --subagent-runner "<runner>"` |
+| Worker execution | `worker` | `node scripts/dispatch.ts --skill worker --workspace ./workspace --batch-id <N> --runner-command "<runner {skill} {workspace} {batchId} {testCaseId}>"` |
+| Fix loop | `fixer` | `node scripts/dispatch.ts --skill fixer --workspace ./workspace --batch-id <N> --runner-command "<runner {skill} {workspace} {batchId} {testCaseId}>"` |
+| Manual condition loop only (not orchestrator batch loop): score < 85 due to prompt quality | `prompt-engineering` | `node scripts/dispatch.ts --skill prompt-engineering --workspace ./workspace` |
+| Manual condition loop only (not orchestrator batch loop): no tests exist | `testing` | `node scripts/dispatch.ts --skill testing --workspace ./workspace` |
+| Manual condition loop only (not orchestrator batch loop): score plateaued across full runs | `iteration` | `node scripts/dispatch.ts --skill iteration --workspace ./workspace` |
+| Manual condition loop only (not orchestrator batch loop): tools missing | `tooling` | `node scripts/dispatch.ts --skill tooling --workspace ./workspace` |
+## Available Scripts
+### orchestrator.ts - Autonomous Batch Iteration
+Runs the full batched parallel sub-agent workflow.
+```bash
+node scripts/orchestrator.ts --workspace ./workspace --batch-size 3 --score-threshold 85 --subagent-runner "<runner>"
+```
+### scaffold.ts - Generate ICM Workspace
+Creates a complete ICM workspace structure from a plan.
+```bash
+node scripts/scaffold.ts --name "research" --stages "01-research,02-analysis,03-report" --output ./workspace
+```
+### validate.ts - Check ICM Compliance
+Validates a workspace against ICM rules.
+```bash
+node scripts/validate.ts --workspace ./workspace
+```
+### install-tool.ts - Install Packages
+Installs a tool and updates the workspace inventory.
+```bash
+node scripts/install-tool.ts --tool "pdf-lib" --manager npm --workspace ./workspace
+```
+### iterate.ts - Single-Workspace Iteration (legacy)
+Runs a 3-pass improvement loop. Use orchestrator.ts for batched parallel iteration.
+```bash
+node scripts/iterate.ts --workspace ./workspace --max-retries 3
+```
+### generate-tests.ts - Generate Test Cases
+Creates test cases for each stage (sample, edge-case, empty).
+```bash
+node scripts/generate-tests.ts --workspace ./workspace --output ./tests.json
+```
+### benchmark.ts - Weighted Benchmark Scoring
+Runs weighted benchmark scoring on a workspace.
+```bash
+node scripts/benchmark.ts --workspace ./workspace
+```
+### dispatch.ts - Sub-Skill Dispatcher
+Loads and executes sub-skill workflows. Supports parallel dispatch.
+```bash
+node scripts/dispatch.ts --skill <name> --workspace ./workspace [--batch-id <N>] [--parallel --invocations <path>]
+```
+## Anti-Rationalization Table
+| Thought | Reality |
+|---------|---------|
+| "This workspace looks good enough" | Good enough is the enemy of excellent. Run validation. |
+| "I'll skip research and go straight to building" | Building without research produces generic, non-optimal workspaces. |
+| "The user didn't ask for tests" | Autonomous workflows require self-verification. Tests are mandatory. |
+| "I'll fix this later" | Later never comes. Fix it now or escalate. |
+| "This sub-skill doesn't apply here" | If there's a 1% chance it applies, dispatch it. |
+| "The score is fine" | Fine is not good. Target >= 85. |
+| "I already validated this" | Validation is a snapshot. Re-validate after every change. |
+| "I'll do all phases at once" | Phases exist for a reason. Complete each before moving to the next. |
+## Integration
+- Sub-skills live in `skills/` directory, loaded via dispatch.ts
+- Shared references in `references/` directory (anti-patterns, reporting-format, iron-laws)
+- All sub-skills return structured JSON reports
+- Orchestrator manages batch lifecycle with fix loops
+- Condition loop continues until score >= 85 AND all validations pass
+- Escalate to human if stuck after 3 iteration attempts
+## ICM Rules
+- Canonical sources: each fact lives in exactly one file
+- One-way dependencies only: A -> B, never B -> A
+- Selective loading: route to sections, not whole files
+- Numbered folders for workflow stages
+## Output Format
+- workspace/ - the built markdown-first workflow workspace
+- .agents/skills/<workspace-name>/ - installable skill
+- USAGE.md - how to use this workspace in future sessions
+- .agents/iteration/summary.json - autonomous iteration results
+## Creating Workspaces with Invokable Agents
+The workspace-maxxing skill can now create both the workspace folder structure AND an invokable agent that can be called with `@` in the workspace.
+### CLI Commands
+```bash
+# Create workspace WITH agent (default)
+npx workspace-maxxing --create-workspace --workspace-name "Daily Digest" --stages "01-input,02-process,03-output"
+# Create workspace WITHOUT agent (backward compatible)
+npx workspace-maxxing --create-workspace --workspace-name "My Workflow" --no-agent
+# Custom agent name
+npx workspace-maxxing --create-workspace --workspace-name "AI News" --agent-name "@news-agent"
+# Custom iteration settings
+npx workspace-maxxing --create-workspace --workspace-name "My Workflow" --threshold 90 --max-iterations 5
+```
+### Options
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--create-workspace` | - | Enable workspace creation mode |
+| `--workspace-name` | "My Workspace" | Name of the workspace |
+| `--stages` | "01-input,02-process,03-output" | Comma-separated stage names |
+| `--agent-name` | auto-generated (@workspace-name) | Custom agent name |
+| `--no-agent` | false | Create workspace without agent |
+| `--threshold` | 85 | Robustness threshold for agent iteration |
+| `--max-iterations` | 3 | Max improvement cycles |
+### What Gets Created
+When you run with `--create-workspace`:
+1. **ICM Workspace** - Folder structure with SYSTEM.md, CONTEXT.md, stage folders
+2. **Invokable Agent** - Stored in `.agents/skills/@<name>/`
+3. **Self-Improvement** - Agent runs through iteration loop until robustness >= threshold
+### Agent Structure
+```
+workspace/
+├── .agents/
+│   └── skills/
+│       └── @<name>/           # The invokable agent
+│           ├── SKILL.md
+│           ├── config.json
+│           ├── prompts/
+│           │   ├── system.md
+│           │   └── tasks/
+│           ├── tools/
+│           └── tests/
+├── 01-input/
+├── 02-process/
+├── 03-output/
+├── SYSTEM.md
+└── CONTEXT.md
+```
+### Invoking the Agent
+After workspace is created, use `@` followed by the agent name:
+- **OpenCode**: `@daily-digest`
+- **Claude Code**: Via `.claude/skills/` directory
+- **Copilot**: Via `.github/copilot-instructions/`
+- **Gemini**: Via `.gemini/skills/` directory
+### Agent Self-Improvement
+When the agent is created, it runs through an iteration loop:
+1. **Generate test cases** - Edge cases, empty states, varied inputs
+2. **Validate** - Check agent handles each case properly
+3. **Score** - Compute robustness score (0-100)
+4. **Improve** - If score < threshold, update prompts to fix issues
+5. **Repeat** - Until score >= threshold or max iterations reached
+This ensures the delivered agent is robust for real-world use.
+### Backward Compatibility
+Existing workspace-maxxing behavior is unchanged:
+- `--opencode`, `--claude`, `--copilot`, `--gemini` still install the skill
+- Using `--no-agent` creates workspace-only (no agent)
+- Default behavior (without `--no-agent`) includes agent creation

package/tests/benchmark.test.ts ADDED Viewed

@@ -0,0 +1,158 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { calculateBenchmark, formatBenchmarkTable, saveBenchmarkReport, BenchmarkResult } from '../src/scripts/benchmark';
+describe('calculateBenchmark', () => {
+  let tempDir: string;
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'benchmark-test-'));
+  });
+  afterEach(() => {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+  function createStage(ws: string, name: string, content: string) {
+    const dir = path.join(ws, name);
+    fs.mkdirSync(dir, { recursive: true });
+    fs.writeFileSync(path.join(dir, 'CONTEXT.md'), content);
+  }
+  it('returns weighted scores for a workspace with all stages', () => {
+    createStage(tempDir, '01-ideation', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
+    createStage(tempDir, '02-research', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
+    createStage(tempDir, '03-architecture', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
+    const result = calculateBenchmark(tempDir);
+    expect(result.stages).toHaveLength(3);
+    expect(result.stages[0].name).toBe('01-ideation');
+    expect(result.stages[0].weight).toBe(1.5);
+    expect(result.stages[1].name).toBe('02-research');
+    expect(result.stages[1].weight).toBe(1.3);
+    expect(result.stages[2].name).toBe('03-architecture');
+    expect(result.stages[2].weight).toBe(1.2);
+  });
+  it('excludes missing stages from calculation', () => {
+    createStage(tempDir, '01-ideation', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
+    const result = calculateBenchmark(tempDir);
+    expect(result.stages).toHaveLength(1);
+    expect(result.stages[0].name).toBe('01-ideation');
+  });
+  it('normalizes final score to 0-100', () => {
+    createStage(tempDir, '01-ideation', '## Purpose\nTest\n\n## Inputs\nNone\n\n## Outputs\nTest\n\n## Dependencies\nNone');
+    const result = calculateBenchmark(tempDir);
+    expect(result.weightedScore).toBeGreaterThanOrEqual(0);
+    expect(result.weightedScore).toBeLessThanOrEqual(100);
+  });
+  it('returns empty stages for workspace with no numbered folders', () => {
+    fs.mkdirSync(path.join(tempDir, '00-meta'), { recursive: true });
+    const result = calculateBenchmark(tempDir);
+    expect(result.stages).toHaveLength(0);
+    expect(result.weightedScore).toBe(0);
+  });
+  it('returns empty stages for non-existent workspace', () => {
+    const result = calculateBenchmark('/non-existent-path-xyz');
+    expect(result.stages).toHaveLength(0);
+    expect(result.weightedScore).toBe(0);
+  });
+  it('generates fix suggestions for incomplete stages', () => {
+    createStage(tempDir, '01-ideation', 'minimal content');
+    const result = calculateBenchmark(tempDir);
+    expect(result.fixSuggestions.length).toBeGreaterThan(0);
+    expect(result.improvementPotential).toBe(true);
+  });
+});
+describe('formatBenchmarkTable', () => {
+  it('formats a benchmark result as a console table', () => {
+    const data: BenchmarkResult = {
+      workspace: 'test-ws',
+      agent: 'opencode',
+      timestamp: '2026-04-07T00:00:00Z',
+      rawScore: 72,
+      weightedScore: 78,
+      stages: [
+        { name: '01-ideation', raw: 85, weight: 1.5, weighted: 95 },
+        { name: '02-research', raw: 60, weight: 1.3, weighted: 58 },
+      ],
+      fixSuggestions: ['Add research sources'],
+      improvementPotential: true,
+    };
+    const table = formatBenchmarkTable(data);
+    expect(table).toContain('01-ideation');
+    expect(table).toContain('02-research');
+    expect(table).toContain('78');
+    expect(table).toContain('TOTAL');
+  });
+  it('handles empty stages gracefully', () => {
+    const data: BenchmarkResult = {
+      workspace: 'test-ws',
+      agent: 'opencode',
+      timestamp: '2026-04-07T00:00:00Z',
+      rawScore: 0,
+      weightedScore: 0,
+      stages: [],
+      fixSuggestions: [],
+      improvementPotential: false,
+    };
+    const table = formatBenchmarkTable(data);
+    expect(table).toContain('0');
+  });
+});
+describe('saveBenchmarkReport', () => {
+  let tempDir: string;
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'benchmark-report-'));
+  });
+  afterEach(() => {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+  it('saves benchmark report to .workspace-benchmarks directory', () => {
+    const data: BenchmarkResult = {
+      workspace: 'test-ws',
+      agent: 'opencode',
+      timestamp: '2026-04-07T00:00:00Z',
+      rawScore: 72,
+      weightedScore: 78,
+      stages: [],
+      fixSuggestions: [],
+      improvementPotential: false,
+    };
+    const filePath = saveBenchmarkReport(tempDir, data);
+    expect(filePath).toContain('.workspace-benchmarks');
+    expect(filePath).toContain('test-ws-');
+    expect(fs.existsSync(filePath)).toBe(true);
+    const saved = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
+    expect(saved.weightedScore).toBe(78);
+    expect(saved.workspace).toBe('test-ws');
+  });
+});