workspace-maxxing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
- package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
- package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
- package/README.md +144 -0
- package/dist/agent-creator.d.ts +9 -0
- package/dist/agent-creator.d.ts.map +1 -0
- package/dist/agent-creator.js +199 -0
- package/dist/agent-creator.js.map +1 -0
- package/dist/agent-iterator.d.ts +38 -0
- package/dist/agent-iterator.d.ts.map +1 -0
- package/dist/agent-iterator.js +327 -0
- package/dist/agent-iterator.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +197 -0
- package/dist/index.js.map +1 -0
- package/dist/install.d.ts +18 -0
- package/dist/install.d.ts.map +1 -0
- package/dist/install.js +117 -0
- package/dist/install.js.map +1 -0
- package/dist/platforms/claude.d.ts +7 -0
- package/dist/platforms/claude.d.ts.map +1 -0
- package/dist/platforms/claude.js +70 -0
- package/dist/platforms/claude.js.map +1 -0
- package/dist/platforms/copilot.d.ts +7 -0
- package/dist/platforms/copilot.d.ts.map +1 -0
- package/dist/platforms/copilot.js +75 -0
- package/dist/platforms/copilot.js.map +1 -0
- package/dist/platforms/gemini.d.ts +7 -0
- package/dist/platforms/gemini.d.ts.map +1 -0
- package/dist/platforms/gemini.js +81 -0
- package/dist/platforms/gemini.js.map +1 -0
- package/dist/platforms/index.d.ts +8 -0
- package/dist/platforms/index.d.ts.map +1 -0
- package/dist/platforms/index.js +41 -0
- package/dist/platforms/index.js.map +1 -0
- package/dist/platforms/opencode.d.ts +7 -0
- package/dist/platforms/opencode.d.ts.map +1 -0
- package/dist/platforms/opencode.js +70 -0
- package/dist/platforms/opencode.js.map +1 -0
- package/dist/scripts/benchmark.d.ts +20 -0
- package/dist/scripts/benchmark.d.ts.map +1 -0
- package/dist/scripts/benchmark.js +170 -0
- package/dist/scripts/benchmark.js.map +1 -0
- package/dist/scripts/dispatch.d.ts +32 -0
- package/dist/scripts/dispatch.d.ts.map +1 -0
- package/dist/scripts/dispatch.js +386 -0
- package/dist/scripts/dispatch.js.map +1 -0
- package/dist/scripts/generate-tests.d.ts +11 -0
- package/dist/scripts/generate-tests.d.ts.map +1 -0
- package/dist/scripts/generate-tests.js +118 -0
- package/dist/scripts/generate-tests.js.map +1 -0
- package/dist/scripts/install-tool.d.ts +8 -0
- package/dist/scripts/install-tool.d.ts.map +1 -0
- package/dist/scripts/install-tool.js +98 -0
- package/dist/scripts/install-tool.js.map +1 -0
- package/dist/scripts/iterate.d.ts +44 -0
- package/dist/scripts/iterate.d.ts.map +1 -0
- package/dist/scripts/iterate.js +260 -0
- package/dist/scripts/iterate.js.map +1 -0
- package/dist/scripts/orchestrator.d.ts +40 -0
- package/dist/scripts/orchestrator.d.ts.map +1 -0
- package/dist/scripts/orchestrator.js +378 -0
- package/dist/scripts/orchestrator.js.map +1 -0
- package/dist/scripts/scaffold.d.ts +8 -0
- package/dist/scripts/scaffold.d.ts.map +1 -0
- package/dist/scripts/scaffold.js +279 -0
- package/dist/scripts/scaffold.js.map +1 -0
- package/dist/scripts/validate.d.ts +11 -0
- package/dist/scripts/validate.d.ts.map +1 -0
- package/dist/scripts/validate.js +472 -0
- package/dist/scripts/validate.js.map +1 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
- package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
- package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
- package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
- package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
- package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
- package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
- package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
- package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
- package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
- package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
- package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
- package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
- package/jest.config.js +8 -0
- package/package.json +32 -0
- package/src/agent-creator.ts +180 -0
- package/src/agent-iterator.ts +397 -0
- package/src/index.ts +189 -0
- package/src/install.ts +105 -0
- package/src/platforms/claude.ts +40 -0
- package/src/platforms/copilot.ts +50 -0
- package/src/platforms/gemini.ts +55 -0
- package/src/platforms/index.ts +45 -0
- package/src/platforms/opencode.ts +41 -0
- package/src/scripts/benchmark.ts +171 -0
- package/src/scripts/dispatch.ts +473 -0
- package/src/scripts/generate-tests.ts +112 -0
- package/src/scripts/install-tool.ts +82 -0
- package/src/scripts/iterate.ts +271 -0
- package/src/scripts/orchestrator.ts +539 -0
- package/src/scripts/scaffold.ts +282 -0
- package/src/scripts/validate.ts +516 -0
- package/templates/.workspace-templates/CONTEXT.md +44 -0
- package/templates/.workspace-templates/SYSTEM.md +44 -0
- package/templates/.workspace-templates/references/anti-patterns.md +16 -0
- package/templates/.workspace-templates/references/iron-laws.md +26 -0
- package/templates/.workspace-templates/references/reporting-format.md +52 -0
- package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
- package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
- package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
- package/templates/.workspace-templates/scripts/iterate.ts +265 -0
- package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
- package/templates/.workspace-templates/scripts/validate.ts +452 -0
- package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
- package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/README.md +14 -0
- package/templates/SKILL.md +347 -0
- package/tests/benchmark.test.ts +158 -0
- package/tests/cli.test.ts +109 -0
- package/tests/dispatch-parallel.test.ts +124 -0
- package/tests/dispatch.test.ts +218 -0
- package/tests/fixer-skill.test.ts +203 -0
- package/tests/generate-tests.test.ts +101 -0
- package/tests/install-tool.test.ts +141 -0
- package/tests/install.test.ts +144 -0
- package/tests/integration.test.ts +324 -0
- package/tests/iterate.test.ts +219 -0
- package/tests/orchestrator.test.ts +710 -0
- package/tests/scaffold.test.ts +238 -0
- package/tests/templates-enhanced.test.ts +208 -0
- package/tests/templates.test.ts +219 -0
- package/tests/validate.test.ts +421 -0
- package/tests/validation-enhanced.test.ts +303 -0
- package/tests/worker-skill.test.ts +88 -0
- package/tsconfig.json +19 -0
- package/workspace/00-meta/CONTEXT.md +3 -0
- package/workspace/00-meta/execution-log.md +17 -0
- package/workspace/00-meta/tools.md +11 -0
- package/workspace/01-input/CONTEXT.md +27 -0
- package/workspace/CONTEXT.md +35 -0
- package/workspace/README.md +14 -0
- package/workspace/SYSTEM.md +36 -0
- package/workspace-maxxing-0.1.0.tgz +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Routing Table
|
|
2
|
+
|
|
3
|
+
## How to Use This File
|
|
4
|
+
|
|
5
|
+
Map each task to the smallest required context and avoid loading unrelated files.
|
|
6
|
+
|
|
7
|
+
## Task Routing
|
|
8
|
+
|
|
9
|
+
| When you need to... | Go to | Load |
|
|
10
|
+
|---------------------|-------|------|
|
|
11
|
+
| Understand workspace constraints | SYSTEM.md | Always loaded first |
|
|
12
|
+
| Gather or validate inputs | 01-input/CONTEXT.md | Input stage contract |
|
|
13
|
+
| Analyze, process, or draft | 02-process/CONTEXT.md | Processing stage contract |
|
|
14
|
+
| Finalize and deliver outputs | 03-output/CONTEXT.md | Output stage contract |
|
|
15
|
+
| Check available tools | 00-meta/tools.md | Tool inventory |
|
|
16
|
+
|
|
17
|
+
## Loading Order
|
|
18
|
+
|
|
19
|
+
1. SYSTEM.md (always)
|
|
20
|
+
2. This root CONTEXT.md
|
|
21
|
+
3. One relevant stage CONTEXT.md
|
|
22
|
+
4. Only the task files needed for that stage
|
|
23
|
+
|
|
24
|
+
## Scope Guardrails
|
|
25
|
+
|
|
26
|
+
- Route domain goals into workflow design stages and markdown deliverables.
|
|
27
|
+
- Do not scaffold backend, frontend, or runtime product repositories from this routing file.
|
|
28
|
+
- Keep artifacts file-structured and markdown-first across numbered workflow folders.
|
|
29
|
+
|
|
30
|
+
## Sequential Routing Contract
|
|
31
|
+
|
|
32
|
+
- Route to the earliest incomplete stage listed in 00-meta/execution-log.md.
|
|
33
|
+
- Do not skip forward to later stages while earlier stages remain incomplete.
|
|
34
|
+
- Append handoff notes after each completed stage before advancing.
|
|
35
|
+
|
|
36
|
+
## Stage Handoff Routing
|
|
37
|
+
|
|
38
|
+
- 01-input -> 02-process when input completion criteria are met
|
|
39
|
+
- 02-process -> 03-output when processing completion criteria are met
|
|
40
|
+
- 03-output -> delivery and closure
|
|
41
|
+
|
|
42
|
+
## Escalation
|
|
43
|
+
|
|
44
|
+
Escalate when required sections are missing, routing is ambiguous, or dependencies conflict with stage order.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# System - Workspace Root
|
|
2
|
+
|
|
3
|
+
## Role
|
|
4
|
+
You are an AI assistant operating inside this workspace. Follow stage contracts, route tasks through stage contexts, and keep information canonical.
|
|
5
|
+
|
|
6
|
+
## Folder Map
|
|
7
|
+
|
|
8
|
+
| Folder | Purpose |
|
|
9
|
+
|--------|---------|
|
|
10
|
+
| 00-meta/ | Workspace configuration, tool inventory, session notes |
|
|
11
|
+
| 01-input/ | Source materials, intake, and validation |
|
|
12
|
+
| 02-process/ | Analysis, transformation, and drafting |
|
|
13
|
+
| 03-output/ | Final deliverables and publication artifacts |
|
|
14
|
+
|
|
15
|
+
## Workflow Rules
|
|
16
|
+
|
|
17
|
+
1. Read this file first every session.
|
|
18
|
+
2. Read root `CONTEXT.md` before loading stage files.
|
|
19
|
+
3. Load only the stage context and task files required for the current step.
|
|
20
|
+
4. Keep one canonical source for each fact; do not duplicate content across stages.
|
|
21
|
+
|
|
22
|
+
## Scope Guardrails
|
|
23
|
+
|
|
24
|
+
- Build workflow documentation, not product implementation code.
|
|
25
|
+
- Keep stage outputs markdown-first (plans, checklists, prompts, routing notes).
|
|
26
|
+
- If the user asks for product code, convert that into workflow requirements and stay inside ICM workspace scope.
|
|
27
|
+
|
|
28
|
+
## Sequential Execution Protocol
|
|
29
|
+
|
|
30
|
+
1. Complete stages in ascending numeric order.
|
|
31
|
+
2. Record each completed stage in 00-meta/execution-log.md before routing onward.
|
|
32
|
+
3. Do not generate final output until all prior stages are marked complete.
|
|
33
|
+
|
|
34
|
+
## Stage Boundaries
|
|
35
|
+
|
|
36
|
+
- Execute stages in order unless explicit handoff says otherwise.
|
|
37
|
+
- One-way dependencies only: upstream -> downstream.
|
|
38
|
+
- Downstream stages may reference upstream outputs, never reverse.
|
|
39
|
+
|
|
40
|
+
## Tooling Policy
|
|
41
|
+
|
|
42
|
+
- Tool inventory is tracked in `00-meta/tools.md`.
|
|
43
|
+
- Check inventory before proposing installs.
|
|
44
|
+
- Record approved tool changes in `00-meta/tools.md`.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Anti-Patterns & Rationalization Prevention
|
|
2
|
+
|
|
3
|
+
Common rationalizations agents use to skip steps, with reality checks.
|
|
4
|
+
|
|
5
|
+
| Thought | Reality |
|
|
6
|
+
|---------|---------|
|
|
7
|
+
| "This workspace looks good enough" | Good enough is the enemy of excellent. Run validation. |
|
|
8
|
+
| "I'll skip research and go straight to building" | Building without research produces generic, non-optimal workspaces. |
|
|
9
|
+
| "The user didn't ask for tests" | Autonomous workflows require self-verification. Tests are mandatory. |
|
|
10
|
+
| "I'll fix this later" | Later never comes. Fix it now or escalate. |
|
|
11
|
+
| "This sub-skill doesn't apply here" | If there's a 1% chance it applies, dispatch it. |
|
|
12
|
+
| "The score is fine" | Fine is not good. Target > 85. |
|
|
13
|
+
| "I already validated this" | Validation is a snapshot. Re-validate after every change. |
|
|
14
|
+
| "This prompt update is cosmetic" | Prompt quality directly impacts agent behavior. No cosmetic-only changes. |
|
|
15
|
+
| "I'll do all phases at once" | Phases exist for a reason. Complete each before moving to the next. |
|
|
16
|
+
| "The user will review anyway" | Autonomous means autonomous. Deliver quality without requiring human review. |
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Iron Laws
|
|
2
|
+
|
|
3
|
+
These rules are absolute. No exceptions. No rationalizations.
|
|
4
|
+
|
|
5
|
+
## The Iron Laws of Workspace Building
|
|
6
|
+
|
|
7
|
+
1. **NO BUILD WITHOUT PLAN** — Every workspace must have an approved architecture plan before scaffold.ts runs.
|
|
8
|
+
|
|
9
|
+
2. **NO PLAN WITHOUT RESEARCH** — Architecture decisions must be informed by pattern research and context gathering.
|
|
10
|
+
|
|
11
|
+
3. **NO IMPROVEMENT WITHOUT VALIDATION** — Never claim a workspace improved without running validate.ts before and after.
|
|
12
|
+
|
|
13
|
+
4. **NO COMPLETION CLAIM WITHOUT VERIFICATION** — Run the full test suite and validation checks before declaring delivery.
|
|
14
|
+
|
|
15
|
+
5. **NO SKIPPING PHASES** — The hybrid flow exists for a reason. Complete each phase before moving to the next.
|
|
16
|
+
|
|
17
|
+
6. **NO SILENT FAILURES** — If a sub-skill fails, report it. Do not continue as if nothing happened.
|
|
18
|
+
|
|
19
|
+
## Enforcement
|
|
20
|
+
|
|
21
|
+
Before any action, ask: "Which iron law does this touch?" If the answer is "none," you're probably about to break one.
|
|
22
|
+
|
|
23
|
+
Violation of any iron law requires immediate escalation to the human with:
|
|
24
|
+
- Which law was violated
|
|
25
|
+
- Why it was violated
|
|
26
|
+
- Proposed fix
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Sub-Skill Report Format
|
|
2
|
+
|
|
3
|
+
All sub-skills return a structured JSON report. Use this exact format.
|
|
4
|
+
|
|
5
|
+
## Report Structure
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"skill": "<skill-name>",
|
|
10
|
+
"status": "passed|failed|escalated",
|
|
11
|
+
"timestamp": "<ISO-8601 timestamp>",
|
|
12
|
+
"findings": [
|
|
13
|
+
"<specific finding 1>",
|
|
14
|
+
"<specific finding 2>"
|
|
15
|
+
],
|
|
16
|
+
"recommendations": [
|
|
17
|
+
"<actionable recommendation 1>",
|
|
18
|
+
"<actionable recommendation 2>"
|
|
19
|
+
],
|
|
20
|
+
"metrics": {
|
|
21
|
+
"<metric-name>": <value>,
|
|
22
|
+
"score": <0-100>,
|
|
23
|
+
"itemsChecked": <number>,
|
|
24
|
+
"itemsPassed": <number>
|
|
25
|
+
},
|
|
26
|
+
"nextSkill": "<next-skill-name>|none"
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Field Descriptions
|
|
31
|
+
|
|
32
|
+
- `skill`: Name of the sub-skill that generated this report
|
|
33
|
+
- `status`: One of "passed", "failed", or "escalated"
|
|
34
|
+
- `passed`: All checks passed, no critical issues
|
|
35
|
+
- `failed`: One or more checks failed, actionable items exist
|
|
36
|
+
- `escalated`: Cannot proceed, requires human intervention
|
|
37
|
+
- `timestamp`: ISO-8601 timestamp of report generation
|
|
38
|
+
- `findings`: Array of specific observations (both positive and negative)
|
|
39
|
+
- `recommendations`: Array of actionable next steps
|
|
40
|
+
- `metrics`: Quantitative measurements from the sub-skill
|
|
41
|
+
- `score`: 0-100 quality score
|
|
42
|
+
- `itemsChecked`: Total items evaluated
|
|
43
|
+
- `itemsPassed`: Items that passed evaluation
|
|
44
|
+
- `nextSkill`: Suggested next sub-skill to dispatch, or "none" if workflow is complete
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
|
|
48
|
+
Print the report as JSON to stdout when the sub-skill completes:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
echo '{"skill":"validation","status":"passed",...}'
|
|
52
|
+
```
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
|
|
4
|
+
export interface StageBenchmark {
|
|
5
|
+
name: string;
|
|
6
|
+
raw: number;
|
|
7
|
+
weight: number;
|
|
8
|
+
weighted: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface BenchmarkResult {
|
|
12
|
+
workspace: string;
|
|
13
|
+
agent: string;
|
|
14
|
+
timestamp: string;
|
|
15
|
+
rawScore: number;
|
|
16
|
+
weightedScore: number;
|
|
17
|
+
stages: StageBenchmark[];
|
|
18
|
+
fixSuggestions: string[];
|
|
19
|
+
improvementPotential: boolean;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const STAGE_WEIGHTS: Record<string, number> = {
|
|
23
|
+
'01-ideation': 1.5,
|
|
24
|
+
'02-research': 1.3,
|
|
25
|
+
'03-architecture': 1.2,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const DEFAULT_WEIGHT = 1.0;
|
|
29
|
+
const MAX_RAW_SCORE = 45;
|
|
30
|
+
|
|
31
|
+
export function calculateBenchmark(workspacePath: string): BenchmarkResult {
|
|
32
|
+
const ws = path.resolve(workspacePath);
|
|
33
|
+
const stageFolders = getNumberedFolders(ws);
|
|
34
|
+
|
|
35
|
+
const stages: StageBenchmark[] = [];
|
|
36
|
+
let totalWeighted = 0;
|
|
37
|
+
let totalWeight = 0;
|
|
38
|
+
|
|
39
|
+
for (const folder of stageFolders) {
|
|
40
|
+
const weight = STAGE_WEIGHTS[folder] ?? DEFAULT_WEIGHT;
|
|
41
|
+
const raw = calculateStageRawScore(ws, folder);
|
|
42
|
+
const weighted = (raw / MAX_RAW_SCORE) * 100 * weight;
|
|
43
|
+
|
|
44
|
+
stages.push({ name: folder, raw, weight, weighted });
|
|
45
|
+
totalWeighted += weighted;
|
|
46
|
+
totalWeight += weight;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const weightedScore = totalWeight > 0 ? totalWeighted / totalWeight : 0;
|
|
50
|
+
const rawScore = stages.reduce((sum, s) => sum + s.raw, 0);
|
|
51
|
+
|
|
52
|
+
const fixSuggestions = stages
|
|
53
|
+
.filter((s) => s.raw < MAX_RAW_SCORE)
|
|
54
|
+
.map((s) => `Improve ${s.name}: current score ${s.raw}/${MAX_RAW_SCORE}`);
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
workspace: path.basename(ws),
|
|
58
|
+
agent: 'unknown',
|
|
59
|
+
timestamp: new Date().toISOString(),
|
|
60
|
+
rawScore,
|
|
61
|
+
weightedScore: Math.min(Math.round(weightedScore), 100),
|
|
62
|
+
stages,
|
|
63
|
+
fixSuggestions,
|
|
64
|
+
improvementPotential: stages.some((s) => s.raw < MAX_RAW_SCORE),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function formatBenchmarkTable(data: BenchmarkResult): string {
|
|
69
|
+
const lines: string[] = [];
|
|
70
|
+
|
|
71
|
+
lines.push(`\nBenchmark Report: ${data.workspace}`);
|
|
72
|
+
lines.push(`Agent: ${data.agent} | Timestamp: ${data.timestamp}`);
|
|
73
|
+
lines.push('');
|
|
74
|
+
lines.push(
|
|
75
|
+
padRight('Stage', 20) +
|
|
76
|
+
padRight('Raw', 8) +
|
|
77
|
+
padRight('Weight', 10) +
|
|
78
|
+
padRight('Weighted', 12)
|
|
79
|
+
);
|
|
80
|
+
lines.push('-'.repeat(50));
|
|
81
|
+
|
|
82
|
+
for (const stage of data.stages) {
|
|
83
|
+
lines.push(
|
|
84
|
+
padRight(stage.name, 20) +
|
|
85
|
+
padRight(String(stage.raw), 8) +
|
|
86
|
+
padRight(stage.weight.toFixed(1) + 'x', 10) +
|
|
87
|
+
padRight(stage.weighted.toFixed(1), 12)
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
lines.push('-'.repeat(50));
|
|
92
|
+
lines.push(
|
|
93
|
+
padRight('TOTAL', 20) +
|
|
94
|
+
padRight(String(data.rawScore), 8) +
|
|
95
|
+
padRight('', 10) +
|
|
96
|
+
padRight(data.weightedScore.toFixed(1), 12)
|
|
97
|
+
);
|
|
98
|
+
lines.push('');
|
|
99
|
+
|
|
100
|
+
if (data.fixSuggestions.length > 0) {
|
|
101
|
+
lines.push('Suggestions:');
|
|
102
|
+
for (const suggestion of data.fixSuggestions) {
|
|
103
|
+
lines.push(` - ${suggestion}`);
|
|
104
|
+
}
|
|
105
|
+
lines.push('');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return lines.join('\n');
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function saveBenchmarkReport(workspacePath: string, data: BenchmarkResult): string {
|
|
112
|
+
const reportDir = path.join(workspacePath, '.workspace-benchmarks');
|
|
113
|
+
fs.mkdirSync(reportDir, { recursive: true });
|
|
114
|
+
|
|
115
|
+
const filename = `${data.workspace}-${data.timestamp.replace(/[:.]/g, '-')}.json`;
|
|
116
|
+
const filePath = path.join(reportDir, filename);
|
|
117
|
+
|
|
118
|
+
fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf-8');
|
|
119
|
+
return filePath;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function calculateStageRawScore(ws: string, folder: string): number {
|
|
123
|
+
const stageContextPath = path.join(ws, folder, 'CONTEXT.md');
|
|
124
|
+
let score = 0;
|
|
125
|
+
|
|
126
|
+
if (fs.existsSync(stageContextPath)) {
|
|
127
|
+
const content = fs.readFileSync(stageContextPath, 'utf-8');
|
|
128
|
+
if (content.toLowerCase().includes('purpose') || content.toLowerCase().includes('## purpose')) score += 4;
|
|
129
|
+
if (content.toLowerCase().includes('input')) score += 4;
|
|
130
|
+
if (content.toLowerCase().includes('output')) score += 4;
|
|
131
|
+
if (content.toLowerCase().includes('dependenc')) score += 3;
|
|
132
|
+
if (content.toLowerCase().includes('## success criteria') || content.toLowerCase().includes('success criteria')) score += 5;
|
|
133
|
+
if (content.toLowerCase().includes('## approach') || content.toLowerCase().includes('approach')) score += 5;
|
|
134
|
+
if (content.toLowerCase().includes('## risks') || content.toLowerCase().includes('risks')) score += 5;
|
|
135
|
+
if (content.toLowerCase().includes('## timeline') || content.toLowerCase().includes('timeline')) score += 5;
|
|
136
|
+
if (content.toLowerCase().includes('## resources') || content.toLowerCase().includes('resources')) score += 5;
|
|
137
|
+
if (content.toLowerCase().includes('## validation') || content.toLowerCase().includes('validation')) score += 5;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return Math.min(score, MAX_RAW_SCORE);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function getNumberedFolders(workspacePath: string): string[] {
|
|
144
|
+
if (!fs.existsSync(workspacePath)) return [];
|
|
145
|
+
const entries = fs.readdirSync(workspacePath, { withFileTypes: true });
|
|
146
|
+
return entries
|
|
147
|
+
.filter((e) => e.isDirectory() && /^\d{2}-/.test(e.name) && e.name !== '00-meta')
|
|
148
|
+
.map((e) => e.name);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function padRight(str: string, length: number): string {
|
|
152
|
+
return str.padEnd(length);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (require.main === module) {
|
|
156
|
+
const args = process.argv.slice(2);
|
|
157
|
+
const parseArg = (flag: string): string | undefined => {
|
|
158
|
+
const idx = args.indexOf(flag);
|
|
159
|
+
return idx !== -1 ? args[idx + 1] : undefined;
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const workspace = parseArg('--workspace');
|
|
163
|
+
|
|
164
|
+
if (!workspace) {
|
|
165
|
+
console.error('Usage: node benchmark.ts --workspace <path>');
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const result = calculateBenchmark(workspace);
|
|
170
|
+
console.log(formatBenchmarkTable(result));
|
|
171
|
+
}
|