workspace-maxxing 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workspace-maxxing/skills/architecture/SKILL.md +95 -0
- package/.agents/skills/workspace-maxxing/skills/fixer/SKILL.md +109 -0
- package/.agents/skills/workspace-maxxing/skills/iteration/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/skills/prompt-engineering/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/skills/research/SKILL.md +94 -0
- package/.agents/skills/workspace-maxxing/skills/testing/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/skills/tooling/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/skills/validation/SKILL.md +103 -0
- package/.agents/skills/workspace-maxxing/skills/worker/SKILL.md +79 -0
- package/dist/index.js +43 -9
- package/dist/index.js.map +1 -1
- package/package.json +8 -2
- package/src/index.ts +47 -9
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: architecture
|
|
3
|
+
description: "Designs workspace structure, plans folder layout, and creates the build plan. Use when planning workspace structure, designing folder hierarchy, or after research phase."
|
|
4
|
+
triggers: ["design workspace", "plan structure", "folder layout", "build plan"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Design the workspace structure based on research findings. Architecture translates research into a concrete, buildable plan.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Phase 2 of the hybrid flow (after research)
|
|
14
|
+
- When research is complete and building is next
|
|
15
|
+
- When restructuring an existing workspace
|
|
16
|
+
|
|
17
|
+
## When Not to Use
|
|
18
|
+
|
|
19
|
+
- Before research is complete (use research sub-skill)
|
|
20
|
+
- During building itself (use scaffold.ts directly)
|
|
21
|
+
- For minor structural tweaks (use direct file operations)
|
|
22
|
+
|
|
23
|
+
## The Iron Law
|
|
24
|
+
|
|
25
|
+
NO ARCHITECTURE WITHOUT RESEARCH
|
|
26
|
+
NO BUILDING WITHOUT APPROVED PLAN
|
|
27
|
+
NO SKIPPING USER APPROVAL
|
|
28
|
+
NO AMBIGUOUS STAGE DEFINITIONS
|
|
29
|
+
|
|
30
|
+
## Scope Guardrails
|
|
31
|
+
|
|
32
|
+
- Design only the ICM workspace architecture (folders, routing, context contracts).
|
|
33
|
+
- Plan markdown workflow artifacts per stage; do not design application runtime components.
|
|
34
|
+
- Keep architecture outputs directly consumable by scaffold.ts for file-structured markdown folders.
|
|
35
|
+
- Treat product implementation asks as workflow requirements captured in stage docs.
|
|
36
|
+
|
|
37
|
+
## The Process
|
|
38
|
+
|
|
39
|
+
1. **Review research findings** - Read the research sub-skill report.
|
|
40
|
+
2. **Define stage folders** - Determine numbered folder structure (01-xxx, 02-xxx, and so on) for workflow execution.
|
|
41
|
+
3. **Design routing table** - Plan CONTEXT.md routing for each stage.
|
|
42
|
+
4. **Define SYSTEM.md** - Plan the folder map, rules, and tool inventory.
|
|
43
|
+
5. **Plan CONTEXT.md content** - Define what each stage CONTEXT.md must contain and which markdown artifacts it must produce.
|
|
44
|
+
6. **Create build plan** - Document the scaffold.ts command with all parameters.
|
|
45
|
+
7. **Get approval** - Present the plan to the user before building.
|
|
46
|
+
|
|
47
|
+
## Red Flags
|
|
48
|
+
|
|
49
|
+
- Stage folders do not follow sequential numbering
|
|
50
|
+
- Routing table does not reference all stages
|
|
51
|
+
- SYSTEM.md plan is missing or incomplete
|
|
52
|
+
- Build plan is missing scaffold.ts parameters
|
|
53
|
+
- User approval is skipped before build
|
|
54
|
+
- Plan includes backend/frontend/database implementation details instead of workspace structure
|
|
55
|
+
|
|
56
|
+
## Anti-Rationalization Table
|
|
57
|
+
|
|
58
|
+
| Thought | Reality |
|
|
59
|
+
|---------|---------|
|
|
60
|
+
| "I will adjust the structure while building" | Structure changes mid-build are expensive. Plan first. |
|
|
61
|
+
| "This stage name is good enough" | Stage names affect routing. Be precise. |
|
|
62
|
+
| "The user will understand without approval" | Unapproved plans produce unwanted results. Always present the plan. |
|
|
63
|
+
|
|
64
|
+
## Sub-Skill Dispatch
|
|
65
|
+
|
|
66
|
+
- `status = passed` (plan approved) -> `nextSkill = none` and main workflow runs scaffold.ts.
|
|
67
|
+
- `status = failed` (plan incomplete or not approved) -> `nextSkill = none`.
|
|
68
|
+
- `status = escalated` (blocking uncertainty) -> `nextSkill = none`.
|
|
69
|
+
|
|
70
|
+
## Report Format
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"skill": "architecture",
|
|
75
|
+
"status": "passed",
|
|
76
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
77
|
+
"findings": ["Defined four sequential stages with explicit routing"],
|
|
78
|
+
"recommendations": ["Run scaffold.ts using the approved stage list"],
|
|
79
|
+
"metrics": {
|
|
80
|
+
"stagesPlanned": 4,
|
|
81
|
+
"toolsIdentified": 2
|
|
82
|
+
},
|
|
83
|
+
"nextSkill": "none"
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Allowed `status` values: `passed`, `failed`, `escalated`.
|
|
88
|
+
|
|
89
|
+
Allowed `nextSkill` values: `none`.
|
|
90
|
+
|
|
91
|
+
## Integration
|
|
92
|
+
|
|
93
|
+
- Uses research output as architecture input.
|
|
94
|
+
- Produces the approved plan consumed by scaffold.ts.
|
|
95
|
+
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: fixer
|
|
3
|
+
description: "Applies targeted fixes to failing test case outputs. Use when fixing failed worker outputs, improving low-scoring results, or addressing validator findings."
|
|
4
|
+
triggers: ["fix failing test", "improve output", "address validation failure", "apply targeted fix"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Read validator findings and original worker output, identify the root cause of failure, apply the minimal fix needed, and prepare the case for re-validation. Each fixer runs with fresh context.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Dispatched by orchestrator in a fix loop
|
|
14
|
+
- Validator identifies specific failures for a test case
|
|
15
|
+
- Worker output is incomplete, incorrect, or misaligned with expectations
|
|
16
|
+
|
|
17
|
+
## When Not to Use
|
|
18
|
+
|
|
19
|
+
- Generating new output from scratch (use worker sub-skill)
|
|
20
|
+
- Validating outputs (use validation sub-skill)
|
|
21
|
+
- Redesigning workspace structure (use architecture sub-skill)
|
|
22
|
+
|
|
23
|
+
## The Iron Law
|
|
24
|
+
|
|
25
|
+
NO BLIND RETRIES
|
|
26
|
+
NO COSMETIC FIXES
|
|
27
|
+
NO FIXING WHAT IS NOT BROKEN
|
|
28
|
+
NO CLAIMING FIX WITHOUT RE-VALIDATION
|
|
29
|
+
|
|
30
|
+
## The Process
|
|
31
|
+
|
|
32
|
+
1. **Read validator findings** - Load `batch-report.json` from the batch directory
|
|
33
|
+
2. **Read original output** - Load `output.md` and `report.json` from `.agents/iteration/batch-<N>/<testCaseId>/`
|
|
34
|
+
3. **Identify root cause** - Map each finding to a specific defect in the output
|
|
35
|
+
4. **Apply minimal fix** - Change only what is needed to resolve each finding
|
|
36
|
+
5. **Update output.md** - Write the fixed output for the same test case
|
|
37
|
+
6. **Write report.json** - Structured JSON with `{skill, status, timestamp, testCaseId, batchId, findings, fixesApplied, recommendations, metrics, nextSkill}`
|
|
38
|
+
7. **Dispatch validation** - Signal that the fix is ready for re-validation
|
|
39
|
+
|
|
40
|
+
## External Runner Contract
|
|
41
|
+
|
|
42
|
+
- Fixer execution in autonomous iteration is orchestrated via `--subagent-runner`.
|
|
43
|
+
- Direct fixer dispatch must provide `--runner-command` with placeholders `{skill}`, `{workspace}`, `{batchId}`, `{testCaseId}`.
|
|
44
|
+
- Runner output must be JSON and include `report.json` compatible fields:
|
|
45
|
+
- `skill`, `status`, `timestamp`, `findings`, `recommendations`, `metrics`, `nextSkill`
|
|
46
|
+
- Missing/invalid runner output is a failure, not a simulated success path.
|
|
47
|
+
- Use `.agents/iteration/runs/*.json` telemetry to debug command rendering and runner payload issues.
|
|
48
|
+
|
|
49
|
+
## Agent-Driven Test-Case Generation (Required)
|
|
50
|
+
|
|
51
|
+
- **Agent ownership:** Test-cases MUST be discovered and authored by an agent using the workspace context. Do not rely on hardcoded script-generated test cases.
|
|
52
|
+
- **Where to write:** The agent must write the test-case bundle to `.agents/iteration/test-cases.json` inside the workspace before the orchestrator or validator dispatches workers. Validators and orchestrator flows will treat this file as the authoritative source of inputs for the iteration.
|
|
53
|
+
- **Discovery guidance:** Agents should inspect repo files (SYSTEM.md, CONTEXT.md, stage CONTEXTs, user prompts, examples, and any domain files) to infer realistic inputs, edge cases, and acceptance criteria. Test-cases should reflect actual workspace intent and cover positive, negative, and boundary cases.
|
|
54
|
+
- **Schema (minimal):** The file must be valid JSON and an array of objects with the following fields:
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
[
|
|
58
|
+
{
|
|
59
|
+
"id": "tc-001",
|
|
60
|
+
"title": "Short descriptive title",
|
|
61
|
+
"input": {"type": "text", "payload": "..."},
|
|
62
|
+
"expected": {"criteria": ["..."], "matcher": "contains|equals|schema"},
|
|
63
|
+
"metadata": {"priority": "high|medium|low", "sourceHints": ["SYSTEM.md"]}
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
- **Idempotence:** Agents may re-generate or refine the file across iterations, but each write must be complete (no partial artifacts) and timestamped inside the JSON if updated.
|
|
69
|
+
- **Signal readiness:** After creating `.agents/iteration/test-cases.json` the agent should also write a single-line marker file `.agents/iteration/.test-cases-ready` to avoid race conditions with orchestrators reading stdout.
|
|
70
|
+
|
|
71
|
+
## Enforcement Notes
|
|
72
|
+
|
|
73
|
+
- **Validator contract:** The validation step is expected to check for `.agents/iteration/test-cases.json` when running in agent-driven mode and fail fast if missing or malformed. This ensures the orchestrator cannot fall back to hardcoded script cases.
|
|
74
|
+
- **Orchestrator behavior:** When agent-driven mode is enabled, the orchestrator must prefer `.agents/iteration/test-cases.json` and should not call or rely on any built-in `generate-tests` script to produce authoritative inputs.
|
|
75
|
+
- **Audit trail:** Agents should include a `generatedBy` and `timestamp` field in the test-cases file to aid debugging and reproducibility.
|
|
76
|
+
|
|
77
|
+
## Anti-Rationalization Table
|
|
78
|
+
|
|
79
|
+
| Thought | Reality |
|
|
80
|
+
|---------|---------|
|
|
81
|
+
| "I will just re-run the worker logic" | Blind retries do not fix root causes. Address the findings directly. |
|
|
82
|
+
| "This looks better now" | Better is subjective. The fix must satisfy the validator criteria. |
|
|
83
|
+
| "I will fix other things while I am here" | Scope creep adds risk. Fix only what was flagged. |
|
|
84
|
+
| "The fix is obvious" | Obvious assumptions cause regressions. Tie every change to a finding. |
|
|
85
|
+
| "I do not need to re-validate" | Unvalidated fixes are guesses. Always dispatch validation. |
|
|
86
|
+
|
|
87
|
+
## Sub-Skill Dispatch
|
|
88
|
+
|
|
89
|
+
- After fix applied -> validation sub-skill
|
|
90
|
+
|
|
91
|
+
## Report Format
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"skill": "fixer",
|
|
96
|
+
"status": "passed|failed|escalated",
|
|
97
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
98
|
+
"testCaseId": "tc-001",
|
|
99
|
+
"batchId": 1,
|
|
100
|
+
"findings": ["Missing expected acceptance criteria section"],
|
|
101
|
+
"fixesApplied": ["Added acceptance criteria section from validator recommendation"],
|
|
102
|
+
"recommendations": ["Run validation to confirm all findings are cleared"],
|
|
103
|
+
"metrics": {
|
|
104
|
+
"findingsAddressed": 1,
|
|
105
|
+
"fixesApplied": 1
|
|
106
|
+
},
|
|
107
|
+
"nextSkill": "validation"
|
|
108
|
+
}
|
|
109
|
+
```
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: iteration
|
|
3
|
+
description: "Runs autonomous improvement loops with benchmark scoring. Use when score plateaued, deeper fixes needed, or after testing identifies patterns."
|
|
4
|
+
triggers: ["run improvement loop", "iterate on workspace", "deeper fixes", "score plateau"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Execute improvement loops until quality thresholds are met. Iteration applies systematic fixes when first-pass prompt improvements are not enough.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Score is plateaued across runs
|
|
14
|
+
- Testing finds repeated failure patterns
|
|
15
|
+
- Validation failures persist after prompt-engineering
|
|
16
|
+
- The condition-driven improvement loop requires deeper fixes
|
|
17
|
+
- Latest benchmark score is strictly between 80 and 85 (`80 < score < 85`)
|
|
18
|
+
- Score is 80 or lower (`score <= 80`) after prompt-engineering stops improving
|
|
19
|
+
|
|
20
|
+
## When Not to Use
|
|
21
|
+
|
|
22
|
+
- For first-pass improvements (use prompt-engineering first)
|
|
23
|
+
- When workspace is new and untested (use testing first)
|
|
24
|
+
- When structural redesign is needed (use architecture)
|
|
25
|
+
|
|
26
|
+
## The Iron Law
|
|
27
|
+
|
|
28
|
+
NO CLAIMING IMPROVEMENT WITHOUT RE-RUNNING BENCHMARK
|
|
29
|
+
NO SKIPPING FIX SUGGESTIONS
|
|
30
|
+
NO INFINITE ITERATION LOOPS
|
|
31
|
+
NO SKIPPING ESCALATION WHEN STUCK
|
|
32
|
+
|
|
33
|
+
## The Process
|
|
34
|
+
|
|
35
|
+
1. **Run iterate.ts** - Execute `node scripts/iterate.ts --workspace <path> --max-retries 3`.
|
|
36
|
+
2. **Read benchmark results** - Parse score, fixSuggestions, and improvementPotential.
|
|
37
|
+
3. **Identify improvement areas** - Prioritize changes with highest impact.
|
|
38
|
+
4. **Apply fixes** - Address each suggestion systematically.
|
|
39
|
+
5. **Re-run iteration** - Verify score movement.
|
|
40
|
+
6. **Repeat until threshold** - Continue until score is 85 or higher (`score >= 85`) or no improvement is possible.
|
|
41
|
+
7. **Escalate if stuck** - If score remains below 85 after 3 attempts, escalate.
|
|
42
|
+
|
|
43
|
+
## Red Flags
|
|
44
|
+
|
|
45
|
+
- Improvement is claimed without fresh benchmark evidence
|
|
46
|
+
- Fix suggestions are ignored
|
|
47
|
+
- Loop runs beyond max retries
|
|
48
|
+
- Escalation is skipped despite stalled score
|
|
49
|
+
|
|
50
|
+
## Anti-Rationalization Table
|
|
51
|
+
|
|
52
|
+
| Thought | Reality |
|
|
53
|
+
|---------|---------|
|
|
54
|
+
| "I will just run it again" | Re-running without fixes wastes cycles. |
|
|
55
|
+
| "The score improved by one point" | Marginal gains are not enough. Target is 85 or higher. |
|
|
56
|
+
| "I will keep iterating until it works" | Maximum 3 attempts, then escalate. |
|
|
57
|
+
|
|
58
|
+
## Sub-Skill Dispatch
|
|
59
|
+
|
|
60
|
+
- `status = passed` (`score >= 85`) -> `nextSkill = none`.
|
|
61
|
+
- `status = failed` (`score < 85` after max retries) -> `nextSkill = none` and require human follow-up.
|
|
62
|
+
- `status = escalated` (critical blocker prevents safe continuation) -> `nextSkill = none`.
|
|
63
|
+
|
|
64
|
+
## Report Format
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
{
|
|
68
|
+
"skill": "iteration",
|
|
69
|
+
"status": "passed",
|
|
70
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
71
|
+
"findings": ["Resolved two repeated edge-case failures"],
|
|
72
|
+
"recommendations": ["Run final validation and testing before delivery"],
|
|
73
|
+
"metrics": {
|
|
74
|
+
"scoreBefore": 81,
|
|
75
|
+
"scoreAfter": 88,
|
|
76
|
+
"iterationsRun": 2
|
|
77
|
+
},
|
|
78
|
+
"nextSkill": "none"
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Allowed `status` values: `passed`, `failed`, `escalated`.
|
|
83
|
+
|
|
84
|
+
Allowed `nextSkill` values: `none`.
|
|
85
|
+
|
|
86
|
+
## Integration
|
|
87
|
+
|
|
88
|
+
- Works after testing or prompt-engineering when quality is stuck.
|
|
89
|
+
- Hands final results back to validation and completion checks.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prompt-engineering
|
|
3
|
+
description: "Improves CONTEXT.md and SYSTEM.md prompts for better agent behavior. Use when workspace score is 80 or lower, prompts need improvement, or after validation identifies content gaps."
|
|
4
|
+
triggers: ["improve prompts", "fix content gaps", "optimize prompts", "clarify instructions"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Optimize workspace prompts for clarity, completeness, and agent guidance. Prompt engineering resolves content-level quality issues without structural redesign.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Score is 80 or lower in benchmark results (`score <= 80`)
|
|
14
|
+
- Validation identifies missing or weak content
|
|
15
|
+
- Prompts are vague or incomplete
|
|
16
|
+
- Agent behavior does not match expectations
|
|
17
|
+
|
|
18
|
+
## When Not to Use
|
|
19
|
+
|
|
20
|
+
- For structural issues (use architecture or fixer)
|
|
21
|
+
- When workspace has no content yet (use worker)
|
|
22
|
+
- For dependency installation (use tooling)
|
|
23
|
+
|
|
24
|
+
## The Iron Law
|
|
25
|
+
|
|
26
|
+
NO COSMETIC CHANGES WITHOUT FUNCTIONAL IMPROVEMENT
|
|
27
|
+
NO CHANGING PROMPTS WITHOUT RE-VALIDATING
|
|
28
|
+
NO REMOVING CONTENT WITHOUT REPLACEMENT
|
|
29
|
+
NO CLAIMING IMPROVEMENT WITHOUT SCORE CHECK
|
|
30
|
+
|
|
31
|
+
## The Process
|
|
32
|
+
|
|
33
|
+
1. **Identify weak prompts** - Read benchmark findings and validation failures.
|
|
34
|
+
2. **Analyze current prompts** - Identify what is missing, vague, or contradictory.
|
|
35
|
+
3. **Apply prompt patterns** - Use clear structure, examples, constraints, and output format guidance.
|
|
36
|
+
4. **Update CONTEXT.md files** - Improve stage-specific instructions.
|
|
37
|
+
5. **Update SYSTEM.md if needed** - Improve folder map, rules, and tool inventory guidance.
|
|
38
|
+
6. **Re-run validation** - Verify improvements did not break compliance.
|
|
39
|
+
7. **Re-run benchmark** - Confirm score movement.
|
|
40
|
+
|
|
41
|
+
## Red Flags
|
|
42
|
+
|
|
43
|
+
- Cosmetic wording changes with no measurable improvement
|
|
44
|
+
- Prompt edits made without re-validation
|
|
45
|
+
- Content removed without replacement
|
|
46
|
+
- No before/after score comparison
|
|
47
|
+
|
|
48
|
+
## Anti-Rationalization Table
|
|
49
|
+
|
|
50
|
+
| Thought | Reality |
|
|
51
|
+
|---------|---------|
|
|
52
|
+
| "This wording change is enough" | Wording changes must produce functional improvement. |
|
|
53
|
+
| "I will remove vague sections" | Removing sections creates gaps. Improve, do not delete. |
|
|
54
|
+
| "The score did not change, but it is better" | No score change means no proven improvement. Iterate again. |
|
|
55
|
+
|
|
56
|
+
## Sub-Skill Dispatch
|
|
57
|
+
|
|
58
|
+
- `status = passed` (`scoreAfter > 80`) -> `nextSkill = testing`.
|
|
59
|
+
- `status = failed` (`scoreAfter <= 80` or no measurable improvement) -> `nextSkill = iteration`.
|
|
60
|
+
- `status = escalated` (requirements conflict or critical blocker) -> `nextSkill = none`.
|
|
61
|
+
|
|
62
|
+
## Report Format
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"skill": "prompt-engineering",
|
|
67
|
+
"status": "passed",
|
|
68
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
69
|
+
"findings": ["Clarified output constraints in two stage prompts"],
|
|
70
|
+
"recommendations": ["Run testing to verify edge-case behavior"],
|
|
71
|
+
"metrics": {
|
|
72
|
+
"scoreBefore": 74,
|
|
73
|
+
"scoreAfter": 83,
|
|
74
|
+
"promptsUpdated": 3
|
|
75
|
+
},
|
|
76
|
+
"nextSkill": "testing"
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Allowed `status` values: `passed`, `failed`, `escalated`.
|
|
81
|
+
|
|
82
|
+
Allowed `nextSkill` values: `testing`, `iteration`, `none`.
|
|
83
|
+
|
|
84
|
+
## Integration
|
|
85
|
+
|
|
86
|
+
- Consumes findings from validation and benchmark.
|
|
87
|
+
- Produces higher-quality prompt content for testing and iteration.
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research
|
|
3
|
+
description: "Investigates patterns, gathers context, and identifies best practices for workspace design. Use when starting a new workspace, researching workflow patterns, or before architecture planning."
|
|
4
|
+
triggers: ["research workflow", "gather context", "identify patterns", "best practices"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Gather context and identify patterns before building. Research ensures the workspace design is informed by real requirements, not assumptions.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Phase 1 of the hybrid flow (always first)
|
|
14
|
+
- Before architecture planning
|
|
15
|
+
- When the user asks for a novel workflow type
|
|
16
|
+
- When existing patterns do not fit the use case
|
|
17
|
+
|
|
18
|
+
## When Not to Use
|
|
19
|
+
|
|
20
|
+
- After architecture is already planned (use architecture sub-skill)
|
|
21
|
+
- When workspace structure already exists (use validation sub-skill)
|
|
22
|
+
- For simple file creation (use direct file operations)
|
|
23
|
+
|
|
24
|
+
## The Iron Law
|
|
25
|
+
|
|
26
|
+
NO BUILD WITHOUT RESEARCH
|
|
27
|
+
NO GENERIC FINDINGS
|
|
28
|
+
NO SKIPPING INPUT/OUTPUT ANALYSIS
|
|
29
|
+
NO ASSUMPTIONS WITHOUT EVIDENCE
|
|
30
|
+
|
|
31
|
+
## Scope Guardrails
|
|
32
|
+
|
|
33
|
+
- Research the workflow needed to produce outcomes, not the product implementation stack.
|
|
34
|
+
- Convert domain asks (for example, "football predictor") into stageable workflow responsibilities.
|
|
35
|
+
- Keep research outputs markdown-first and suitable for numbered folder CONTEXT contracts.
|
|
36
|
+
- Do not produce backend/frontend/data-model implementation plans in this phase.
|
|
37
|
+
|
|
38
|
+
## The Process
|
|
39
|
+
|
|
40
|
+
1. **Identify workflow type** - Determine what process is being automated as a file-structured markdown workflow.
|
|
41
|
+
2. **Research similar patterns** - Review existing workspaces, docs, and best practices.
|
|
42
|
+
3. **Identify key stages** - Define the natural workflow phases.
|
|
43
|
+
4. **Determine inputs and outputs** - Capture what goes in and what markdown artifacts come out at each stage.
|
|
44
|
+
5. **Identify tooling needs** - List tools commonly needed for this workflow.
|
|
45
|
+
6. **Document findings** - Create a concise research summary for architecture.
|
|
46
|
+
|
|
47
|
+
## Red Flags
|
|
48
|
+
|
|
49
|
+
- Research is too generic and not tied to the requested workflow
|
|
50
|
+
- Input and output analysis is missing
|
|
51
|
+
- Tooling assessment is missing
|
|
52
|
+
- Architecture starts before research findings are complete
|
|
53
|
+
- Findings drift into app architecture, model design, or runtime repository scaffolding
|
|
54
|
+
|
|
55
|
+
## Anti-Rationalization Table
|
|
56
|
+
|
|
57
|
+
| Thought | Reality |
|
|
58
|
+
|---------|---------|
|
|
59
|
+
| "I already know this workflow type" | Knowledge is not research. Document findings for the next agent. |
|
|
60
|
+
| "Research is taking too long" | Research prevents wasted build time. Be thorough. |
|
|
61
|
+
| "I will figure it out while building" | Building without research produces generic, non-optimal workspaces. |
|
|
62
|
+
| "The user will clarify later" | Ask now. Ambiguous requirements produce ambiguous workspaces. |
|
|
63
|
+
|
|
64
|
+
## Sub-Skill Dispatch
|
|
65
|
+
|
|
66
|
+
- `status = passed` -> `nextSkill = architecture`.
|
|
67
|
+
- `status = failed` (research incomplete but recoverable) -> `nextSkill = none` and request missing inputs before rerun.
|
|
68
|
+
- `status = escalated` (blocking ambiguity or conflicting constraints) -> `nextSkill = none`.
|
|
69
|
+
|
|
70
|
+
## Report Format
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"skill": "research",
|
|
75
|
+
"status": "passed",
|
|
76
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
77
|
+
"findings": ["Identified three reusable workflow stage patterns"],
|
|
78
|
+
"recommendations": ["Use a three-stage layout with explicit input/output boundaries"],
|
|
79
|
+
"metrics": {
|
|
80
|
+
"patternsIdentified": 3,
|
|
81
|
+
"stagesIdentified": 3
|
|
82
|
+
},
|
|
83
|
+
"nextSkill": "architecture"
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Allowed `status` values: `passed`, `failed`, `escalated`.
|
|
88
|
+
|
|
89
|
+
Allowed `nextSkill` values: `architecture`, `none`.
|
|
90
|
+
|
|
91
|
+
## Integration
|
|
92
|
+
|
|
93
|
+
- Feeds architecture with concrete findings and stage proposals.
|
|
94
|
+
- Reduces rework by grounding structure decisions in evidence.
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: testing
|
|
3
|
+
description: "Generates and runs test cases, evaluates results, and identifies gaps. Use when testing workspace quality, generating test cases, or after prompt improvements."
|
|
4
|
+
triggers: ["generate test cases", "run tests", "test workspace", "evaluate quality"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Verify workspace quality through systematic testing. Testing confirms outputs across sample, edge-case, and empty-input scenarios.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- After prompt-engineering improvements
|
|
14
|
+
- When no tests exist for the workspace
|
|
15
|
+
- Before claiming delivery
|
|
16
|
+
- When benchmark score is strictly between 80 and 85 (`80 < score < 85`)
|
|
17
|
+
- When score is 85 or higher and final evidence is still required
|
|
18
|
+
|
|
19
|
+
## When Not to Use
|
|
20
|
+
|
|
21
|
+
- Before workspace build is complete (run scaffold.ts first)
|
|
22
|
+
- For structural validation (use validation sub-skill)
|
|
23
|
+
- When applying direct fixes to failures (use fixer sub-skill)
|
|
24
|
+
- When benchmark score is 80 or lower (`score <= 80`) (use prompt-engineering first)
|
|
25
|
+
|
|
26
|
+
## The Iron Law
|
|
27
|
+
|
|
28
|
+
NO SKIPPING TEST GENERATION
|
|
29
|
+
NO IGNORING FAILED TESTS
|
|
30
|
+
NO CLAIMING QUALITY WITHOUT EVIDENCE
|
|
31
|
+
NO TESTING WITHOUT TEST CASES
|
|
32
|
+
|
|
33
|
+
## The Process
|
|
34
|
+
|
|
35
|
+
1. **Generate test cases** - Run `node scripts/generate-tests.ts --workspace <path> --output ./tests.json`.
|
|
36
|
+
2. **Read test cases** - Parse generated test cases and expected outcomes.
|
|
37
|
+
3. **Run generation tests** - Produce sample content each stage should output.
|
|
38
|
+
4. **Run evaluation tests** - Review CONTEXT.md files against expected behavior.
|
|
39
|
+
5. **Aggregate results** - Identify recurring patterns and quality gaps.
|
|
40
|
+
6. **Document findings** - Create a pass/fail report per test case.
|
|
41
|
+
|
|
42
|
+
## Red Flags
|
|
43
|
+
|
|
44
|
+
- Test generation is skipped
|
|
45
|
+
- Generation tests run without evaluation tests
|
|
46
|
+
- Failed test cases are ignored
|
|
47
|
+
- Failure patterns are undocumented
|
|
48
|
+
|
|
49
|
+
## Anti-Rationalization Table
|
|
50
|
+
|
|
51
|
+
| Thought | Reality |
|
|
52
|
+
|---------|---------|
|
|
53
|
+
| "The workspace looks fine, no need to test" | Looks can deceive. Tests reveal behavior. |
|
|
54
|
+
| "One failed test is a fluke" | Failed tests are signals. Investigate each one. |
|
|
55
|
+
| "I will test after delivery" | Untested delivery is a gamble. Test first. |
|
|
56
|
+
|
|
57
|
+
## Sub-Skill Dispatch
|
|
58
|
+
|
|
59
|
+
- `status = passed` (all required tests pass and `benchmarkScore >= 85`) -> `nextSkill = none`.
|
|
60
|
+
- `status = failed` (any required test fails or `benchmarkScore < 85`) -> `nextSkill = iteration`.
|
|
61
|
+
- `status = escalated` (testing cannot run reliably due to blockers) -> `nextSkill = none`.
|
|
62
|
+
|
|
63
|
+
## Report Format
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"skill": "testing",
|
|
68
|
+
"status": "failed",
|
|
69
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
70
|
+
"findings": ["Two edge-case outputs failed acceptance checks"],
|
|
71
|
+
"recommendations": ["Run iteration to address repeated edge-case defects"],
|
|
72
|
+
"metrics": {
|
|
73
|
+
"benchmarkScore": 82,
|
|
74
|
+
"testCasesGenerated": 9,
|
|
75
|
+
"testCasesPassed": 7,
|
|
76
|
+
"testCasesFailed": 2
|
|
77
|
+
},
|
|
78
|
+
"nextSkill": "iteration"
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Allowed `status` values: `passed`, `failed`, `escalated`.
|
|
83
|
+
|
|
84
|
+
Allowed `nextSkill` values: `iteration`, `none`.
|
|
85
|
+
|
|
86
|
+
## Integration
|
|
87
|
+
|
|
88
|
+
- Uses generate-tests.ts output as primary test input.
|
|
89
|
+
- Supplies pass/fail evidence for iteration and final verification.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tooling
|
|
3
|
+
description: "Assesses, installs, and configures tools for the workspace. Use when tools are missing, tool inventory needs updating, or workspace requires specific dependencies."
|
|
4
|
+
triggers: ["install tools", "assess tooling", "update tool inventory", "configure dependencies"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Ensure workspace has the right tools installed and configured. Tooling manages the dependency layer of the workspace.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Tool inventory is empty or incomplete
|
|
14
|
+
- Workspace requires specific dependencies
|
|
15
|
+
- Architecture identifies missing tooling needs
|
|
16
|
+
- User requests specific tool installation
|
|
17
|
+
|
|
18
|
+
## When Not to Use
|
|
19
|
+
|
|
20
|
+
- For non-tool structural changes (use architecture)
|
|
21
|
+
- For content quality improvements (use prompt-engineering)
|
|
22
|
+
- When no additional tools are needed
|
|
23
|
+
|
|
24
|
+
## The Iron Law
|
|
25
|
+
|
|
26
|
+
NO INSTALLING TOOLS WITHOUT USER APPROVAL
|
|
27
|
+
NO SKIPPING TOOL INVENTORY UPDATES
|
|
28
|
+
NO INSTALLING UNNECESSARY TOOLS
|
|
29
|
+
NO SKIPPING VERIFICATION AFTER INSTALLATION
|
|
30
|
+
|
|
31
|
+
## The Process
|
|
32
|
+
|
|
33
|
+
1. **Scan current tools** - Read SYSTEM.md tool inventory.
|
|
34
|
+
2. **Identify missing tools** - Compare requirements against installed tools.
|
|
35
|
+
3. **Propose tools** - Provide recommended tools with justification.
|
|
36
|
+
4. **Get approval** - Present the tool plan before installing.
|
|
37
|
+
5. **Install tools** - Run `node scripts/install-tool.ts --tool <name> --manager <mgr> --workspace <path>`.
|
|
38
|
+
6. **Update inventory** - Confirm SYSTEM.md or inventory section is updated.
|
|
39
|
+
7. **Verify installation** - Confirm each installed tool is accessible.
|
|
40
|
+
|
|
41
|
+
## Red Flags
|
|
42
|
+
|
|
43
|
+
- Tools installed without approval
|
|
44
|
+
- Inventory not updated after install
|
|
45
|
+
- Unnecessary tools installed
|
|
46
|
+
- Installation not verified
|
|
47
|
+
|
|
48
|
+
## Anti-Rationalization Table
|
|
49
|
+
|
|
50
|
+
| Thought | Reality |
|
|
51
|
+
|---------|---------|
|
|
52
|
+
| "This tool might be useful" | "Might" is not enough. Every tool needs explicit justification. |
|
|
53
|
+
| "I will install now and tell the user later" | Approval must come before installation. |
|
|
54
|
+
| "The install probably worked" | Probably is not verified. Validate each install. |
|
|
55
|
+
|
|
56
|
+
## Sub-Skill Dispatch
|
|
57
|
+
|
|
58
|
+
- `status = passed` (approved tooling installed and verified) -> `nextSkill = none`.
|
|
59
|
+
- `status = failed` (installation incomplete or verification failed) -> `nextSkill = none`.
|
|
60
|
+
- `status = escalated` (blocked by permissions, policy, or unresolved conflicts) -> `nextSkill = none`.
|
|
61
|
+
|
|
62
|
+
## Report Format
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"skill": "tooling",
|
|
67
|
+
"status": "passed",
|
|
68
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
69
|
+
"findings": ["Installed two approved dependencies"],
|
|
70
|
+
"recommendations": ["Run validation to confirm inventory consistency"],
|
|
71
|
+
"metrics": {
|
|
72
|
+
"toolsInstalled": 2,
|
|
73
|
+
"toolsProposed": 2,
|
|
74
|
+
"toolsFailed": 0
|
|
75
|
+
},
|
|
76
|
+
"nextSkill": "none"
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Allowed `status` values: `passed`, `failed`, `escalated`.
|
|
81
|
+
|
|
82
|
+
Allowed `nextSkill` values: `none`.
|
|
83
|
+
|
|
84
|
+
## Integration
|
|
85
|
+
|
|
86
|
+
- Consumes architecture and requirement signals to propose tools.
|
|
87
|
+
- Produces verified dependency state for downstream validation.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: validation
|
|
3
|
+
description: "Checks workspace ICM compliance and benchmarks batch outputs. Use when validating a workspace, checking compliance, running validation, benchmarking batch results, or after making changes to workspace structure."
|
|
4
|
+
triggers: ["validate batch", "check results", "run validation", "benchmark outputs", "check compliance"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Ensure workspace meets ICM standards and benchmark batch outputs through systematic validation. Validate both workspace structure and worker or fixer outputs before any completion claim.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- After workspace scaffolding
|
|
14
|
+
- After any structural change
|
|
15
|
+
- After worker batch completes
|
|
16
|
+
- After fixer applies fixes
|
|
17
|
+
- Before claiming delivery
|
|
18
|
+
- When score drops below threshold
|
|
19
|
+
|
|
20
|
+
## When Not to Use
|
|
21
|
+
|
|
22
|
+
- Generating outputs (use worker sub-skill)
|
|
23
|
+
- Fixing failures (use fixer sub-skill)
|
|
24
|
+
- Researching patterns (use research sub-skill)
|
|
25
|
+
|
|
26
|
+
## The Iron Law
|
|
27
|
+
|
|
28
|
+
NO SCORE INFLATION
|
|
29
|
+
NO SKIPPING FAILURES
|
|
30
|
+
NO VALIDATING WITHOUT BENCHMARK
|
|
31
|
+
NO PASSING WITHOUT EVIDENCE
|
|
32
|
+
|
|
33
|
+
## The Process
|
|
34
|
+
|
|
35
|
+
1. **Run validate.ts** - Execute `node scripts/validate.ts --workspace <path>`
|
|
36
|
+
2. **Parse validation results** - Read exit code and output; collect structural findings
|
|
37
|
+
3. **Check batch outputs** - For each test case in `.agents/iteration/batch-<N>/`, verify `output.md` and `report.json` exist
|
|
38
|
+
4. **Run benchmark.ts** - Execute `node scripts/benchmark.ts --workspace <path>` to compute benchmark scoring
|
|
39
|
+
5. **Aggregate scores** - Combine structural validation score and benchmark score into a single batch score
|
|
40
|
+
6. **Generate findings** - List failures with concrete fix suggestions mapped to each failing test case
|
|
41
|
+
7. **Write batch-report.json** - Structured JSON with `{skill, status, timestamp, batchId, findings, fixSuggestions, recommendations, metrics, nextSkill}` where `nextSkill` is one of `fixer`, `orchestrator`, or `none`
|
|
42
|
+
|
|
43
|
+
## Batch-Level Validation
|
|
44
|
+
|
|
45
|
+
When validating a batch:
|
|
46
|
+
- Read all `report.json` files in `.agents/iteration/batch-<N>/`
|
|
47
|
+
- Verify each worker or fixer output matches its test case expectations
|
|
48
|
+
- Calculate per-test-case pass/fail status
|
|
49
|
+
- Calculate overall batch score using benchmark weights
|
|
50
|
+
- If score < threshold, dispatch fixer with findings
|
|
51
|
+
|
|
52
|
+
## Red Flags
|
|
53
|
+
|
|
54
|
+
- Reporting inflated scores to force a pass
|
|
55
|
+
- Skipping failing findings because they look minor
|
|
56
|
+
- Running validation without benchmark evidence
|
|
57
|
+
- Returning a passing status without per-case verification
|
|
58
|
+
|
|
59
|
+
## Anti-Rationalization Table
|
|
60
|
+
|
|
61
|
+
| Thought | Reality |
|
|
62
|
+
|---------|---------|
|
|
63
|
+
| "This workspace looks good enough" | Good enough is the enemy of excellent. Run validation. |
|
|
64
|
+
| "The score is close, I will round up" | Score inflation hides real problems. Report the true score. |
|
|
65
|
+
| "One failure does not matter" | Every failure matters. Report it and route it to fixer. |
|
|
66
|
+
| "I already validated this" | Validation is a snapshot. Re-validate after every change. |
|
|
67
|
+
| "The benchmark is too strict" | The benchmark is the standard. Meet it or escalate. |
|
|
68
|
+
|
|
69
|
+
## Sub-Skill Dispatch
|
|
70
|
+
|
|
71
|
+
- If batch score < threshold -> fixer sub-skill (`nextSkill = fixer`)
|
|
72
|
+
- If batch score >= threshold -> orchestrator (batch complete, `nextSkill = orchestrator`)
|
|
73
|
+
- If critical failures (for example missing SYSTEM.md) -> escalate to human and set `nextSkill = none`
|
|
74
|
+
|
|
75
|
+
## Report Format
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"skill": "validation",
|
|
80
|
+
"status": "passed",
|
|
81
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
82
|
+
"batchId": 1,
|
|
83
|
+
"findings": ["All required files present"],
|
|
84
|
+
"fixSuggestions": ["No fixes required"],
|
|
85
|
+
"recommendations": ["Proceed to next batch"],
|
|
86
|
+
"metrics": {
|
|
87
|
+
"score": 94,
|
|
88
|
+
"benchmarkScore": 92,
|
|
89
|
+
"itemsChecked": 18,
|
|
90
|
+
"itemsPassed": 17,
|
|
91
|
+
"testCasesPassed": 7,
|
|
92
|
+
"testCasesFailed": 1
|
|
93
|
+
},
|
|
94
|
+
"nextSkill": "orchestrator"
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Allowed `nextSkill` values: `fixer`, `orchestrator`, `none`.
|
|
99
|
+
|
|
100
|
+
## Integration
|
|
101
|
+
|
|
102
|
+
- Consumes worker and fixer reports from `.agents/iteration/batch-<N>/`.
|
|
103
|
+
- Produces `batch-report.json` that drives fixer routing or orchestrator continuation.
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: worker
|
|
3
|
+
description: "Executes a single test case against the workspace and produces output. Use when running test cases, executing workspace tasks, or processing stage-specific work."
|
|
4
|
+
triggers: ["run test case", "execute workspace task", "process stage", "generate output"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Execute a single test case by reading the relevant workspace sections, performing the required work, and producing structured output. Each worker runs with fresh context - no assumptions about prior runs.
|
|
10
|
+
|
|
11
|
+
## When to Use
|
|
12
|
+
|
|
13
|
+
- Dispatched by orchestrator as part of a batch
|
|
14
|
+
- User asks to run a specific test case
|
|
15
|
+
- User asks to execute a workspace stage task
|
|
16
|
+
|
|
17
|
+
## When Not to Use
|
|
18
|
+
|
|
19
|
+
- Validating outputs (use validation sub-skill)
|
|
20
|
+
- Fixing failed outputs (use fixer sub-skill)
|
|
21
|
+
- Planning workspace structure (use architecture sub-skill)
|
|
22
|
+
|
|
23
|
+
## The Iron Law
|
|
24
|
+
|
|
25
|
+
NO SKIPPING TEST CASE STEPS
|
|
26
|
+
NO MODIFYING WORKSPACE STRUCTURE
|
|
27
|
+
NO CLAIMING DONE WITHOUT OUTPUT
|
|
28
|
+
NO ASSUMING PRIOR CONTEXT
|
|
29
|
+
|
|
30
|
+
## The Process
|
|
31
|
+
|
|
32
|
+
1. **Read test case** - Load the test case JSON from `.agents/iteration/batch-<N>/<testCaseId>/` or orchestrator input
|
|
33
|
+
2. **Load workspace context** - Read `SYSTEM.md` and relevant stage `CONTEXT.md` files
|
|
34
|
+
3. **Execute the task** - Follow the test case input/expected instructions
|
|
35
|
+
4. **Write output.md** - Human-readable output in `.agents/iteration/batch-<N>/<testCaseId>/output.md`
|
|
36
|
+
5. **Write report.json** - Structured JSON with `{skill, status, timestamp, testCaseId, batchId, findings, recommendations, metrics, nextSkill}`
|
|
37
|
+
6. **Dispatch validation** - Signal that output is ready for validation
|
|
38
|
+
|
|
39
|
+
## External Runner Contract
|
|
40
|
+
|
|
41
|
+
- Worker execution in autonomous iteration is orchestrated via `--subagent-runner`.
|
|
42
|
+
- Direct worker dispatch must provide `--runner-command` with placeholders `{skill}`, `{workspace}`, `{batchId}`, `{testCaseId}`.
|
|
43
|
+
- Runner output must be JSON and include `report.json` compatible fields:
|
|
44
|
+
- `skill`, `status`, `timestamp`, `findings`, `recommendations`, `metrics`, `nextSkill`
|
|
45
|
+
- Missing/invalid runner output is a failure, not a simulated success path.
|
|
46
|
+
- Use `.agents/iteration/runs/*.json` telemetry to debug command rendering and runner payload issues.
|
|
47
|
+
|
|
48
|
+
## Anti-Rationalization Table
|
|
49
|
+
|
|
50
|
+
| Thought | Reality |
|
|
51
|
+
|---------|---------|
|
|
52
|
+
| "I already know what this stage does" | Read the CONTEXT.md. Assumptions cause failures. |
|
|
53
|
+
| "The output is good enough" | Good enough fails validation. Follow the test case exactly. |
|
|
54
|
+
| "I'll modify the workspace structure to make this easier" | Workers don't modify structure. That's the fixer's job. |
|
|
55
|
+
| "This test case is redundant" | Every test case exists for a reason. Execute it. |
|
|
56
|
+
| "I'll skip writing report.json" | Validation depends on report.json. It's mandatory. |
|
|
57
|
+
|
|
58
|
+
## Sub-Skill Dispatch
|
|
59
|
+
|
|
60
|
+
- After output complete -> validation sub-skill
|
|
61
|
+
|
|
62
|
+
## Report Format
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"skill": "worker",
|
|
67
|
+
"status": "passed|failed|escalated",
|
|
68
|
+
"timestamp": "2026-04-08T00:00:00Z",
|
|
69
|
+
"testCaseId": "tc-001",
|
|
70
|
+
"batchId": 1,
|
|
71
|
+
"findings": ["Output generated with required sections"],
|
|
72
|
+
"recommendations": ["Proceed to validation"],
|
|
73
|
+
"metrics": {
|
|
74
|
+
"executionTimeMs": 120,
|
|
75
|
+
"outputLength": 640
|
|
76
|
+
},
|
|
77
|
+
"nextSkill": "validation"
|
|
78
|
+
}
|
|
79
|
+
```
|
package/dist/index.js
CHANGED
|
@@ -34,12 +34,41 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
};
|
|
35
35
|
})();
|
|
36
36
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
37
38
|
const path = __importStar(require("path"));
|
|
38
39
|
const install_1 = require("./install");
|
|
39
40
|
const scaffold_1 = require("./scripts/scaffold");
|
|
40
41
|
const agent_creator_1 = require("./agent-creator");
|
|
41
42
|
const agent_iterator_1 = require("./agent-iterator");
|
|
42
43
|
const platforms_1 = require("./platforms");
|
|
44
|
+
/**
|
|
45
|
+
* Copy sub-skills directory to workspace's .agents/skills/ folder.
|
|
46
|
+
* This enables /skill research, /skill tooling, etc. inside the workspace.
|
|
47
|
+
*/
|
|
48
|
+
function copySubSkillsToWorkspace(templatesDir, workspaceDir) {
|
|
49
|
+
const skillsSrc = path.join(templatesDir, '.workspace-templates', 'skills');
|
|
50
|
+
const skillsDest = path.join(workspaceDir, '.agents', 'skills');
|
|
51
|
+
if (!fs.existsSync(skillsSrc)) {
|
|
52
|
+
console.log('Warning: No sub-skills found in templates');
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
function copyDir(src, dest) {
|
|
56
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
57
|
+
const entries = fs.readdirSync(src, { withFileTypes: true });
|
|
58
|
+
for (const entry of entries) {
|
|
59
|
+
const srcPath = path.join(src, entry.name);
|
|
60
|
+
const destPath = path.join(dest, entry.name);
|
|
61
|
+
if (entry.isDirectory()) {
|
|
62
|
+
copyDir(srcPath, destPath);
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
fs.copyFileSync(srcPath, destPath);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
copyDir(skillsSrc, skillsDest);
|
|
70
|
+
console.log(`Copied sub-skills to: ${skillsDest}`);
|
|
71
|
+
}
|
|
43
72
|
function showHelp() {
|
|
44
73
|
console.log(`
|
|
45
74
|
workspace-maxxing — npx-installable skill for AI agents
|
|
@@ -89,7 +118,7 @@ function extractOption(args, option) {
|
|
|
89
118
|
function hasFlag(args, flag) {
|
|
90
119
|
return args.includes(flag);
|
|
91
120
|
}
|
|
92
|
-
async function createWorkspace(args) {
|
|
121
|
+
async function createWorkspace(args, templatesDir) {
|
|
93
122
|
const workspaceName = extractOption(args, '--workspace-name') ?? 'My Workspace';
|
|
94
123
|
const stagesStr = extractOption(args, '--stages') ?? '01-input,02-process,03-output';
|
|
95
124
|
const stages = stagesStr.split(',').map(s => s.trim()).filter(Boolean);
|
|
@@ -114,9 +143,12 @@ async function createWorkspace(args) {
|
|
|
114
143
|
output: outputDir,
|
|
115
144
|
force: true,
|
|
116
145
|
});
|
|
117
|
-
// Step 2:
|
|
146
|
+
// Step 2: Copy sub-skills to workspace for /skill commands
|
|
147
|
+
console.log('\nStep 2: Installing sub-skills to workspace...');
|
|
148
|
+
copySubSkillsToWorkspace(templatesDir, outputDir);
|
|
149
|
+
// Step 3: Create agent if enabled
|
|
118
150
|
if (withAgent) {
|
|
119
|
-
console.log('\nStep
|
|
151
|
+
console.log('\nStep 3: Creating invokable agent...');
|
|
120
152
|
// Generate agent name from workspace name if not provided
|
|
121
153
|
const agentName = agentNameOption ?? (0, agent_creator_1.generateAgentName)(workspaceName);
|
|
122
154
|
const agentOptions = {
|
|
@@ -125,8 +157,8 @@ async function createWorkspace(args) {
|
|
|
125
157
|
workspacePath: outputDir,
|
|
126
158
|
};
|
|
127
159
|
(0, agent_creator_1.createAgent)(agentOptions);
|
|
128
|
-
// Step
|
|
129
|
-
console.log('\nStep
|
|
160
|
+
// Step 4: Run agent self-improvement loop
|
|
161
|
+
console.log('\nStep 4: Running agent self-improvement...');
|
|
130
162
|
const agentDirName = agentName.startsWith('@') ? agentName.slice(1) : agentName;
|
|
131
163
|
const agentPath = path.join(outputDir, '.agents', 'skills', agentDirName);
|
|
132
164
|
const iterationResult = await (0, agent_iterator_1.iterateAgent)({
|
|
@@ -135,8 +167,8 @@ async function createWorkspace(args) {
|
|
|
135
167
|
threshold,
|
|
136
168
|
maxIterations,
|
|
137
169
|
});
|
|
138
|
-
// Step
|
|
139
|
-
console.log('\nStep
|
|
170
|
+
// Step 5: Install for detected platform
|
|
171
|
+
console.log('\nStep 5: Installing for platform...');
|
|
140
172
|
const platform = (0, platforms_1.detectPlatform)();
|
|
141
173
|
console.log(`Detected platform: ${platform}`);
|
|
142
174
|
const installer = (0, platforms_1.getPlatformInstaller)(platform);
|
|
@@ -164,7 +196,8 @@ async function main() {
|
|
|
164
196
|
if (args.includes('init') || args.includes('--create-workspace')) {
|
|
165
197
|
// Remove 'init' from args if present, keep other flags
|
|
166
198
|
const cleanArgs = args.filter(a => a !== 'init' && a !== '--create-workspace');
|
|
167
|
-
|
|
199
|
+
const templatesDir = process.env.WORKSPACE_MAXXING_TEMPLATES ?? path.join(__dirname, '..', 'templates');
|
|
200
|
+
await createWorkspace(cleanArgs, templatesDir);
|
|
168
201
|
return;
|
|
169
202
|
}
|
|
170
203
|
// Check for install command
|
|
@@ -212,7 +245,8 @@ async function main() {
|
|
|
212
245
|
return;
|
|
213
246
|
}
|
|
214
247
|
// Default: treat as workspace creation (backward compatible)
|
|
215
|
-
|
|
248
|
+
const templatesDir = process.env.WORKSPACE_MAXXING_TEMPLATES ?? path.join(__dirname, '..', 'templates');
|
|
249
|
+
await createWorkspace(args, templatesDir);
|
|
216
250
|
}
|
|
217
251
|
main().catch((error) => {
|
|
218
252
|
console.error(error);
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,2CAA6B;AAC7B,uCAAyE;AACzE,iDAAuD;AACvD,mDAA+E;AAC/E,qDAAgD;AAChD,2CAAmE;AAEnE,SAAS,QAAQ;IACf,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuCb,CAAC,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,IAAc,EAAE,MAAc;IACnD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACjC,OAAO,GAAG,KAAK,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACzE,CAAC;AAED,SAAS,OAAO,CAAC,IAAc,EAAE,IAAY;IAC3C,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,IAAc;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,uCAAyB;AACzB,2CAA6B;AAC7B,uCAAyE;AACzE,iDAAuD;AACvD,mDAA+E;AAC/E,qDAAgD;AAChD,2CAAmE;AAEnE;;;GAGG;AACH,SAAS,wBAAwB,CAAC,YAAoB,EAAE,YAAoB;IAC1E,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,sBAAsB,EAAE,QAAQ,CAAC,CAAC;IAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEhE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QACzD,OAAO;IACT,CAAC;IAED,SAAS,OAAO,CAAC,GAAW,EAAE,IAAY;QACxC,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAC7C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,OAAO,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IAC/B,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,EAAE,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,QAAQ;IACf,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuCb,CAAC,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,IAAc,EAAE,MAAc;IACnD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACjC,OAAO,GAAG,KAAK,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACzE,CAAC;AAED,SAAS,OAAO,CAAC,IAAc,EAAE,IAAY;IAC3C,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,IAAc,EAAE,YAAoB;IACjE,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,EAAE,kBAAkB,CAAC,IAAI,cAAc,CAAC;IAChF,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,+BAA+B,CAAC;IACrF,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEvE,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IAC/C,MAAM,eAAe,GAAG,aAAa,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,EAAE,UAAU,CAAC;QAC/C,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,CAAC,IAAI,EAAE,UAAU,CAAE,CAAC;QAC/D,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,WAAW,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,EAAE,aAAa,CAAE,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9G,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,EAAE,kBAAkB,CAAE,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3H,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;IACzC,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,EAAE,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC;IACpC,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,EAAE,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,4CAA4C;IAC5C,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;IAC9D,IAAA,4BAAiB,EAAC;QAChB,IAAI,EAAE,aAAa;QACnB,MAAM;QACN,MAAM,EAAE,SAAS;QACjB,KAAK,EAAE,IAAI;KACZ,CAAC,CAAC;IAEH,2DAA2D;IAC3D,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IAC/D,wBAAwB,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC;IAElD,kCAAkC;IAClC,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;QAErD,0DAA0D;QAC1D,MAAM,SAAS,GAAG,eAAe,IAAI,IAAA,iCAAiB,EAAC,aAAa,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAiB;YACjC,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,WAAW,aAAa,WAAW;YAC5C,aAAa,EAAE,SAAS;SACzB,CAAC;QAEF,IAAA,2BAAW,EAAC,YAAY,CAAC,CAAC;QAE1B,0CAA0C;QAC1C,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;QAC3D,MAAM,YAAY,GAAG,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAChF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,YAAY,CAAC,CAAC;QAE1E,MAAM,eAAe,GAAG,MAAM,IAAA,6BAAY,EAAC;YACzC,SAAS;YACT,aAAa,EAAE,SAAS;YACxB,SAAS;YACT,aAAa;SACd,CAAC,CAAC;QAEH,wCAAwC;QACxC,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;QACpD,MAAM,QAAQ,GAAG,IAAA,0BAAc,GAAE,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,sBAAsB,QAAQ,EAAE,CAAC,CAAC;QAE9C,MAAM,SAAS,GAAG,IAAA,gCAAoB,EAAC,QAAQ,CAAC,CAAC;QACjD,SAAS,CAAC,OAAO,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAExC,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,UAAU,SAAS,EAAE,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,UAAU,eAAe,CAAC,KAAK,IAAI,SAAS,EAAE,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,eAAe,eAAe,CAAC,UAAU,EAAE,CAAC,CAAC;QACzD,OAAO,CAAC,GAAG,CAAC,gCAAgC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACpE,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjD,QAAQ,EAAE,CAAC;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,uDAAuD;IACvD,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;QACjE,uDAAuD;QACvD,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,oBAAoB,CAAC,CAAC;QAC/E,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QACxG,MAAM,eAAe,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QAC/C,OAAO;IACT,CAAC;IAED,4BAA4B;IAC5B,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;QAC1B,MAAM,WAAW,GAAG,IAAA,2BAAiB,EAAC,GAAG,CAAC,CAAC;QAE3C,IAAI,WAAW,KAAK,GAAG,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,0BAA0B,WAAW,EAAE,CAAC,CAAC;QACvD,CAAC;QAED,MAAM,YAAY,GAChB,OAAO,CAAC,GAAG,CAAC,2BAA2B;YACvC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAE1C,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,MAAM,IAAA,sBAAY,EAAC,WAAW,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC;QAEzE,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,uBAAuB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YACvD,OAAO,CAAC,GAAG,CAAC,oFAAoF,CAAC,CAAC;QACpG,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,wBAAwB,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;YACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO;IACT,CAAC;IAED,yDAAyD;IACzD,MAAM,UAAU,GAAkB,CAAC,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC9E,MAAM,aAAa,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC;IAE5E,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;QAC1B,MAAM,WAAW,GAAG,IAAA,2BAAiB,EAAC,GAAG,CAAC,CAAC;QAE3C,IAAI,WAAW,KAAK,GAAG,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,0BAA0B,WAAW,EAAE,CAAC,CAAC;QACvD,CAAC;QAED,MAAM,YAAY,GAChB,OAAO,CAAC,GAAG,CAAC,2BAA2B;YACvC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAE1C,OAAO,CAAC,GAAG,CAAC,0CAA0C,aAAa,KAAK,CAAC,CAAC;QAC1E,MAAM,MAAM,GAAG,MAAM,IAAA,sBAAY,EAAC,WAAW,EAAE,YAAY,EAAE,aAAa,CAAC,CAAC;QAE5E,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,uBAAuB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YACvD,OAAO,CAAC,GAAG,CAAC,cAAc,aAAa,iEAAiE,CAAC,CAAC;QAC5G,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,wBAAwB,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;YACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO;IACT,CAAC;IAED,6DAA6D;IAC7D,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;IACxG,MAAM,eAAe,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;AAC5C,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "workspace-maxxing",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "npx-installable skill for AI agents to create structured workspaces using ICM methodology",
|
|
5
5
|
"bin": {
|
|
6
6
|
"workspace-maxxing": "dist/index.js"
|
|
@@ -12,7 +12,13 @@
|
|
|
12
12
|
"test:watch": "jest --watch",
|
|
13
13
|
"prepublishOnly": "npm run build"
|
|
14
14
|
},
|
|
15
|
-
"keywords": [
|
|
15
|
+
"keywords": [
|
|
16
|
+
"ai-agent",
|
|
17
|
+
"workspace",
|
|
18
|
+
"skill",
|
|
19
|
+
"opencode",
|
|
20
|
+
"icm"
|
|
21
|
+
],
|
|
16
22
|
"author": "Eric Julian Deguzman <ericjuliandeguzman77@gmail.com>",
|
|
17
23
|
"license": "MIT",
|
|
18
24
|
"repository": {
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
+
import * as fs from 'fs';
|
|
3
4
|
import * as path from 'path';
|
|
4
5
|
import { detectProjectRoot, installSkill, AgentTarget } from './install';
|
|
5
6
|
import { scaffoldWorkspace } from './scripts/scaffold';
|
|
@@ -7,6 +8,37 @@ import { createAgent, generateAgentName, AgentOptions } from './agent-creator';
|
|
|
7
8
|
import { iterateAgent } from './agent-iterator';
|
|
8
9
|
import { detectPlatform, getPlatformInstaller } from './platforms';
|
|
9
10
|
|
|
11
|
+
/**
|
|
12
|
+
* Copy sub-skills directory to workspace's .agents/skills/ folder.
|
|
13
|
+
* This enables /skill research, /skill tooling, etc. inside the workspace.
|
|
14
|
+
*/
|
|
15
|
+
function copySubSkillsToWorkspace(templatesDir: string, workspaceDir: string): void {
|
|
16
|
+
const skillsSrc = path.join(templatesDir, '.workspace-templates', 'skills');
|
|
17
|
+
const skillsDest = path.join(workspaceDir, '.agents', 'skills');
|
|
18
|
+
|
|
19
|
+
if (!fs.existsSync(skillsSrc)) {
|
|
20
|
+
console.log('Warning: No sub-skills found in templates');
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function copyDir(src: string, dest: string): void {
|
|
25
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
26
|
+
const entries = fs.readdirSync(src, { withFileTypes: true });
|
|
27
|
+
for (const entry of entries) {
|
|
28
|
+
const srcPath = path.join(src, entry.name);
|
|
29
|
+
const destPath = path.join(dest, entry.name);
|
|
30
|
+
if (entry.isDirectory()) {
|
|
31
|
+
copyDir(srcPath, destPath);
|
|
32
|
+
} else {
|
|
33
|
+
fs.copyFileSync(srcPath, destPath);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
copyDir(skillsSrc, skillsDest);
|
|
39
|
+
console.log(`Copied sub-skills to: ${skillsDest}`);
|
|
40
|
+
}
|
|
41
|
+
|
|
10
42
|
function showHelp(): void {
|
|
11
43
|
console.log(`
|
|
12
44
|
workspace-maxxing — npx-installable skill for AI agents
|
|
@@ -59,7 +91,7 @@ function hasFlag(args: string[], flag: string): boolean {
|
|
|
59
91
|
return args.includes(flag);
|
|
60
92
|
}
|
|
61
93
|
|
|
62
|
-
async function createWorkspace(args: string[]): Promise<void> {
|
|
94
|
+
async function createWorkspace(args: string[], templatesDir: string): Promise<void> {
|
|
63
95
|
const workspaceName = extractOption(args, '--workspace-name') ?? 'My Workspace';
|
|
64
96
|
const stagesStr = extractOption(args, '--stages') ?? '01-input,02-process,03-output';
|
|
65
97
|
const stages = stagesStr.split(',').map(s => s.trim()).filter(Boolean);
|
|
@@ -88,9 +120,13 @@ async function createWorkspace(args: string[]): Promise<void> {
|
|
|
88
120
|
force: true,
|
|
89
121
|
});
|
|
90
122
|
|
|
91
|
-
// Step 2:
|
|
123
|
+
// Step 2: Copy sub-skills to workspace for /skill commands
|
|
124
|
+
console.log('\nStep 2: Installing sub-skills to workspace...');
|
|
125
|
+
copySubSkillsToWorkspace(templatesDir, outputDir);
|
|
126
|
+
|
|
127
|
+
// Step 3: Create agent if enabled
|
|
92
128
|
if (withAgent) {
|
|
93
|
-
console.log('\nStep
|
|
129
|
+
console.log('\nStep 3: Creating invokable agent...');
|
|
94
130
|
|
|
95
131
|
// Generate agent name from workspace name if not provided
|
|
96
132
|
const agentName = agentNameOption ?? generateAgentName(workspaceName);
|
|
@@ -103,8 +139,8 @@ async function createWorkspace(args: string[]): Promise<void> {
|
|
|
103
139
|
|
|
104
140
|
createAgent(agentOptions);
|
|
105
141
|
|
|
106
|
-
// Step
|
|
107
|
-
console.log('\nStep
|
|
142
|
+
// Step 4: Run agent self-improvement loop
|
|
143
|
+
console.log('\nStep 4: Running agent self-improvement...');
|
|
108
144
|
const agentDirName = agentName.startsWith('@') ? agentName.slice(1) : agentName;
|
|
109
145
|
const agentPath = path.join(outputDir, '.agents', 'skills', agentDirName);
|
|
110
146
|
|
|
@@ -115,8 +151,8 @@ async function createWorkspace(args: string[]): Promise<void> {
|
|
|
115
151
|
maxIterations,
|
|
116
152
|
});
|
|
117
153
|
|
|
118
|
-
// Step
|
|
119
|
-
console.log('\nStep
|
|
154
|
+
// Step 5: Install for detected platform
|
|
155
|
+
console.log('\nStep 5: Installing for platform...');
|
|
120
156
|
const platform = detectPlatform();
|
|
121
157
|
console.log(`Detected platform: ${platform}`);
|
|
122
158
|
|
|
@@ -148,7 +184,8 @@ async function main(): Promise<void> {
|
|
|
148
184
|
if (args.includes('init') || args.includes('--create-workspace')) {
|
|
149
185
|
// Remove 'init' from args if present, keep other flags
|
|
150
186
|
const cleanArgs = args.filter(a => a !== 'init' && a !== '--create-workspace');
|
|
151
|
-
|
|
187
|
+
const templatesDir = process.env.WORKSPACE_MAXXING_TEMPLATES ?? path.join(__dirname, '..', 'templates');
|
|
188
|
+
await createWorkspace(cleanArgs, templatesDir);
|
|
152
189
|
return;
|
|
153
190
|
}
|
|
154
191
|
|
|
@@ -210,7 +247,8 @@ async function main(): Promise<void> {
|
|
|
210
247
|
}
|
|
211
248
|
|
|
212
249
|
// Default: treat as workspace creation (backward compatible)
|
|
213
|
-
|
|
250
|
+
const templatesDir = process.env.WORKSPACE_MAXXING_TEMPLATES ?? path.join(__dirname, '..', 'templates');
|
|
251
|
+
await createWorkspace(args, templatesDir);
|
|
214
252
|
}
|
|
215
253
|
|
|
216
254
|
main().catch((error) => {
|