learnship 2.3.6 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.cursor-plugin/plugin.json +1 -1
  3. package/README.md +34 -17
  4. package/SKILL.md +32 -11
  5. package/agents/learnship-challenger.md +9 -0
  6. package/agents/learnship-executor.md +9 -0
  7. package/agents/learnship-ideation-agent.md +9 -0
  8. package/agents/learnship-research-synthesizer.md +9 -0
  9. package/agents/learnship-roadmapper.md +9 -0
  10. package/agents/learnship-security-auditor.md +20 -1
  11. package/agents/learnship-solution-writer.md +9 -0
  12. package/agents/learnship-verifier.md +1 -1
  13. package/bin/install.js +95 -25
  14. package/cursor-rules/learnship.mdc +32 -4
  15. package/gemini-extension.json +1 -1
  16. package/hooks/learnship-context-monitor.js +6 -3
  17. package/hooks/learnship-prompt-guard.js +1 -1
  18. package/hooks/learnship-session-state.js +1 -1
  19. package/hooks/learnship-statusline.js +8 -3
  20. package/learnship/agents/challenger.md +7 -0
  21. package/learnship/agents/executor.md +7 -0
  22. package/learnship/agents/ideation-agent.md +7 -0
  23. package/learnship/agents/research-synthesizer.md +7 -0
  24. package/learnship/agents/roadmapper.md +7 -0
  25. package/learnship/agents/security-auditor.md +28 -0
  26. package/learnship/agents/solution-writer.md +7 -0
  27. package/learnship/agents/verifier.md +1 -1
  28. package/learnship/references/model-profiles.md +20 -13
  29. package/learnship/workflows/execute-phase.md +4 -3
  30. package/learnship/workflows/health.md +32 -4
  31. package/learnship/workflows/new-project.md +36 -4
  32. package/learnship/workflows/review.md +106 -10
  33. package/learnship/workflows/secure-phase.md +2 -0
  34. package/learnship/workflows/ship.md +43 -0
  35. package/learnship/workflows/verify-work.md +33 -0
  36. package/package.json +1 -1
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "learnship",
3
3
  "description": "Agentic engineering done right — 57 structured workflows, 17 specialist agent personas, persistent memory across sessions, integrated learning partner, and impeccable UI design system. Works with Claude Code, Windsurf, Cursor, Gemini CLI, OpenCode, and Codex.",
4
- "version": "2.3.6",
4
+ "version": "2.4.0",
5
5
  "author": {
6
6
  "name": "Favio Vazquez",
7
7
  "email": "favio.vazquezp@gmail.com"
@@ -2,7 +2,7 @@
2
2
  "name": "learnship",
3
3
  "displayName": "learnship",
4
4
  "description": "Agentic engineering done right — 57 structured workflows, 17 specialist agent personas, persistent memory across sessions, integrated learning partner, and impeccable UI design system.",
5
- "version": "2.3.6",
5
+ "version": "2.4.0",
6
6
  "logo": "assets/logo.png",
7
7
  "author": {
8
8
  "name": "Favio Vazquez",
package/README.md CHANGED
@@ -124,7 +124,7 @@ flowchart LR
124
124
  PP["/plan-phase N<br/>Vertical slice plans"]
125
125
  EP["/execute-phase N<br/>Build + commit"]
126
126
  VW["/verify-work N<br/>UAT + diagnose"]
127
- RV["/review<br/>Multi-persona review"]
127
+ RV["/review<br/>Two-pass review"]
128
128
  SH["/ship<br/>Test → PR"]
129
129
  CP["/compound<br/>Capture knowledge"]
130
130
 
@@ -140,7 +140,7 @@ flowchart LR
140
140
  | **2. Plan** | `/plan-phase N` | Agent researches the domain, creates vertical slice plans (tracer bullets), verifies them — including horizontal slice detection (v2.3.4) |
141
141
  | **3. Execute** | `/execute-phase N` | Plans run in dependency order, one atomic commit per task |
142
142
  | **4. Verify** | `/verify-work N` | You do UAT; agent diagnoses any gaps and creates fix plans |
143
- | **5. Review** | `/review` | Multi-persona code review through 6 lenses (v2.0) |
143
+ | **5. Review** | `/review` | Two-pass review: spec compliance check then 6-lens quality review (v2.4.0) |
144
144
  | **6. Ship** | `/ship` | Test → lint → commit → push → PR (v2.0) |
145
145
  | **7. Compound** | `/compound` | Capture what you learned as searchable documentation (v2.0) |
146
146
 
@@ -172,14 +172,16 @@ Each platform gets the best experience it supports:
172
172
  |---------|----------|-------------|----------|------------|-----------|
173
173
  | Slash commands | ✓ | ✓ | ✓ | ✓ | `$skills` |
174
174
  | Real parallel subagents | — | ✓ | ✓ | ✓ | ✓ |
175
- | Parallel wave execution | — | ✓ opt-in | ✓ opt-in | | ✓ opt-in |
175
+ | Parallel wave execution | — | ✓ | ✓ | ⚠️ experimental | ✓ |
176
176
  | Agent personas (17) | `model_decision` rules | `Task()` subagents | `Task()` subagents | `Task()` subagents | `Task()` subagents |
177
177
  | Interactive questions | `ask_user_question` | `AskUserQuestion` | `question` | `ask_user` | `request_user_input` |
178
178
  | Session hooks | — | ✓ | — | ✓ | — |
179
179
  | Skills (native `@invoke`) | ✓ | — | — | — | — |
180
180
  | Skills (context files) | ✓ | ✓ | ✓ | ✓ | ✓ |
181
181
 
182
- **Parallel subagents:** On Claude Code, OpenCode, and Codex, `execute-phase` can spawn a dedicated executor per plan within a wave, each with its own 200k context budget. Enable with `"parallelization": { "enabled": true }` in `.planning/config.json`. Up to 5 concurrent agents per wave by default. All platforms default to sequential (always safe).
182
+ **Cursor** uses context injection via `cursor-rules/learnship.mdc` it gets the full workflow library, design system, and learning partner, but does not appear in the feature matrix above because it has no first-class slash command or subagent API (rules load automatically as context).
183
+
184
+ **Parallel subagents:** Claude Code, OpenCode, and Codex default to parallel execution — `execute-phase` spawns a dedicated executor per plan within a wave, each with its own 200k context budget. Up to 5 concurrent agents per wave. To run sequentially, set `"parallelization": { "enabled": false }` in `.planning/config.json`. Windsurf, Cursor, and Gemini CLI use sequential execution (no real subagent API on those platforms).
183
185
 
184
186
  ---
185
187
 
@@ -193,8 +195,8 @@ learnship gives you that harness as a portable, open-source layer that adds:
193
195
 
194
196
  - **Persistent memory.** `/new-project` generates an `AGENTS.md` loaded automatically every session. No more repeating yourself.
195
197
  - **Structured process.** A repeatable phase loop with spec-driven plans, wave-ordered execution, and UAT-driven verification.
196
- - **Knowledge compounding.** `/compound` captures solved problems. `/review` runs multi-persona code review. `/ship` runs the full delivery pipeline.
197
- - **Security & recovery.** `/secure-phase` for STRIDE verification. `/forensics` for post-mortem. `/undo` for safe revert.
198
+ - **Knowledge compounding.** `/compound` captures solved problems. `/review` runs two-pass code review (spec compliance then quality). `/ship` runs the full delivery pipeline.
199
+ - **Security & recovery.** `/secure-phase` for STRIDE + OWASP Top 10 verification. `/forensics` for post-mortem. `/undo` for safe revert.
198
200
  - **Session intelligence.** Hooks, context profiles, interactive questions, agent delegation. ([v2.2 details →](#whats-new-in-v22))
199
201
  - **Built-in learning.** Neuroscience-backed checkpoints at every phase transition so you understand what you shipped.
200
202
 
@@ -230,7 +232,7 @@ It's the right tool if:
230
232
  - You're **building a real project** and want the AI to stay aligned across sessions
231
233
  - You're **learning while building** and want to actually understand what gets shipped
232
234
  - You care about **code quality and UI quality** beyond "it works"
233
- - You want **parallel agent execution** on Claude Code, OpenCode, or Gemini CLI
235
+ - You want **parallel agent execution** Claude Code, OpenCode, and Codex run it by default
234
236
  - You've felt the frustration of **context loss**: repeating yourself while the agent forgets
235
237
 
236
238
  It's probably overkill for one-off scripts. Use `/quick` for that.
@@ -252,6 +254,18 @@ It's probably overkill for one-off scripts. Use `/quick` for that.
252
254
 
253
255
  ## 🆕 What's New
254
256
 
257
+ ### What's new in v2.4.0
258
+
259
+ v2.4.0 adds spec compliance checking to `/review`, OWASP Top 10 coverage to `/secure-phase`, a numeric score to `/health`, and Playwright MCP smoke-test guidance to `/verify-work` and `/ship`:
260
+
261
+ **Two-stage `/review`**: Pass 1 checks spec compliance — reads PLAN.md must-haves and classifies each as COVERED / PARTIAL / MISSING — before Pass 2 runs the existing 6-persona quality review. The spec compliance result appears in the report header. Use `--quality-only` to skip Pass 1 and run only the quality review.
262
+
263
+ **OWASP Top 10 in `/secure-phase`**: The security-auditor agent now cross-maps STRIDE findings against OWASP Top 10 (A01–A10). Every SECURITY.md output includes an OWASP coverage table alongside the STRIDE analysis.
264
+
265
+ **Numeric `/health` score**: The health check now outputs a 0–100 numeric score alongside the qualitative status. Starts at 100, deducts per issue found. Bands: HEALTHY (90–100), DEGRADED (70–89), BROKEN (0–69).
266
+
267
+ **Playwright MCP guidance in `/verify-work` and `/ship`**: Optional live UI smoke-test sections activate when `@playwright/mcp` is configured. Supported on all 6 MCP-capable platforms (Claude Code, OpenCode, Cursor, Windsurf, Codex CLI, Gemini CLI). In `/verify-work`, walks the golden path using `mcp__playwright__*` tools. In `/ship`, runs a quick smoke test before creating the PR.
268
+
255
269
  ### What's new in v2.3.4
256
270
 
257
271
  v2.3.4 adds two planning quality features:
@@ -296,7 +310,7 @@ v2.1 adds 8 new workflows, 5 new references, 3 new templates, and 2 new agents:
296
310
 
297
311
  | Category | New workflows |
298
312
  |----------|--------------|
299
- | **Security** | `/secure-phase` — per-phase STRIDE threat verification |
313
+ | **Security** | `/secure-phase` — per-phase STRIDE threat-model security verification |
300
314
  | **Documentation** | `/docs-update` — generate and verify project docs against codebase |
301
315
  | **Recovery** | `/forensics` — post-mortem investigation · `/undo` — safe git revert |
302
316
  | **Session** | `/note` — zero-friction capture · `/session-report` — stakeholder summaries |
@@ -309,7 +323,7 @@ Enhanced: `/discuss-phase` (scope guardrails + domain probes + `--deep` extended
309
323
 
310
324
  ---
311
325
 
312
- ## Agentic Engineering vs Vibe Coding
326
+ ##Agentic Engineering vs Vibe Coding
313
327
 
314
328
  ![Vibe coding vs Agentic engineering](assets/vibe-vs-agentic.png)
315
329
 
@@ -373,7 +387,7 @@ AGENTS.md ← your AI agent reads this every conversation
373
387
  | `/discuss-phase [N]` | Capture implementation decisions before planning | Before every phase |
374
388
  | `/plan-phase [N]` | Research + create + verify plans | After discussing a phase |
375
389
  | `/execute-phase [N]` | Wave-ordered execution of all plans | After planning |
376
- | `/verify-work [N]` | Manual UAT with auto-diagnosis and fix planning | After execution |
390
+ | `/verify-work [N]` | Manual UAT with auto-diagnosis and fix planning. Optional Playwright MCP live UI smoke test when `@playwright/mcp` is configured. | After execution |
377
391
  | `/complete-milestone` | Archive milestone, tag release, prepare next | All phases verified |
378
392
  | `/audit-milestone` | Pre-release: requirement coverage, stub detection | Before completing milestone |
379
393
  | `/new-milestone [name]` | Start next version cycle | After completing a milestone |
@@ -436,9 +450,9 @@ AGENTS.md ← your AI agent reads this every conversation
436
450
  | Workflow | Purpose | When to use |
437
451
  |----------|---------|-------------|
438
452
  | `/compound` | Capture solved problem as searchable documentation | After `/debug`, `/verify-work`, or any aha moment |
439
- | `/review` | Multi-persona code review (6 lenses) | After `/verify-work`, before shipping |
453
+ | `/review` | Two-pass review: spec compliance check then 6-persona quality review. `--quality-only` skips spec compliance. | After `/verify-work`, before shipping |
440
454
  | `/challenge` | Stress-test scope through product + engineering lenses | Before committing to a milestone or large feature |
441
- | `/ship` | Test → lint → commit → push → PR | After review, ready to deploy |
455
+ | `/ship` | Test → lint → commit → push → PR. Optional Playwright MCP smoke test before PR creation when `@playwright/mcp` is configured. | After review, ready to deploy |
442
456
  | `/ideate` | Codebase-grounded idea generation | Before `/discuss-milestone`, between milestones |
443
457
  | `/guard` | Safety mode: protect sensitive directories | Working on auth, payments, migrations |
444
458
  | `/sync-docs` | Detect stale documentation | Before `/complete-milestone`, after refactors |
@@ -449,7 +463,7 @@ AGENTS.md ← your AI agent reads this every conversation
449
463
  |----------|---------|-------------|
450
464
  | `/settings` | Interactive config editor | Change mode, toggle agents |
451
465
  | `/set-profile [quality\|balanced\|budget]` | One-step model profile switch | Quick cost/quality adjustment |
452
- | `/health` | Project health check | Stale files, missing artifacts |
466
+ | `/health` | Project health check with numeric 0–100 score (HEALTHY ≥90, DEGRADED ≥70, BROKEN <70) | Stale files, missing artifacts |
453
467
  | `/cleanup` | Archive old artifacts | End of milestone |
454
468
  | `/update` | Update the platform itself | Check for new workflows |
455
469
  | `/reapply-patches` | Restore local edits after update | After `/update` if you had local changes |
@@ -534,7 +548,7 @@ Project settings live in `.planning/config.json`. Set during `/new-project` or e
534
548
  | `model_profile` | `quality`, `balanced`, `budget` | `balanced` | Agent model tier (see table below) |
535
549
  | `learning_mode` | `auto`, `manual` | `auto` | `auto` offers learning at checkpoints; `manual` requires explicit invocation |
536
550
  | `context` | `dev`, `research`, `review` | `dev` | Output profile: `dev` (concise), `research` (detailed), `review` (audit-focused) |
537
- | `parallelization.enabled` | `true`, `false` | `false` | Parallel subagents per plan on supported platforms |
551
+ | `parallelization.enabled` | `true`, `false` | `true` (Claude Code, OpenCode, Codex) / `false` (others) | Parallel subagents per plan on supported platforms |
538
552
  | `test_first` | `true`, `false` | `false` | TDD mode: write failing test first, verify red, implement, verify green |
539
553
  | `planning.commit_mode` | `auto`, `manual` | `auto` | `auto` commits after each workflow step; `manual` skips all git commits |
540
554
 
@@ -584,7 +598,7 @@ Project settings live in `.planning/config.json`. Set during `/new-project` or e
584
598
  | Challenger | large | medium | medium |
585
599
  | Ideation Agent | large | medium | small |
586
600
 
587
- > **Platform note:** Tiers map to the best available model on your platform: `large` = Claude Opus 4.6 / Gemini 3.1 Pro / GPT-5.4, `medium` = Claude Sonnet 4.6 / Gemini 3.1 Flash / GPT-5.4-mini, `small` = Claude Haiku 4.5 / Gemini 3.1 Flash-Lite / GPT-5.4-nano. Windsurf, Cursor, and OpenCode use the platform default model — tiers signal intended task complexity.
601
+ > **Platform note:** Tiers map to the best available model on your platform. On Claude Code: `large` = Opus, `medium` = Sonnet, `small` = Haiku. On Gemini CLI and Codex CLI the installer maps tiers to the best available model at install time. Windsurf, Cursor, and OpenCode use the platform default model — tiers signal intended task complexity.
588
602
 
589
603
  ### Speed vs. Quality Presets
590
604
 
@@ -653,9 +667,12 @@ The **impeccable** skill suite is always active as project context for any UI wo
653
667
  | `/quieter` | Tone down overly aggressive designs to reduce intensity and gain refinement |
654
668
  | `/distill` | Strip to essence: remove complexity, clarify what matters |
655
669
  | `/clarify` | Improve UX copy, error messages, microcopy, labels |
670
+ | `/typeset` | Improve typography: font choices, hierarchy, sizing, weight, and readability |
671
+ | `/arrange` | Improve layout, spacing, and visual rhythm; fix monotonous grids and weak hierarchy |
656
672
  | `/optimize` | Performance: loading speed, rendering, animations, bundle size |
657
673
  | `/harden` | Resilience: error handling, i18n, text overflow, edge cases |
658
674
  | `/delight` | Add moments of joy and personality that make interfaces memorable |
675
+ | `/overdrive` | Push past conventional limits — shaders, spring physics, scroll-driven reveals |
659
676
  | `/extract` | Extract reusable components and design tokens into your design system |
660
677
  | `/adapt` | Adapt designs across screen sizes, devices, and contexts |
661
678
  | `/onboard` | Design onboarding flows, empty states, first-time user experiences |
@@ -687,7 +704,7 @@ The **impeccable** skill suite is always active as project context for any UI wo
687
704
  /plan-phase 1 # Research + plan + verify
688
705
  /execute-phase 1 # Wave-ordered execution
689
706
  /verify-work 1 # Manual UAT
690
- /review # v2.0: multi-persona code review
707
+ /review # two-pass review: spec compliance + quality (v2.4.0)
691
708
  /ship # v2.0: test → commit → push → PR
692
709
  /compound # v2.0: capture what you learned
693
710
  # Repeat for each phase
@@ -907,7 +924,7 @@ learnship/
907
924
  ├── bin/
908
925
  │ └── install.js # Multi-platform installer (Claude Code, OpenCode, Gemini CLI, Codex CLI, Windsurf)
909
926
  ├── tests/
910
- │ └── run_all.sh # 15 test suites, 1200+ checks across 6 platforms
927
+ │ └── run_all.sh # 17 test suites, 1330+ checks across 6 platforms
911
928
  ├── SKILL.md # Meta-skill: platform context loaded by Cascade / AI agents
912
929
  ├── install.sh # Shell installer wrapper
913
930
  ├── package.json # npm package (npx learnship)
package/SKILL.md CHANGED
@@ -103,25 +103,46 @@ Read `learning_mode` from `.planning/config.json` (default: "auto"):
103
103
  - **`auto`** — Proactively offer learning actions at natural workflow checkpoints (after planning, execution, verification)
104
104
  - **`manual`** — Only activate `@agentic-learning` when the user explicitly asks
105
105
 
106
- Learning checkpoints:
107
- - After requirements approved → `@agentic-learning brainstorm`
108
- - After discuss-phase → `@agentic-learning either-or`
109
- - After plan-phase → `@agentic-learning cognitive-load`
110
- - After execute-phase → `@agentic-learning reflect`
111
- - After verify-work passes → `@agentic-learning space`
112
- - After `/review` → `@agentic-learning learn` (review findings as learning material)
106
+ Learning checkpoints (auto mode triggers these; manual mode surfaces them as tips):
107
+
108
+ **Core phase loop:**
109
+ - After requirements approved → `@agentic-learning brainstorm` (design dialogue on the requirements)
110
+ - After `/discuss-phase` → `@agentic-learning either-or` (capture the decisions made)
111
+ - After `/plan-phase` → `@agentic-learning cognitive-load` (decompose if plan feels overwhelming)
112
+ - After `/execute-phase` → `@agentic-learning reflect` (consolidate the cycle)
113
+ - After `/verify-work` passes → `@agentic-learning space` (queue concepts for spaced revisit)
114
+
115
+ **Quality gates:**
116
+ - After `/review` → `@agentic-learning learn` (most significant finding as a learning topic)
117
+ - After `/review` (on UI changes) → `@agentic-learning quiz` (gaps in recall predict future bugs)
113
118
  - After `/challenge` → `@agentic-learning either-or` (which lens was most valuable?)
119
+ - After `/secure-phase` → `@agentic-learning learn` (security patterns)
114
120
  - After `/ship` → `@agentic-learning reflect` (what went well in this cycle?)
121
+
122
+ **Discovery, mapping, comprehension:**
123
+ - After `/map-codebase` or `/discovery-phase` → `@agentic-learning explain` (lock in the project knowledge log)
124
+ - When studying an unfamiliar function or pattern → `@agentic-learning explain-first` (oracy-first comprehension check)
125
+ - After absorbing research files (RESEARCH.md, STACK.md, etc.) → `@agentic-learning quiz` (test what stuck)
126
+
127
+ **Ideation and complex tasks:**
115
128
  - After `/ideate` → `@agentic-learning brainstorm` (explore top idea collaboratively)
116
- - During complex quick tasks → `@agentic-learning struggle`
129
+ - During complex `/quick` tasks → `@agentic-learning struggle` (productive struggle on hard parts)
130
+ - When stuck across multiple domains in one session → `@agentic-learning interleave` (mixed retrieval forces transfer)
131
+
132
+ **Recovery and reflection:**
117
133
  - After `/forensics` → `@agentic-learning reflect` (what caused the failure?)
118
- - After `/extract-learnings` → `@agentic-learning space` (schedule learnings for review)
119
- - After `/secure-phase` → `@agentic-learning learn` (security patterns)
134
+ - After `/extract-learnings` → `@agentic-learning space` (schedule learnings for spaced review)
120
135
  - After `/session-report` → `@agentic-learning reflect` (session-level reflection)
121
136
 
122
137
  ## Design Skill
123
138
 
124
- The `impeccable` skill suite is always available for any UI work. Use its steering commands (`/audit`, `/critique`, `/polish`, `/colorize`, `/animate`, `/bolder`, `/quieter`, `/distill`, `/clarify`, `/optimize`, `/harden`, `/delight`, `/extract`, `/adapt`, `/onboard`, `/normalize`, `/teach-impeccable`) when reviewing or building user-facing interfaces.
139
+ The `impeccable` skill suite is always available for any UI work. Use its 21 steering commands when reviewing or building user-facing interfaces:
140
+
141
+ **Review & critique:** `/audit`, `/critique`, `/teach-impeccable`
142
+ **Refine & elevate:** `/polish`, `/bolder`, `/quieter`, `/distill`, `/clarify`, `/normalize`, `/extract`, `/adapt`
143
+ **Specific concerns:** `/colorize` (color/contrast), `/typeset` (typography), `/arrange` (layout/spacing), `/animate` (motion), `/onboard` (first-time UX), `/delight` (interaction polish)
144
+ **Engineering attributes:** `/harden` (accessibility, resilience), `/optimize` (performance), `/overdrive` (push design quality to its ceiling)
145
+ **Foundations:** `/frontend-design` (full design system reference: typography, color, spatial, motion, interaction, responsive, UX writing)
125
146
 
126
147
  ## Mandatory Gate — No Project, No Work
127
148
 
@@ -16,6 +16,15 @@ Your job: Ask 3-5 forcing questions through your assigned lens (product or engin
16
16
  If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
17
17
  </role>
18
18
 
19
+ <boundaries>
20
+ ## Boundaries — what this agent does NOT do
21
+
22
+ - **Do NOT make the decision.** Your output is a verdict (proceed / rethink / reduce-scope) plus rationale. The user owns the choice.
23
+ - **Do NOT veto.** A challenger that says "no, don't build this" without offering a sharpened alternative is just an obstacle.
24
+ - **Do NOT modify code, plans, or docs.** You analyze and recommend. Other personas write.
25
+ - **Do NOT pad with general advice.** Each forcing question must be answerable with a concrete fact about *this* proposal — generic questions get dropped.
26
+ </boundaries>
27
+
19
28
  <project_context>
20
29
  Before challenging, load project context:
21
30
 
@@ -16,6 +16,15 @@ Your job: Execute the plan completely, commit each task, create SUMMARY.md, upda
16
16
  If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
17
17
  </role>
18
18
 
19
+ <boundaries>
20
+ ## Boundaries — what this agent does NOT do
21
+
22
+ - **Do NOT improve the plan.** If the plan is wrong, surface the obstacle in SUMMARY.md and propose a deviation — do not silently "fix" the plan as you go.
23
+ - **Do NOT batch commits.** Every task gets its own commit, even if two tasks touch the same file.
24
+ - **Do NOT refactor adjacent code.** If you see a smell, note it for the next planning cycle. Touching unrelated code makes the diff impossible to review.
25
+ - **Do NOT skip the verify step.** A task without verification is a task that might not be done. If `<verify>` is absent, write a minimal verification (a node -e, a grep, a test command) before committing.
26
+ </boundaries>
27
+
19
28
  <project_context>
20
29
  Before executing, load project context:
21
30
 
@@ -16,6 +16,15 @@ Your job: Generate 6-8 ideas through your assigned frame, grounded in the codeba
16
16
  If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
17
17
  </role>
18
18
 
19
+ <boundaries>
20
+ ## Boundaries — what this agent does NOT do
21
+
22
+ - **Do NOT write plans.** Ideas go to the adversarial filter; plans come from the planner. If an idea wins the filter, the user runs `/plan-phase` — not you.
23
+ - **Do NOT execute or modify code.** Ideation is read-only research. You may grep, glob, and read files. You do not edit them.
24
+ - **Do NOT critique or rank.** That's the filtering step's job. Generate broadly; let the filter cut.
25
+ - **Do NOT skip the codebase grounding.** An idea with no file/pattern citation is not an idea — it is product advice. Cite specifics or drop it.
26
+ </boundaries>
27
+
19
28
  <project_context>
20
29
  Before ideating, load the codebase scan results from the prompt context:
21
30
 
@@ -18,6 +18,15 @@ Spawned by `/new-project` after STACK.md, FEATURES.md, ARCHITECTURE.md, and PITF
18
18
  - Identify confidence levels and gaps
19
19
  - Write SUMMARY.md
20
20
 
21
+ <boundaries>
22
+ ## Boundaries — what this agent does NOT do
23
+
24
+ - **Do NOT run new research.** Your job is to synthesize what's already in `.planning/research/`. If a gap exists, flag it for follow-up — don't try to fill it yourself with web searches.
25
+ - **Do NOT modify the 4 source research files.** They are inputs. Edit only SUMMARY.md.
26
+ - **Do NOT make roadmap decisions.** Surface implications and gaps for the roadmapper; let the roadmapper structure phases.
27
+ - **Do NOT concatenate.** If SUMMARY.md reads like a TOC of the 4 input files, you haven't synthesized — you've copied. Find the cross-cutting threads.
28
+ </boundaries>
29
+
21
30
  ## Downstream Consumer
22
31
 
23
32
  Your SUMMARY.md is consumed by the roadmapper (or the planning step) which uses it to:
@@ -18,6 +18,15 @@ Spawned by `/new-project` (after research + requirements) or `/new-milestone`. Y
18
18
  - Define observable success criteria for each phase
19
19
  - Identify which phases need deeper research during planning
20
20
 
21
+ <boundaries>
22
+ ## Boundaries — what this agent does NOT do
23
+
24
+ - **Do NOT write PLAN.md files.** The roadmap is a map of phases, not their implementation. PLAN.md is the planner's job, one phase at a time, on demand.
25
+ - **Do NOT invent requirements.** If a requirement isn't in REQUIREMENTS.md or PROJECT.md, flag it for the user — don't silently extend scope.
26
+ - **Do NOT modify research or requirements.** They are inputs. Write ROADMAP.md only.
27
+ - **Do NOT skip success criteria.** Every phase needs observable, testable success criteria. If you can't name them, the phase is the wrong shape.
28
+ </boundaries>
29
+
21
30
  ## Roadmap Design Principles
22
31
 
23
32
  **Goal-backward:** Start from what the user needs, work backward to what must be built first.
@@ -50,6 +50,25 @@ For each file modified in this phase:
50
50
  5. **Error handling** — Do errors leak implementation details?
51
51
  6. **Dependencies** — Are there known vulnerabilities in new dependencies?
52
52
 
53
+ ## OWASP Top 10 (2021) Cross-Reference
54
+
55
+ For every audit, cross-map STRIDE findings against the OWASP Top 10. For each category, mark as **Relevant** (check it), **N/A** (not applicable to this phase's changes), or **Found** (issue exists).
56
+
57
+ | # | OWASP Category | STRIDE | What to look for |
58
+ |---|---------------|--------|-----------------|
59
+ | A01 | Broken Access Control | E | Missing authz checks, IDOR, path traversal, CORS misconfiguration |
60
+ | A02 | Cryptographic Failures | I | Plaintext secrets, weak ciphers, no TLS, sensitive data in logs/URLs |
61
+ | A03 | Injection | T | SQL, command, LDAP, XPath, template injection; unsanitized user input |
62
+ | A04 | Insecure Design | S/T/E | No rate limiting, unsafe business logic, missing threat model |
63
+ | A05 | Security Misconfiguration | S/I/E | Debug mode on, default credentials, verbose errors, open cloud storage |
64
+ | A06 | Vulnerable Components | T/I/E | Outdated dependencies, known CVEs, unmaintained packages |
65
+ | A07 | Auth Failures | S | Weak/missing passwords, broken session management, no account lockout |
66
+ | A08 | Software/Data Integrity | T | Unsigned updates, unsafe deserialization, CI without integrity checks |
67
+ | A09 | Logging/Monitoring Failures | R | No audit trail, sensitive data in logs, missing alerting on auth failures |
68
+ | A10 | SSRF | T/I | User-controlled URLs fetched server-side, internal service enumeration |
69
+
70
+ Include an OWASP coverage table in SECURITY.md. For irrelevant categories, a single "N/A — [reason]" is sufficient. Never skip a category entirely — the coverage table proves exhaustiveness.
71
+
53
72
  ## Threat Classification
54
73
 
55
74
  For each identified concern:
@@ -58,7 +77,7 @@ For each identified concern:
58
77
 
59
78
  ## Output Format
60
79
 
61
- Write the SECURITY.md file using the template at `~/.claude/learnship/templates/security.md`. Fill in:
80
+ Write the SECURITY.md file using the template at `~/.claude/templates/security.md`. Fill in:
62
81
  - Trust boundaries from the analysis
63
82
  - Complete threat register with STRIDE categories
64
83
  - Status for each threat (open/closed)
@@ -16,6 +16,15 @@ Your job: Extract problem context from conversation history, classify the proble
16
16
  If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
17
17
  </role>
18
18
 
19
+ <boundaries>
20
+ ## Boundaries — what this agent does NOT do
21
+
22
+ - **Do NOT modify source code.** Solutions are documentation of what already happened. The fix lives in git history; the solution file lives in `.planning/solutions/`.
23
+ - **Do NOT invent details.** Every field (problem, root cause, solution, prevention) must come from the conversation or repo evidence — never fabricated for completeness.
24
+ - **Do NOT duplicate.** Search `.planning/solutions/` first. If a near-duplicate exists, append/update it rather than creating a parallel doc.
25
+ - **Do NOT skip frontmatter.** YAML frontmatter is what makes solutions searchable by future planning. A solution without it is invisible.
26
+ </boundaries>
27
+
19
28
  <project_context>
20
29
  Before writing, load project context:
21
30
 
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: learnship-verifier
3
3
  description: Verifies that a phase goal was actually achieved after execution — checks must_haves, requirement coverage, and integration links. Spawned by execute-phase on platforms with subagent support.
4
- tools: Read, Bash, Glob, Grep
4
+ tools: Read, Write, Bash, Glob, Grep
5
5
  color: purple
6
6
  ---
7
7
 
package/bin/install.js CHANGED
@@ -30,24 +30,27 @@ const pkg = require('../package.json');
30
30
  // Codex config.toml marker — used to identify learnship-managed section
31
31
  const LEARNSHIP_CODEX_MARKER = '# learnship Agent Configuration — managed by learnship installer';
32
32
 
33
- // Per-agent Codex sandbox modes (read-only for checkers, workspace-write for executors)
33
+ // Per-agent Codex sandbox modes (read-only for checkers, workspace-write for executors).
34
+ // All 17 agents must be listed explicitly — agents missing from this map silently
35
+ // fall back to whatever Codex's default is, which is wrong for read-only roles.
34
36
  const CODEX_AGENT_SANDBOX = {
35
- 'learnship-executor': 'workspace-write',
36
- 'learnship-planner': 'workspace-write',
37
- 'learnship-verifier': 'workspace-write',
38
- 'learnship-debugger': 'workspace-write',
39
- 'learnship-plan-checker': 'read-only',
40
- 'learnship-solution-writer': 'workspace-write',
41
- 'learnship-code-reviewer': 'read-only',
42
- 'learnship-challenger': 'read-only',
43
- 'learnship-ideation-agent': 'read-only',
44
- 'learnship-security-auditor': 'read-only',
45
- 'learnship-doc-writer': 'workspace-write',
46
- 'learnship-project-researcher': 'workspace-write',
37
+ 'learnship-executor': 'workspace-write',
38
+ 'learnship-planner': 'workspace-write',
39
+ 'learnship-verifier': 'workspace-write',
40
+ 'learnship-debugger': 'workspace-write',
41
+ 'learnship-plan-checker': 'read-only',
42
+ 'learnship-solution-writer': 'workspace-write',
43
+ 'learnship-code-reviewer': 'read-only',
44
+ 'learnship-challenger': 'read-only',
45
+ 'learnship-ideation-agent': 'read-only',
46
+ 'learnship-security-auditor': 'read-only',
47
+ 'learnship-doc-writer': 'workspace-write',
48
+ 'learnship-doc-verifier': 'read-only',
49
+ 'learnship-researcher': 'workspace-write',
50
+ 'learnship-project-researcher': 'workspace-write',
47
51
  'learnship-research-synthesizer': 'workspace-write',
48
- 'learnship-roadmapper': 'workspace-write',
49
- 'learnship-phase-researcher': 'workspace-write',
50
- 'learnship-doc-verifier': 'read-only',
52
+ 'learnship-roadmapper': 'workspace-write',
53
+ 'learnship-phase-researcher': 'workspace-write',
51
54
  };
52
55
 
53
56
  // ─── Colors ────────────────────────────────────────────────────────────────
@@ -72,7 +75,40 @@ const hasLocal = args.includes('--local') || args.includes('-l');
72
75
  const hasUninstall = args.includes('--uninstall') || args.includes('-u');
73
76
  const hasHelp = args.includes('--help') || args.includes('-h');
74
77
  const targetIdx = args.indexOf('--target');
75
- const targetOverride = targetIdx !== -1 && args[targetIdx + 1] ? path.resolve(args[targetIdx + 1]) : null;
78
+
79
+ // Refuse high-risk --target paths. We never want a typo or malicious arg
80
+ // to make the installer rmSync / overwrite something like /, /etc, or $HOME.
81
+ // Allowed: anywhere under $HOME, anywhere under /tmp (test installs), anywhere
82
+ // under the current working directory (project-local installs). Reject the rest.
83
+ function validateTargetPath(raw) {
84
+ if (!raw) return null;
85
+ const resolved = path.resolve(raw);
86
+ const home = os.homedir();
87
+ const cwd = process.cwd();
88
+ const tmp = os.tmpdir();
89
+ // Forbid the literal root and common system dirs outright
90
+ const forbidden = ['/', '/etc', '/usr', '/var', '/bin', '/sbin', '/lib', '/lib64', '/boot', '/dev', '/proc', '/sys', '/root'];
91
+ if (forbidden.includes(resolved)) {
92
+ console.error(` Error: refusing to install to system path: ${resolved}`);
93
+ process.exit(1);
94
+ }
95
+ // Refuse if it equals $HOME exactly (installing directly into ~ would clobber dotfiles)
96
+ if (resolved === home) {
97
+ console.error(` Error: refusing to install directly to $HOME (${resolved}). Use a subdirectory like $HOME/.claude or pass --global.`);
98
+ process.exit(1);
99
+ }
100
+ // Otherwise must be inside one of the allowed roots
101
+ const insideHome = resolved === home || resolved.startsWith(home + path.sep);
102
+ const insideTmp = resolved === tmp || resolved.startsWith(tmp + path.sep);
103
+ const insideCwd = resolved === cwd || resolved.startsWith(cwd + path.sep);
104
+ if (!insideHome && !insideTmp && !insideCwd) {
105
+ console.error(` Error: --target must be inside $HOME, /tmp, or the current directory. Got: ${resolved}`);
106
+ process.exit(1);
107
+ }
108
+ return resolved;
109
+ }
110
+
111
+ const targetOverride = targetIdx !== -1 && args[targetIdx + 1] ? validateTargetPath(args[targetIdx + 1]) : null;
76
112
 
77
113
  let selectedPlatforms = [];
78
114
  if (hasAll) {
@@ -299,13 +335,17 @@ function convertToOpencode(content) {
299
335
  }
300
336
  continue;
301
337
  }
302
- // Convert color names to hex
338
+ // Convert color names to hex (OpenCode requires hex)
303
339
  if (t.startsWith('color:')) {
304
- const colorVal = t.substring(6).trim().toLowerCase();
340
+ const colorVal = t.substring(6).trim().toLowerCase().replace(/^['"]|['"]$/g, '');
305
341
  const hex = colorNameToHex[colorVal];
306
342
  if (hex) { newLines.push(`color: "${hex}"`); }
307
343
  else if (colorVal.startsWith('#')) { newLines.push(line); }
308
- // skip unknown color names entirely
344
+ else {
345
+ // Unknown color: warn rather than silently drop, so installs are debuggable.
346
+ console.warn(` ${yellow}⚠${reset} Unknown OpenCode color "${colorVal}" — defaulting to #808080 (gray). Add it to colorNameToHex in bin/install.js to fix.`);
347
+ newLines.push(`color: "#808080"`);
348
+ }
309
349
  continue;
310
350
  }
311
351
  if (inTools) {
@@ -686,7 +726,7 @@ function rewriteNewProject(content, platform) {
686
726
  // Parallel execution block
687
727
  let parallelBlock;
688
728
  if (supportsParallel) {
689
- parallelBlock = `**Group D — Parallel execution:**\n\n${label} supports real parallel subagents. Ask:\n\n"Do you want to enable parallel subagent execution?"\n- **No** (recommended default) — Plans execute sequentially, one at a time. Safer, easier to follow.\n- **Yes** — Each independent plan in a wave gets its own dedicated subagent with a fresh context budget. Faster, but uses more tokens.`;
729
+ parallelBlock = `**Group D — Parallel execution:**\n\n${label} supports real parallel subagents. Ask:\n\n"Do you want to enable parallel subagent execution?"\n- **Yes** (recommended) — Each independent plan in a wave gets its own dedicated subagent with a fresh context budget. Plans finish faster and each executor has a clean context with no accumulated noise.\n- **No** — Plans execute sequentially, one at a time. Predictable and easy to follow; useful when you want to review each plan before the next begins.`;
690
730
  } else if (platform === 'gemini') {
691
731
  parallelBlock = `**Group D — Parallel execution:**\n\nGemini CLI supports subagents but only runs them sequentially — parallel execution is not yet available. Parallelization is automatically set to \`false\`.`;
692
732
  } else {
@@ -694,6 +734,18 @@ function rewriteNewProject(content, platform) {
694
734
  }
695
735
  content = content.replace('<!-- LEARNSHIP_PARALLEL_BLOCK -->', parallelBlock);
696
736
 
737
+ // On platforms with real parallel subagents, flip Quick-mode default to parallel-on
738
+ if (supportsParallel) {
739
+ content = content.replace(
740
+ 'parallelization off (you can flip it later in .planning/config.json)',
741
+ 'parallelization on'
742
+ );
743
+ content = content.replace(
744
+ 'and `parallelization.enabled = false`)',
745
+ 'and `parallelization.enabled = true` for this platform)'
746
+ );
747
+ }
748
+
697
749
  // Platform-specific AGENTS.md note
698
750
  // Claude Code reads CLAUDE.md as primary; Gemini CLI reads GEMINI.md — copy so sessions have context
699
751
  let agentsMdNote = '';
@@ -1401,12 +1453,27 @@ function saveLocalPatches(targetDir) {
1401
1453
  const patchesDir = path.join(targetDir, 'learnship-local-patches');
1402
1454
  const modified = [];
1403
1455
 
1456
+ // Reject any manifest entry whose relative path escapes the install root.
1457
+ // A malicious or corrupted manifest could otherwise read/copy arbitrary files
1458
+ // via "../" segments before they're caught by fs operations.
1459
+ const targetResolved = path.resolve(targetDir);
1460
+ const patchesResolved = path.resolve(patchesDir);
1461
+
1404
1462
  for (const [relPath, originalHash] of Object.entries(manifest.files || {})) {
1405
- const fullPath = path.join(targetDir, relPath);
1463
+ if (typeof relPath !== 'string' || typeof originalHash !== 'string') continue;
1464
+ // Block absolute paths, parent-dir traversal, and null bytes.
1465
+ if (path.isAbsolute(relPath) || relPath.includes('\0') || /(^|[\\/])\.\.([\\/]|$)/.test(relPath)) {
1466
+ console.warn(` ${yellow}⚠${reset} Skipping unsafe manifest path: ${relPath}`);
1467
+ continue;
1468
+ }
1469
+ const fullPath = path.resolve(targetResolved, relPath);
1470
+ const backupPath = path.resolve(patchesResolved, relPath);
1471
+ // Belt-and-braces: confirm both resolved paths land inside their roots.
1472
+ if (!fullPath.startsWith(targetResolved + path.sep) && fullPath !== targetResolved) continue;
1473
+ if (!backupPath.startsWith(patchesResolved + path.sep) && backupPath !== patchesResolved) continue;
1406
1474
  if (!fs.existsSync(fullPath)) continue;
1407
1475
  const currentHash = fileHash(fullPath);
1408
1476
  if (currentHash !== originalHash) {
1409
- const backupPath = path.join(patchesDir, relPath);
1410
1477
  fs.mkdirSync(path.dirname(backupPath), { recursive: true });
1411
1478
  fs.copyFileSync(fullPath, backupPath);
1412
1479
  modified.push(relPath);
@@ -1607,8 +1674,11 @@ function install(platform, isGlobal) {
1607
1674
  platform === 'gemini' ? '/learnship:ls' : '$learnship-ls';
1608
1675
  console.log(`\n ${green}Done!${reset} Open a project in ${label} and run ${cyan}${firstCmd}${reset}.`);
1609
1676
  console.log(` ${dim}First time? Run ${cyan}${platform === 'windsurf' ? '/new-project' : platform === 'claude' ? '/learnship:new-project' : platform === 'opencode' ? '/learnship-new-project' : platform === 'gemini' ? '/learnship:new-project' : '$learnship-new-project'}${reset}${dim} to initialize your project and create AGENTS.md.${reset}`);
1610
- if (platform !== 'windsurf') {
1611
- console.log(` ${dim}Enable parallel subagents: add ${cyan}"parallelization": true${reset}${dim} to .planning/config.json${reset}`);
1677
+ const parallelSupported = platform === 'claude' || platform === 'opencode' || platform === 'codex';
1678
+ if (parallelSupported) {
1679
+ console.log(` ${dim}Parallel subagents are ${green}on by default${reset}${dim}. To disable: set ${cyan}"parallelization": { "enabled": false }${reset}${dim} in .planning/config.json${reset}`);
1680
+ } else if (platform !== 'windsurf') {
1681
+ console.log(` ${dim}Parallel subagents: not available on ${label}. Sequential execution is used.${reset}`);
1612
1682
  }
1613
1683
  }
1614
1684