npm - learnship - Versions diffs - 2.3.6 → 2.4.0 - Mend

learnship 2.3.6 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/.claude-plugin/plugin.json +1 -1
package/.cursor-plugin/plugin.json +1 -1
package/README.md +34 -17
package/SKILL.md +32 -11
package/agents/learnship-challenger.md +9 -0
package/agents/learnship-executor.md +9 -0
package/agents/learnship-ideation-agent.md +9 -0
package/agents/learnship-research-synthesizer.md +9 -0
package/agents/learnship-roadmapper.md +9 -0
package/agents/learnship-security-auditor.md +20 -1
package/agents/learnship-solution-writer.md +9 -0
package/agents/learnship-verifier.md +1 -1
package/bin/install.js +95 -25
package/cursor-rules/learnship.mdc +32 -4
package/gemini-extension.json +1 -1
package/hooks/learnship-context-monitor.js +6 -3
package/hooks/learnship-prompt-guard.js +1 -1
package/hooks/learnship-session-state.js +1 -1
package/hooks/learnship-statusline.js +8 -3
package/learnship/agents/challenger.md +7 -0
package/learnship/agents/executor.md +7 -0
package/learnship/agents/ideation-agent.md +7 -0
package/learnship/agents/research-synthesizer.md +7 -0
package/learnship/agents/roadmapper.md +7 -0
package/learnship/agents/security-auditor.md +28 -0
package/learnship/agents/solution-writer.md +7 -0
package/learnship/agents/verifier.md +1 -1
package/learnship/references/model-profiles.md +20 -13
package/learnship/workflows/execute-phase.md +4 -3
package/learnship/workflows/health.md +32 -4
package/learnship/workflows/new-project.md +36 -4
package/learnship/workflows/review.md +106 -10
package/learnship/workflows/secure-phase.md +2 -0
package/learnship/workflows/ship.md +43 -0
package/learnship/workflows/verify-work.md +33 -0
package/package.json +1 -1

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "learnship",
   "description": "Agentic engineering done right — 57 structured workflows, 17 specialist agent personas, persistent memory across sessions, integrated learning partner, and impeccable UI design system. Works with Claude Code, Windsurf, Cursor, Gemini CLI, OpenCode, and Codex.",
-  "version": "2.3.6",
+  "version": "2.4.0",
   "author": {
     "name": "Favio Vazquez",
     "email": "favio.vazquezp@gmail.com"

package/.cursor-plugin/plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "learnship",
   "displayName": "learnship",
   "description": "Agentic engineering done right — 57 structured workflows, 17 specialist agent personas, persistent memory across sessions, integrated learning partner, and impeccable UI design system.",
-  "version": "2.3.6",
+  "version": "2.4.0",
   "logo": "assets/logo.png",
   "author": {
     "name": "Favio Vazquez",

package/README.md CHANGED Viewed

@@ -124,7 +124,7 @@ flowchart LR
     PP["/plan-phase N<br/>Vertical slice plans"]
     EP["/execute-phase N<br/>Build + commit"]
     VW["/verify-work N<br/>UAT + diagnose"]
-    RV["/review<br/>Multi-persona review"]
+    RV["/review<br/>Two-pass review"]
     SH["/ship<br/>Test → PR"]
     CP["/compound<br/>Capture knowledge"]
@@ -140,7 +140,7 @@ flowchart LR
 | **2. Plan** | `/plan-phase N` | Agent researches the domain, creates vertical slice plans (tracer bullets), verifies them — including horizontal slice detection (v2.3.4) |
 | **3. Execute** | `/execute-phase N` | Plans run in dependency order, one atomic commit per task |
 | **4. Verify** | `/verify-work N` | You do UAT; agent diagnoses any gaps and creates fix plans |
-| **5. Review** | `/review` | Multi-persona code review through 6 lenses (v2.0) |
+| **5. Review** | `/review` | Two-pass review: spec compliance check then 6-lens quality review (v2.4.0) |
 | **6. Ship** | `/ship` | Test → lint → commit → push → PR (v2.0) |
 | **7. Compound** | `/compound` | Capture what you learned as searchable documentation (v2.0) |
@@ -172,14 +172,16 @@ Each platform gets the best experience it supports:
 |---------|----------|-------------|----------|------------|-----------|
 | Slash commands | ✓ | ✓ | ✓ | ✓ | `$skills` |
 | Real parallel subagents | — | ✓ | ✓ | ✓ | ✓ |
-| Parallel wave execution | — | ✓ opt-in | ✓ opt-in | ✓ | ✓ opt-in |
+| Parallel wave execution | — | ✓ | ✓ | ⚠️ experimental | ✓ |
 | Agent personas (17) | `model_decision` rules | `Task()` subagents | `Task()` subagents | `Task()` subagents | `Task()` subagents |
 | Interactive questions | `ask_user_question` | `AskUserQuestion` | `question` | `ask_user` | `request_user_input` |
 | Session hooks | — | ✓ | — | ✓ | — |
 | Skills (native `@invoke`) | ✓ | — | — | — | — |
 | Skills (context files) | ✓ | ✓ | ✓ | ✓ | ✓ |
-**Parallel subagents:** On Claude Code, OpenCode, and Codex, `execute-phase` can spawn a dedicated executor per plan within a wave, each with its own 200k context budget. Enable with `"parallelization": { "enabled": true }` in `.planning/config.json`. Up to 5 concurrent agents per wave by default. All platforms default to sequential (always safe).
+**Cursor** uses context injection via `cursor-rules/learnship.mdc` — it gets the full workflow library, design system, and learning partner, but does not appear in the feature matrix above because it has no first-class slash command or subagent API (rules load automatically as context).
+**Parallel subagents:** Claude Code, OpenCode, and Codex default to parallel execution — `execute-phase` spawns a dedicated executor per plan within a wave, each with its own 200k context budget. Up to 5 concurrent agents per wave. To run sequentially, set `"parallelization": { "enabled": false }` in `.planning/config.json`. Windsurf, Cursor, and Gemini CLI use sequential execution (no real subagent API on those platforms).
 ---
@@ -193,8 +195,8 @@ learnship gives you that harness as a portable, open-source layer that adds:
 - **Persistent memory.** `/new-project` generates an `AGENTS.md` loaded automatically every session. No more repeating yourself.
 - **Structured process.** A repeatable phase loop with spec-driven plans, wave-ordered execution, and UAT-driven verification.
-- **Knowledge compounding.** `/compound` captures solved problems. `/review` runs multi-persona code review. `/ship` runs the full delivery pipeline.
-- **Security & recovery.** `/secure-phase` for STRIDE verification. `/forensics` for post-mortem. `/undo` for safe revert.
+- **Knowledge compounding.** `/compound` captures solved problems. `/review` runs two-pass code review (spec compliance then quality). `/ship` runs the full delivery pipeline.
+- **Security & recovery.** `/secure-phase` for STRIDE + OWASP Top 10 verification. `/forensics` for post-mortem. `/undo` for safe revert.
 - **Session intelligence.** Hooks, context profiles, interactive questions, agent delegation. ([v2.2 details →](#whats-new-in-v22))
 - **Built-in learning.** Neuroscience-backed checkpoints at every phase transition so you understand what you shipped.
@@ -230,7 +232,7 @@ It's the right tool if:
 - You're **building a real project** and want the AI to stay aligned across sessions
 - You're **learning while building** and want to actually understand what gets shipped
 - You care about **code quality and UI quality** beyond "it works"
-- You want **parallel agent execution** on Claude Code, OpenCode, or Gemini CLI
+- You want **parallel agent execution** — Claude Code, OpenCode, and Codex run it by default
 - You've felt the frustration of **context loss**: repeating yourself while the agent forgets
 It's probably overkill for one-off scripts. Use `/quick` for that.
@@ -252,6 +254,18 @@ It's probably overkill for one-off scripts. Use `/quick` for that.
 ## 🆕 What's New
+### What's new in v2.4.0
+v2.4.0 adds spec compliance checking to `/review`, OWASP Top 10 coverage to `/secure-phase`, a numeric score to `/health`, and Playwright MCP smoke-test guidance to `/verify-work` and `/ship`:
+**Two-stage `/review`**: Pass 1 checks spec compliance — reads PLAN.md must-haves and classifies each as COVERED / PARTIAL / MISSING — before Pass 2 runs the existing 6-persona quality review. The spec compliance result appears in the report header. Use `--quality-only` to skip Pass 1 and run only the quality review.
+**OWASP Top 10 in `/secure-phase`**: The security-auditor agent now cross-maps STRIDE findings against OWASP Top 10 (A01–A10). Every SECURITY.md output includes an OWASP coverage table alongside the STRIDE analysis.
+**Numeric `/health` score**: The health check now outputs a 0–100 numeric score alongside the qualitative status. Starts at 100, deducts per issue found. Bands: HEALTHY (90–100), DEGRADED (70–89), BROKEN (0–69).
+**Playwright MCP guidance in `/verify-work` and `/ship`**: Optional live UI smoke-test sections activate when `@playwright/mcp` is configured. Supported on all 6 MCP-capable platforms (Claude Code, OpenCode, Cursor, Windsurf, Codex CLI, Gemini CLI). In `/verify-work`, walks the golden path using `mcp__playwright__*` tools. In `/ship`, runs a quick smoke test before creating the PR.
 ### What's new in v2.3.4
 v2.3.4 adds two planning quality features:
@@ -296,7 +310,7 @@ v2.1 adds 8 new workflows, 5 new references, 3 new templates, and 2 new agents:
 | Category | New workflows |
 |----------|--------------|
-| **Security** | `/secure-phase` — per-phase STRIDE threat verification |
+| **Security** | `/secure-phase` — per-phase STRIDE threat-model security verification |
 | **Documentation** | `/docs-update` — generate and verify project docs against codebase |
 | **Recovery** | `/forensics` — post-mortem investigation · `/undo` — safe git revert |
 | **Session** | `/note` — zero-friction capture · `/session-report` — stakeholder summaries |
@@ -309,7 +323,7 @@ Enhanced: `/discuss-phase` (scope guardrails + domain probes + `--deep` extended
 ---
-##  Agentic Engineering vs Vibe Coding
+## ⚡ Agentic Engineering vs Vibe Coding
 ![Vibe coding vs Agentic engineering](assets/vibe-vs-agentic.png)
@@ -373,7 +387,7 @@ AGENTS.md                   ← your AI agent reads this every conversation
 | `/discuss-phase [N]` | Capture implementation decisions before planning | Before every phase |
 | `/plan-phase [N]` | Research + create + verify plans | After discussing a phase |
 | `/execute-phase [N]` | Wave-ordered execution of all plans | After planning |
-| `/verify-work [N]` | Manual UAT with auto-diagnosis and fix planning | After execution |
+| `/verify-work [N]` | Manual UAT with auto-diagnosis and fix planning. Optional Playwright MCP live UI smoke test when `@playwright/mcp` is configured. | After execution |
 | `/complete-milestone` | Archive milestone, tag release, prepare next | All phases verified |
 | `/audit-milestone` | Pre-release: requirement coverage, stub detection | Before completing milestone |
 | `/new-milestone [name]` | Start next version cycle | After completing a milestone |
@@ -436,9 +450,9 @@ AGENTS.md                   ← your AI agent reads this every conversation
 | Workflow | Purpose | When to use |
 |----------|---------|-------------|
 | `/compound` | Capture solved problem as searchable documentation | After `/debug`, `/verify-work`, or any aha moment |
-| `/review` | Multi-persona code review (6 lenses) | After `/verify-work`, before shipping |
+| `/review` | Two-pass review: spec compliance check then 6-persona quality review. `--quality-only` skips spec compliance. | After `/verify-work`, before shipping |
 | `/challenge` | Stress-test scope through product + engineering lenses | Before committing to a milestone or large feature |
-| `/ship` | Test → lint → commit → push → PR | After review, ready to deploy |
+| `/ship` | Test → lint → commit → push → PR. Optional Playwright MCP smoke test before PR creation when `@playwright/mcp` is configured. | After review, ready to deploy |
 | `/ideate` | Codebase-grounded idea generation | Before `/discuss-milestone`, between milestones |
 | `/guard` | Safety mode: protect sensitive directories | Working on auth, payments, migrations |
 | `/sync-docs` | Detect stale documentation | Before `/complete-milestone`, after refactors |
@@ -449,7 +463,7 @@ AGENTS.md                   ← your AI agent reads this every conversation
 |----------|---------|-------------|
 | `/settings` | Interactive config editor | Change mode, toggle agents |
 | `/set-profile [quality\|balanced\|budget]` | One-step model profile switch | Quick cost/quality adjustment |
-| `/health` | Project health check | Stale files, missing artifacts |
+| `/health` | Project health check with numeric 0–100 score (HEALTHY ≥90, DEGRADED ≥70, BROKEN <70) | Stale files, missing artifacts |
 | `/cleanup` | Archive old artifacts | End of milestone |
 | `/update` | Update the platform itself | Check for new workflows |
 | `/reapply-patches` | Restore local edits after update | After `/update` if you had local changes |
@@ -534,7 +548,7 @@ Project settings live in `.planning/config.json`. Set during `/new-project` or e
 | `model_profile` | `quality`, `balanced`, `budget` | `balanced` | Agent model tier (see table below) |
 | `learning_mode` | `auto`, `manual` | `auto` | `auto` offers learning at checkpoints; `manual` requires explicit invocation |
 | `context` | `dev`, `research`, `review` | `dev` | Output profile: `dev` (concise), `research` (detailed), `review` (audit-focused) |
-| `parallelization.enabled` | `true`, `false` | `false` | Parallel subagents per plan on supported platforms |
+| `parallelization.enabled` | `true`, `false` | `true` (Claude Code, OpenCode, Codex) / `false` (others) | Parallel subagents per plan on supported platforms |
 | `test_first` | `true`, `false` | `false` | TDD mode: write failing test first, verify red, implement, verify green |
 | `planning.commit_mode` | `auto`, `manual` | `auto` | `auto` commits after each workflow step; `manual` skips all git commits |
@@ -584,7 +598,7 @@ Project settings live in `.planning/config.json`. Set during `/new-project` or e
 | Challenger | large | medium | medium |
 | Ideation Agent | large | medium | small |
-> **Platform note:** Tiers map to the best available model on your platform: `large` = Claude Opus 4.6 / Gemini 3.1 Pro / GPT-5.4, `medium` = Claude Sonnet 4.6 / Gemini 3.1 Flash / GPT-5.4-mini, `small` = Claude Haiku 4.5 / Gemini 3.1 Flash-Lite / GPT-5.4-nano. Windsurf, Cursor, and OpenCode use the platform default model — tiers signal intended task complexity.
+> **Platform note:** Tiers map to the best available model on your platform. On Claude Code: `large` = Opus, `medium` = Sonnet, `small` = Haiku. On Gemini CLI and Codex CLI the installer maps tiers to the best available model at install time. Windsurf, Cursor, and OpenCode use the platform default model — tiers signal intended task complexity.
 ### Speed vs. Quality Presets
@@ -653,9 +667,12 @@ The **impeccable** skill suite is always active as project context for any UI wo
 | `/quieter` | Tone down overly aggressive designs to reduce intensity and gain refinement |
 | `/distill` | Strip to essence: remove complexity, clarify what matters |
 | `/clarify` | Improve UX copy, error messages, microcopy, labels |
+| `/typeset` | Improve typography: font choices, hierarchy, sizing, weight, and readability |
+| `/arrange` | Improve layout, spacing, and visual rhythm; fix monotonous grids and weak hierarchy |
 | `/optimize` | Performance: loading speed, rendering, animations, bundle size |
 | `/harden` | Resilience: error handling, i18n, text overflow, edge cases |
 | `/delight` | Add moments of joy and personality that make interfaces memorable |
+| `/overdrive` | Push past conventional limits — shaders, spring physics, scroll-driven reveals |
 | `/extract` | Extract reusable components and design tokens into your design system |
 | `/adapt` | Adapt designs across screen sizes, devices, and contexts |
 | `/onboard` | Design onboarding flows, empty states, first-time user experiences |
@@ -687,7 +704,7 @@ The **impeccable** skill suite is always active as project context for any UI wo
 /plan-phase 1             # Research + plan + verify
 /execute-phase 1          # Wave-ordered execution
 /verify-work 1            # Manual UAT
-/review                   # v2.0: multi-persona code review
+/review                   # two-pass review: spec compliance + quality (v2.4.0)
 /ship                     # v2.0: test → commit → push → PR
 /compound                 # v2.0: capture what you learned
                           # Repeat for each phase
@@ -907,7 +924,7 @@ learnship/
 ├── bin/
 │   └── install.js          # Multi-platform installer (Claude Code, OpenCode, Gemini CLI, Codex CLI, Windsurf)
 ├── tests/
-│   └── run_all.sh               # 15 test suites, 1200+ checks across 6 platforms
+│   └── run_all.sh               # 17 test suites, 1330+ checks across 6 platforms
 ├── SKILL.md                # Meta-skill: platform context loaded by Cascade / AI agents
 ├── install.sh              # Shell installer wrapper
 ├── package.json            # npm package (npx learnship)

package/SKILL.md CHANGED Viewed

@@ -103,25 +103,46 @@ Read `learning_mode` from `.planning/config.json` (default: "auto"):
 - **`auto`** — Proactively offer learning actions at natural workflow checkpoints (after planning, execution, verification)
 - **`manual`** — Only activate `@agentic-learning` when the user explicitly asks
-Learning checkpoints:
-- After requirements approved → `@agentic-learning brainstorm`
-- After discuss-phase → `@agentic-learning either-or`
-- After plan-phase → `@agentic-learning cognitive-load`
-- After execute-phase → `@agentic-learning reflect`
-- After verify-work passes → `@agentic-learning space`
-- After `/review` → `@agentic-learning learn` (review findings as learning material)
+Learning checkpoints (auto mode triggers these; manual mode surfaces them as tips):
+**Core phase loop:**
+- After requirements approved → `@agentic-learning brainstorm` (design dialogue on the requirements)
+- After `/discuss-phase` → `@agentic-learning either-or` (capture the decisions made)
+- After `/plan-phase` → `@agentic-learning cognitive-load` (decompose if plan feels overwhelming)
+- After `/execute-phase` → `@agentic-learning reflect` (consolidate the cycle)
+- After `/verify-work` passes → `@agentic-learning space` (queue concepts for spaced revisit)
+**Quality gates:**
+- After `/review` → `@agentic-learning learn` (most significant finding as a learning topic)
+- After `/review` (on UI changes) → `@agentic-learning quiz` (gaps in recall predict future bugs)
 - After `/challenge` → `@agentic-learning either-or` (which lens was most valuable?)
+- After `/secure-phase` → `@agentic-learning learn` (security patterns)
 - After `/ship` → `@agentic-learning reflect` (what went well in this cycle?)
+**Discovery, mapping, comprehension:**
+- After `/map-codebase` or `/discovery-phase` → `@agentic-learning explain` (lock in the project knowledge log)
+- When studying an unfamiliar function or pattern → `@agentic-learning explain-first` (oracy-first comprehension check)
+- After absorbing research files (RESEARCH.md, STACK.md, etc.) → `@agentic-learning quiz` (test what stuck)
+**Ideation and complex tasks:**
 - After `/ideate` → `@agentic-learning brainstorm` (explore top idea collaboratively)
-- During complex quick tasks → `@agentic-learning struggle`
+- During complex `/quick` tasks → `@agentic-learning struggle` (productive struggle on hard parts)
+- When stuck across multiple domains in one session → `@agentic-learning interleave` (mixed retrieval forces transfer)
+**Recovery and reflection:**
 - After `/forensics` → `@agentic-learning reflect` (what caused the failure?)
-- After `/extract-learnings` → `@agentic-learning space` (schedule learnings for review)
-- After `/secure-phase` → `@agentic-learning learn` (security patterns)
+- After `/extract-learnings` → `@agentic-learning space` (schedule learnings for spaced review)
 - After `/session-report` → `@agentic-learning reflect` (session-level reflection)
 ## Design Skill
-The `impeccable` skill suite is always available for any UI work. Use its steering commands (`/audit`, `/critique`, `/polish`, `/colorize`, `/animate`, `/bolder`, `/quieter`, `/distill`, `/clarify`, `/optimize`, `/harden`, `/delight`, `/extract`, `/adapt`, `/onboard`, `/normalize`, `/teach-impeccable`) when reviewing or building user-facing interfaces.
+The `impeccable` skill suite is always available for any UI work. Use its 21 steering commands when reviewing or building user-facing interfaces:
+**Review & critique:** `/audit`, `/critique`, `/teach-impeccable`
+**Refine & elevate:** `/polish`, `/bolder`, `/quieter`, `/distill`, `/clarify`, `/normalize`, `/extract`, `/adapt`
+**Specific concerns:** `/colorize` (color/contrast), `/typeset` (typography), `/arrange` (layout/spacing), `/animate` (motion), `/onboard` (first-time UX), `/delight` (interaction polish)
+**Engineering attributes:** `/harden` (accessibility, resilience), `/optimize` (performance), `/overdrive` (push design quality to its ceiling)
+**Foundations:** `/frontend-design` (full design system reference: typography, color, spatial, motion, interaction, responsive, UX writing)
 ## Mandatory Gate — No Project, No Work

package/agents/learnship-challenger.md CHANGED Viewed

@@ -16,6 +16,15 @@ Your job: Ask 3-5 forcing questions through your assigned lens (product or engin
 If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
 </role>
+<boundaries>
+## Boundaries — what this agent does NOT do
+- **Do NOT make the decision.** Your output is a verdict (proceed / rethink / reduce-scope) plus rationale. The user owns the choice.
+- **Do NOT veto.** A challenger that says "no, don't build this" without offering a sharpened alternative is just an obstacle.
+- **Do NOT modify code, plans, or docs.** You analyze and recommend. Other personas write.
+- **Do NOT pad with general advice.** Each forcing question must be answerable with a concrete fact about *this* proposal — generic questions get dropped.
+</boundaries>
 <project_context>
 Before challenging, load project context:

package/agents/learnship-executor.md CHANGED Viewed

@@ -16,6 +16,15 @@ Your job: Execute the plan completely, commit each task, create SUMMARY.md, upda
 If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
 </role>
+<boundaries>
+## Boundaries — what this agent does NOT do
+- **Do NOT improve the plan.** If the plan is wrong, surface the obstacle in SUMMARY.md and propose a deviation — do not silently "fix" the plan as you go.
+- **Do NOT batch commits.** Every task gets its own commit, even if two tasks touch the same file.
+- **Do NOT refactor adjacent code.** If you see a smell, note it for the next planning cycle. Touching unrelated code makes the diff impossible to review.
+- **Do NOT skip the verify step.** A task without verification is a task that might not be done. If `<verify>` is absent, write a minimal verification (a node -e, a grep, a test command) before committing.
+</boundaries>
 <project_context>
 Before executing, load project context:

package/agents/learnship-ideation-agent.md CHANGED Viewed

@@ -16,6 +16,15 @@ Your job: Generate 6-8 ideas through your assigned frame, grounded in the codeba
 If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
 </role>
+<boundaries>
+## Boundaries — what this agent does NOT do
+- **Do NOT write plans.** Ideas go to the adversarial filter; plans come from the planner. If an idea wins the filter, the user runs `/plan-phase` — not you.
+- **Do NOT execute or modify code.** Ideation is read-only research. You may grep, glob, and read files. You do not edit them.
+- **Do NOT critique or rank.** That's the filtering step's job. Generate broadly; let the filter cut.
+- **Do NOT skip the codebase grounding.** An idea with no file/pattern citation is not an idea — it is product advice. Cite specifics or drop it.
+</boundaries>
 <project_context>
 Before ideating, load the codebase scan results from the prompt context:

package/agents/learnship-research-synthesizer.md CHANGED Viewed

@@ -18,6 +18,15 @@ Spawned by `/new-project` after STACK.md, FEATURES.md, ARCHITECTURE.md, and PITF
 - Identify confidence levels and gaps
 - Write SUMMARY.md
+<boundaries>
+## Boundaries — what this agent does NOT do
+- **Do NOT run new research.** Your job is to synthesize what's already in `.planning/research/`. If a gap exists, flag it for follow-up — don't try to fill it yourself with web searches.
+- **Do NOT modify the 4 source research files.** They are inputs. Edit only SUMMARY.md.
+- **Do NOT make roadmap decisions.** Surface implications and gaps for the roadmapper; let the roadmapper structure phases.
+- **Do NOT concatenate.** If SUMMARY.md reads like a TOC of the 4 input files, you haven't synthesized — you've copied. Find the cross-cutting threads.
+</boundaries>
 ## Downstream Consumer
 Your SUMMARY.md is consumed by the roadmapper (or the planning step) which uses it to:

package/agents/learnship-roadmapper.md CHANGED Viewed

@@ -18,6 +18,15 @@ Spawned by `/new-project` (after research + requirements) or `/new-milestone`. Y
 - Define observable success criteria for each phase
 - Identify which phases need deeper research during planning
+<boundaries>
+## Boundaries — what this agent does NOT do
+- **Do NOT write PLAN.md files.** The roadmap is a map of phases, not their implementation. PLAN.md is the planner's job, one phase at a time, on demand.
+- **Do NOT invent requirements.** If a requirement isn't in REQUIREMENTS.md or PROJECT.md, flag it for the user — don't silently extend scope.
+- **Do NOT modify research or requirements.** They are inputs. Write ROADMAP.md only.
+- **Do NOT skip success criteria.** Every phase needs observable, testable success criteria. If you can't name them, the phase is the wrong shape.
+</boundaries>
 ## Roadmap Design Principles
 **Goal-backward:** Start from what the user needs, work backward to what must be built first.

package/agents/learnship-security-auditor.md CHANGED Viewed

@@ -50,6 +50,25 @@ For each file modified in this phase:
 5. **Error handling** — Do errors leak implementation details?
 6. **Dependencies** — Are there known vulnerabilities in new dependencies?
+## OWASP Top 10 (2021) Cross-Reference
+For every audit, cross-map STRIDE findings against the OWASP Top 10. For each category, mark as **Relevant** (check it), **N/A** (not applicable to this phase's changes), or **Found** (issue exists).
+| # | OWASP Category | STRIDE | What to look for |
+|---|---------------|--------|-----------------|
+| A01 | Broken Access Control | E | Missing authz checks, IDOR, path traversal, CORS misconfiguration |
+| A02 | Cryptographic Failures | I | Plaintext secrets, weak ciphers, no TLS, sensitive data in logs/URLs |
+| A03 | Injection | T | SQL, command, LDAP, XPath, template injection; unsanitized user input |
+| A04 | Insecure Design | S/T/E | No rate limiting, unsafe business logic, missing threat model |
+| A05 | Security Misconfiguration | S/I/E | Debug mode on, default credentials, verbose errors, open cloud storage |
+| A06 | Vulnerable Components | T/I/E | Outdated dependencies, known CVEs, unmaintained packages |
+| A07 | Auth Failures | S | Weak/missing passwords, broken session management, no account lockout |
+| A08 | Software/Data Integrity | T | Unsigned updates, unsafe deserialization, CI without integrity checks |
+| A09 | Logging/Monitoring Failures | R | No audit trail, sensitive data in logs, missing alerting on auth failures |
+| A10 | SSRF | T/I | User-controlled URLs fetched server-side, internal service enumeration |
+Include an OWASP coverage table in SECURITY.md. For irrelevant categories, a single "N/A — [reason]" is sufficient. Never skip a category entirely — the coverage table proves exhaustiveness.
 ## Threat Classification
 For each identified concern:
@@ -58,7 +77,7 @@ For each identified concern:
 ## Output Format
-Write the SECURITY.md file using the template at `~/.claude/learnship/templates/security.md`. Fill in:
+Write the SECURITY.md file using the template at `~/.claude/templates/security.md`. Fill in:
 - Trust boundaries from the analysis
 - Complete threat register with STRIDE categories
 - Status for each threat (open/closed)

package/agents/learnship-solution-writer.md CHANGED Viewed

@@ -16,6 +16,15 @@ Your job: Extract problem context from conversation history, classify the proble
 If the prompt contains a `<files_to_read>` block, you MUST use the Read tool to load every file listed there before performing any other actions.
 </role>
+<boundaries>
+## Boundaries — what this agent does NOT do
+- **Do NOT modify source code.** Solutions are documentation of what already happened. The fix lives in git history; the solution file lives in `.planning/solutions/`.
+- **Do NOT invent details.** Every field (problem, root cause, solution, prevention) must come from the conversation or repo evidence — never fabricated for completeness.
+- **Do NOT duplicate.** Search `.planning/solutions/` first. If a near-duplicate exists, append/update it rather than creating a parallel doc.
+- **Do NOT skip frontmatter.** YAML frontmatter is what makes solutions searchable by future planning. A solution without it is invisible.
+</boundaries>
 <project_context>
 Before writing, load project context:

package/agents/learnship-verifier.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: learnship-verifier
 description: Verifies that a phase goal was actually achieved after execution — checks must_haves, requirement coverage, and integration links. Spawned by execute-phase on platforms with subagent support.
-tools: Read, Bash, Glob, Grep
+tools: Read, Write, Bash, Glob, Grep
 color: purple
 ---

package/bin/install.js CHANGED Viewed

@@ -30,24 +30,27 @@ const pkg = require('../package.json');
 // Codex config.toml marker — used to identify learnship-managed section
 const LEARNSHIP_CODEX_MARKER = '# learnship Agent Configuration — managed by learnship installer';
-// Per-agent Codex sandbox modes (read-only for checkers, workspace-write for executors)
+// Per-agent Codex sandbox modes (read-only for checkers, workspace-write for executors).
+// All 17 agents must be listed explicitly — agents missing from this map silently
+// fall back to whatever Codex's default is, which is wrong for read-only roles.
 const CODEX_AGENT_SANDBOX = {
-  'learnship-executor':          'workspace-write',
-  'learnship-planner':           'workspace-write',
-  'learnship-verifier':          'workspace-write',
-  'learnship-debugger':          'workspace-write',
-  'learnship-plan-checker':      'read-only',
-  'learnship-solution-writer':   'workspace-write',
-  'learnship-code-reviewer':     'read-only',
-  'learnship-challenger':        'read-only',
-  'learnship-ideation-agent':    'read-only',
-  'learnship-security-auditor':  'read-only',
-  'learnship-doc-writer':        'workspace-write',
-  'learnship-project-researcher': 'workspace-write',
+  'learnship-executor':             'workspace-write',
+  'learnship-planner':              'workspace-write',
+  'learnship-verifier':             'workspace-write',
+  'learnship-debugger':             'workspace-write',
+  'learnship-plan-checker':         'read-only',
+  'learnship-solution-writer':      'workspace-write',
+  'learnship-code-reviewer':        'read-only',
+  'learnship-challenger':           'read-only',
+  'learnship-ideation-agent':       'read-only',
+  'learnship-security-auditor':     'read-only',
+  'learnship-doc-writer':           'workspace-write',
+  'learnship-doc-verifier':         'read-only',
+  'learnship-researcher':           'workspace-write',
+  'learnship-project-researcher':   'workspace-write',
   'learnship-research-synthesizer': 'workspace-write',
-  'learnship-roadmapper':         'workspace-write',
-  'learnship-phase-researcher':   'workspace-write',
-  'learnship-doc-verifier':       'read-only',
+  'learnship-roadmapper':           'workspace-write',
+  'learnship-phase-researcher':     'workspace-write',
 };
 // ─── Colors ────────────────────────────────────────────────────────────────
@@ -72,7 +75,40 @@ const hasLocal     = args.includes('--local')  || args.includes('-l');
 const hasUninstall = args.includes('--uninstall') || args.includes('-u');
 const hasHelp      = args.includes('--help') || args.includes('-h');
 const targetIdx    = args.indexOf('--target');
-const targetOverride = targetIdx !== -1 && args[targetIdx + 1] ? path.resolve(args[targetIdx + 1]) : null;
+// Refuse high-risk --target paths. We never want a typo or malicious arg
+// to make the installer rmSync / overwrite something like /, /etc, or $HOME.
+// Allowed: anywhere under $HOME, anywhere under /tmp (test installs), anywhere
+// under the current working directory (project-local installs). Reject the rest.
+function validateTargetPath(raw) {
+  if (!raw) return null;
+  const resolved = path.resolve(raw);
+  const home = os.homedir();
+  const cwd  = process.cwd();
+  const tmp  = os.tmpdir();
+  // Forbid the literal root and common system dirs outright
+  const forbidden = ['/', '/etc', '/usr', '/var', '/bin', '/sbin', '/lib', '/lib64', '/boot', '/dev', '/proc', '/sys', '/root'];
+  if (forbidden.includes(resolved)) {
+    console.error(`  Error: refusing to install to system path: ${resolved}`);
+    process.exit(1);
+  }
+  // Refuse if it equals $HOME exactly (installing directly into ~ would clobber dotfiles)
+  if (resolved === home) {
+    console.error(`  Error: refusing to install directly to $HOME (${resolved}). Use a subdirectory like $HOME/.claude or pass --global.`);
+    process.exit(1);
+  }
+  // Otherwise must be inside one of the allowed roots
+  const insideHome = resolved === home || resolved.startsWith(home + path.sep);
+  const insideTmp  = resolved === tmp  || resolved.startsWith(tmp + path.sep);
+  const insideCwd  = resolved === cwd  || resolved.startsWith(cwd + path.sep);
+  if (!insideHome && !insideTmp && !insideCwd) {
+    console.error(`  Error: --target must be inside $HOME, /tmp, or the current directory. Got: ${resolved}`);
+    process.exit(1);
+  }
+  return resolved;
+}
+const targetOverride = targetIdx !== -1 && args[targetIdx + 1] ? validateTargetPath(args[targetIdx + 1]) : null;
 let selectedPlatforms = [];
 if (hasAll) {
@@ -299,13 +335,17 @@ function convertToOpencode(content) {
       }
       continue;
     }
-    // Convert color names to hex
+    // Convert color names to hex (OpenCode requires hex)
     if (t.startsWith('color:')) {
-      const colorVal = t.substring(6).trim().toLowerCase();
+      const colorVal = t.substring(6).trim().toLowerCase().replace(/^['"]|['"]$/g, '');
       const hex = colorNameToHex[colorVal];
       if (hex) { newLines.push(`color: "${hex}"`); }
       else if (colorVal.startsWith('#')) { newLines.push(line); }
-      // skip unknown color names entirely
+      else {
+        // Unknown color: warn rather than silently drop, so installs are debuggable.
+        console.warn(`  ${yellow}⚠${reset} Unknown OpenCode color "${colorVal}" — defaulting to #808080 (gray). Add it to colorNameToHex in bin/install.js to fix.`);
+        newLines.push(`color: "#808080"`);
+      }
       continue;
     }
     if (inTools) {
@@ -686,7 +726,7 @@ function rewriteNewProject(content, platform) {
   // Parallel execution block
   let parallelBlock;
   if (supportsParallel) {
-    parallelBlock = `**Group D — Parallel execution:**\n\n${label} supports real parallel subagents. Ask:\n\n"Do you want to enable parallel subagent execution?"\n- **No** (recommended default) — Plans execute sequentially, one at a time. Safer, easier to follow.\n- **Yes** — Each independent plan in a wave gets its own dedicated subagent with a fresh context budget. Faster, but uses more tokens.`;
+    parallelBlock = `**Group D — Parallel execution:**\n\n${label} supports real parallel subagents. Ask:\n\n"Do you want to enable parallel subagent execution?"\n- **Yes** (recommended) — Each independent plan in a wave gets its own dedicated subagent with a fresh context budget. Plans finish faster and each executor has a clean context with no accumulated noise.\n- **No** — Plans execute sequentially, one at a time. Predictable and easy to follow; useful when you want to review each plan before the next begins.`;
   } else if (platform === 'gemini') {
     parallelBlock = `**Group D — Parallel execution:**\n\nGemini CLI supports subagents but only runs them sequentially — parallel execution is not yet available. Parallelization is automatically set to \`false\`.`;
   } else {
@@ -694,6 +734,18 @@ function rewriteNewProject(content, platform) {
   }
   content = content.replace('<!-- LEARNSHIP_PARALLEL_BLOCK -->', parallelBlock);
+  // On platforms with real parallel subagents, flip Quick-mode default to parallel-on
+  if (supportsParallel) {
+    content = content.replace(
+      'parallelization off (you can flip it later in .planning/config.json)',
+      'parallelization on'
+    );
+    content = content.replace(
+      'and `parallelization.enabled = false`)',
+      'and `parallelization.enabled = true` for this platform)'
+    );
+  }
   // Platform-specific AGENTS.md note
   // Claude Code reads CLAUDE.md as primary; Gemini CLI reads GEMINI.md — copy so sessions have context
   let agentsMdNote = '';
@@ -1401,12 +1453,27 @@ function saveLocalPatches(targetDir) {
   const patchesDir = path.join(targetDir, 'learnship-local-patches');
   const modified = [];
+  // Reject any manifest entry whose relative path escapes the install root.
+  // A malicious or corrupted manifest could otherwise read/copy arbitrary files
+  // via "../" segments before they're caught by fs operations.
+  const targetResolved = path.resolve(targetDir);
+  const patchesResolved = path.resolve(patchesDir);
   for (const [relPath, originalHash] of Object.entries(manifest.files || {})) {
-    const fullPath = path.join(targetDir, relPath);
+    if (typeof relPath !== 'string' || typeof originalHash !== 'string') continue;
+    // Block absolute paths, parent-dir traversal, and null bytes.
+    if (path.isAbsolute(relPath) || relPath.includes('\0') || /(^|[\\/])\.\.([\\/]|$)/.test(relPath)) {
+      console.warn(`  ${yellow}⚠${reset} Skipping unsafe manifest path: ${relPath}`);
+      continue;
+    }
+    const fullPath = path.resolve(targetResolved, relPath);
+    const backupPath = path.resolve(patchesResolved, relPath);
+    // Belt-and-braces: confirm both resolved paths land inside their roots.
+    if (!fullPath.startsWith(targetResolved + path.sep) && fullPath !== targetResolved) continue;
+    if (!backupPath.startsWith(patchesResolved + path.sep) && backupPath !== patchesResolved) continue;
     if (!fs.existsSync(fullPath)) continue;
     const currentHash = fileHash(fullPath);
     if (currentHash !== originalHash) {
-      const backupPath = path.join(patchesDir, relPath);
       fs.mkdirSync(path.dirname(backupPath), { recursive: true });
       fs.copyFileSync(fullPath, backupPath);
       modified.push(relPath);
@@ -1607,8 +1674,11 @@ function install(platform, isGlobal) {
                    platform === 'gemini'   ? '/learnship:ls' : '$learnship-ls';
   console.log(`\n  ${green}Done!${reset} Open a project in ${label} and run ${cyan}${firstCmd}${reset}.`);
   console.log(`  ${dim}First time? Run ${cyan}${platform === 'windsurf' ? '/new-project' : platform === 'claude' ? '/learnship:new-project' : platform === 'opencode' ? '/learnship-new-project' : platform === 'gemini' ? '/learnship:new-project' : '$learnship-new-project'}${reset}${dim} to initialize your project and create AGENTS.md.${reset}`);
-  if (platform !== 'windsurf') {
-    console.log(`  ${dim}Enable parallel subagents: add ${cyan}"parallelization": true${reset}${dim} to .planning/config.json${reset}`);
+  const parallelSupported = platform === 'claude' || platform === 'opencode' || platform === 'codex';
+  if (parallelSupported) {
+    console.log(`  ${dim}Parallel subagents are ${green}on by default${reset}${dim}. To disable: set ${cyan}"parallelization": { "enabled": false }${reset}${dim} in .planning/config.json${reset}`);
+  } else if (platform !== 'windsurf') {
+    console.log(`  ${dim}Parallel subagents: not available on ${label}. Sequential execution is used.${reset}`);
   }
 }