npm - @arthai/agents - Versions diffs - 1.0.4 → 1.0.6 - Mend

@arthai/agents 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/README.md +55 -3
package/VERSION +1 -1
package/agents/troubleshooter.md +132 -0
package/bin/cli.js +366 -0
package/bundles/canvas.json +1 -1
package/bundles/compass.json +1 -1
package/bundles/counsel.json +1 -0
package/bundles/cruise.json +1 -1
package/bundles/forge.json +12 -1
package/bundles/prism.json +1 -0
package/bundles/scalpel.json +5 -2
package/bundles/sentinel.json +8 -2
package/bundles/shield.json +1 -0
package/bundles/spark.json +1 -0
package/compiler.sh +14 -0
package/dist/plugins/canvas/.claude-plugin/plugin.json +1 -1
package/dist/plugins/canvas/VERSION +1 -0
package/dist/plugins/canvas/commands/planning.md +100 -11
package/dist/plugins/canvas/hooks/hooks.json +16 -0
package/dist/plugins/canvas/hooks/project-setup.sh +109 -0
package/dist/plugins/canvas/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/canvas/templates/CLAUDE.md.template +111 -0
package/dist/plugins/compass/.claude-plugin/plugin.json +1 -1
package/dist/plugins/compass/VERSION +1 -0
package/dist/plugins/compass/commands/planning.md +100 -11
package/dist/plugins/compass/hooks/hooks.json +16 -0
package/dist/plugins/compass/hooks/project-setup.sh +109 -0
package/dist/plugins/compass/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/compass/templates/CLAUDE.md.template +111 -0
package/dist/plugins/counsel/.claude-plugin/plugin.json +1 -1
package/dist/plugins/counsel/VERSION +1 -0
package/dist/plugins/counsel/hooks/hooks.json +10 -0
package/dist/plugins/counsel/hooks/project-setup.sh +109 -0
package/dist/plugins/counsel/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/counsel/templates/CLAUDE.md.template +111 -0
package/dist/plugins/cruise/.claude-plugin/plugin.json +1 -1
package/dist/plugins/cruise/VERSION +1 -0
package/dist/plugins/cruise/hooks/hooks.json +16 -0
package/dist/plugins/cruise/hooks/project-setup.sh +109 -0
package/dist/plugins/cruise/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/cruise/templates/CLAUDE.md.template +111 -0
package/dist/plugins/forge/.claude-plugin/plugin.json +1 -1
package/dist/plugins/forge/VERSION +1 -0
package/dist/plugins/forge/agents/troubleshooter.md +132 -0
package/dist/plugins/forge/commands/implement.md +99 -1
package/dist/plugins/forge/commands/planning.md +100 -11
package/dist/plugins/forge/hooks/escalation-guard.sh +177 -0
package/dist/plugins/forge/hooks/hooks.json +22 -0
package/dist/plugins/forge/hooks/project-setup.sh +109 -0
package/dist/plugins/forge/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/forge/templates/CLAUDE.md.template +111 -0
package/dist/plugins/prime/.claude-plugin/plugin.json +1 -1
package/dist/plugins/prime/VERSION +1 -0
package/dist/plugins/prime/agents/troubleshooter.md +132 -0
package/dist/plugins/prime/commands/calibrate.md +20 -0
package/dist/plugins/prime/commands/ci-fix.md +36 -0
package/dist/plugins/prime/commands/fix.md +23 -0
package/dist/plugins/prime/commands/implement.md +99 -1
package/dist/plugins/prime/commands/planning.md +100 -11
package/dist/plugins/prime/commands/qa-incident.md +54 -0
package/dist/plugins/prime/commands/restart.md +186 -30
package/dist/plugins/prime/hooks/escalation-guard.sh +177 -0
package/dist/plugins/prime/hooks/hooks.json +60 -0
package/dist/plugins/prime/hooks/post-config-change-restart-reminder.sh +86 -0
package/dist/plugins/prime/hooks/post-server-crash-watch.sh +120 -0
package/dist/plugins/prime/hooks/pre-server-port-guard.sh +110 -0
package/dist/plugins/prime/hooks/project-setup.sh +109 -0
package/dist/plugins/prime/hooks/sync-agents.sh +99 -12
package/dist/plugins/prime/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/prime/templates/CLAUDE.md.template +111 -0
package/dist/plugins/prism/.claude-plugin/plugin.json +1 -1
package/dist/plugins/prism/VERSION +1 -0
package/dist/plugins/prism/commands/qa-incident.md +54 -0
package/dist/plugins/prism/hooks/hooks.json +12 -0
package/dist/plugins/prism/hooks/project-setup.sh +109 -0
package/dist/plugins/prism/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/prism/templates/CLAUDE.md.template +111 -0
package/dist/plugins/scalpel/.claude-plugin/plugin.json +1 -1
package/dist/plugins/scalpel/VERSION +1 -0
package/dist/plugins/scalpel/agents/troubleshooter.md +132 -0
package/dist/plugins/scalpel/commands/ci-fix.md +36 -0
package/dist/plugins/scalpel/commands/fix.md +23 -0
package/dist/plugins/scalpel/hooks/escalation-guard.sh +177 -0
package/dist/plugins/scalpel/hooks/hooks.json +24 -0
package/dist/plugins/scalpel/hooks/project-setup.sh +109 -0
package/dist/plugins/scalpel/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/scalpel/templates/CLAUDE.md.template +111 -0
package/dist/plugins/sentinel/.claude-plugin/plugin.json +1 -1
package/dist/plugins/sentinel/VERSION +1 -0
package/dist/plugins/sentinel/agents/troubleshooter.md +132 -0
package/dist/plugins/sentinel/commands/restart.md +186 -30
package/dist/plugins/sentinel/hooks/escalation-guard.sh +177 -0
package/dist/plugins/sentinel/hooks/hooks.json +64 -0
package/dist/plugins/sentinel/hooks/post-config-change-restart-reminder.sh +86 -0
package/dist/plugins/sentinel/hooks/post-server-crash-watch.sh +120 -0
package/dist/plugins/sentinel/hooks/pre-server-port-guard.sh +110 -0
package/dist/plugins/sentinel/hooks/project-setup.sh +109 -0
package/dist/plugins/sentinel/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/sentinel/templates/CLAUDE.md.template +111 -0
package/dist/plugins/shield/.claude-plugin/plugin.json +1 -1
package/dist/plugins/shield/VERSION +1 -0
package/dist/plugins/shield/hooks/hooks.json +22 -12
package/dist/plugins/shield/hooks/project-setup.sh +109 -0
package/dist/plugins/shield/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/shield/templates/CLAUDE.md.template +111 -0
package/dist/plugins/spark/.claude-plugin/plugin.json +1 -1
package/dist/plugins/spark/VERSION +1 -0
package/dist/plugins/spark/commands/calibrate.md +20 -0
package/dist/plugins/spark/hooks/hooks.json +10 -0
package/dist/plugins/spark/hooks/project-setup.sh +109 -0
package/dist/plugins/spark/templates/CLAUDE.md.managed-block +123 -0
package/dist/plugins/spark/templates/CLAUDE.md.template +111 -0
package/hook-defs.json +31 -0
package/hooks/escalation-guard.sh +177 -0
package/hooks/post-config-change-restart-reminder.sh +86 -0
package/hooks/post-server-crash-watch.sh +120 -0
package/hooks/pre-server-port-guard.sh +110 -0
package/hooks/project-setup.sh +109 -0
package/hooks/sync-agents.sh +99 -12
package/install.sh +2 -2
package/package.json +1 -1
package/portable.manifest +7 -1
package/skills/calibrate/SKILL.md +20 -0
package/skills/ci-fix/SKILL.md +36 -0
package/skills/fix/SKILL.md +23 -0
package/skills/implement/SKILL.md +99 -1
package/skills/license/SKILL.md +159 -0
package/skills/planning/SKILL.md +100 -11
package/skills/publish/SKILL.md +3 -0
package/skills/qa-incident/SKILL.md +54 -0
package/skills/restart/SKILL.md +187 -31

package/dist/plugins/prism/templates/CLAUDE.md.managed-block ADDED Viewed

@@ -0,0 +1,123 @@
+## Engineering Principles (MANDATORY — applies to ALL work)
+### Research Before Fixing
+- **Never guess.** Before changing code, read the relevant source files, docs, and configs.
+- Understand WHY something is broken before attempting a fix.
+- If your first fix doesn't work, STOP. Don't try another guess. Re-read the code.
+- Use explore-light (Haiku, 1x cost) to scan the codebase before expensive agents investigate.
+### No Over-Engineering
+- **Do exactly what's needed.** Don't add abstractions, utilities, or frameworks unless the code already uses them.
+- Match existing patterns — run explore-light to find how similar code is structured before writing new code.
+- A bug fix touches the minimum files possible. A feature matches the existing architecture.
+- If you're creating a new class/helper/utility that nothing else in the codebase uses, you're over-engineering.
+### Test Before Shipping
+- **Run tests locally before pushing.** Never push untested code.
+- If the project has `/precheck`, run it. If it has `/qa`, run it in commit mode.
+- After fixing a bug, verify the fix AND verify nothing else broke (differential testing).
+- If 3+ consecutive fix attempts fail, STOP. Step back and reassess the root cause from scratch.
+### Deployment Safety
+- **Never modify production systems without explicit confirmation.**
+- Don't change deploy targets, CI pipeline structure, or infrastructure config silently.
+- Don't overwrite existing files during deployment without asking.
+- If a deployment breaks something, investigate before attempting to fix. Don't cascade.
+## Toolkit Awareness (MANDATORY — READ THIS FIRST)
+You have a **claude-agents toolkit** installed in this project. It provides specialized
+agents, skills, and hooks that handle domain-specific work better and cheaper.
+**You are the ORCHESTRATOR.** The triage router fires on every message with a routing
+table and SPEED score. Use it to decide: toolkit or you?
+### When to use the toolkit (SPEED score 2+):
+- **Multi-step workflows**: `/pr`, `/deploy`, `/planning`, `/implement`, `/qa`, `/ci-fix`
+  encode battle-tested sequences you'd otherwise do manually and forget steps
+- **Domain expertise**: SRE, QA, frontend, backend agents have project context baked in
+- **Cost savings**: Haiku/Sonnet agents handle 80% of tasks at 1/60th the cost of Opus
+- **Parallelism**: Team skills spawn multiple agents working simultaneously
+### When to use YOU directly (SPEED score 0-1):
+- **Quick lookups**: Read/Grep/Glob for finding a file, checking a value, reading code
+- **Small targeted edits**: 1-2 file changes where you already know what to do
+- **Complex reasoning**: Architecture decisions, debugging novel problems, nuanced tradeoffs
+- **Conversation flow**: Follow-up questions, clarifications, explaining code
+- **Creative problem-solving**: When the task doesn't fit any existing pattern
+- **Judgment calls**: Security reviews, design decisions, "should we even do this?"
+### The balance:
+The toolkit handles **process** (repeatable workflows, domain-specific checks, multi-step
+sequences). You handle **judgment** (reasoning, creativity, novel problems, architecture).
+A senior engineer doesn't do everything themselves — they delegate routine work and focus
+their expertise where it matters most. That's you. The toolkit is your team.
+**Don't over-delegate**: If it's faster to just Read a file and answer, do it.
+**Don't under-delegate**: If it's a 5-step workflow the toolkit has a skill for, use it.
+### Project Knowledge System
+If this project has been calibrated (`/calibrate`), deep context is available:
+- **`.claude/project-profile.md`** — Architecture patterns, coding conventions, domain model,
+  testing style. Read this before writing any code to match the project's patterns.
+- **`.claude/knowledge/`** — The toolkit's long-term memory for this project:
+  - `shared/conventions.md` — Coding rules learned from corrections. **Read before writing code.**
+  - `shared/domain.md` — Business rules beyond what's in the code. **Read before domain decisions.**
+  - `shared/vocabulary.md` — What the team calls things. **Use these terms.**
+  - `shared/patterns.md` — Architecture patterns. **Follow these when adding new code.**
+  - `agents/{your-name}.md` — Your past learning. **Read on session start.**
+  - **Write back** when you learn something new — corrections, discoveries, decisions.
+  - See `knowledge/README.md` for the full protocol.
+- **`.claude/knowledge/external/sources.md`** — Where team knowledge lives outside code
+  (Notion, Linear, Figma, etc.). Check before making decisions that might already be documented.
+## Session Start Behavior (MANDATORY)
+On your FIRST response in every new session, ALWAYS start with a brief status line
+using context from the SessionStart hook. Include:
+- Current branch + uncommitted file count
+- Docker/infra status (if problems detected)
+- Open PRs or assigned issues (if any)
+- Any red flags (pending migrations, expired tokens)
+Format: 1-3 compact lines before addressing the user's request. Example:
+```
+📋 project — main | 5 uncommitted | Docker: postgres ✓ redis ✓ | 2 open PRs
+```
+Then proceed with the user's actual request.
+**CRITICAL — Greetings and vague first messages**: If the user's first message is a
+greeting ("hey", "hi", "hello", "yo", "sup") or vague ("help", "what's up",
+"what should I work on") or ANY message under 5 words with no specific task —
+**ALWAYS use the `/onboard` skill**. Never respond to greetings yourself. The
+bootstrap hook status line is a quick snapshot — `/onboard` gives the real briefing
+with open PRs, issues, priorities, and actionable next steps.
+## Routing Trace (MANDATORY)
+On EVERY response, show a compact routing trace so the user understands the decision
+path. Place it at the end of your response in a dimmed block:
+```
+🔀 Routing: [what triage decided] → [agent/skill/tool used] ([cost tier])
+   Why: [1-line reason for this routing choice]
+```
+Examples:
+```
+🔀 Routing: backend bug fix → python-backend agent (Sonnet, 10x)
+   Why: touches backend/app/services/, needs CLAUDE.md context, SPEED=4
+```
+```
+🔀 Routing: file lookup → Grep (built-in, 0x)
+   Why: single-file search, no project context needed, SPEED=0
+```
+Rules:
+- Always show the SPEED score breakdown if score >= 2
+- Show which hook provided the context (triage-router, bootstrap, etc.)
+- If you chose NOT to use the triage router's suggestion, explain why
+- Skip the trace only for simple follow-up messages in an ongoing conversation

package/dist/plugins/prism/templates/CLAUDE.md.template ADDED Viewed

@@ -0,0 +1,111 @@
+# CLAUDE.md — {{PROJECT_NAME}}
+<!-- Generated by claude-agents install.sh --init -->
+<!-- TODO: Replace {{placeholders}} with your project details -->
+## Project Overview
+{{PROJECT_NAME}} is a {{DESCRIPTION}}.
+## Tech Stack
+- **Frontend**: <!-- TODO: e.g., Next.js 14, React 18, TypeScript, Tailwind -->
+- **Backend**: <!-- TODO: e.g., FastAPI, SQLAlchemy, PostgreSQL -->
+- **Auth**: <!-- TODO: e.g., Stytch, Auth0, Clerk -->
+- **Deploy**: <!-- TODO: e.g., Railway, Vercel, AWS -->
+## Project Structure
+```
+{{PROJECT_NAME}}/
+├── frontend/          <!-- TODO: Frontend directory -->
+├── backend/           <!-- TODO: Backend directory -->
+└── ...
+```
+## Key Architecture
+<!-- TODO: Describe your auth flow, API patterns, database schema, etc. -->
+## Local Dev Services
+<!-- TODO: Auto-populated by /scan or fill manually -->
+| Service  | Port | Directory | Start Command |
+|----------|------|-----------|---------------|
+| Frontend | <!-- TODO --> | frontend/ | <!-- TODO: e.g., npm run dev --> |
+| Backend  | <!-- TODO --> | backend/  | <!-- TODO: e.g., uvicorn app.main:app --reload --> |
+## Test Commands
+<!-- TODO: Auto-populated by /scan or fill manually -->
+| What | Command | Directory |
+|------|---------|-----------|
+| Backend tests | <!-- TODO: e.g., pytest --> | backend/ |
+| Backend lint | <!-- TODO: e.g., ruff check . --> | backend/ |
+| Frontend tests | <!-- TODO: e.g., npm test --> | frontend/ |
+| Frontend lint | <!-- TODO: e.g., npm run lint --> | frontend/ |
+| Type check | <!-- TODO: e.g., npx tsc --noEmit --> | frontend/ |
+| E2E tests | <!-- TODO: e.g., npx playwright test --> | frontend/ |
+## Infrastructure
+<!-- TODO: Auto-populated by /scan or fill manually -->
+| Platform | Service | Domain |
+|----------|---------|--------|
+| <!-- TODO: e.g., Railway --> | <!-- TODO --> | <!-- TODO --> |
+Health endpoints: <!-- TODO: e.g., /health, /api/health -->
+## Environments
+<!-- TODO: Auto-populated by /scan environments or /calibrate -->
+| Name | Type | URL | Health | Deploy | Branch |
+|------|------|-----|--------|--------|--------|
+| local | development | <!-- TODO --> | <!-- TODO: e.g., /health --> | manual | — |
+| <!-- TODO --> | <!-- TODO: staging/production/preview/canary --> | <!-- TODO --> | <!-- TODO --> | <!-- TODO --> | <!-- TODO --> |
+Access notes: <!-- TODO: e.g., Railway MCP for staging/prod. Env vars: .env.local, .env.staging -->
+## Domain
+<!-- TODO: Auto-populated by /scan or fill manually -->
+<!-- Describe what this app does, its core entities, and business rules. -->
+<!-- Used by qa-domain agent for domain-aware testing. -->
+## Running Locally
+```bash
+# TODO: Add your local development commands
+# Frontend
+cd frontend && npm run dev
+# Backend
+cd backend && source .venv/bin/activate && uvicorn app.main:app --reload
+```
+## Critical Rules
+<!-- TODO: Add project-specific rules, e.g.: -->
+- Never push to main directly — always create a PR
+- Secrets in .env.local only — never committed
+## Agent Customization
+The following agents/skills are managed by `claude-agents` (symlinked):
+- Run `~/.claude-agents/install.sh --status` to see what's linked
+- To override any portable file, replace the symlink with a regular file
+- Your override won't be touched by future syncs
+### Project-Specific Agents
+Add project-specific agents as regular files in `.claude/agents/`:
+- See `~/.claude-agents/examples/agents/` for templates (frontend, backend, ops, sre, qa)
+### Project-Specific Skills
+Add project-specific skills as regular directories in `.claude/skills/`:
+- See `~/.claude-agents/examples/skills/` for templates (ci-fix, qa, restart)

package/dist/plugins/scalpel/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "scalpel",
   "description": "Surgical bug fixing — targeted fixes, CI repair, issue triage",
-  "version": "1.0.4",
+  "version": "1.0.6",
   "author": {
     "name": "Arth AI"
   }

package/dist/plugins/scalpel/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1.0.6

package/dist/plugins/scalpel/agents/troubleshooter.md ADDED Viewed

@@ -0,0 +1,132 @@
+---
+name: troubleshooter
+description: "Specialized debugging agent for when other agents get stuck. Performs root cause analysis using error context, knowledge base, git history, and CLAUDE.md. Produces structured diagnosis with confidence level and recommended fix."
+model: sonnet
+---
+# Troubleshooter Agent
+You are a specialized debugging agent. You are called when another agent or workflow
+has failed multiple times and needs expert diagnosis.
+## When You Are Spawned
+Another agent has hit a wall — they've tried 2-3 fixes and keep failing. Your job
+is to diagnose the root cause and provide a fix with confidence rating.
+## Your Process (follow in order)
+### 1. Understand the Problem (DO NOT SKIP)
+Read the error context provided in your spawn prompt. Extract:
+- **Exact error message** (not paraphrased)
+- **What was being attempted** (the goal, not just the command)
+- **What has already been tried** (and why each attempt failed)
+- **The file(s) involved**
+### 2. Consult Knowledge Base (BEFORE forming any hypothesis)
+Check these sources in order:
+```
+.claude/knowledge/qa-knowledge/         → past incidents with error signatures
+.claude/knowledge/shared/conventions.md → project-specific gotchas and rules
+.claude/knowledge/shared/patterns.md    → architecture patterns that may explain the error
+.claude/knowledge/agents/               → per-agent learning files
+CLAUDE.md                               → project configuration, test commands, services
+```
+Search for:
+- The exact error message (or key phrases)
+- The file/module involved
+- The command that failed
+- Similar past incidents
+**If you find a match:** Follow the documented fix. Do not reinvent.
+**If no match:** Proceed to step 3.
+### 3. Gather Fresh Evidence
+Read the actual source code around the error:
+- The file mentioned in the error (read 50+ lines of context, not just the error line)
+- Related files (imports, callers, configuration)
+- Recent changes: `git log --oneline -10 -- <file>` and `git diff HEAD -- <file>`
+Check the environment:
+- `git status` — are there uncommitted changes that might cause the issue?
+- Check if the right dependencies are installed (node_modules, venv, etc.)
+- Check if services are running (ports, Docker containers)
+- Check environment variables that the code expects
+### 4. Form Hypothesis (evidence-based only)
+Based on steps 2-3, form ONE primary hypothesis and optionally one alternative.
+Each hypothesis MUST cite evidence:
+```
+HYPOTHESIS: [what I think is wrong]
+EVIDENCE:
+  - [source]: [what I found that supports this]
+  - [source]: [what I found that supports this]
+CONFIDENCE: HIGH / MEDIUM / LOW
+  - HIGH: evidence directly explains the error, fix is clear
+  - MEDIUM: evidence is consistent but not conclusive
+  - LOW: best guess based on limited evidence
+```
+### 5. Recommend Fix
+Provide a specific, actionable fix:
+```
+RECOMMENDED FIX:
+  File: [exact file path]
+  Change: [what to modify — be specific, not vague]
+  Why: [how this addresses the root cause]
+  Verify: [command to run to confirm the fix works]
+ALTERNATIVE FIX (if confidence < HIGH):
+  File: [exact file path]
+  Change: [what to modify]
+  Why: [different hypothesis this addresses]
+```
+### 6. Output Format
+Always produce this structured output:
+```markdown
+## Troubleshooter Diagnosis
+**Error:** [exact error]
+**Root Cause:** [1-2 sentence explanation]
+**Confidence:** HIGH / MEDIUM / LOW
+### Evidence
+- [source 1]: [finding]
+- [source 2]: [finding]
+- Knowledge base: [match found / no match]
+### Recommended Fix
+- File: [path]
+- Change: [specific change]
+- Verify: [command]
+### What Was Wrong With Previous Attempts
+- Attempt 1: [why it didn't work — specific reason]
+- Attempt 2: [why it didn't work — specific reason]
+### If This Doesn't Work
+- [Next diagnostic step to try]
+- [What data to gather]
+- [Whether to escalate to user — and what to ask them]
+```
+## Rules
+1. **Never guess.** Every claim must cite evidence from code, logs, KB, or git history.
+2. **Check KB first.** If a past incident matches, use that fix. Don't reinvent.
+3. **Be specific.** "Check the config" is not a fix. "Change line 42 of config.ts from X to Y" is.
+4. **Explain why previous attempts failed.** This is as valuable as the fix itself.
+5. **Know when to escalate.** If confidence is LOW and you can't gather more evidence, say so. Recommend what data to ask the user for.
+6. **Don't try the fix yourself.** Your job is diagnosis. The calling agent implements the fix.

package/dist/plugins/scalpel/commands/ci-fix.md CHANGED Viewed

@@ -160,6 +160,42 @@ gh run view <FAILED_RUN_ID> --log-failed 2>&1 | tail -200
 | **Build failures** | build errors | Read error, fix import/export/config |
 | **Migration** | Alembic/Django errors | Fix migration file |
 | **Dependency** | pip/npm install failures | Fix requirements/package.json |
+| **Toolkit tests** | 15/20-skill-runtime-safety, manifest-coverage | See Toolkit Test Fixes below |
+#### Toolkit-Specific Test Fixes (claude-agents repo)
+When CI fails on the mechanical test suite (`tests/run.sh`), these are the common failures and auto-fixes:
+| Test | Failure message | Root cause | Auto-fix |
+|------|----------------|-----------|----------|
+| `20-skill-runtime-safety` | "regex-unsafe [brackets] in descriptions" | SKILL.md `description:` or `arguments:` field contains `[text]` | Replace `[text]` with `<text>` in the frontmatter field. Brackets break regex matching in Claude Code. |
+| `20-skill-runtime-safety` | "Skills missing required frontmatter fields" | SKILL.md missing `user-invocable: true` or `arguments:` | Add missing field to the YAML frontmatter between `---` markers. Check `git show HEAD~1:path/to/SKILL.md` for the original. |
+| `15-manifest-coverage` | "entries mapped to categories" | New file in `portable.manifest` not listed in any `get_category_items()` category in `install.sh` | Add the manifest entry to the appropriate category in `install.sh:get_category_items()`. |
+| `15-manifest-coverage` | "Install creates all expected symlinks" | New file in `portable.manifest` but install didn't create the symlink | Usually follows from the category mapping fix above. |
+| `15-manifest-coverage` | "Entry counts are consistent" | Mismatch between manifest entries and installed files | Check that new manifest entries have matching source files. |
+| `19-brownfield-assessment` | "classify_file returns IDENTICAL" | Agent fixture is stale after editing an agent `.md` file | Update fixture: `cp agents/{name}.md tests/fixtures/claude-setups/poweruser/.claude/agents/` |
+**Auto-fix sequence for toolkit tests:**
+```bash
+# 1. Get the exact failure
+gh run view <ID> --log-failed 2>&1 | grep -E "FAIL|✗" | head -5
+# 2. For bracket issues — find and fix ALL bracket descriptions
+grep -rn 'description:.*\[' skills/*/SKILL.md
+# Replace [text] with <text> in each match
+# 3. For missing frontmatter — compare against last known good
+git show HEAD~1:path/to/SKILL.md | head -6
+# Restore missing fields
+# 4. For manifest coverage — add to install.sh categories
+grep "get_category_items" install.sh
+# Add new entries to the right category
+# 5. Verify locally before pushing
+bash tests/run.sh --suite 15,20 --scenario a
+```
 **Attempt escalation:**
 - Attempt 1: Apply the obvious fix (auto-fix tools, direct code fix)

package/dist/plugins/scalpel/commands/fix.md CHANGED Viewed

@@ -476,6 +476,29 @@ Select the right agent based on which layer the bug is in:
 If `.claude/project-profile.md` exists, read it to determine the platform and pick the right agent.
 If `/calibrate` generated custom agents (e.g., `ios-developer.md`), use those for platform-specific bugs.
+**4.2b: Escalation protocol for fix agents**
+Include this in the implementation agent's prompt:
+```
+## When Your Fix Doesn't Work (MANDATORY)
+1. After first failed attempt: re-read the root cause analysis from Step 1.
+   Is the root cause correct? If not, go back to Step 1.
+2. After second failed attempt: consult knowledge base:
+   - .claude/knowledge/qa-knowledge/ (error keywords)
+   - .claude/knowledge/shared/conventions.md (project gotchas)
+   - git log --all --grep="<error keyword>" --oneline -10
+3. After third failed attempt: STOP. Do not try another fix.
+   Generate a STUCK REPORT and send to team-lead:
+   - Error: [exact message]
+   - Root cause hypothesis: [from Step 1]
+   - Fix attempts: [1, 2, 3 with results]
+   - KB consultation results: [what you found]
+   - Recommendation: [re-investigate root cause / ask user for X / try different approach]
+4. If a troubleshooter agent is available, team-lead may spawn one.
+```
 **Agent prompt includes:**
 ```
 1. Root cause analysis from Step 1

package/dist/plugins/scalpel/hooks/escalation-guard.sh ADDED Viewed

@@ -0,0 +1,177 @@
+#!/bin/bash
+# PostToolUse hook (Bash): Circuit breaker for consecutive failures.
+# Tracks failed commands by error signature. After 3 failures with the same
+# signature, forces KB consultation and outputs a structured stuck report.
+#
+# Uses .claude/.escalation-state.json to persist state across tool calls.
+# stdout is injected as context.
+set -euo pipefail
+PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
+STATE_FILE="$PROJECT_DIR/.claude/.escalation-state.json"
+CIRCUIT_BREAKER_THRESHOLD=3
+# Extract the command and exit code
+COMMAND="${CLAUDE_TOOL_INPUT_COMMAND:-}"
+if [ -z "$COMMAND" ] && [ -n "${CLAUDE_TOOL_INPUT:-}" ]; then
+    COMMAND=$(echo "$CLAUDE_TOOL_INPUT" | python3 -c "import json,sys; print(json.load(sys.stdin).get('command',''))" 2>/dev/null) || true
+fi
+EXIT_CODE="${CLAUDE_TOOL_RESULT_EXIT_CODE:-0}"
+[ -z "$COMMAND" ] && exit 0
+# ---------------------------------------------------------------------------
+# Skip tracking for non-failing commands and read-only commands
+# ---------------------------------------------------------------------------
+# Success — reset state if we had failures for this signature
+if [ "$EXIT_CODE" = "0" ]; then
+    if [ -f "$STATE_FILE" ]; then
+        # Clear state on success — the issue is resolved
+        python3 -c "
+import json, sys
+try:
+    with open('$STATE_FILE', 'r') as f:
+        state = json.load(f)
+    # Reset consecutive failures
+    state['consecutive_failures'] = 0
+    state['last_error_signature'] = ''
+    state['attempts'] = []
+    with open('$STATE_FILE', 'w') as f:
+        json.dump(state, f, indent=2)
+except:
+    pass
+" 2>/dev/null || true
+    fi
+    exit 0
+fi
+# Skip tracking for read-only commands (ls, cat, grep, git status, etc.)
+if echo "$COMMAND" | grep -qE '^\s*(ls|cat|head|tail|grep|rg|git\s+(status|log|diff|show|branch)|echo|pwd|which|type|file|wc)\b'; then
+    exit 0
+fi
+# ---------------------------------------------------------------------------
+# Compute error signature from command + exit code
+# ---------------------------------------------------------------------------
+# Get the last few lines of error output (from CLAUDE_TOOL_RESULT_STDERR or infer)
+ERROR_OUTPUT="${CLAUDE_TOOL_RESULT_STDERR:-}"
+if [ -z "$ERROR_OUTPUT" ] && [ -n "${CLAUDE_TOOL_RESULT_STDOUT:-}" ]; then
+    # Sometimes errors go to stdout (e.g., npm, python)
+    ERROR_OUTPUT=$(echo "${CLAUDE_TOOL_RESULT_STDOUT:-}" | tail -5)
+fi
+# Compute signature: hash of (command_base + error_pattern)
+# Strip variable parts (paths, timestamps, PIDs) for stable signatures
+COMMAND_BASE=$(echo "$COMMAND" | awk '{print $1, $2}')
+ERROR_PATTERN=$(echo "$ERROR_OUTPUT" | sed 's/[0-9]\{4,\}//g; s|/[^ ]*||g' | head -3)
+SIGNATURE=$(echo "${COMMAND_BASE}:${ERROR_PATTERN}" | shasum -a 256 | cut -c1-16)
+# ---------------------------------------------------------------------------
+# Update state and check circuit breaker
+# ---------------------------------------------------------------------------
+RESULT=$(python3 -c "
+import json, sys, time, os
+state_file = '$STATE_FILE'
+signature = '$SIGNATURE'
+command = '''$COMMAND'''[:200]
+error = '''$ERROR_OUTPUT'''[:500]
+threshold = $CIRCUIT_BREAKER_THRESHOLD
+# Load or create state
+state = {'consecutive_failures': 0, 'last_error_signature': '', 'attempts': [], 'total_circuits_tripped': 0}
+try:
+    if os.path.exists(state_file):
+        with open(state_file, 'r') as f:
+            state = json.load(f)
+except:
+    pass
+# Check if same signature as last failure
+if state.get('last_error_signature') == signature:
+    state['consecutive_failures'] = state.get('consecutive_failures', 0) + 1
+else:
+    # New error signature — reset counter
+    state['consecutive_failures'] = 1
+    state['attempts'] = []
+state['last_error_signature'] = signature
+state['attempts'] = (state.get('attempts', []) + [{'command': command, 'error': error[:200], 'time': time.time()}])[-5:]
+# Check threshold
+tripped = state['consecutive_failures'] >= threshold
+if tripped:
+    state['total_circuits_tripped'] = state.get('total_circuits_tripped', 0) + 1
+# Save state
+os.makedirs(os.path.dirname(state_file), exist_ok=True)
+with open(state_file, 'w') as f:
+    json.dump(state, f, indent=2)
+# Output: tripped|count|attempts_summary
+attempts_summary = ' / '.join([a.get('command', '')[:60] for a in state.get('attempts', [])])
+print(f\"{'TRIPPED' if tripped else 'OK'}|{state['consecutive_failures']}|{attempts_summary}\")
+" 2>/dev/null) || exit 0
+STATUS=$(echo "$RESULT" | cut -d'|' -f1)
+COUNT=$(echo "$RESULT" | cut -d'|' -f2)
+ATTEMPTS=$(echo "$RESULT" | cut -d'|' -f3-)
+# ---------------------------------------------------------------------------
+# If not tripped, show warning at count 2
+# ---------------------------------------------------------------------------
+if [ "$STATUS" = "OK" ] && [ "$COUNT" = "2" ]; then
+    echo "ESCALATION WARNING: 2 consecutive failures with same error pattern."
+    echo "One more failure triggers the circuit breaker."
+    echo "Before retrying: check .claude/knowledge/ for known solutions."
+    exit 0
+fi
+# ---------------------------------------------------------------------------
+# Circuit breaker tripped — inject structured stuck report
+# ---------------------------------------------------------------------------
+if [ "$STATUS" = "TRIPPED" ]; then
+    echo "CIRCUIT BREAKER TRIPPED — $COUNT consecutive failures with same error signature."
+    echo ""
+    echo "STOP. Do not attempt another fix without completing these steps:"
+    echo ""
+    echo "1. CONSULT KNOWLEDGE BASE:"
+    echo "   - Read .claude/knowledge/shared/conventions.md"
+    echo "   - Read .claude/knowledge/qa-knowledge/ (search for error keywords)"
+    echo "   - Run: git log --all --grep='fix:' --oneline -10"
+    echo ""
+    echo "2. GATHER EVIDENCE (if not already done):"
+    echo "   - Read full error output (not just the last line)"
+    echo "   - Check environment: env vars, ports (lsof), processes (ps), disk (df)"
+    echo "   - Check dependencies: node_modules, venv, Docker containers"
+    echo ""
+    echo "3. IF STILL STUCK — use this template to ask for help:"
+    echo "   ┌─────────────────────────────────────────────────────────┐"
+    echo "   │ STUCK REPORT                                           │"
+    echo "   │                                                        │"
+    echo "   │ Error: [exact error message]                           │"
+    echo "   │ Context: [what I was doing]                            │"
+    echo "   │ Attempts:                                              │"
+    echo "   │   1. [what I tried] -> [result]                        │"
+    echo "   │   2. [what I tried] -> [result]                        │"
+    echo "   │   3. [what I tried] -> [result]                        │"
+    echo "   │ Evidence: [logs, state, KB search results]             │"
+    echo "   │ What I need: [access/data/decision]                    │"
+    echo "   │ My recommendation: [option A because X]                │"
+    echo "   └─────────────────────────────────────────────────────────┘"
+    echo ""
+    echo "4. IF ON A TEAM — escalate to troubleshooter agent or ask a teammate."
+    echo "   If solo — present the stuck report to the user with options."
+    echo ""
+    echo "Previous attempts: $ATTEMPTS"
+fi
+exit 0

package/dist/plugins/scalpel/hooks/hooks.json CHANGED Viewed

@@ -1,5 +1,17 @@
 {
   "hooks": {
+    "SessionStart": [
+      {
+        "matcher": "",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "${CLAUDE_PLUGIN_ROOT}/hooks/project-setup.sh",
+            "timeout": 10
+          }
+        ]
+      }
+    ],
     "PreToolUse": [
       {
         "matcher": "Edit",
@@ -21,6 +33,18 @@
           }
         ]
       }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "${CLAUDE_PLUGIN_ROOT}/hooks/escalation-guard.sh",
+            "timeout": 5
+          }
+        ]
+      }
     ]
   }
 }