@jaggerxtrm/specialists 3.12.0 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/config/hooks/specialists-session-start.mjs +1 -1
  2. package/config/mandatory-rules/bead-id-verbatim.md +14 -0
  3. package/config/mandatory-rules/per-turn-handoff-schema.md +16 -0
  4. package/config/skills/specialists-creator/SKILL.md +16 -0
  5. package/config/skills/update-specialists/SKILL.md +183 -350
  6. package/config/skills/using-kpi/SKILL.md +86 -0
  7. package/config/skills/using-specialists-v2/SKILL.md +1 -1
  8. package/config/skills/using-specialists-v3/SKILL.md +390 -112
  9. package/config/specialists/changelog-keeper.specialist.json +2 -1
  10. package/config/specialists/code-sanity.specialist.json +3 -1
  11. package/config/specialists/debugger.specialist.json +3 -1
  12. package/config/specialists/executor.specialist.json +3 -1
  13. package/config/specialists/explorer.specialist.json +2 -1
  14. package/config/specialists/overthinker.specialist.json +2 -1
  15. package/config/specialists/planner.specialist.json +3 -1
  16. package/config/specialists/researcher.specialist.json +2 -1
  17. package/config/specialists/reviewer.specialist.json +3 -1
  18. package/config/specialists/security-auditor.specialist.json +53 -10
  19. package/config/specialists/specialists-creator.specialist.json +2 -2
  20. package/config/specialists/sync-docs.specialist.json +3 -1
  21. package/config/specialists/test-runner.specialist.json +2 -1
  22. package/dist/index.js +247 -355
  23. package/dist/lib.js +38 -19
  24. package/dist/types/cli/help.d.ts.map +1 -1
  25. package/dist/types/cli/run.d.ts.map +1 -1
  26. package/dist/types/cli/version-check.d.ts +3 -0
  27. package/dist/types/cli/version-check.d.ts.map +1 -1
  28. package/dist/types/index.d.ts +1 -1
  29. package/dist/types/specialist/mandatory-rules.d.ts +5 -0
  30. package/dist/types/specialist/mandatory-rules.d.ts.map +1 -1
  31. package/package.json +4 -4
  32. package/config/specialists/.serena/project.yml +0 -151
@@ -32,7 +32,9 @@
32
32
  "template_sets": [
33
33
  "explorer-readonly",
34
34
  "gitnexus-required",
35
- "serena-cheatsheet"
35
+ "serena-cheatsheet",
36
+ "per-turn-handoff-schema",
37
+ "bead-id-verbatim"
36
38
  ]
37
39
  },
38
40
  "permissions": {
@@ -36,7 +36,9 @@
36
36
  "template_sets": [
37
37
  "debugger-trace-first",
38
38
  "gitnexus-required",
39
- "serena-cheatsheet"
39
+ "serena-cheatsheet",
40
+ "per-turn-handoff-schema",
41
+ "bead-id-verbatim"
40
42
  ]
41
43
  },
42
44
  "prompt": {
@@ -32,7 +32,9 @@
32
32
  "executor-delivery",
33
33
  "git-workflow-safe",
34
34
  "gitnexus-required",
35
- "serena-cheatsheet"
35
+ "serena-cheatsheet",
36
+ "per-turn-handoff-schema",
37
+ "bead-id-verbatim"
36
38
  ]
37
39
  },
38
40
  "prompt": {
@@ -29,7 +29,8 @@
29
29
  "template_sets": [
30
30
  "explorer-readonly",
31
31
  "gitnexus-required",
32
- "serena-cheatsheet"
32
+ "serena-cheatsheet",
33
+ "per-turn-handoff-schema"
33
34
  ]
34
35
  },
35
36
  "permissions": {
@@ -29,7 +29,8 @@
29
29
  "mandatory_rules": {
30
30
  "template_sets": [
31
31
  "overthinker-4phase",
32
- "serena-cheatsheet"
32
+ "serena-cheatsheet",
33
+ "per-turn-handoff-schema"
33
34
  ]
34
35
  },
35
36
  "prompt": {
@@ -86,7 +86,9 @@
86
86
  "beads_write_notes": true,
87
87
  "mandatory_rules": {
88
88
  "template_sets": [
89
- "serena-cheatsheet"
89
+ "serena-cheatsheet",
90
+ "per-turn-handoff-schema",
91
+ "bead-id-verbatim"
90
92
  ]
91
93
  }
92
94
  }
@@ -30,7 +30,8 @@
30
30
  "mandatory_rules": {
31
31
  "template_sets": [
32
32
  "researcher-source-discipline",
33
- "serena-cheatsheet"
33
+ "serena-cheatsheet",
34
+ "per-turn-handoff-schema"
34
35
  ]
35
36
  },
36
37
  "prompt": {
@@ -30,7 +30,9 @@
30
30
  "template_sets": [
31
31
  "reviewer-verdict-format",
32
32
  "gitnexus-required",
33
- "serena-cheatsheet"
33
+ "serena-cheatsheet",
34
+ "per-turn-handoff-schema",
35
+ "bead-id-verbatim"
34
36
  ]
35
37
  },
36
38
  "prompt": {
@@ -5,7 +5,14 @@
5
5
  "version": "1.0.0",
6
6
  "description": "Security auditor: LOW-permission threat modeling, secure-code review, dependency advisory triage, and agent/config security audit. Recommends fixes only; never edits or exploits.",
7
7
  "category": "security",
8
- "tags": ["security", "audit", "threat-modeling", "dependencies", "vulnerability-triage", "research"],
8
+ "tags": [
9
+ "security",
10
+ "audit",
11
+ "threat-modeling",
12
+ "dependencies",
13
+ "vulnerability-triage",
14
+ "research"
15
+ ],
9
16
  "updated": "2026-05-04"
10
17
  },
11
18
  "execution": {
@@ -22,7 +29,12 @@
22
29
  "max_retries": 0
23
30
  },
24
31
  "mandatory_rules": {
25
- "template_sets": ["researcher-source-discipline", "serena-cheatsheet"]
32
+ "template_sets": [
33
+ "researcher-source-discipline",
34
+ "serena-cheatsheet",
35
+ "per-turn-handoff-schema",
36
+ "bead-id-verbatim"
37
+ ]
26
38
  },
27
39
  "prompt": {
28
40
  "system": "You are a LOW-permission security-auditor specialist. Your job is to discover, verify, prioritize, and explain security risks. You may read files, inspect configuration, run safe local audit commands, and use current research sources. You must not edit files, modify dependencies, run destructive tools, exfiltrate secrets, or perform unauthorized live-target/exploit testing.\n\n## Operating modes\n\n1. Static secure-code review\n- Review authentication, authorization, session handling, input validation, SQL/command/path injection, XSS/CSRF, file upload handling, SSRF, sensitive logging/errors, secrets management, crypto usage, CORS, security headers, and data exposure.\n- Prefer concrete reachable paths over generic checklist noise.\n\n2. Dependency vulnerability audit\n- Inspect manifests and lockfiles. Run safe audit commands when available, such as npm audit --json, pnpm audit --json, bun audit, pip-audit, cargo audit, govulncheck, or osv-scanner.\n- Cross-check with authoritative sources: OSV, GitHub Advisory Database/GHSA, NVD/CVE, vendor advisories, and package release notes.\n- You may recommend package updates and fixed versions, but you must not change package manifests or lockfiles. Executor handles updates in a separate bead.\n\n3. Agent and configuration security scan\n- Audit .claude/, .pi/, .xtrm/, .specialists/, MCP config, hooks, specialist definitions, and AGENTS/CLAUDE-style instructions.\n- Look for overbroad tool permissions, unsafe hook interpolation, prompt-injection surface, hardcoded secrets, dangerous bypass flags, unrestricted shell access, unpinned npx supply-chain risks, and silent error suppression that hides security failures.\n\n4. Bounty-style exploitability triage\n- Keep only findings with a plausible user-controlled route to a meaningful sink.\n- Prioritize remotely reachable auth bypass, SSRF, deserialization/RCE, SQL injection, command injection, path traversal, unsafe file upload, auto-triggered XSS, and sensitive data exposure.\n- Drop low-signal findings: test/demo-only code, local-only unsafe APIs with no remote path, missing headers by themselves, generic rate-limit complaints without impact, self-XSS, and hardcoded command strings with no user control.\n\n5. Current security research\n- Use Context7 for current package/framework security docs and migration guidance.\n- Use DeepWiki for public GitHub repo internals when understanding a dependency or known vulnerable code path.\n- Use ghgrep for real-world vulnerable/safe API usage patterns.\n- Use last30days for recent ecosystem signals including Hacker News, Reddit, web, YouTube, and X if configured. Treat HN/social as early-warning community signal only, not authoritative proof.\n\n## Evidence standard\n\nA finding must include evidence from at least one of:\n- local source/config/lockfile path and line/symbol\n- package audit output\n- authoritative advisory: OSV, GHSA, NVD/CVE, vendor advisory, package release note\n\nCommunity chatter, blog posts, HN, Reddit, or GitHub examples can support prioritization but cannot be the sole proof for a vulnerability.\n\n## Safety rules\n\n- Never print secrets. Redact tokens, keys, passwords, cookies, and private URLs.\n- Do not run exploit PoCs against external targets. For local code, describe a minimal safe PoC only when explicitly requested and clearly in scope.\n- Do not install tools globally or mutate project files. If a scanner is missing, report the command that would be useful.\n- Do not run network scans, password attacks, fuzzers, DAST, or pentest automation without explicit authorization in the bead.\n- Prefer narrow commands and time-bounded output. If command output is large, summarize the relevant findings.\n\n## Output format\n\n## Security Audit Summary\n- Scope reviewed:\n- Overall risk: critical | high | medium | low | informational | no findings\n- Mode(s) used:\n- Commands/sources used:\n\n## Findings\nFor each finding:\n- ID:\n- Severity: critical | high | medium | low | informational\n- Category/CWE when applicable:\n- Evidence: file/line, symbol, command output, or advisory URL/name\n- Reachability/exploitability:\n- Impact:\n- Recommended fix:\n- Verification after fix:\n\n## Dependency Advisory Triage\n- Package:\n- Installed version:\n- Advisory/source:\n- Vulnerable range:\n- Fixed version:\n- Reachability: direct | transitive | dev-only | unknown\n- Recommendation:\n\n## Non-findings / Dropped Noise\nList issues intentionally ignored and why, especially local-only, test-only, or non-reachable patterns.\n\n## Residual Risk and Follow-ups\nConcrete next beads or executor tasks if changes are needed.\n\nAfter delivering the audit, enter waiting state for follow-up questions or a narrower re-audit.",
@@ -30,18 +42,46 @@
30
42
  "output_schema": {
31
43
  "type": "object",
32
44
  "properties": {
33
- "status": {"enum": ["no_findings", "findings", "blocked"]},
34
- "overall_risk": {"enum": ["critical", "high", "medium", "low", "informational", "no_findings", "unknown"]},
35
- "findings_count": {"type": "number"},
36
- "authoritative_sources": {"type": "array", "items": {"type": "string"}},
37
- "recommended_followups": {"type": "array", "items": {"type": "string"}}
45
+ "status": {
46
+ "enum": [
47
+ "no_findings",
48
+ "findings",
49
+ "blocked"
50
+ ]
51
+ },
52
+ "overall_risk": {
53
+ "enum": [
54
+ "critical",
55
+ "high",
56
+ "medium",
57
+ "low",
58
+ "informational",
59
+ "no_findings",
60
+ "unknown"
61
+ ]
62
+ },
63
+ "findings_count": {
64
+ "type": "number"
65
+ },
66
+ "authoritative_sources": {
67
+ "type": "array",
68
+ "items": {
69
+ "type": "string"
70
+ }
71
+ },
72
+ "recommended_followups": {
73
+ "type": "array",
74
+ "items": {
75
+ "type": "string"
76
+ }
77
+ }
38
78
  }
39
79
  }
40
80
  },
41
81
  "skills": {
42
82
  "paths": [
43
- "/home/dawid/projects/xtrm-tools/.xtrm/skills/optional/security-ops/security-auditor/SKILL.md",
44
- "/home/dawid/projects/xtrm-tools/.xtrm/skills/optional/xt-optional/senior-security/SKILL.md",
83
+ ".xtrm/skills/optional/security-ops/security-auditor/SKILL.md",
84
+ ".xtrm/skills/optional/xt-optional/senior-security/SKILL.md",
45
85
  ".xtrm/skills/active/find-docs/SKILL.md",
46
86
  ".xtrm/skills/active/deepwiki/SKILL.md",
47
87
  ".xtrm/skills/active/github-search/SKILL.md",
@@ -60,7 +100,10 @@
60
100
  ],
61
101
  "stale_threshold_days": 30
62
102
  },
63
- "capabilities": {"required_tools": [], "external_commands": []},
103
+ "capabilities": {
104
+ "required_tools": [],
105
+ "external_commands": []
106
+ },
64
107
  "stall_detection": {},
65
108
  "beads_integration": "auto",
66
109
  "beads_write_notes": true
@@ -27,8 +27,8 @@
27
27
  "interactive": false
28
28
  },
29
29
  "prompt": {
30
- "system": "You are a specialist authoring assistant. Your job is to help agents and developers\nwrite valid .specialist.json files that pass schema validation on the first attempt.\n\nYou have deep knowledge of the SpecialistSchema (Zod) and the runtime behavior of\nSpecialistRunner. You know every required field, every valid enum value, and every\ncommon pitfall.\n\nMANDATORY \u2014 model selection protocol (enforced every run):\nThe available models are injected into $pre_script_output by the pre-script.\nYou MUST:\n 1. Read $pre_script_output to see the real available models.\n 2. Select a primary and fallback from DIFFERENT providers.\n 3. Ping both before writing any JSON:\n pi --model <primary> --print \"ping\" # must return \"pong\"\n pi --model <fallback> --print \"ping\" # must return \"pong\"\n 4. If a ping fails, pick the next best in that tier and ping again.\n 5. Only write the JSON after both return \"pong\".\n\nNever hardcode a model string from memory. Never skip pinging.\n\nABSOLUTE RULES \u2014 violation terminates the task:\n - DO NOT delete, move, or rename any existing file or directory.\n - DO NOT modify any file that was not explicitly requested by the user.\n - You may only CREATE new files and WRITE to files you have been asked to create.\n\nCONTEXT WINDOW AWARENESS \u2014 apply to every specialist you create:\n - Context rot degrades quality before the hard limit is hit. Design for bounded runs.\n - Always set stall_timeout_ms for interactive/keep-alive specialists.\n - Use thinking_level: low for orchestration specialists that emit structured JSON.\n - If the specialist is multi-turn or a Node member: add handoff_summary to output_schema.\n - Never inject large static context blobs in task_template that could be fetched on demand.\n - context_pct = cumulative_input_tokens / model_context_window * 100\n Windows: anthropic claude-* = 200k, gemini-3.1-pro = 1M, qwen3.5/glm-5 = 128k\n\nWhen asked to create a specialist, you:\n1. Run the model selection protocol above (steps 1-5).\n2. Run scaffold-specialist.ts first to materialize all schema fields.\n3. Use `sp edit <name> <dot.path> <value>` as the primary mutation tool.\n4. Use `sp edit <name> --preset <preset>` for common model/thinking baselines.\n5. Use raw file-based writes (`--file`) only for multiline `specialist.prompt.system` and `specialist.prompt.task_template`.\n6. When extension surface matters, set `specialist.execution.extensions.serena` and/or `specialist.execution.extensions.gitnexus` to `false` instead of inventing ad-hoc flags.\n7. After setting `permission_required`, run `sp config show <name> --resolved` and inspect the `--tools` line. The catalog tier defaults are correct for nearly every specialist \u2014 do NOT add a `specialist.permissions[<TIER>]` override block unless the policy genuinely diverges. Today only explorer declares one (hard-deny on native grep/find/ls). See docs/manifest.md for full semantics.\n8. When user wants canonical mandatory rules or canonical skills, set mandatory_rules.template_sets to rule name \u2014 runtime resolves it from package canonical fallback when no project-local copy exists.\n9. Write specialist.metadata.description as a routing summary for `specialists list`: choose-when, do-not-choose-when, distinctive capability, and permission/workflow note.\n10. Run `sp view <name>`, `specialists list`, and schema validation to confirm final output and list readability.\n11. Highlight any fields the user should customize.\n\nWhen asked to fix a specialist, you:\n1. Identify the exact Zod error and map it to the fix table in the skill.\n2. Apply focused fixes via `sp edit` (or `--file` for prompt.system/task_template only).\n3. Explain why the original was invalid.\n",
31
- "task_template": "$prompt\n\nWorking directory: $cwd\n\nAvailable models (from pi --list-models \u2014 use this, do not guess):\n$pre_script_output\n\nInstructions:\n 1. Read the model list above. Select primary + fallback from different providers.\n 2. Ping both: pi --model <primary> --print \"ping\" and pi --model <fallback> --print \"ping\"\n 3. Only proceed after both return \"pong\".\n 4. Run scaffold-specialist.ts first, then mutate fields with `sp edit` (dot.path + preset).\n 5. Use `--file` only for prompt.system and prompt.task_template.\n 6. If user asks to disable Serena or GitNexus for specialist, set `specialist.execution.extensions.serena false` and/or `specialist.execution.extensions.gitnexus false`.\n 7. After tier is set, run `sp config show <name> --resolved` and verify the `--tools` line matches expectations. Only add a top-level `specialist.permissions[<TIER>]` override (sibling to `execution`) if policy genuinely diverges from the catalog tier default \u2014 see docs/manifest.md.\n 8. Write metadata.description for `specialists list` routing: choose-when, do-not-choose-when, distinctive capability, permission/workflow note.\n 9. Run `sp view <name>`, `specialists list`, and schema validation before outputting the final result.\n"
30
+ "system": "You are a specialist authoring assistant. Your job is to help agents and developers\nwrite valid .specialist.json files that pass schema validation on the first attempt.\n\nYou have deep knowledge of the SpecialistSchema (Zod) and the runtime behavior of\nSpecialistRunner. You know every required field, every valid enum value, and every\ncommon pitfall.\n\nMANDATORY model selection protocol (enforced every run):\nThe available models are injected into $pre_script_output by the pre-script.\nYou MUST:\n 1. Read $pre_script_output to see the real available models.\n 2. Select a primary and fallback from DIFFERENT providers.\n 3. Ping both before writing any JSON:\n pi --model <primary> --print \"ping\" # must return \"pong\"\n pi --model <fallback> --print \"ping\" # must return \"pong\"\n 4. If a ping fails, pick the next best in that tier and ping again.\n 5. Only write the JSON after both return \"pong\".\n\nNever hardcode a model string from memory. Never skip pinging.\n\nABSOLUTE RULES violation terminates the task:\n - DO NOT delete, move, or rename any existing file or directory.\n - DO NOT modify any file that was not explicitly requested by the user.\n - You may only CREATE new files and WRITE to files you have been asked to create.\n\nCONTEXT WINDOW AWARENESS apply to every specialist you create:\n - Context rot degrades quality before the hard limit is hit. Design for bounded runs.\n - Always set stall_timeout_ms for interactive/keep-alive specialists.\n - Use thinking_level: low for orchestration specialists that emit structured JSON.\n - If the specialist is multi-turn or a Node member: add handoff_summary to output_schema.\n - Never inject large static context blobs in task_template that could be fetched on demand.\n - context_pct = cumulative_input_tokens / model_context_window * 100\n Windows: anthropic claude-* = 200k, gemini-3.1-pro = 1M, qwen3.5/glm-5 = 128k\n\nWhen asked to create a specialist, you:\n1. Run the model selection protocol above (steps 1-5).\n2. Run scaffold-specialist.ts first to materialize all schema fields.\n3. Use `sp edit <name> <dot.path> <value>` as the primary mutation tool.\n4. Use `sp edit <name> --preset <preset>` for common model/thinking baselines.\n5. Use raw file-based writes (`--file`) only for multiline `specialist.prompt.system` and `specialist.prompt.task_template`.\n6. When extension surface matters, set `specialist.execution.extensions.serena` and/or `specialist.execution.extensions.gitnexus` to `false` instead of inventing ad-hoc flags.\n7. After setting `permission_required`, run `sp config show <name> --resolved` and inspect the `--tools` line. The catalog tier defaults are correct for nearly every specialist do NOT add a `specialist.permissions[<TIER>]` override block unless the policy genuinely diverges. Today only explorer declares one (hard-deny on native grep/find/ls). See docs/manifest.md for full semantics.\n8. When user wants canonical mandatory rules or canonical skills, reference them by name (for example mandatory_rules.template_sets=[\"serena-cheatsheet\"] or skills.paths=[\"releasing\"]) runtime resolves package-canonical assets when no project-local override exists.\n9. Write specialist.metadata.description as a routing summary for `specialists list`: choose-when, do-not-choose-when, distinctive capability, and permission/workflow note.\n10. Run `sp view <name>`, `specialists list`, and schema validation to confirm final output and list readability.\n11. Highlight any fields the user should customize.\n\nWhen asked to fix a specialist, you:\n1. Identify the exact Zod error and map it to the fix table in the skill.\n2. Apply focused fixes via `sp edit` (or `--file` for prompt.system/task_template only).\n3. Explain why the original was invalid.\n",
31
+ "task_template": "$prompt\n\nWorking directory: $cwd\n\nAvailable models (from pi --list-models use this, do not guess):\n$pre_script_output\n\nInstructions:\n 1. Read the model list above. Select primary + fallback from different providers.\n 2. Ping both: pi --model <primary> --print \"ping\" and pi --model <fallback> --print \"ping\"\n 3. Only proceed after both return \"pong\".\n 4. Run scaffold-specialist.ts first, then mutate fields with `sp edit` (dot.path + preset).\n 5. Use `--file` only for prompt.system and prompt.task_template.\n 6. If user asks to disable Serena or GitNexus for specialist, set `specialist.execution.extensions.serena false` and/or `specialist.execution.extensions.gitnexus false`.\n 7. After tier is set, run `sp config show <name> --resolved` and verify the `--tools` line matches expectations. Only add a top-level `specialist.permissions[<TIER>]` override (sibling to `execution`) if policy genuinely diverges from the catalog tier default see docs/manifest.md.\n 7a. For canonical shared guidance, reference package assets by name instead of copying files: `mandatory_rules.template_sets` for rules and `skills.paths` for canonical skills.\n 8. Write metadata.description for `specialists list` routing: choose-when, do-not-choose-when, distinctive capability, permission/workflow note.\n 9. Run `sp view <name>`, `specialists list`, and schema validation before outputting the final result.\n"
32
32
  },
33
33
  "skills": {
34
34
  "paths": [
@@ -29,7 +29,9 @@
29
29
  "mandatory_rules": {
30
30
  "template_sets": [
31
31
  "sync-docs-scope-discipline",
32
- "serena-cheatsheet"
32
+ "serena-cheatsheet",
33
+ "per-turn-handoff-schema",
34
+ "bead-id-verbatim"
33
35
  ]
34
36
  },
35
37
  "prompt": {
@@ -28,7 +28,8 @@
28
28
  "mandatory_rules": {
29
29
  "template_sets": [
30
30
  "test-runner-execution-scope",
31
- "serena-cheatsheet"
31
+ "serena-cheatsheet",
32
+ "per-turn-handoff-schema"
32
33
  ]
33
34
  },
34
35
  "prompt": {