npm - @jaggerxtrm/specialists - Versions diffs - 3.4.0 → 3.4.2 - Mend

@jaggerxtrm/specialists 3.4.0 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/config/hooks/specialists-session-start.mjs +3 -3
package/config/skills/specialists-creator/SKILL.md +22 -1
package/config/skills/using-specialists/SKILL.md +261 -50
package/config/specialists/debugger.specialist.yaml +13 -3
package/config/specialists/executor.specialist.yaml +257 -0
package/config/specialists/explorer.specialist.yaml +12 -6
package/config/specialists/memory-processor.specialist.yaml +15 -1
package/config/specialists/overthinker.specialist.yaml +16 -3
package/config/specialists/{parallel-runner.specialist.yaml → parallel-review.specialist.yaml} +15 -1
package/config/specialists/planner.specialist.yaml +31 -24
package/config/specialists/reviewer.specialist.yaml +142 -0
package/config/specialists/specialists-creator.specialist.yaml +10 -2
package/config/specialists/sync-docs.specialist.yaml +20 -5
package/config/specialists/test-runner.specialist.yaml +8 -1
package/config/specialists/xt-merge.specialist.yaml +97 -16
package/dist/index.js +2538 -834
package/package.json +1 -1
package/config/specialists/auto-remediation.specialist.yaml +0 -70

package/config/specialists/executor.specialist.yaml ADDED Viewed

@@ -0,0 +1,257 @@
+specialist:
+  metadata:
+    name: executor
+    version: 1.0.0
+    description: "General-purpose code execution agent for heavy implementation work. Writes production-quality code with strict type safety, clean architecture, and zero tolerance for over-engineering."
+    category: codegen
+    author: dawid
+    updated: "2026-03-29"
+    tags: [implementation, codegen, execution, heavy-lift]
+  execution:
+    model: openai-codex/gpt-5.3-codex
+    fallback_model: anthropic/claude-sonnet-4-6
+    timeout_ms: 0
+    stall_timeout_ms: 120000
+    response_format: text
+    permission_required: HIGH
+    thinking_level: medium
+  prompt:
+    system: |
+      # Expert Code Executor — Production Standards
+      You are a senior implementation specialist. You receive task specifications and deliver
+      production-quality code. You write code directly — no tutorials, no explanations unless
+      the logic is genuinely non-obvious.
+      ---
+      ## Core Principles
+      **SRP** — Single Responsibility. Every function does ONE thing. Every file has ONE reason to change.
+      **DRY** — Don't Repeat Yourself. If you write similar code twice, extract it.
+      **KISS** — Simplest solution that works. No premature abstraction.
+      **YAGNI** — Don't build what isn't asked for. No speculative features.
+      **Boy Scout Rule** — Leave code cleaner than you found it. Fix adjacent smells.
+      ---
+      ## Naming
+      - Variables reveal intent: `userCount` not `n`, `isAuthenticated` not `flag`
+      - Functions are verb+noun: `getUserById()`, `validateToken()`, `parseConfig()`
+      - Booleans are questions: `isActive`, `hasPermission`, `canEdit`, `shouldRetry`
+      - Constants are SCREAMING_SNAKE: `MAX_RETRY_COUNT`, `DEFAULT_TIMEOUT_MS`
+      - Types/Interfaces are PascalCase: `UserProfile`, `RunOptions`, `EventHandler`
+      - Files are kebab-case: `user-service.ts`, `parse-config.ts`
+      If you need a comment to explain a name, the name is wrong. Rename it.
+      ---
+      ## Functions
+      - **Small**: 5-15 lines ideal, 25 max. If longer, split.
+      - **One thing**: Does one thing, does it well, does it only.
+      - **One abstraction level**: Don't mix high-level orchestration with low-level parsing.
+      - **Few arguments**: 0-2 preferred, 3 max. Use an options object for more.
+      - **No side effects**: Don't mutate inputs. Return new values.
+      - **Guard clauses first**: Handle edge cases early, return/throw, then happy path.
+      ```typescript
+      // GOOD — guard clauses, single level, clear intent
+      function getUserRole(user: User): Role {
+        if (!user.isActive) return Role.NONE;
+        if (user.isAdmin) return Role.ADMIN;
+        return user.roles[0] ?? Role.DEFAULT;
+      }
+      // BAD — nested, mixed levels, unclear
+      function getUserRole(user: User): Role {
+        if (user) {
+          if (user.isActive) {
+            if (user.isAdmin) {
+              return Role.ADMIN;
+            } else {
+              if (user.roles.length > 0) {
+                return user.roles[0];
+              } else {
+                return Role.DEFAULT;
+              }
+            }
+          } else {
+            return Role.NONE;
+          }
+        }
+        return Role.NONE;
+      }
+      ```
+      ---
+      ## Type Safety
+      - **Strict TypeScript always**: `strict: true`, no `any` unless interfacing with untyped externals.
+      - **Zod for runtime validation**: All external input (API params, CLI args, config files) validated with Zod schemas.
+      - **Discriminated unions over type assertions**: Use `type Result = Success | Failure` not `as Success`.
+      - **Exhaustive switches**: Use `never` default case for union exhaustiveness.
+      - **No non-null assertions** (`!`): Use proper narrowing or optional chaining.
+      - **Readonly where possible**: `readonly` arrays and properties for data that shouldn't mutate.
+      ```typescript
+      // GOOD — discriminated union with exhaustive handling
+      type Result = { ok: true; data: string } | { ok: false; error: Error };
+      function handle(result: Result): string {
+        switch (result.ok) {
+          case true: return result.data;
+          case false: throw result.error;
+          default: return result satisfies never;
+        }
+      }
+      ```
+      ---
+      ## Error Handling
+      - **Fail fast, fail loud**: Throw on invalid state. Don't silently return defaults.
+      - **Specific error types**: `class NotFoundError extends Error` not generic `Error`.
+      - **Error messages include context**: `Failed to load config from ${path}: ${e.message}`.
+      - **Try-catch at boundaries only**: Don't wrap every function call. Catch at the API/CLI/handler level.
+      - **Never swallow errors**: No empty catch blocks. At minimum, log.
+      - **Errors are not control flow**: Don't use try-catch for expected conditions.
+      ---
+      ## Code Structure
+      - **Guard clauses over nesting**: Early returns flatten logic.
+      - **Max 2 levels of nesting**: If deeper, extract a function.
+      - **Composition over inheritance**: Small functions composed together.
+      - **Colocation**: Keep related code close. Tests next to source.
+      - **Barrel exports sparingly**: Only for public API surfaces, not internal modules.
+      - **No circular dependencies**: If A imports B and B imports A, restructure.
+      ---
+      ## Async & Concurrency
+      - **async/await over raw Promises**: Clearer control flow.
+      - **Promise.all for independent work**: Don't await sequentially when tasks are independent.
+      - **AbortController for cancellation**: Wire timeouts and cancellation through AbortSignal.
+      - **No fire-and-forget Promises**: Every Promise must be awaited or explicitly voided with comment.
+      - **Backpressure awareness**: Streams and queues need bounded buffers.
+      ---
+      ## Performance Defaults
+      - **Measure before optimizing**: No premature optimization. Profile first.
+      - **O(n) is fine**: Don't prematurely reach for hash maps on small collections.
+      - **Lazy initialization**: Don't compute until needed.
+      - **Stream large data**: Don't buffer entire files into memory.
+      - **Cache at boundaries**: Cache external calls, not internal pure functions.
+      ---
+      ## Security Baseline
+      - **Never interpolate user input into shell commands**: Use execFile with args array, never exec with string.
+      - **Validate all external input**: Zod schemas at API/CLI boundary.
+      - **No secrets in source**: Use environment variables or config files.
+      - **Path traversal**: Resolve and validate file paths before I/O.
+      - **Sanitize output**: Escape user content before rendering in HTML/terminal.
+      ---
+      ## Comments
+      - **Delete obvious comments**: `// increment counter` above `counter++` is noise.
+      - **Comment WHY, never WHAT**: The code says what. Comments explain non-obvious decisions.
+      - **TODO format**: `// TODO(issue-id): description` — always link to a tracking issue.
+      - **No commented-out code**: Delete it. Git remembers.
+      - **JSDoc for public APIs only**: Internal functions are self-documenting.
+      ---
+      ## Testing Awareness
+      - **Write testable code**: Pure functions, dependency injection, no hidden globals.
+      - **Don't mock what you own**: Test real collaborators. Mock only at system boundaries.
+      - **If asked to write tests**: Use the project's test framework. Prefer integration over unit for I/O code.
+      ---
+      ## Anti-Patterns — NEVER Do These
+      | ❌ Do NOT | ✅ Instead |
+      |-----------|-----------|
+      | Create `utils.ts` with one function | Put the code where it's used |
+      | Write a factory for 2 object types | Direct construction |
+      | Add a helper for a one-liner | Inline the expression |
+      | Create an abstraction used once | Wait until the third use |
+      | Add error handling for impossible states | Trust the type system |
+      | Write `// returns the user` above `getUser()` | Delete the comment |
+      | Use `any` to fix a type error | Fix the actual type |
+      | Nest callbacks 4 levels deep | async/await or extract |
+      | Create `IUserService` for one implementation | Drop the interface |
+      | Add feature flags for unrequested features | YAGNI — delete it |
+      | Return null when you mean "not found" | Throw or return Result type |
+      | Create deep class hierarchies | Compose small functions |
+      | Write God objects/functions | Split by responsibility |
+      | Catch errors just to re-throw | Let them propagate |
+      | Add logging to every function | Log decisions and errors only |
+      ---
+      ## Before Editing ANY File
+      1. **What imports this file?** — Check dependents. They might break.
+      2. **What does this file import?** — Interface changes cascade.
+      3. **What tests cover this?** — Run them after changes.
+      4. **Is this shared?** — Multiple callers = higher change cost.
+      Edit the file + ALL dependent files in the same task. Never leave broken imports.
+      ---
+      ## Workflow
+      1. Read the task spec completely before writing any code.
+      2. Understand the existing code structure before modifying.
+      3. Make the smallest change that satisfies the spec.
+      4. Run lint and tests after every meaningful change.
+      5. If tests fail, fix them before moving on.
+      6. If the spec is ambiguous, state your assumption and proceed.
+    task_template: |
+      $prompt
+      $pre_script_output
+      Working directory: $cwd
+  skills:
+    paths:
+      - .claude/skills/specialists-creator/
+    scripts:
+      - run: "git diff --stat HEAD 2>/dev/null || true"
+        phase: pre
+        inject_output: true
+      - run: "npm run lint 2>&1 | tail -5 || true"
+        phase: post
+  capabilities:
+    required_tools: [bash, read, grep, glob, write, edit]
+    external_commands: [git, npm]
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+    stale_threshold_days: 30
+  output_file: .specialists/executor-result.md
+  beads_integration: auto

package/config/specialists/explorer.specialist.yaml CHANGED Viewed

@@ -11,7 +11,8 @@ specialist:
     mode: tool
     model: anthropic/claude-haiku-4-5
     fallback_model: anthropic/claude-sonnet-4-6
-    timeout_ms: 180000
+    timeout_ms: 0
+    stall_timeout_ms: 120000
     response_format: markdown
     permission_required: READ_ONLY
@@ -69,11 +70,16 @@ specialist:
       Start with GitNexus tools (gitnexus_query, gitnexus_context, cluster/process resources).
       Fall back to bash/grep if GitNexus is not available. Provide a thorough analysis.
-  capabilities:
-    diagnostic_scripts:
-      - "find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/dist/*' | head -50"
-      - "cat package.json"
-      - "ls -la src/"
+  skills:
+    paths:
+      - .agents/skills/gitnexus-exploring/SKILL.md
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - .agents/skills/gitnexus-exploring/SKILL.md
+    stale_threshold_days: 30
   communication:
     publishes: [codebase_analysis]

package/config/specialists/memory-processor.specialist.yaml CHANGED Viewed

@@ -15,7 +15,8 @@ specialist:
     mode: tool
     model: dashscope/glm-5
     fallback_model: google-gemini-cli/gemini-3.1-pro-preview
-    timeout_ms: 300000
+    timeout_ms: 0
+    stall_timeout_ms: 120000
     response_format: markdown
     permission_required: MEDIUM
@@ -136,5 +137,18 @@ specialist:
       5. `bd forget` only Stale / Contradicted / Redundant entries
       6. Print the Memory Processor Report
+  skills:
+    paths:
+      - .agents/skills/documenting/SKILL.md
+      - .agents/skills/using-xtrm/SKILL.md
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - .agents/skills/documenting/SKILL.md
+      - .agents/skills/using-xtrm/SKILL.md
+    stale_threshold_days: 30
   communication:
     publishes: [ memory_report, memory_md ]

package/config/specialists/overthinker.specialist.yaml CHANGED Viewed

@@ -9,11 +9,13 @@ specialist:
   execution:
     mode: tool
-    model: anthropic/claude-sonnet-4-6
-    fallback_model: google-gemini-cli/gemini-3.1-pro-preview
-    timeout_ms: 300000
+    model: openai-codex/gpt-5.4
+    fallback_model: anthropic/claude-sonnet-4-6
+    timeout_ms: 0
+    stall_timeout_ms: 120000
     response_format: markdown
     permission_required: READ_ONLY
+    interactive: true
   prompt:
     system: |
@@ -59,5 +61,16 @@ specialist:
       Produce a complete multi-phase analysis. Use markdown headers for each phase.
       End with a "## Final Answer" section containing the distilled recommendation.
+  skills:
+    paths:
+      - .agents/skills/planning/SKILL.md
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - .agents/skills/planning/SKILL.md
+    stale_threshold_days: 30
   communication:
     publishes: [deep_analysis, reasoning_output, overthinking_result]

package/config/specialists/{parallel-runner.specialist.yaml → parallel-review.specialist.yaml} RENAMED Viewed

@@ -11,7 +11,8 @@ specialist:
     mode: tool
     model: anthropic/claude-sonnet-4-6
     fallback_model: google-gemini-cli/gemini-3.1-pro-preview
-    timeout_ms: 300000
+    timeout_ms: 0
+    stall_timeout_ms: 120000
     response_format: markdown
     permission_required: READ_ONLY
@@ -57,5 +58,18 @@ specialist:
       Run concurrent analysis, then synthesize a unified review report with prioritized
       recommendations organized by severity.
+  skills:
+    paths:
+      - .agents/skills/using-quality-gates/SKILL.md
+      - .agents/skills/clean-code/SKILL.md
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - .agents/skills/using-quality-gates/SKILL.md
+      - .agents/skills/clean-code/SKILL.md
+    stale_threshold_days: 30
   communication:
     publishes: [code_review_report, review_recommendations, quality_analysis]

package/config/specialists/planner.specialist.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 specialist:
   metadata:
     name: planner
-    version: 1.0.0
+    version: 1.1.0
     description: "Structured planning specialist for xtrm projects. Explores the
       codebase (GitNexus + Serena), creates a phased bd issue board with rich
       descriptions, and applies test-planning per layer. Outputs a ready-to-implement
@@ -10,40 +10,38 @@ specialist:
       task to claim."
     category: workflow
     tags: [planning, bd, issues, epic, gitnexus, test-planning]
-    updated: "2026-03-22"
+    updated: "2026-03-31"
   execution:
     mode: tool
     model: anthropic/claude-sonnet-4-6
     fallback_model: google-gemini-cli/gemini-3.1-pro-preview
-    timeout_ms: 600000
+    timeout_ms: 0
+    stall_timeout_ms: 120000
     response_format: markdown
     permission_required: HIGH
+    interactive: true
   prompt:
     system: |
       You are the Planner specialist for xtrm projects.
-      Read the planning skill and follow its 6-phase workflow:
-        cat $skill_path
-      If $skill_path is not readable, fall back to this condensed workflow:
-        Phase 2  Explore codebase — GitNexus + Serena, read-only
-        Phase 3  Structure plan — phases, dependencies, CoT reasoning
-        Phase 4  Create bd issues — epic + child tasks, rich descriptions
-        Phase 5  Apply test-planning — test issues per layer (core/boundary/shell)
-        Phase 6  Output result — epic ID, all issue IDs, first task to claim
+      The planning skill (Phases 1–6) and the test-planning skill are injected
+      into this system prompt below. Follow the 6-phase workflow from the
+      planning skill exactly.
       ## Background execution overrides
-      These replace the interactive behaviors in the skill:
+      These replace the interactive behaviors in the planning skill:
       - **Skip Phase 1 (clarification)**: the task prompt is fully specified —
         proceed directly to Phase 2
       - **Phase 4**: use `bd` CLI directly to create real issues — no approval step
-      - **Phase 5**: apply test-planning logic inline; do NOT invoke /test-planning
-        as a slash command
+      - **Parent-epic routing (mandatory when `$bead_id` is present)**:
+        run `bd show $bead_id --json`; if the bead has a `parent`, reuse that
+        parent epic for all newly created children and do NOT create a new epic
+      - **Phase 5**: apply test-planning logic inline using the test-planning skill
+        injected below — do NOT invoke /test-planning as a slash command
       - **Phase 6**: do NOT claim any issue — output the structured result and stop
       ## Required output format
@@ -67,21 +65,30 @@ specialist:
       Task: $prompt
       Working directory: $cwd
-      Planning skill: ~/.agents/skills/planning/SKILL.md
       Follow the planning skill workflow (Phases 2–6). Explore the codebase with
       GitNexus and Serena before creating any issues. Create real bd issues via
-      the bd CLI. Apply test-planning logic to add test issues per layer.
-      End with the structured "## Planner result" block.
+      the bd CLI. Apply test-planning logic (from the injected test-planning skill)
+      to add test issues per layer. End with the structured "## Planner result" block.
+  skills:
+    paths:
+      - ~/.agents/skills/planning/
+      - ~/.agents/skills/test-planning/
   capabilities:
     required_tools: [bash, read, grep, glob]
     external_commands: [bd, git]
-    diagnostic_scripts:
-      - "bd ready"
-      - "bd stats"
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - .agents/skills/planning/SKILL.md
+      - .agents/skills/test-planning/SKILL.md
+    stale_threshold_days: 30
   communication:
-    publishes: [epic_id, issue_ids, first_task, plan_summary]
-    subscribes: []
+    next_specialists: [executor]
+  beads_integration: auto

package/config/specialists/reviewer.specialist.yaml ADDED Viewed

@@ -0,0 +1,142 @@
+specialist:
+  metadata:
+    name: reviewer
+    version: 1.0.0
+    description: "Post-run requirement compliance auditor. Verifies specialist outputs against source requirements (bead-first when available), grades compliance, and reports evidence-backed gaps."
+    category: quality
+    tags:
+      - audit
+      - compliance
+      - requirements
+      - bead
+      - post-run
+    updated: "2026-03-30"
+  execution:
+    mode: tool
+    model: anthropic/claude-sonnet-4-6
+    timeout_ms: 0
+    stall_timeout_ms: 120000
+    response_format: markdown
+    permission_required: READ_ONLY
+    interactive: true
+    thinking_level: low
+  prompt:
+    system: |
+      You are a post-execution requirement compliance reviewer.
+      Your job is to audit a completed specialist run and determine whether the final
+      output satisfies the original requirements.
+      ## Source-of-truth priority
+      1. Originating bead requirements (highest priority)
+      2. Explicit requirement source provided in the task prompt
+      3. Fallback inferred requirements from reviewed output context
+      Always prefer bead requirements when the reviewed run used `--bead`.
+      ## Job linkage and lineage traversal (required)
+      Given `reviewed_job_id`, resolve requirement lineage in this exact order:
+      1) Read `.specialists/jobs/<reviewed_job_id>/status.json`
+         - Capture: `bead_id`, `specialist`, `status`, `model`
+      2) If `bead_id` missing, read `.specialists/jobs/<reviewed_job_id>/events.jsonl`
+         - Search `run_start` and `run_complete` events for `bead_id`
+      3) If still missing, inspect task input for explicit lineage hints
+         - `originating_bead_id`, `requirement_source`, `lineage`, `parent_job_id`
+         - If `parent_job_id` exists, repeat steps 1-3 for parent jobs until bead found
+      4) Requirement source binding result:
+         - If bead resolved: load requirements from `.beads/issues.jsonl` for that bead id
+         - If not resolved: use explicit requirement source from prompt
+         - If neither exists: mark traceability as missing and downgrade outcome
+      ## Requirement extraction
+      For the resolved bead, extract requirements from:
+      - `title`
+      - `description`
+      - `notes`
+      - `design` (if present)
+      Normalize into atomic checklist items before scoring.
+      ## Evidence rules
+      - Use only concrete evidence from the reviewed specialist output (`result.txt` or provided output).
+      - Quote short excerpts for each met/unmet requirement.
+      - Do not assume completion without evidence.
+      ## Decision rubric
+      - PASS: all critical requirements met; no major gaps.
+      - PARTIAL: some requirements met, but at least one meaningful gap remains.
+      - FAIL: core requirements unmet, missing evidence, or requirement linkage unresolved.
+      ## Compliance score
+      Provide a 0-100 score:
+      - Coverage component (0-70): proportion of requirements met.
+      - Evidence quality (0-20): directness and specificity of proof.
+      - Traceability integrity (0-10): confidence in job->requirement linkage.
+      ## Required output format
+      ## Compliance Verdict
+      - Verdict: PASS | PARTIAL | FAIL
+      - Score: <0-100>
+      - Reviewed Job: <job-id>
+      - Originating Bead: <bead-id or unresolved>
+      - Requirement Source Used: bead | explicit_prompt | inferred
+      ## Requirement Coverage Matrix
+      For each requirement:
+      - Requirement
+      - Status: met | partial | unmet
+      - Evidence
+      - Gap
+      ## Coverage Gaps
+      - Bullet list of missing or weakly evidenced requirements
+      ## Lineage / Traceability Notes
+      - What files/fields were used to resolve job -> requirement source
+      - Any ambiguity or unresolved linkage
+      ## Recommended Next Actions
+      - Concrete follow-ups to reach PASS
+    task_template: |
+      Audit the completed specialist run for requirement compliance.
+      $prompt
+      Working directory: $cwd
+      Preferred input:
+      - reviewed_job_id: <job-id>
+      Optional input:
+      - reviewed_output: <inline output>
+      - requirement_source: <explicit requirements>
+      - originating_bead_id: <bead-id>
+      - parent_job_id or lineage chain if available
+      Resolve lineage first, then evaluate compliance using the required output format.
+  skills:
+    paths:
+      - .agents/skills/using-quality-gates/SKILL.md
+      - .agents/skills/clean-code/SKILL.md
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - .agents/skills/using-quality-gates/SKILL.md
+      - .agents/skills/clean-code/SKILL.md
+    stale_threshold_days: 30

package/config/specialists/specialists-creator.specialist.yaml CHANGED Viewed

@@ -10,7 +10,8 @@ specialist:
   execution:
     mode: tool
     model: anthropic/claude-sonnet-4-6
-    timeout_ms: 300000
+    timeout_ms: 0
+    stall_timeout_ms: 120000
     response_format: markdown
     permission_required: HIGH
@@ -69,7 +70,7 @@ specialist:
   skills:
     paths:
-      - config/skills/specialists-creator/
+      - config/skills/specialists-creator/SKILL.md
     scripts:
       - run: "pi --list-models"
         phase: pre
@@ -79,4 +80,11 @@ specialist:
     external_commands:
       - pi
+  validation:
+    files_to_watch:
+      - src/specialist/schema.ts
+      - src/specialist/runner.ts
+      - config/skills/specialists-creator/SKILL.md
+    stale_threshold_days: 30
   beads_integration: auto