npm - ma-agents - Versions diffs - 3.5.6 → 3.6.0 - Mend

ma-agents 3.5.6 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/.ma-agents.json +10 -0
package/AGENTS.md +97 -0
package/MANIFEST.yaml +3 -0
package/README.md +17 -0
package/_bmad-output/implementation-artifacts/21-10-profile-reconfigure.md +30 -6
package/_bmad-output/implementation-artifacts/21-11-profile-uninstall.md +2 -1
package/_bmad-output/implementation-artifacts/21-2-universal-instruction-block-expansion.md +217 -62
package/_bmad-output/implementation-artifacts/21-3-roomodes-template-bmad-modes.md +196 -73
package/_bmad-output/implementation-artifacts/21-4-agents-md-template-opencode.md +242 -53
package/_bmad-output/implementation-artifacts/21-5-clinerules-template-extension.md +180 -41
package/_bmad-output/implementation-artifacts/21-6-onprem-layered-guardrails.md +250 -75
package/_bmad-output/implementation-artifacts/21-7-bmad-persona-phase-prefix.md +221 -89
package/_bmad-output/implementation-artifacts/21-8-vllm-reference-doc-readme.md +121 -63
package/_bmad-output/implementation-artifacts/21-9-tests-validation.md +332 -61
package/_bmad-output/implementation-artifacts/bug-bmad-recompile-fails-on-airgapped-network.md +112 -0
package/_bmad-output/implementation-artifacts/sprint-status.yaml +3 -2
package/bin/cli.js +59 -0
package/docs/deployment/vllm-nemotron.md +130 -0
package/lib/agents.js +17 -2
package/lib/bmad-customize/bmm-analyst.customize.yaml +8 -0
package/lib/bmad-customize/bmm-architect.customize.yaml +2 -0
package/lib/bmad-customize/bmm-dev.customize.yaml +2 -0
package/lib/bmad-customize/bmm-pm.customize.yaml +2 -0
package/lib/bmad-customize/bmm-qa.customize.yaml +2 -0
package/lib/bmad-customize/bmm-quick-flow-solo-dev.customize.yaml +8 -0
package/lib/bmad-customize/bmm-sm.customize.yaml +2 -0
package/lib/bmad-customize/bmm-tech-writer.customize.yaml +2 -0
package/lib/bmad-customize/bmm-ux-designer.customize.yaml +2 -0
package/lib/bmad.js +293 -1
package/lib/installer.js +617 -43
package/lib/merge/roomodes.js +125 -0
package/lib/profile.js +25 -2
package/lib/reconfigure.js +334 -0
package/lib/templates/agents-md.template.md +67 -0
package/lib/templates/clinerules.template.md +13 -0
package/lib/templates/instruction-block-onprem.template.md +86 -0
package/lib/templates/instruction-block-universal.template.md +29 -0
package/lib/templates/roomodes.template.yaml +96 -0
package/lib/uninstall.js +314 -0
package/package.json +4 -3
package/test/agents-md.test.js +398 -0
package/test/bmad-extension.test.js +2 -2
package/test/bmad-persona-phase-prefix.test.js +271 -0
package/test/clinerules.test.js +339 -0
package/test/instruction-block.test.js +388 -0
package/test/integration-verification.test.js +2 -2
package/test/migration-validation.test.js +2 -2
package/test/offline-recompile.test.js +237 -0
package/test/onprem-injection.test.js +425 -32
package/test/onprem-layer.test.js +419 -0
package/test/reconfigure.test.js +436 -0
package/test/roomodes.test.js +343 -0
package/test/uninstall.test.js +402 -0

package/bin/cli.js CHANGED Viewed

@@ -35,6 +35,8 @@ const fs = require('fs');
 const { execFileSync } = require('child_process');
 const { installSkill, uninstallSkill, getStatus, listSkills, listAgents, updateProjectContextRepoLayout } = require('../lib/installer');
 const { getProfile, setProfile, resolveProfile } = require('../lib/profile');
+const { reconfigure: runReconfigure, ReconfigureYesRejectedError, ManifestNotFoundError, RoomodesSlugDivergenceError } = require('../lib/reconfigure');
+const { uninstallProfileArtifacts } = require('../lib/uninstall');
 const bmad = require('../lib/bmad');
 const { getBmadPlatformCode } = require('../lib/agents');
 const { handleCreateSkill, handleValidateSkill, handleSetMandatory, handleCustomizeAgent, handleCreateAgent } = require('../lib/skill-authoring');
@@ -92,6 +94,7 @@ ${chalk.bold('Usage:')}
   ${chalk.cyan(`npx ${NAME} install`)}                      Interactive install wizard
   ${chalk.cyan(`npx ${NAME} install`)} <skill> <agents...>  Install directly
   ${chalk.cyan(`npx ${NAME} uninstall`)} <skill> <agents..> Uninstall a skill
+  ${chalk.cyan(`npx ${NAME} uninstall --profile-artifacts`)}  Remove ma-agents-owned profile content from all injection files
   ${chalk.cyan(`npx ${NAME} status`)}                       Show installed skills
   ${chalk.cyan(`npx ${NAME} list`)}                         List available skills
   ${chalk.cyan(`npx ${NAME} agents`)}                       List supported agents
@@ -100,6 +103,7 @@ ${chalk.bold('Usage:')}
   ${chalk.cyan(`npx ${NAME} set-mandatory`)} <name> [--off]  Mark a skill as always-load (or remove)
   ${chalk.cyan(`npx ${NAME} customize-agent`)} <agent>       Customize a BMAD agent persona and actions
   ${chalk.cyan(`npx ${NAME} create-agent`)} <name>           Create a new specialized BMAD agent
+  ${chalk.cyan(`npx ${NAME} reconfigure`)}                 Re-run the profile prompt and re-stamp artifacts
   ${chalk.cyan(`npx ${NAME} config layout`)}                Reconfigure repository layout
   ${chalk.cyan(`npx ${NAME} config layout --show`)}         Show current layout (read-only)
   ${chalk.cyan(`npx ${NAME} help`)}                         Show this help
@@ -112,6 +116,18 @@ ${chalk.bold('Install options:')}
   ${chalk.cyan('--agent <name>')}    Target a specific agent (skip agent selection)
   ${chalk.cyan('--log')}              Log all console output to install_<datetime>.log
+${chalk.bold('Reconfigure options:')}
+  ${chalk.cyan('--force-roomodes-overwrite')}  Accept slug-stomp on user-edited ma-agents slugs
+  ${chalk.gray('(reconfigure rejects --yes by design — it is interactive only)')}
+${chalk.bold('Uninstall --profile-artifacts options:')}
+  ${chalk.cyan('--yes')}  Skip confirmation prompt (supported for CI decommissioning)
+  ${chalk.gray('(asymmetry note: uninstall --profile-artifacts accepts --yes; reconfigure does not)')}
+${chalk.bold('Notes:')}
+  Divergent \`.cline/clinerules.md\` vs. \`.clinerules\` marker blocks require
+  manual reconciliation — \`--yes\` does not bypass this check.
 ${chalk.bold('Examples:')}
   npx ${NAME} install
   npx ${NAME} install code-review claude-code
@@ -1197,6 +1213,20 @@ async function handleInstall(args) {
 }
 async function handleUninstall(args) {
+  // Story 21.11 — if --profile-artifacts flag is present, route to uninstallProfileArtifacts.
+  // --yes IS supported for profile-artifact uninstall (CI decommissioning use-case).
+  // This is an intentional asymmetry vs. `reconfigure` which REJECTS --yes.
+  if (args.includes('--profile-artifacts')) {
+    const yesFlag = args.includes('--yes');
+    try {
+      await uninstallProfileArtifacts(process.cwd(), { yes: yesFlag });
+    } catch (error) {
+      console.error(chalk.red('\n Profile artifact uninstall failed:'), error.message);
+      process.exit(1);
+    }
+    return;
+  }
   const { globalFlag, customPath, scope, positional } = parseFlags(args);
   const skillId = positional[0];
@@ -1281,6 +1311,32 @@ async function handleConfigLayout(args) {
   console.log(chalk.bold.green('\n  Layout reconfigured!\n'));
 }
+// --- Profile reconfigure (Story 21.10) ---
+async function handleReconfigure(args) {
+  try {
+    await runReconfigure({ projectRoot: process.cwd(), argv: args });
+  } catch (err) {
+    if (err instanceof ReconfigureYesRejectedError) {
+      console.error(chalk.red(err.message));
+      process.exit(1);
+    }
+    if (err instanceof ManifestNotFoundError) {
+      console.error(chalk.red(err.message));
+      console.error(chalk.gray('  Hint: run `npx ma-agents install` before `reconfigure`.'));
+      process.exit(1);
+    }
+    if (err instanceof RoomodesSlugDivergenceError) {
+      console.error(chalk.red(err.message));
+      process.exit(1);
+    }
+    if (err && err.name === 'ClinerulesDualFileDriftError') {
+      console.error(chalk.red(err.message));
+      process.exit(1);
+    }
+    throw err;
+  }
+}
 // --- Interactive mode ---
 async function interactiveMode() {
@@ -1385,6 +1441,9 @@ async function main() {
         process.exit(1);
       }
       break;
+    case 'reconfigure':
+      await handleReconfigure(args.slice(1));
+      break;
     case 'help':
     case '--help':
     case '-h':

package/docs/deployment/vllm-nemotron.md ADDED Viewed

@@ -0,0 +1,130 @@
+# vLLM Deployment Reference: Nemotron Super 49B
+This document covers the recommended vLLM server configuration for running **Nvidia Nemotron Super 49B v1** (or similar Nemotron-class reasoning models) as the local LLM backend for `ma-agents` on-prem installations.
+> **Scope:** This document is reference documentation for the human operator who runs the vLLM inference server. It is not stamped into target projects by the `ma-agents` installer (FR179). Agent-side guardrails are delivered by the on-prem instruction block (`lib/templates/instruction-block-onprem.template.md`) installed into each project's agent files.
+---
+## Recommended vLLM Flags
+All five flags below are recommended for Nemotron Super 49B. Omitting any of them will degrade either correctness or stability.
+### `--enable-auto-tool-choice`
+Enables automatic tool/function call parsing for supported models. Without this flag, vLLM treats model output as plain text and never extracts structured tool calls, causing the agent runtime to receive raw JSON embedded in prose rather than a parsed tool invocation. Required for any coding agent workflow that relies on file-editing or shell-execution tools.
+### `--tool-call-parser qwen3_coder`
+Selects the Qwen3-Coder-compatible parser that correctly extracts structured tool calls from Nemotron's output format. Nemotron Super 49B uses the same tool-call serialization convention as the Qwen3-Coder family. Using the wrong parser (e.g., `hermes`) causes malformed or silently dropped tool calls.
+### `--max-model-len 32768`
+Caps the context window at 32K tokens. This balances the KV-cache VRAM budget against coding task context requirements — sufficient for full repository context plus long multi-turn chats. Without an explicit cap, vLLM allocates KV-cache for the model's maximum trained length (often 128K+), exhausting VRAM before the first request completes on single-GPU configurations.
+### `--enforce-eager`
+Disables CUDA graph capture. This eliminates the first-request latency spike (CUDA graph warm-up can take 30–90 seconds on large models) and resolves some multi-turn inference stability issues observed with Nemotron-class models. The trade-off is slightly lower sustained throughput on back-to-back short requests. For interactive coding agent use where latency-per-request matters more than throughput, `--enforce-eager` is the correct default.
+### `--trust-remote-code`
+Required by the Nemotron Super 49B model class to load custom model code from the HuggingFace snapshot. Nemotron ships custom attention and RoPE scaling implementations that are not yet merged into the vLLM mainline. **Only set this flag when the model source is trusted** — for Nvidia's official HuggingFace repository (`nvidia/Nemotron-Super-49B-v1`) this is safe; for third-party forks, verify the model code before enabling.
+---
+## Sample `vllm serve` Command
+Copy-paste-runnable launch command composing all recommended flags:
+```bash
+vllm serve nvidia/Nemotron-Super-49B-v1 \
+  --enable-auto-tool-choice \
+  --tool-call-parser qwen3_coder \
+  --max-model-len 32768 \
+  --enforce-eager \
+  --trust-remote-code \
+  --dtype bfloat16
+  # Per-request sampling parameters are set client-side (see sampling table below):
+  # Planning phase:       temperature=0.0, top_p=1.0
+  # Implementation phase: temperature=0.6, top_p=0.95
+```
+> **Note on `--dtype`:** Use `bfloat16` for full-precision inference (requires 2× H100 80GB). For single-GPU deployments, substitute with `--quantization fp8` or `--quantization nvfp4` — see the quantization table below. Remove `--dtype bfloat16` when using a quantization flag, as they are mutually exclusive.
+---
+## Quantization Tradeoffs
+| Quantization | Approx. VRAM (49B, single H100 80GB) | Instruction-Following Quality |
+|---|---|---|
+| BF16 | ~98 GB (requires 2× H100) | Best — full precision weights |
+| FP8 | ~49 GB (fits 1× H100 80GB) | Near-BF16 — minor degradation on complex reasoning |
+| NVFP4 | ~25 GB (fits 1× A100 40GB) | Acceptable — recommended starting point for constrained hardware; test instruction-following quality for your use-case |
+> **Assumptions:** VRAM figures assume no KV-cache at model load time with `--max-model-len 32768`. Actual VRAM usage is higher at inference as KV-cache fills. Tensor-parallel configurations (e.g., `--tensor-parallel-size 2`) can distribute the model across multiple GPUs, enabling BF16 on 2× H100 80GB.
+**Recommended starting point for constrained hardware:** NVFP4 on a single A100 40GB. Validate instruction-following quality for your specific task mix before using in production — planning-phase prompts (deterministic, `/no_think`) are typically less sensitive to quantization than implementation-phase prompts (reasoning ON).
+---
+## Reasoning Mode and `/no_think`
+Nemotron Super 49B is a reasoning model. **Thinking mode is ON by default** — the model prefixes its response with an extended internal monologue (`<think>...</think>`) before producing its answer. For coding tasks, this improves output quality at the cost of higher token consumption and latency.
+### Disabling Reasoning for Planning Personas
+Planning-phase agents (PM, Architect, Scrum Master, Analyst, Tech Writer, UX, QA) produce structured documents where extended reasoning adds latency without improving output quality — and where deterministic output (`temperature=0.0`) is preferred. To disable reasoning for these personas, prepend `/no_think` to the system prompt.
+The `ma-agents` on-prem installer delivers `/no_think` in two places:
+1. **Per-agent system prompt prefix** — the BMAD persona phase-aware prefix (Story 21.7) prepends `/no_think` to the system prompt of all planning-phase personas at install time.
+2. **Per-tool instruction blocks** — `lib/templates/instruction-block-onprem.template.md` also injects `/no_think` into the per-tool instruction blocks stamped into `CLAUDE.md`, `.roo/rules/`, `.clinerules`, and `AGENTS.md` for planning-mode agents.
+### Retaining Reasoning for Implementation Personas
+Reasoning mode is intentionally **left ON** for implementation personas (`bmm-dev`, `bmm-quick-flow-solo-dev`). The extended chain-of-thought improves code generation quality, catches edge cases, and produces more careful reasoning about algorithmic choices. The higher token cost is accepted for implementation tasks.
+---
+## Per-Phase Sampling Parameters
+Sampling parameters are set client-side on each request (not in the `vllm serve` command). Configure your agent client or OpenAI-compatible API call accordingly.
+| Phase | Temperature | top_p | Notes |
+|---|---|---|---|
+| Planning (PM, Architect, SM, Analyst, Tech Writer, UX, QA) | 0.0 | 1.0 | Deterministic — avoids hallucinated artifacts; use with `/no_think` |
+| Implementation (Dev, Quick-Flow-Solo-Dev) | 0.6 | 0.95 | Allows creative solutions; reasoning ON |
+> **Why `temperature=0.0` for planning?** Planning artifacts (PRDs, architecture docs, stories) must be reproducible across re-runs. Non-deterministic output at planning phase leads to artifact drift between sessions, which breaks BMAD-METHOD's assumption that prior context documents are stable. Setting `temperature=0.0` makes the model greedy and fully deterministic given the same prompt.
+> **Why `temperature=0.6` for implementation?** Implementation tasks benefit from creative exploration — slightly non-zero temperature allows the model to consider multiple candidate approaches rather than always taking the locally-greedy token path. Combined with reasoning ON, this produces higher-quality code than either `temperature=0.0` or pure sampling alone.
+---
+## `str_replace_editor` Hallucination Warning
+### Failure Mode
+Local LLMs — including Nemotron-class models — may hallucinate invocations of `str_replace_editor`. This tool exists only in Claude Code (Anthropic's hosted Claude environment) and is not available in any locally-hosted LLM agent runtime. When a local model generates a `str_replace_editor` tool call:
+- The agent runtime fails silently or returns a tool-not-found error.
+- No actual file is edited.
+- The model may then proceed as if the edit succeeded, producing divergent state between the model's internal belief and the actual file system.
+- Downstream tool calls (e.g., `read_file`, `bash`) may then operate on stale content, compounding the error.
+This failure mode is insidious because it is often silent — the model does not retry, and the user may not notice until the generated code is tested.
+### Mitigations Delivered by `ma-agents`
+`ma-agents` addresses this at two layers:
+**Layer 1 — Instruction-block prohibition (Story 21.6)**
+The on-prem instruction block (`lib/templates/instruction-block-onprem.template.md`) explicitly forbids invoking `str_replace_editor` in the per-tool rules injected into every agent's instruction file (`CLAUDE.md`, `.roo/rules/`, `.clinerules`, `AGENTS.md`). This rule is present in every on-prem agent installation and is re-stamped on each `ma-agents` update.
+**Layer 2 — Application-layer path restriction (Stories 21.3, 21.4)**
+- **Roo Code:** `.roomodes` `fileRegex` patterns (Story 21.3) constrain planning personas to documentation-only paths (e.g., `_bmad-output/**`, `docs/**`). Even if a planning persona hallucinates a `str_replace_editor` call targeting a source file, the Roo Code application layer will block the tool call before it reaches the filesystem.
+- **OpenCode / AGENTS.md:** Story 21.4 similarly scopes planning personas in `AGENTS.md` to documentation paths, reducing the surface where a misrouted tool call could corrupt code files.
+> **Operator action required:** Neither mitigation eliminates the underlying model behavior. Monitor agent session logs for `str_replace_editor` in model outputs — occurrences indicate the model is operating outside its instruction-following envelope and may require prompt tuning or a different quantization level.

package/lib/agents.js CHANGED Viewed

@@ -159,7 +159,13 @@ const agents = [
     template: 'generic',
     bmadPlatformCode: 'roo',
     instructionFiles: ['.roo/rules/00-ma-agents.md'],
-    injectionStrategy: { position: 'top', skipPatterns: ['---'] }
+    injectionStrategy: { position: 'top', skipPatterns: ['---'] },
+    // Story 21.3 AC #3 — extra YAML template stamped at project root so Roo
+    // Code's FileRestrictionError (NFR47) gates BMAD-mode edits at the IDE
+    // layer. Processed by applyExtraInstructionTemplates in lib/installer.js.
+    extraInstructionTemplates: [
+      { template: 'roomodes.template.yaml', target: '.roomodes', merger: 'yaml-customModes' }
+    ]
   },
   {
     id: 'cursor',
@@ -242,7 +248,16 @@ const agents = [
     fileExtension: '.md',
     template: 'generic',
     instructionFiles: ['opencode.json'],
-    injectionStrategy: { position: 'json-merge', targetKey: 'instructions' }
+    injectionStrategy: { position: 'json-merge', targetKey: 'instructions' },
+    // Story 21.4 — OpenCode auto-loads AGENTS.md when the file is listed in
+    // opencode.json::instructions[]. The extraInstructionTemplates field names
+    // the template shipped at lib/templates/<template>, the target path
+    // (relative to project root) to stamp, and the merger strategy. Processed
+    // by updateAgentInstructions in lib/installer.js in addition to the JSON
+    // merge above.
+    extraInstructionTemplates: [
+      { template: 'agents-md.template.md', target: 'AGENTS.md', merger: 'markdown-markers' }
+    ]
   },
   {
     id: 'bmm-devops',

package/lib/bmad-customize/bmm-analyst.customize.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+# MA-AGENTS: skill enforcement for bmm-analyst
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
+critical_actions:
+  - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
+  - "For each skill marked always_load: true, read the skill file completely"
+  - "If _bmad-output/project-context.md exists, read it completely"
+  - "Follow all skill directives and project-context rules during this session"

package/lib/bmad-customize/bmm-architect.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: skill enforcement for bmm-architect
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
 critical_actions:
   - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
   - "For each skill marked always_load: true, read the skill file completely"

package/lib/bmad-customize/bmm-dev.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: skill enforcement for bmm-dev
+phase: implementation
+on_prem_phase_prefix: "Think carefully before writing code. Always reference the story you are implementing before making edits."
 critical_actions:
   - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
   - "For each skill marked always_load: true, read the skill file completely"

package/lib/bmad-customize/bmm-pm.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: skill enforcement for bmm-pm
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
 critical_actions:
   - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
   - "For each skill marked always_load: true, read the skill file completely"

package/lib/bmad-customize/bmm-qa.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: Gad — Software Quality Assurance Expert
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
 agent:
   metadata:
     name: "Gad"

package/lib/bmad-customize/bmm-quick-flow-solo-dev.customize.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+# MA-AGENTS: skill enforcement for bmm-quick-flow-solo-dev
+phase: implementation
+on_prem_phase_prefix: "Think carefully before writing code. Always reference the story you are implementing before making edits."
+critical_actions:
+  - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
+  - "For each skill marked always_load: true, read the skill file completely"
+  - "If _bmad-output/project-context.md exists, read it completely"
+  - "Follow all skill directives and project-context rules during this session"

package/lib/bmad-customize/bmm-sm.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: skill enforcement for bmm-sm
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
 critical_actions:
   - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
   - "For each skill marked always_load: true, read the skill file completely"

package/lib/bmad-customize/bmm-tech-writer.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: skill enforcement for bmm-tech-writer
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
 critical_actions:
   - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
   - "For each skill marked always_load: true, read the skill file completely"

package/lib/bmad-customize/bmm-ux-designer.customize.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 # MA-AGENTS: skill enforcement for bmm-ux-designer
+phase: planning
+on_prem_phase_prefix: "/no_think You are in a planning phase — respond in text for questions; create files only when explicitly asked."
 critical_actions:
   - "Read the skills MANIFEST at {project-root}/skills/MANIFEST.yaml"
   - "For each skill marked always_load: true, read the skill file completely"