npm - compound-agent - Versions diffs - 1.4.3 → 1.4.4 - Mend

compound-agent 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +39 -1
package/dist/cli.js +543 -49
package/dist/cli.js.map +1 -1
package/dist/index.d.ts +1 -1
package/dist/index.js +34 -5
package/dist/index.js.map +1 -1
package/docs/research/index.md +12 -0
package/docs/research/security/auth-patterns.md +138 -0
package/docs/research/security/data-exposure.md +185 -0
package/docs/research/security/dependency-security.md +91 -0
package/docs/research/security/injection-patterns.md +249 -0
package/docs/research/security/overview.md +81 -0
package/docs/research/security/secrets-checklist.md +92 -0
package/docs/research/security/secure-coding-failure.md +297 -0
package/package.json +3 -1
package/scripts/postinstall.mjs +102 -0

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { createHash } from 'crypto';
 import * as fs from 'fs/promises';
 import { readFile, mkdir, appendFile, writeFile, chmod, rm, rename, readdir } from 'fs/promises';
 import { Command } from 'commander';
-import { getLlama, resolveModelFile } from 'node-llama-cpp';
+import { getLlama, LlamaLogLevel, resolveModelFile } from 'node-llama-cpp';
 import { homedir, tmpdir } from 'os';
 import { z } from 'zod';
 import { execSync, execFileSync, spawn } from 'child_process';
@@ -584,7 +584,15 @@ async function isModelUsable() {
   let context = null;
   try {
     const modelPath = join(DEFAULT_MODEL_DIR, MODEL_FILENAME);
-    llama = await getLlama();
+    llama = await getLlama({
+      build: "never",
+      // Never compile from source in a deployed tool
+      progressLogs: false,
+      // Suppress prebuilt binary fallback warnings
+      logLevel: LlamaLogLevel.error
+      // Only surface real errors from C++ backend
+      // Set NODE_LLAMA_CPP_DEBUG=true to re-enable all output for troubleshooting
+    });
     model = await llama.loadModel({ modelPath });
     context = await model.createEmbeddingContext();
     cachedUsability = { usable: true };
@@ -600,7 +608,19 @@ async function isModelUsable() {
   } finally {
     if (context) {
       try {
-        context.dispose();
+        await context.dispose();
+      } catch {
+      }
+    }
+    if (model) {
+      try {
+        await model.dispose();
+      } catch {
+      }
+    }
+    if (llama) {
+      try {
+        await llama.dispose();
       } catch {
       }
     }
@@ -622,7 +642,15 @@ async function getEmbedding() {
   pendingInit = (async () => {
     try {
       const modelPath = await resolveModel({ cli: true });
-      llamaInstance = await getLlama();
+      llamaInstance = await getLlama({
+        build: "never",
+        // Never compile from source in a deployed tool
+        progressLogs: false,
+        // Suppress prebuilt binary fallback warnings
+        logLevel: LlamaLogLevel.error
+        // Only surface real errors from C++ backend
+        // Set NODE_LLAMA_CPP_DEBUG=true to re-enable all output for troubleshooting
+      });
       modelInstance = await llamaInstance.loadModel({ modelPath });
       embeddingContext = await modelInstance.createEmbeddingContext();
       return embeddingContext;
@@ -4869,39 +4897,59 @@ Return a list of relevant memory items:
 `,
   "security-reviewer": `---
 name: Security Reviewer
-description: Reviews code for security vulnerabilities
+description: Mandatory core-4 reviewer with P0-P3 severity classification and specialist escalation
 ---
 # Security Reviewer
 ## Role
-Review code changes for security vulnerabilities including OWASP top 10, injection attacks, authentication issues, and data exposure risks.
+Mandatory core-4 reviewer responsible for identifying security vulnerabilities using P0-P3 severity classification. Has authority to escalate findings to specialist security skills for deep analysis.
 ## Instructions
-1. Read the changed files completely
-2. Check for injection vulnerabilities (SQL, command, XSS)
-3. Verify input validation and sanitization
-4. Review authentication and authorization logic
-5. Check for hardcoded secrets or credentials
-6. Verify error messages do not leak sensitive info
-7. Check dependency versions for known CVEs
-8. For large diffs, spawn opus subagents to review different file groups in parallel (e.g., 1 per module). Merge findings and deduplicate.
+1. Read \`docs/compound/research/security/overview.md\` for severity classification and escalation triggers
+2. Read all changed files completely, focusing on:
+   - Input handling and data flow to interpreters (SQL, shell, HTML, templates)
+   - Secrets and credential management
+   - Authentication and authorization enforcement
+   - Logging and error handling for data exposure
+   - Dependency changes in lockfiles or manifests
+3. Classify each finding using P0-P3 severity:
+   - **P0**: Unauthenticated RCE, credential compromise, unauth data access (blocks merge)
+   - **P1**: Authenticated exploit, limited data breach, missing auth on sensitive routes (requires ack)
+   - **P2**: Medium impact, harder to exploit, missing hardening (should fix)
+   - **P3**: Best practice, defense in depth, code hygiene (nice to have)
+4. Escalate to specialist skills when deep analysis needed:
+   - SQL/command concat or template interpolation -> \`/security-injection\`
+   - Hardcoded strings matching key patterns, committed .env files -> \`/security-secrets\`
+   - Route handlers missing auth middleware, IDOR patterns -> \`/security-auth\`
+   - Logging calls with request objects, verbose error responses -> \`/security-data\`
+   - Lockfile changes, new dependencies, postinstall scripts -> \`/security-deps\`
+5. For large diffs, spawn opus subagents to review different file groups in parallel. Merge findings and deduplicate.
 ## Literature
-- Consult \`docs/compound/research/code-review/\` for systematic review methodology and severity classification
+- Consult \`docs/compound/research/security/overview.md\` for severity classification and OWASP mapping
+- Consult \`docs/compound/research/security/injection-patterns.md\` for injection detection heuristics
+- Consult \`docs/compound/research/security/secrets-checklist.md\` for secret format patterns
+- Consult \`docs/compound/research/security/auth-patterns.md\` for auth/authz audit methodology
+- Consult \`docs/compound/research/security/data-exposure.md\` for data leak detection
+- Consult \`docs/compound/research/security/dependency-security.md\` for dependency risk assessment
+- Consult \`docs/compound/research/security/secure-coding-failure.md\` for full theoretical foundation
 - Run \`npx ca knowledge "security review OWASP"\` for indexed security knowledge
 ## Collaboration
-Share cross-cutting findings via SendMessage: security issues impacting architecture go to architecture-reviewer; secrets in test fixtures go to test-coverage-reviewer.
+Share cross-cutting findings via SendMessage: security issues impacting architecture go to architecture-reviewer; secrets in test fixtures go to test-coverage-reviewer. Escalate to specialist skills via SendMessage when deep analysis needed.
 ## Deployment
 AgentTeam member in the **review** phase. Spawned via TeamCreate. Communicate with teammates via SendMessage.
 ## Output Format
-Return findings as:
-- **CRITICAL**: Must fix before merge
-- **WARNING**: Should fix, potential risk
-- **INFO**: Best practice suggestion
+Return findings classified by severity:
+- **P0** (BLOCKS MERGE): Must fix before merge, no exceptions
+- **P1** (REQUIRES ACK): Must acknowledge or fix before merge
+- **P2** (SHOULD FIX): Should fix, create beads issue if deferred
+- **P3** (NICE TO HAVE): Best practice suggestion, non-blocking
+If no findings at any severity: return "SECURITY REVIEW: CLEAR -- No findings at any severity level."
 `,
   "architecture-reviewer": `---
 name: Architecture Reviewer
@@ -5040,6 +5088,266 @@ AgentTeam member in the **review** phase. Spawned via TeamCreate. Communicate wi
 - **OVER-ENGINEERED**: Simpler solution exists
 - **YAGNI**: Feature not needed yet
 - **OK**: Appropriate complexity for the task
+`,
+  "security-injection": `---
+name: Security Injection Specialist
+description: Deep trace analysis for SQL, command, XSS, SSRF, and SSTI injection vulnerabilities
+---
+# Security Injection Specialist
+## Role
+On-demand specialist for deep injection vulnerability analysis. Traces data flow from untrusted input sources to interpreter sinks (SQL engines, shells, browsers, template engines, HTTP clients).
+## Instructions
+1. Read \`docs/compound/research/security/injection-patterns.md\` for detection heuristics and safe/unsafe patterns
+2. For each changed file, identify:
+   - **Input sources**: request params, body fields, headers, query strings, URL params, environment variables
+   - **Interpreter sinks**: SQL queries, shell commands, HTML output, template rendering, outbound HTTP requests
+3. Trace data flow from each source to each sink:
+   - Direct concatenation or template interpolation into sink -> P0/P1
+   - Flow through sanitization/validation before sink -> check if sanitization is adequate
+   - Parameterized/prepared statement usage -> safe, note as OK
+4. Classify by injection type:
+   - **SQL** (survey 4.1): \`db.query\` with template literals, f-strings in queries, raw SQL with string concat
+   - **Command** (survey 4.2): \`exec\`, \`system\`, \`popen\` with user input, \`shell=True\` with untrusted args
+   - **XSS** (survey 4.3): \`innerHTML\`, \`dangerouslySetInnerHTML\`, \`v-html\`, \`| safe\` filter on user input
+   - **SSRF** (survey 4.4): \`axios.get(userUrl)\`, \`requests.get(userUrl)\`, fetch with user-controlled URL
+   - **SSTI** (survey 4.5): \`Template(userString)\`, \`render_template_string(userInput)\`
+5. For large diffs, spawn opus subagents to trace different file groups in parallel. Merge findings.
+## Literature
+- Consult \`docs/compound/research/security/injection-patterns.md\` for unsafe/safe pattern pairs and detection heuristics
+- Consult \`docs/compound/research/security/secure-coding-failure.md\` sections 4.1-4.5 for theoretical foundation
+- Run \`npx ca knowledge "injection SQL command XSS SSRF SSTI"\` for indexed knowledge
+## Collaboration
+Report findings to security-reviewer via SendMessage with severity classification. Flag architecture-level injection risks (e.g., missing parameterization layer) to architecture-reviewer.
+## Deployment
+On-demand AgentTeam member in the **review** phase. Spawned by security-reviewer when injection patterns detected. Communicate with teammates via SendMessage.
+## Output Format
+Per finding:
+- **Type**: SQL / Command / XSS / SSRF / SSTI
+- **Severity**: P0-P3
+- **File:Line**: Location
+- **Source**: Where untrusted data enters
+- **Sink**: Where it reaches an interpreter
+- **Flow**: Brief trace description
+- **Fix**: Recommended safe pattern
+If no findings: return "INJECTION REVIEW: CLEAR -- No injection patterns found."
+For large diffs (500+ lines): prioritize files with interpreter sinks over pure data/config files.
+`,
+  "security-secrets": `---
+name: Security Secrets Specialist
+description: Credential and secrets scanning using pattern matching, entropy analysis, and git history checks
+---
+# Security Secrets Specialist
+## Role
+On-demand specialist for detecting hardcoded credentials, leaked secrets, and improper secret management in code and configuration.
+## Instructions
+1. Read \`docs/compound/research/security/secrets-checklist.md\` for key format patterns and detection heuristics
+2. Scan changed files for:
+   - **Variable name patterns**: password, secret, token, apiKey, api_key, auth, credential, private_key, connection_string
+   - **Known key formats**: AWS \`AKIA[0-9A-Z]{16}\`, GitHub \`ghp_[a-zA-Z0-9]{36}\`, Slack \`xoxb-\`/\`xoxp-\`, JWT signatures
+   - **High-entropy strings**: 20+ character strings with mixed case, digits, and special chars in assignment context
+3. Check for common hiding spots:
+   - Committed \`.env\` files or \`.env.local\` without gitignore
+   - Docker files with \`ENV SECRET=\` or \`ARG PASSWORD=\`
+   - CI config files (\`.github/workflows/\`, \`.gitlab-ci.yml\`) with inline secrets
+   - Test fixtures that use real-looking credentials instead of obvious fakes
+4. Check git history for previously committed secrets:
+   - \`git log --diff-filter=D -- '*.env'\` for deleted env files
+   - \`git log -p -- <file>\` for files that changed secret-like values
+5. Distinguish real secrets from safe patterns:
+   - Test fixtures prefixed with \`test_\`, \`fake_\`, \`mock_\` -> OK
+   - Placeholder values like \`YOUR_API_KEY_HERE\`, \`changeme\`, \`xxx\` -> OK
+   - Public keys (not private) -> OK
+   - Everything else -> flag for review
+## Literature
+- Consult \`docs/compound/research/security/secrets-checklist.md\` for format patterns and hiding spots
+- Consult \`docs/compound/research/security/secure-coding-failure.md\` section 4.6 for theoretical foundation
+- Run \`npx ca knowledge "secrets credentials hardcoded"\` for indexed knowledge
+## Collaboration
+Report findings to security-reviewer via SendMessage with severity classification. Flag secrets in test files to test-coverage-reviewer.
+## Deployment
+On-demand AgentTeam member in the **review** phase. Spawned by security-reviewer when secret patterns detected. Communicate with teammates via SendMessage.
+## Output Format
+Per finding:
+- **Severity**: P0 (real credential) / P1 (likely credential) / P2 (suspicious pattern) / P3 (missing .gitignore for secret files)
+- **File:Line**: Location
+- **Pattern**: What matched (variable name, key format, entropy)
+- **Value preview**: First/last 4 chars only (never full secret)
+- **Fix**: Use environment variable, secret manager, or .gitignore
+If no findings: return "SECRETS REVIEW: CLEAR -- No hardcoded secrets or credential patterns found."
+`,
+  "security-auth": `---
+name: Security Auth Specialist
+description: Route and endpoint audit for authentication, authorization, IDOR, JWT, and CORS vulnerabilities
+---
+# Security Auth Specialist
+## Role
+On-demand specialist for auditing authentication and authorization enforcement across routes, endpoints, and API handlers.
+## Instructions
+1. Read \`docs/compound/research/security/auth-patterns.md\` for common broken patterns and framework-specific checks
+2. Perform route audit:
+   - List all route/endpoint definitions in changed files
+   - For each route, verify auth middleware or guard is applied
+   - Flag routes that modify data (POST/PUT/DELETE) without auth
+   - Flag admin/privileged routes accessible without role checks
+3. Check for IDOR (Insecure Direct Object Reference):
+   - Find DB queries using user-supplied IDs from params/body
+   - Verify ownership checks exist (e.g., \`WHERE id = ? AND user_id = ?\`)
+   - Flag queries that fetch by ID alone without ownership verification
+4. Check JWT handling:
+   - Verify signature validation is not skipped
+   - Check for algorithm confusion vulnerabilities (\`alg: none\`)
+   - Verify expiry (\`exp\`) is checked
+   - Flag tokens stored in localStorage (prefer httpOnly cookies)
+5. Check CORS configuration:
+   - Flag \`Access-Control-Allow-Origin: *\` with credentials
+   - Flag overly permissive origin patterns
+   - Verify CORS is intentional and scoped appropriately
+6. Framework-specific checks:
+   - **Express/NestJS**: missing \`authMiddleware\`, missing \`@UseGuards()\`, routes outside auth scope
+   - **Django/FastAPI**: missing \`@login_required\`, missing \`Depends(get_current_user)\`, missing permission classes
+7. For non-web projects (CLI tools, libraries): limit scope to file permissions, API key handling, and privilege escalation
+## Literature
+- Consult \`docs/compound/research/security/auth-patterns.md\` for broken auth patterns and detection methodology
+- Consult \`docs/compound/research/security/secure-coding-failure.md\` section 4.7 for theoretical foundation
+- Run \`npx ca knowledge "authentication authorization IDOR"\` for indexed knowledge
+## Collaboration
+Report findings to security-reviewer via SendMessage with severity classification. Flag missing middleware patterns to architecture-reviewer.
+## Deployment
+On-demand AgentTeam member in the **review** phase. Spawned by security-reviewer when auth patterns need deep analysis. Communicate with teammates via SendMessage.
+## Output Format
+Per finding:
+- **Type**: Missing Auth / IDOR / Role Escalation / JWT / CORS
+- **Severity**: P0-P3
+- **File:Line**: Location
+- **Route/Endpoint**: The affected route
+- **Issue**: What is missing or broken
+- **Fix**: Specific middleware, guard, or check to add
+If no findings: return "AUTH REVIEW: CLEAR -- No authentication or authorization issues found."
+`,
+  "security-data": `---
+name: Security Data Specialist
+description: Audit for PII in logs, verbose error responses, sensitive data in URLs, and overly broad API responses
+---
+# Security Data Specialist
+## Role
+On-demand specialist for detecting sensitive data exposure through logging, error handling, URLs, and API responses.
+## Instructions
+1. Read \`docs/compound/research/security/data-exposure.md\` for exposure patterns and detection heuristics
+2. Audit logging calls:
+   - Flag \`console.log(req.body)\`, \`console.log(req.headers)\`, \`logger.info(user)\` -- unfiltered objects may contain passwords/tokens
+   - Flag logging of \`Authorization\` header values
+   - Flag logging of full error objects that may contain connection strings
+   - Check structured loggers for field-level filtering
+3. Audit error handlers:
+   - Flag \`res.status(500).json({ error: err.message })\` or \`err.stack\` sent to clients
+   - Flag DB connection strings, internal paths, or query details in error responses
+   - Verify production error handlers return generic messages
+4. Audit URLs and query parameters:
+   - Flag tokens, keys, or auth values in query strings (leaks via referrer, logs, browser history)
+   - Flag PII (email, name, SSN) in URL paths or query params
+   - Check redirect URLs for open redirect patterns
+5. Audit API responses:
+   - Flag endpoints returning full DB records instead of selected fields
+   - Flag responses containing \`password_hash\`, \`internal_id\`, \`secret\`, or similar internal fields
+   - Verify response serialization uses explicit field selection or DTOs
+## Literature
+- Consult \`docs/compound/research/security/data-exposure.md\` for exposure patterns and detection heuristics
+- Consult \`docs/compound/research/security/secure-coding-failure.md\` section 4.8 for theoretical foundation
+- Run \`npx ca knowledge "data exposure PII logging"\` for indexed knowledge
+## Collaboration
+Report findings to security-reviewer via SendMessage with severity classification. Flag logging architecture issues to architecture-reviewer.
+## Deployment
+On-demand AgentTeam member in the **review** phase. Spawned by security-reviewer when data exposure patterns detected. Communicate with teammates via SendMessage.
+## Output Format
+Per finding:
+- **Type**: PII in Logs / Verbose Error / URL Exposure / Broad API Response
+- **Severity**: P0 (credentials in logs/responses) / P1 (PII exposure) / P2 (internal details) / P3 (hardening)
+- **File:Line**: Location
+- **Data at risk**: What sensitive data is exposed
+- **Channel**: Log / Error response / URL / API response
+- **Fix**: Specific filtering, redaction, or restructuring needed
+If no findings: return "DATA EXPOSURE REVIEW: CLEAR -- No sensitive data exposure patterns found."
+`,
+  "security-deps": `---
+name: Security Deps Specialist
+description: Dependency audit for vulnerable packages, lockfile changes, postinstall scripts, and supply chain risks
+---
+# Security Deps Specialist
+## Role
+On-demand specialist for auditing dependency security, lockfile changes, and supply chain risks.
+## Instructions
+1. Read \`docs/compound/research/security/dependency-security.md\` for risk model and audit methodology
+2. Run audit tools on changed dependency files:
+   - **JS/TS**: \`pnpm audit\` or \`npm audit\` -- report critical and high vulnerabilities
+   - **Python**: \`pip-audit\` or \`safety check\` -- report known CVEs
+   - If audit tool is unavailable, note it and proceed with manual lockfile analysis
+3. Check lockfile changes (pnpm-lock.yaml, package-lock.json, poetry.lock, requirements.txt):
+   - **New direct deps**: Were they intentionally added? Check PR context
+   - **Version downgrades**: Suspicious -- may reintroduce vulnerabilities
+   - **New postinstall scripts**: Can execute arbitrary code during install
+   - **Removed integrity hashes**: May indicate tampering
+4. Evaluate new dependencies:
+   - Check maintenance status (last commit, open issues, bus factor)
+   - Flag packages with fewer than 100 weekly downloads (typosquat risk)
+   - Flag packages pinned 3+ major versions behind latest
+   - Check for known alternatives with better security track record
+5. For large dependency changes, spawn opus subagents to audit different package groups in parallel.
+## Literature
+- Consult \`docs/compound/research/security/dependency-security.md\` for risk assessment methodology
+- Consult \`docs/compound/research/security/secure-coding-failure.md\` section 4.9 for theoretical foundation
+- Run \`npx ca knowledge "dependency vulnerability supply chain"\` for indexed knowledge
+## Collaboration
+Report findings to security-reviewer via SendMessage with severity classification. Flag architecture-level dependency concerns (e.g., replacing a core library) to architecture-reviewer.
+## Deployment
+On-demand AgentTeam member in the **review** phase. Spawned by security-reviewer when dependency changes detected. Communicate with teammates via SendMessage.
+## Output Format
+Per finding:
+- **Package**: name@version
+- **Severity**: P0 (actively exploited CVE) / P1 (critical CVE) / P2 (high CVE, outdated) / P3 (maintenance concern)
+- **CVE**: ID if applicable
+- **Issue**: What the vulnerability enables
+- **Fix**: Update to version X, replace with Y, or accept risk with justification
+If no findings: return "DEPENDENCY REVIEW: CLEAR -- No vulnerable or suspicious dependencies found."
 `
 };
@@ -5514,8 +5822,8 @@ Multi-agent code review with severity classification.
 - Run quality gates: \`pnpm test && pnpm lint\`
 - Spawn specialized reviewers (security, architecture, performance, etc.)
-- Classify findings as P1/P2/P3
-- Fix all P1 findings before proceeding
+- Classify findings as P0 (blocks merge) / P1/P2/P3
+- Fix all P0/P1 findings before proceeding
 ## Phase 5: Compound
@@ -6196,7 +6504,7 @@ description: Multi-agent review with parallel specialized reviewers and severity
 # Review Skill
 ## Overview
-Perform thorough code review by spawning specialized reviewers in parallel, consolidating findings with severity classification (P1/P2/P3), and gating completion on implementation-reviewer approval.
+Perform thorough code review by spawning specialized reviewers in parallel, consolidating findings with severity classification (P0/P1/P2/P3), and gating completion on implementation-reviewer approval.
 ## Methodology
 1. Run quality gates first: \`pnpm test && pnpm lint\`
@@ -6207,10 +6515,11 @@ Perform thorough code review by spawning specialized reviewers in parallel, cons
    - **Large** (500+): all 11 reviewers including docs, consistency, error-handling, pattern-matcher
 4. Spawn reviewers in an **AgentTeam** (TeamCreate + Task with \`team_name\`):
    - Role skills: \`.claude/skills/compound/agents/{security-reviewer,architecture-reviewer,performance-reviewer,test-coverage-reviewer,simplicity-reviewer}/SKILL.md\`
+   - Security specialist skills (on-demand, spawned by security-reviewer): \`.claude/skills/compound/agents/{security-injection,security-secrets,security-auth,security-data,security-deps}/SKILL.md\`
    - For large diffs (500+), deploy MULTIPLE instances; split files across instances, coordinate via SendMessage
 5. Reviewers communicate findings to each other via \`SendMessage\`
 6. Collect, consolidate, and deduplicate all findings
-7. Classify by severity: P1 (critical/blocking), P2 (important), P3 (minor)
+7. Classify by severity: P0 (blocks merge), P1 (critical/blocking), P2 (important), P3 (minor)
 8. Use \`AskUserQuestion\` when severity is ambiguous or fix has multiple valid options
 9. Create beads issues for P1 findings: \`bd create --title="P1: ..."\`
 10. Fix all P1 findings before proceeding
@@ -6239,10 +6548,12 @@ Perform thorough code review by spawning specialized reviewers in parallel, cons
 ## Quality Criteria
 - All quality gates pass (\`pnpm test\`, lint)
 - All 11 reviewer perspectives were applied in parallel
-- Findings are classified P1/P2/P3 and deduplicated
+- Findings are classified P0/P1/P2/P3 and deduplicated
 - pattern-matcher checked memory and reinforced recurring issues
 - cct-reviewer checked against known Claude failure patterns
 - docs-reviewer confirmed docs/ADR alignment
+- security-reviewer P0 findings: none (blocks merge)
+- security-reviewer P1 findings: all acknowledged or resolved
 - All P1 findings fixed before \`/implementation-reviewer\` approval
 - \`/implementation-reviewer\` approved as mandatory gate
@@ -7870,19 +8181,12 @@ async function runDoctor(repoRoot) {
   } catch {
   }
   checks.push(hooksOk ? { name: "Claude hooks", status: "pass" } : { name: "Claude hooks", status: "fail", fix: "Run: npx ca setup" });
-  let modelOk = false;
-  try {
-    modelOk = isModelAvailable();
-  } catch {
+  checks.push(checkEmbeddingModel());
+  checks.push(checkSqliteHealth());
+  const pnpmCheck = checkPnpmBuildConfig(repoRoot);
+  if (pnpmCheck !== null) {
+    checks.push(pnpmCheck);
   }
-  checks.push(modelOk ? { name: "Embedding model", status: "pass" } : { name: "Embedding model", status: "warn", fix: "Run: npx ca download-model" });
-  let sqliteOk = false;
-  try {
-    ensureSqliteAvailable();
-    sqliteOk = true;
-  } catch {
-  }
-  checks.push(sqliteOk ? { name: "SQLite (better-sqlite3)", status: "pass" } : { name: "SQLite (better-sqlite3)", status: "fail", fix: "Run: pnpm rebuild better-sqlite3 (or npm rebuild better-sqlite3)" });
   const beadsResult = checkBeadsAvailable();
   checks.push(beadsResult.available ? { name: "Beads CLI", status: "pass" } : { name: "Beads CLI", status: "warn", fix: "Install beads: https://github.com/Nathandela/beads" });
   checks.push(checkGitignoreHealth(repoRoot) ? { name: ".gitignore health", status: "pass" } : { name: ".gitignore health", status: "warn", fix: "Run: npx ca setup --update" });
@@ -7902,6 +8206,46 @@ async function runDoctor(repoRoot) {
   checks.push(!scope.isUserScope ? { name: "Codebase scope", status: "pass" } : { name: "Codebase scope", status: "warn", fix: "Install in a specific repository, not home directory" });
   return checks;
 }
+function checkEmbeddingModel() {
+  try {
+    return isModelAvailable() ? { name: "Embedding model", status: "pass" } : { name: "Embedding model", status: "warn", fix: "Run: npx ca download-model" };
+  } catch {
+    return { name: "Embedding model", status: "warn", fix: "Run: npx ca download-model" };
+  }
+}
+function checkSqliteHealth() {
+  try {
+    ensureSqliteAvailable();
+    return { name: "SQLite (better-sqlite3)", status: "pass" };
+  } catch {
+    return { name: "SQLite (better-sqlite3)", status: "fail", fix: "Run: pnpm rebuild better-sqlite3 (or npm rebuild better-sqlite3)" };
+  }
+}
+function checkPnpmBuildConfig(repoRoot) {
+  const lockPath = join(repoRoot, "pnpm-lock.yaml");
+  const pkgPath = join(repoRoot, "package.json");
+  let pkg;
+  try {
+    pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
+  } catch {
+    return null;
+  }
+  const hasLockfile = existsSync(lockPath);
+  const hasPmField = typeof pkg.packageManager === "string" && pkg.packageManager.startsWith("pnpm");
+  if (!hasLockfile && !hasPmField) return null;
+  const pnpmConfig = pkg.pnpm;
+  const deps = pnpmConfig?.onlyBuiltDependencies;
+  if (!Array.isArray(deps)) {
+    return { name: "pnpm build config", status: "fail", fix: 'Run: npx ca setup (or add "pnpm.onlyBuiltDependencies" to package.json)' };
+  }
+  if (deps.includes("*")) return { name: "pnpm build config", status: "pass" };
+  const required = ["better-sqlite3", "node-llama-cpp"];
+  const missing = required.filter((d) => !deps.includes(d));
+  if (missing.length > 0) {
+    return { name: "pnpm build config", status: "fail", fix: `Missing from onlyBuiltDependencies: [${missing.join(", ")}]. Run: npx ca setup` };
+  }
+  return { name: "pnpm build config", status: "pass" };
+}
 var STATUS_ICONS = {
   pass: "OK",
   fail: "FAIL",
@@ -8686,7 +9030,42 @@ function registerVerifyGatesCommand(program2) {
 }
 // src/changelog-data.ts
-var CHANGELOG_RECENT = `## [1.4.3] - 2026-02-23
+var CHANGELOG_RECENT = `## [1.4.4] - 2026-02-23
+### Added
+- **Security arc with P0-P3 severity model**: Security-reviewer promoted from generic OWASP checker to mandatory core-4 reviewer with P0 (blocks merge), P1 (requires ack), P2 (should fix), P3 (nice to have) classification
+- **5 on-demand security specialist skills**: \`/security-injection\`, \`/security-secrets\`, \`/security-auth\`, \`/security-data\`, \`/security-deps\` -- spawned by security-reviewer via SendMessage within the review AgentTeam for deep trace analysis
+- **6 security reference docs** (\`docs/research/security/\`): overview, injection-patterns, secrets-checklist, auth-patterns, data-exposure, dependency-security -- distilled from the secure-coding-failure PhD survey into actionable agent guides
+- **Native addon build injection** (\`scripts/postinstall.mjs\`): Postinstall script auto-patches consumer \`package.json\` with \`pnpm.onlyBuiltDependencies\` config for \`better-sqlite3\` and \`node-llama-cpp\`. Handles indent preservation, BOM stripping, atomic writes
+- **CLI preflight diagnostics** (\`src/cli-preflight.ts\`): Catches native module load failures before commands run, prints PM-specific fix instructions (pnpm: 3 options; npm/yarn: rebuild + build tool hints)
+- **\`ca doctor\` pnpm check**: Verifies \`onlyBuiltDependencies\` is configured correctly for pnpm projects, recognizes wildcard \`["*"]\` as valid
+- **Escalation-wiring tests**: 7 new tests verifying security-reviewer mentions all 5 specialists, each specialist declares "Spawned by security-reviewer", P0 documented as merge-blocking, each specialist has \`npx ca knowledge\` and references correct research doc
+- **better-sqlite3 injection patterns**: Added project-specific \`db.exec()\` vs \`db.prepare().run()\` examples to \`injection-patterns.md\`
+### Fixed
+- **Noisy \`node-llama-cpp\` warnings on headless Linux**: Vulkan binary fallback and \`special_eos_id\` tokenizer warnings no longer print during \`ca search\` / \`ca knowledge\` -- GPU auto-detection preserved via \`progressLogs: false\` + \`logLevel: error\`
+- **Resource leak in \`isModelUsable()\`**: \`Llama\` and \`LlamaModel\` instances are now properly disposed after the preflight usability check
+- **Wildcard \`onlyBuiltDependencies\`**: Doctor and postinstall now recognize \`["*"]\` as fully configured (no false positive)
+- **Infinity loop marker injection**: \`--model\` validated against shell metacharacters; grep patterns anchored (\`^EPIC_COMPLETE\`, \`^EPIC_FAILED\`) to prevent false-positive matches from prompt echo in logs
+- **Template-to-deployed SKILL.md drift**: Backported all deployed specialist improvements (output fields, collaboration notes, \`npx ca knowledge\` lines) into source templates so \`ca setup --update\` no longer regresses
+- **SSRF citations**: 3 OWASP references in \`secure-coding-failure.md\` corrected from A01 (Broken Access Control) to A10 (SSRF)
+- **Stale verification docs**: Exit criteria updated from 6 to 8 categories (added Security Clear + Workflow Gates); closed-loop review process updated with security check in Stage 4 flowchart
+- **Broken dual-path reference** in \`subagent-pipeline.md\`: Now documents both \`docs/research/security/\` (source repo) and \`docs/compound/research/security/\` (consumer repos)
+- **Incomplete OWASP mapping** in \`overview.md\`: Completed from 5/10 to 10/10 (added A04, A05, A07, A08, A09)
+### Changed
+- **\`getLlama()\` initialization hardened**: Both call sites (\`nomic.ts\`, \`model.ts\`) now pass \`build: 'never'\` to prevent silent compilation from source on exotic platforms; set \`NODE_LLAMA_CPP_DEBUG=true\` to re-enable verbose output
+- **Review skill wired to security arc**: P0 added to severity overview, security specialist skills listed as on-demand members, quality criteria include P0/P1 checks
+- **WORKFLOW template**: Severity classification updated from P1/P2/P3 to P0-P3 with "Fix all P0/P1 findings"
+- **Zero-findings instruction**: All 6 security templates (reviewer + 5 specialists) now include "return CLEAR" instruction when no findings detected
+- **Scope-limiting instruction**: \`security-injection\` prioritizes files with interpreter sinks over pure data/config for large diffs (500+ lines)
+- **Non-web context**: \`security-auth\` includes step for CLI/API-only projects without web routes
+- **Graceful audit skip**: \`security-deps\` handles missing \`pnpm audit\` / \`pip-audit\` gracefully instead of failing
+## [1.4.3] - 2026-02-23
 ### Fixed
@@ -8705,13 +9084,7 @@ var CHANGELOG_RECENT = `## [1.4.3] - 2026-02-23
 - **Banner audio crash on headless Linux**: Async \`ENOENT\` error from missing \`aplay\` no longer crashes \`ca setup --update\`
 - **PowerShell path injection on Windows**: Temp paths containing apostrophes no longer break or inject commands in \`banner-audio.ts\`
-- **Banner audio test coverage**: Rewrote tests with proper mock isolation (\`vi.spyOn\` + file-scope \`vi.mock\`), covering async ENOENT, sync throw, stop() idempotency, and normal exit cleanup
-## [1.4.1] - 2026-02-22
-### Changed
-- **Broader retrieval messaging**: \`ca search\` and \`ca knowledge\` descriptions in prime output and AGENTS.md now encourage general-purpose use beyond mandatory architectural triggers`;
+- **Banner audio test coverage**: Rewrote tests with proper mock isolation (\`vi.spyOn\` + file-scope \`vi.mock\`), covering async ENOENT, sync throw, stop() idempotency, and normal exit cleanup`;
 // src/commands/about.ts
 function registerAboutCommand(program2) {
@@ -9350,6 +9723,7 @@ function registerCaptureCommands(program2) {
   });
 }
 var EPIC_ID_PATTERN2 = /^[a-zA-Z0-9_.-]+$/;
+var MODEL_PATTERN = /^[a-zA-Z0-9_.:/-]+$/;
 function buildScriptHeader(timestamp, maxRetries, model, epicIds) {
   return `#!/usr/bin/env bash
 # Infinity Loop - Generated by: ca loop
@@ -9550,18 +9924,18 @@ while true; do
            -p "$PROMPT" \\
            &> "$LOGFILE" || true
-    if grep -q "EPIC_COMPLETE" "$LOGFILE"; then
+    if grep -q "^EPIC_COMPLETE$" "$LOGFILE"; then
       log "Epic $EPIC_ID completed successfully"
       SUCCESS=true
       break
-    elif grep -q "HUMAN_REQUIRED" "$LOGFILE"; then
-      REASON=$(grep "HUMAN_REQUIRED:" "$LOGFILE" | head -1 | sed 's/.*HUMAN_REQUIRED: *//')
+    elif grep -q "^HUMAN_REQUIRED:" "$LOGFILE"; then
+      REASON=$(grep "^HUMAN_REQUIRED:" "$LOGFILE" | head -1 | sed 's/^HUMAN_REQUIRED: *//')
       log "Epic $EPIC_ID needs human action: $REASON"
       bd update "$EPIC_ID" --notes "Human required: $REASON" 2>/dev/null || true
       SKIPPED=$((SKIPPED + 1))
       SUCCESS=skip
       break
-    elif grep -q "EPIC_FAILED" "$LOGFILE"; then
+    elif grep -q "^EPIC_FAILED$" "$LOGFILE"; then
       log "Epic $EPIC_ID reported failure (attempt $ATTEMPT)"
     else
       log "Epic $EPIC_ID session ended without marker (attempt $ATTEMPT)"
@@ -9595,6 +9969,9 @@ function validateOptions(options) {
   if (!Number.isInteger(options.maxRetries) || options.maxRetries < 0) {
     throw new Error(`Invalid maxRetries: must be a non-negative integer, got ${options.maxRetries}`);
   }
+  if (!MODEL_PATTERN.test(options.model)) {
+    throw new Error(`Invalid model "${options.model}": must match ${MODEL_PATTERN}`);
+  }
   if (options.epics) {
     for (const id of options.epics) {
       if (!EPIC_ID_PATTERN2.test(id)) {
@@ -9942,6 +10319,109 @@ function registerManagementCommands(program2) {
   registerWorktreeCommands(program2);
 }
+// src/cli-preflight.ts
+var NEEDS_SQLITE = /* @__PURE__ */ new Set([
+  // Capture
+  "learn",
+  "capture",
+  "detect",
+  // Retrieval
+  "search",
+  "list",
+  "load-session",
+  "check-plan",
+  // Knowledge
+  "knowledge",
+  "index-docs",
+  // Management - CRUD
+  "show",
+  "update",
+  "delete",
+  // Management - invalidation
+  "wrong",
+  "validate",
+  // Management - maintenance
+  "compact",
+  "rebuild",
+  "stats",
+  "prime",
+  // Management - IO
+  "export",
+  "import",
+  // Audit & compound
+  "audit",
+  "compound"
+]);
+function commandNeedsSqlite(cmd) {
+  let current = cmd;
+  while (current) {
+    if (NEEDS_SQLITE.has(current.name())) return true;
+    current = current.parent;
+  }
+  return false;
+}
+function detectPackageManager(cwd) {
+  if (existsSync(join(cwd, "pnpm-lock.yaml"))) return "pnpm";
+  try {
+    const raw = readFileSync(join(cwd, "package.json"), "utf-8");
+    const pkg = JSON.parse(raw);
+    if (typeof pkg.packageManager === "string") {
+      if (pkg.packageManager.startsWith("pnpm")) return "pnpm";
+      if (pkg.packageManager.startsWith("yarn")) return "yarn";
+    }
+  } catch {
+  }
+  if (existsSync(join(cwd, "yarn.lock"))) return "yarn";
+  if (existsSync(join(cwd, "package-lock.json"))) return "npm";
+  return "unknown";
+}
+function printNativeBuildDiagnostic(err, cwd = process.cwd()) {
+  const pm = detectPackageManager(cwd);
+  console.error("");
+  console.error('ERROR: Native module "better-sqlite3" failed to load.');
+  console.error("");
+  if (pm === "pnpm") {
+    console.error("  pnpm v10+ blocks native addon builds by default.");
+    console.error("");
+    console.error("  Fix (choose one):");
+    console.error("");
+    console.error("    Option A -- Run setup (recommended):");
+    console.error("      npx ca setup");
+    console.error("");
+    console.error("    Option B -- Manual patch:");
+    console.error("      1. Add to package.json:");
+    console.error('         "pnpm": { "onlyBuiltDependencies": ["better-sqlite3", "node-llama-cpp"] }');
+    console.error("      2. Run: pnpm install && pnpm rebuild better-sqlite3");
+    console.error("");
+    console.error("    Option C -- Approve build scripts interactively:");
+    console.error("      pnpm approve-builds");
+    console.error("");
+  } else {
+    console.error("  Fix: npm rebuild better-sqlite3");
+    console.error("");
+    console.error("  If the error persists, ensure build tools are installed:");
+    printBuildToolsHint();
+    console.error("");
+  }
+  if (err instanceof Error && err.cause) {
+    const causeMsg = err.cause instanceof Error ? err.cause.message : String(err.cause);
+    console.error("  Underlying error:", causeMsg);
+    console.error("");
+  }
+}
+function printBuildToolsHint() {
+  const platform = process.platform;
+  if (platform === "darwin") {
+    console.error("    macOS: xcode-select --install");
+  } else if (platform === "linux") {
+    console.error("    Linux: sudo apt install build-essential python3  (Debian/Ubuntu)");
+    console.error('           sudo dnf groupinstall "Development Tools"  (Fedora)');
+  } else if (platform === "win32") {
+    console.error("    Windows: Install Visual Studio Build Tools");
+    console.error("             https://visualstudio.microsoft.com/visual-cpp-build-tools/");
+  }
+}
 // src/cli.ts
 function cleanup() {
   try {
@@ -9967,6 +10447,20 @@ registerSetupCommands(program);
 registerCompoundCommands(program);
 registerLoopCommands(program);
 registerPhaseCheckCommand(program);
+program.hook("preAction", (_thisCommand, actionCommand) => {
+  if (!commandNeedsSqlite(actionCommand)) return;
+  try {
+    ensureSqliteAvailable();
+  } catch (err) {
+    let root;
+    try {
+      root = getRepoRoot();
+    } catch {
+    }
+    printNativeBuildDiagnostic(err, root);
+    process.exit(1);
+  }
+});
 program.parse();
 //# sourceMappingURL=cli.js.map
 //# sourceMappingURL=cli.js.map