npm - dual-brain - Versions diffs - 6.0.1 → 6.1.0 - Mend

dual-brain 6.0.1 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/bin/dual-brain.mjs +173 -15
package/hooks/head-guard.sh +6 -0
package/package.json +12 -4
package/playbooks/debug.json +49 -0
package/playbooks/refactor.json +57 -0
package/playbooks/security-audit.json +57 -0
package/playbooks/security.json +38 -0
package/playbooks/test-gen.json +48 -0
package/src/brief.mjs +266 -0
package/src/decide.mjs +162 -91
package/src/decompose.mjs +331 -0
package/src/detect.mjs +1 -1
package/src/dispatch.mjs +313 -19
package/src/health.mjs +253 -0
package/src/index.mjs +6 -0
package/src/playbook.mjs +257 -0
package/src/redact.mjs +192 -0
package/src/repo.mjs +292 -0
package/src/session.mjs +210 -0
package/src/test.mjs +568 -1

package/bin/dual-brain.mjs CHANGED Viewed

@@ -7,7 +7,7 @@ import { fileURLToPath } from 'node:url';
 import { execSync } from 'node:child_process';
 import {
-  ensureProfile, loadProfile, runOnboarding,
+  ensureProfile, loadProfile, saveProfile, runOnboarding,
   rememberPreference, forgetPreference, getActivePreferences,
   getAvailableProviders, isSoloBrain, getHeadModel,
 } from '../src/profile.mjs';
@@ -15,11 +15,18 @@ import {
 import { detectTask } from '../src/detect.mjs';
 import {
-  decideRoute, getAvailableModels, estimateBudgetPressure,
+  decideRoute, getAvailableModels,
 } from '../src/decide.mjs';
+import {
+  getHealth, markHot, markHealthy, remainingCooldownMinutes, getSessionStats,
+} from '../src/health.mjs';
 import { dispatch, detectRuntime, dispatchDualBrain } from '../src/dispatch.mjs';
+import { loadRepoCache } from '../src/repo.mjs';
+import { loadSession, saveSession, formatSessionCard } from '../src/session.mjs';
 // ─── Helpers ─────────────────────────────────────────────────────────────────
 const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -30,6 +37,7 @@ function readVersion() {
 }
 function flag(args, name) { const i = args.indexOf(name); return i !== -1 ? (args[i + 1] ?? true) : null; }
 function err(msg) { process.stderr.write(`Error: ${msg}\n`); process.exit(1); }
+function vtrace(msg) { process.stderr.write(`[verbose] ${msg}\n`); }
 function printHelp() {
   console.log(`
@@ -41,20 +49,37 @@ Commands:
   go "task description"     Detect → decide → dispatch a task
     --dry-run               Show routing decision without executing
     --files a.mjs,b.mjs     Provide file context for risk classification
-  status                    Provider health, budget pressure, available models
+    --verbose, -v           Print routing trace (intent, risk, health, model selection)
+  status                    Provider health, session stats, available models
+    --verbose, -v           Also print profile file path and raw profile object
+  hot <provider>            Manually mark all model classes for provider as hot
+  cool <provider>           Manually clear hot state for a provider
   remember "preference"     Save a project-scoped preference
   forget "preference"       Remove a preference by fuzzy match
 Options:
   --version                 Print version
   --help                    Show this help
+  --verbose, -v             Enable verbose routing trace output (stderr)
 `.trim());
 }
+// ─── Card command (default) ──────────────────────────────────────────────────
+async function cmdCard() {
+  const cwd     = process.cwd();
+  const repo    = loadRepoCache(cwd);
+  const session = loadSession(cwd);
+  const health  = getHealth(cwd);
+  const card    = formatSessionCard(session, repo, health);
+  console.log(card);
+}
 // ─── Commands ─────────────────────────────────────────────────────────────────
 async function cmdInit() {
   const profile = await runOnboarding({ interactive: true });
+  saveProfile(profile, { cwd: process.cwd() });
   const rt = await detectRuntime();
   const providers = getAvailableProviders(profile);
   const providerSummary = providers.length
@@ -65,14 +90,15 @@ async function cmdInit() {
 async function cmdGo(args) {
   const dryRun  = args.includes('--dry-run');
+  const verbose = args.includes('--verbose') || args.includes('-v');
   const filesRaw = flag(args, '--files');
   const files   = filesRaw && typeof filesRaw === 'string'
     ? filesRaw.split(',').map(f => f.trim()).filter(Boolean)
     : [];
   // prompt is the first non-flag argument (or value after --dry-run which is boolean)
-  const prompt = args.find(a => !a.startsWith('--') && a !== (filesRaw ?? ''));
-  if (!prompt) err('Usage: dual-brain go "task description" [--dry-run] [--files a,b]');
+  const prompt = args.find(a => !a.startsWith('--') && !a.startsWith('-') && a !== (filesRaw ?? ''));
+  if (!prompt) err('Usage: dual-brain go "task description" [--dry-run] [--files a,b] [--verbose]');
   const cwd     = process.cwd();
   const profile = await ensureProfile(cwd);
@@ -81,8 +107,44 @@ async function cmdGo(args) {
   // Print the one-sentence classification
   console.log(detection.explanation);
+  // Verbose: emit detection trace before routing decision
+  if (verbose) {
+    vtrace(`Intent: ${detection.intent} | Risk: ${detection.risk} | Complexity: ${detection.complexity} | Effort: ${detection.effort ?? 'n/a'}`);
+    vtrace(`Tier: ${detection.tier} | Files: ${detection.fileCount ?? files.length} | Requires write: ${detection.requiresWrite}`);
+  }
+  // Verbose: emit provider health scores before dispatch
+  if (verbose) {
+    const providers = getAvailableProviders(profile);
+    const { states } = getHealth(cwd);
+    const providerScores = ['claude', 'openai'].map(name => {
+      const enabled = providers.some(p => p.name === name);
+      if (!enabled) return `${name}=unavailable`;
+      // Find any state entry for this provider
+      const statuses = Object.entries(states)
+        .filter(([k]) => k.startsWith(`${name}:`))
+        .map(([, v]) => v.status);
+      const worst = statuses.includes('hot') ? 'hot'
+        : statuses.includes('probing') ? 'probing'
+        : statuses.includes('degraded') ? 'degraded'
+        : 'healthy';
+      return `${name}=${worst}`;
+    }).join(' ');
+    vtrace(`Provider health: ${providerScores}`);
+  }
   const decision = decideRoute({ profile, detection, cwd });
+  // Verbose: emit model selection and dual-brain rationale
+  if (verbose) {
+    const modelLabel = decision.effort ? `${decision.model} (${decision.effort})` : decision.model;
+    const modelStatus = getAvailableModels(profile)[decision.provider]?.includes(decision.model)
+      ? 'available, matches tier'
+      : 'selected';
+    vtrace(`Model selection: ${modelLabel} (${modelStatus})`);
+    vtrace(`Dual-brain: ${decision.dualBrain ? 'yes' : 'no'} (${isSoloBrain(profile) ? 'solo provider' : 'dual provider'}, ${detection.risk} risk)`);
+  }
   // Print routing table
   console.log(`  provider   : ${decision.provider}`);
   console.log(`  model      : ${decision.model}${decision.effort ? ' (' + decision.effort + ')' : ''}`);
@@ -102,38 +164,85 @@ async function cmdGo(args) {
     console.log(`\nConsensus: ${result.consensus}`);
     if (result.claude?.summary) console.log(`Claude : ${result.claude.summary}`);
     if (result.openai?.summary) console.log(`OpenAI : ${result.openai.summary}`);
+    // Save session state
+    saveSession({
+      objective:    prompt,
+      branch:       null,
+      filesChanged: files,
+      commandsRun:  [`dual-brain go "${prompt}"`],
+      lastResult:   { status: 'success', summary: result.consensus || 'dual-brain complete' },
+      provider:     decision.provider,
+      nextAction:   null,
+    }, cwd);
   } else {
     result = await dispatch({ decision, prompt, files, cwd });
     const statusLine = result.status === 'completed' ? 'Done' : `Failed (exit ${result.exitCode})`;
     console.log(`\n${statusLine} in ${(result.durationMs / 1000).toFixed(1)}s`);
     if (result.summary) console.log(result.summary);
     if (result.error)   process.stderr.write(`${result.error}\n`);
-    if (result.status !== 'completed') process.exit(1); }
+    // Save session state regardless of success/failure
+    saveSession({
+      objective:    prompt,
+      branch:       null,
+      filesChanged: files,
+      commandsRun:  [`dual-brain go "${prompt}"`],
+      lastResult:   {
+        status:  result.status === 'completed' ? 'success' : 'failure',
+        summary: result.summary || (result.status === 'completed' ? 'completed' : `exit ${result.exitCode}`),
+      },
+      provider:     decision.provider,
+      nextAction:   null,
+    }, cwd);
+    if (result.status !== 'completed') process.exit(1);
+  }
 }
-async function cmdStatus() {
+async function cmdStatus(args = []) {
+  const verbose = args.includes('--verbose') || args.includes('-v');
   const cwd     = process.cwd();
   const profile = loadProfile(cwd);
   const rt      = await detectRuntime();
   const providers = getAvailableProviders(profile);
-  const pressure  = estimateBudgetPressure(profile, cwd);
   const available = getAvailableModels(profile);
   const prefs     = getActivePreferences(cwd);
+  const { states } = getHealth(cwd);
+  const sessionStats = getSessionStats(cwd);
   console.log('=== Dual-Brain Status ===\n');
-  // Providers
+  // Providers + health
   console.log('Providers:');
   if (providers.length === 0) {
     console.log('  (none configured — run: dual-brain init)');
   } else {
     for (const p of providers) {
       const label = p.name === 'claude' ? 'Claude' : 'OpenAI';
-      const pct   = Math.round((pressure[p.name] ?? 0) * 100);
-      console.log(`  ${label}  plan=${p.plan}  budget=${pct}% used`);
+      // Collect all model-class states for this provider
+      const provStates = Object.entries(states)
+        .filter(([k]) => k.startsWith(`${p.name}:`));
+      const sess = sessionStats[p.name] ?? { calls: 0, tokens: 0 };
+      if (provStates.length === 0) {
+        console.log(`  ${label}  plan=${p.plan}  status=healthy  calls=${sess.calls}  tokens=${sess.tokens}`);
+      } else {
+        for (const [k, st] of provStates) {
+          const modelClass = k.split(':').slice(1).join(':');
+          let statusStr = st.status;
+          if (st.status === 'hot') {
+            const remaining = remainingCooldownMinutes(p.name, modelClass, cwd);
+            statusStr = remaining > 0 ? `hot (retry in ${remaining}m)` : 'hot (cooling)';
+          }
+          console.log(`  ${label}  plan=${p.plan}  model=${modelClass}  status=${statusStr}  calls=${sess.calls}  tokens=${sess.tokens}`);
+        }
+      }
     }
   }
+  // Session totals
+  const totalCalls  = Object.values(sessionStats).reduce((s, v) => s + v.calls, 0);
+  const totalTokens = Object.values(sessionStats).reduce((s, v) => s + v.tokens, 0);
+  console.log(`\nSession: ${totalCalls} dispatch${totalCalls !== 1 ? 'es' : ''}, ${totalTokens} tokens observed`);
   // Models
   console.log('\nAvailable models:');
   if (available.claude.length) console.log(`  Claude : ${available.claude.join(', ')}`);
@@ -154,16 +263,62 @@ async function cmdStatus() {
   console.log(`\nPreferences: ${prefs.length ? '' : '(none)'}`);
   for (const p of prefs) console.log(`  [${p.scope}] ${p.text}`);
+  // Verbose: profile file path and raw object
+  if (verbose) {
+    const { homedir } = await import('node:os');
+    const globalPath  = join(homedir(), '.config', 'dual-brain', 'profile.json');
+    const projectPath = join(cwd, '.dualbrain', 'profile.json');
+    const { existsSync } = await import('node:fs');
+    const loadedFrom = existsSync(projectPath) ? projectPath : existsSync(globalPath) ? globalPath : '(defaults)';
+    vtrace(`Profile file: ${loadedFrom}`);
+    vtrace(`Raw profile:\n${JSON.stringify(profile, null, 2)}`);
+  }
   // Update check
   try {
     const localVer  = readVersion();
     const remoteVer = execSync('npm view dual-brain version 2>/dev/null', { timeout: 5000 }).toString().trim();
-    if (remoteVer && remoteVer !== localVer) {
-      console.log(`\nUpdate available: npm i -g dual-brain@latest  (${localVer} → ${remoteVer})`);
+    if (remoteVer) {
+      const localParts  = localVer.split('.').map(Number);
+      const remoteParts = remoteVer.split('.').map(Number);
+      const updateAvailable =
+        remoteParts[0] > localParts[0]
+        || (remoteParts[0] === localParts[0] && remoteParts[1] > localParts[1])
+        || (remoteParts[0] === localParts[0] && remoteParts[1] === localParts[1] && remoteParts[2] > localParts[2]);
+      if (updateAvailable) {
+        console.log(`\nUpdate available: npm i -g dual-brain@latest  (${localVer} → ${remoteVer})`);
+      }
     }
   } catch { /* network unavailable — skip */ }
 }
+// ─── cmdHot / cmdCool ─────────────────────────────────────────────────────────
+const PROVIDER_MODEL_CLASSES = {
+  claude: ['haiku', 'sonnet', 'opus'],
+  openai: ['o4-mini', 'o3', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-5.4', 'gpt-5.5'],
+};
+function cmdHot(providerArg) {
+  if (!providerArg) err('Usage: dual-brain hot <provider>  (claude | openai)');
+  const provider = providerArg.toLowerCase();
+  const classes  = PROVIDER_MODEL_CLASSES[provider];
+  if (!classes)  err(`Unknown provider: ${provider}. Use "claude" or "openai".`);
+  const cwd = process.cwd();
+  for (const mc of classes) markHot(provider, mc, cwd);
+  console.log(`Marked ${classes.length} model classes as hot for ${provider}.`);
+}
+function cmdCool(providerArg) {
+  if (!providerArg) err('Usage: dual-brain cool <provider>  (claude | openai)');
+  const provider = providerArg.toLowerCase();
+  const classes  = PROVIDER_MODEL_CLASSES[provider];
+  if (!classes)  err(`Unknown provider: ${provider}. Use "claude" or "openai".`);
+  const cwd = process.cwd();
+  for (const mc of classes) markHealthy(provider, mc, cwd);
+  console.log(`Cleared hot state for all ${provider} model classes.`);
+}
 async function cmdInstall() {
   const { spawnSync } = await import('child_process');
   const result = spawnSync('node', [join(__dirname, '..', 'install.mjs')], { stdio: 'inherit', cwd: process.cwd() });
@@ -188,13 +343,16 @@ async function main() {
   const args = process.argv.slice(2);
   const cmd  = args[0];
-  if (!cmd || cmd === '--help' || cmd === '-h') { printHelp(); return; }
+  if (cmd === '--help' || cmd === '-h') { printHelp(); return; }
+  if (!cmd) { await cmdCard(); return; }
   if (cmd === '--version' || cmd === '-v')      { console.log(readVersion()); return; }
   if (cmd === 'init')     { await cmdInit(); return; }
   if (cmd === 'install')  { await cmdInstall(); return; }
   if (cmd === 'go')       { await cmdGo(args.slice(1)); return; }
-  if (cmd === 'status')   { await cmdStatus(); return; }
+  if (cmd === 'status')   { await cmdStatus(args.slice(1)); return; }
+  if (cmd === 'hot')      { cmdHot(args[1]); return; }
+  if (cmd === 'cool')     { cmdCool(args[1]); return; }
   if (cmd === 'remember') { cmdRemember(args[1]); return; }
   if (cmd === 'forget')   { cmdForget(args[1]); return; }

package/hooks/head-guard.sh CHANGED Viewed

@@ -80,6 +80,12 @@ if [[ "${TOOL}" == "Bash" ]]; then
         exit 2
     fi
+    # Interpreter one-liners that can write files (node -e, python -c, perl -e, ruby -e)
+    if printf '%s' "${CMD}" | grep -qE '(^|[[:space:];|&])(node[[:space:]]+(--eval|-e)|python3?[[:space:]]+-c|perl[[:space:]]+-e|ruby[[:space:]]+-e)[[:space:]]'; then
+        echo "HEAD cannot implement directly (interpreter one-liner). Use: node hooks/dispatch.mjs --task \"description\"" >&2
+        exit 2
+    fi
     # mv / cp where the destination looks like a source code file
     if printf '%s' "${CMD}" | grep -qE '(^|[[:space:];|&])(mv|cp)[[:space:]].*\.(js|mjs|cjs|ts|tsx|py|sh|json|yaml|yml|toml|rb|go|rs|java|c|cpp|h|css|html|sql)([[:space:]]|$)'; then
         echo "HEAD cannot implement directly (mv/cp to source file). Use: node hooks/dispatch.mjs --task \"description\"" >&2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dual-brain",
-  "version": "6.0.1",
+  "version": "6.1.0",
   "description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
   "type": "module",
   "bin": {
@@ -12,7 +12,14 @@
     "./profile": "./src/profile.mjs",
     "./detect": "./src/detect.mjs",
     "./decide": "./src/decide.mjs",
-    "./dispatch": "./src/dispatch.mjs"
+    "./dispatch": "./src/dispatch.mjs",
+    "./playbook": "./src/playbook.mjs",
+    "./health": "./src/health.mjs",
+    "./repo": "./src/repo.mjs",
+    "./session": "./src/session.mjs",
+    "./decompose": "./src/decompose.mjs",
+    "./brief": "./src/brief.mjs",
+    "./redact": "./src/redact.mjs"
   },
   "keywords": [
     "claude-code",
@@ -33,7 +40,7 @@
   "scripts": {
     "test": "node hooks/test-orchestrator.mjs",
     "test:core": "node --test src/test.mjs",
-    "postinstall": "node install.mjs"
+    "postinstall": "echo 'dual-brain installed. Run: dual-brain install (in your project) to set up hooks.'"
   },
   "engines": {
     "node": ">=20.0.0"
@@ -49,6 +56,7 @@
     "review-rules.md",
     "CLAUDE.md",
     "README.md",
-    "LICENSE"
+    "LICENSE",
+    "playbooks/*.json"
   ]
 }

package/playbooks/debug.json ADDED Viewed

@@ -0,0 +1,49 @@
+{
+  "name": "debug",
+  "description": "Structured bug resolution: reproduce, isolate, hypothesize root cause, fix minimally, verify with tests",
+  "matchIntents": ["debug", "fix"],
+  "steps": [
+    {
+      "id": "reproduce",
+      "title": "Reproduce the Failure",
+      "goal": "Find the failing code path. Identify the error message, stack trace, or unexpected behavior being reported. Locate the relevant source files, the entry point where the failure originates, and any existing tests that exercise this path. Confirm you understand the expected vs actual behavior.",
+      "tier": "search",
+      "consensus": false,
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "isolate",
+      "title": "Isolate the Root Cause",
+      "goal": "Narrow down the root cause to a specific file, function, or line range. Trace the data flow from the failing callsite back to where the incorrect value or state originates. Check recent git changes to this code path. Identify the single most likely source of the problem before moving on.",
+      "tier": "search",
+      "consensus": false,
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "hypothesize",
+      "title": "Form a Root Cause Hypothesis",
+      "goal": "Based on the isolated evidence, form a clear hypothesis about why the bug occurs. Consider: edge cases not handled, race conditions or ordering issues, incorrect assumptions about inputs or state, stale or shared mutable state, off-by-one errors, or API contract mismatches. State your hypothesis explicitly and explain what evidence supports it.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "fix",
+      "title": "Implement the Minimal Fix",
+      "goal": "Implement the smallest change that fixes the bug according to the hypothesis. Do not refactor surrounding code, rename things, or improve unrelated areas. If a regression test for this bug does not exist, add one. The fix should be easy to review and easy to revert if wrong.",
+      "tier": "execute",
+      "consensus": false,
+      "gate": { "type": "diff-review", "requiredWhen": "high-risk" },
+      "output": { "kind": "patch", "required": true }
+    },
+    {
+      "id": "verify",
+      "title": "Verify Fix and Check for Regressions",
+      "goal": "Run the full test suite. Confirm the bug is no longer reproducible. Confirm no previously passing tests now fail. If regressions are found, determine whether they are related to the fix or pre-existing. Summarize: the root cause in one sentence, the fix applied, and the test evidence that the bug is resolved.",
+      "tier": "execute",
+      "consensus": false,
+      "gate": { "type": "test", "requiredWhen": "always" },
+      "output": { "kind": "summary", "required": true }
+    }
+  ]
+}

package/playbooks/refactor.json ADDED Viewed

@@ -0,0 +1,57 @@
+{
+  "name": "refactor",
+  "description": "Safe, verified refactoring: map callers, lock invariants, plan steps, apply, test, and confirm behavior preservation",
+  "matchIntents": ["refactor"],
+  "steps": [
+    {
+      "id": "understand",
+      "title": "Map the Target Code",
+      "goal": "Map the target code: find all callers and call sites, direct and transitive dependencies, existing tests that cover it, and any observable side effects. Document the current behavior contract — what it accepts, what it returns, what it mutates, and what errors it may throw.",
+      "tier": "search",
+      "consensus": false,
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "invariants",
+      "title": "Identify Behavioral Invariants",
+      "goal": "Based on the code map, identify the behavioral invariants that MUST be preserved through the refactor: public API surface (function signatures, exported names), return types and shapes, error handling contracts, ordering guarantees, and any side effects callers depend on. This list is the acceptance criterion for the refactor.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "checklist", "required": true }
+    },
+    {
+      "id": "plan",
+      "title": "Design the Refactor",
+      "goal": "Design the refactoring as a sequence of small, independently verifiable steps. For each step, describe: what changes, what stays the same, and how to verify it didn't break anything. Avoid big-bang rewrites. Each step should leave the codebase in a working state.",
+      "tier": "think",
+      "consensus": true,
+      "gate": { "type": "approval", "requiredWhen": "always" },
+      "output": { "kind": "plan", "required": true }
+    },
+    {
+      "id": "apply",
+      "title": "Apply the Refactoring",
+      "goal": "Implement the refactoring changes following the approved plan. Make minimal edits — do not improve unrelated code, fix unrelated bugs, or change formatting outside the target. Preserve all invariants identified in the invariants step. Commit or stage changes step by step if the plan has multiple stages.",
+      "tier": "execute",
+      "consensus": false,
+      "output": { "kind": "patch", "required": true }
+    },
+    {
+      "id": "verify",
+      "title": "Run Tests and Fix Regressions",
+      "goal": "Run the full existing test suite. For any failures, determine whether they are real regressions (the refactor broke behavior) or expected test updates (tests were asserting on internal structure that legitimately changed). Fix real regressions immediately. Update tests only where the old test was testing implementation detail, not behavior.",
+      "tier": "execute",
+      "consensus": false,
+      "gate": { "type": "test", "requiredWhen": "always" },
+      "output": { "kind": "test", "required": true }
+    },
+    {
+      "id": "confirm",
+      "title": "Confirm Behavior Preservation",
+      "goal": "Review the final diff against the invariants checklist. Confirm each invariant is still satisfied. Summarize: what structural changes were made, what behavioral aspects are provably unchanged, and whether there are any remaining risks or follow-up tasks.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "summary", "required": true }
+    }
+  ]
+}

package/playbooks/security-audit.json ADDED Viewed

@@ -0,0 +1,57 @@
+{
+  "name": "security-audit",
+  "description": "Systematic security review: inventory, threat model, vulnerability scan, ranked findings, remediation plan",
+  "matchIntents": ["security"],
+  "steps": [
+    {
+      "id": "inventory",
+      "title": "Inventory Security-Sensitive Files",
+      "goal": "Identify all security-sensitive files in the codebase: auth modules, secret storage, .env files, API key usage, token handling, encryption routines, and permission checks. List each file with a one-line note on its security role.",
+      "tier": "search",
+      "consensus": false,
+      "output": { "kind": "checklist", "required": true }
+    },
+    {
+      "id": "threat-model",
+      "title": "Threat Model",
+      "goal": "Analyze the attack surface of this codebase. Identify threat actors (external users, internal users, third-party services), attack vectors (inputs, APIs, files, network), and trust boundaries. Categorize threats using STRIDE (Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege) or a similar framework. Produce a structured threat model.",
+      "tier": "think",
+      "consensus": true,
+      "gate": { "type": "risk", "requiredWhen": "always" },
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "vulnerability-scan",
+      "title": "Vulnerability Scan",
+      "goal": "Check the identified security-sensitive files for common vulnerabilities: hardcoded secrets or API keys, SQL injection, cross-site scripting (XSS), cross-site request forgery (CSRF), insecure deserialization, path traversal, missing or insufficient input validation, and improper error handling that leaks internals. List each finding with file, line reference, and a brief description.",
+      "tier": "search",
+      "consensus": false,
+      "output": { "kind": "checklist", "required": true }
+    },
+    {
+      "id": "exploitability-rank",
+      "title": "Exploitability Ranking",
+      "goal": "Rank the findings from the vulnerability scan by exploitability and impact using CVSS-style severity (Critical, High, Medium, Low). For each finding, estimate attack complexity and potential blast radius. Filter out false positives and explain your reasoning. Produce a ranked list with severity labels.",
+      "tier": "think",
+      "consensus": true,
+      "gate": { "type": "risk", "requiredWhen": "always" },
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "remediation-plan",
+      "title": "Remediation Plan",
+      "goal": "For each confirmed vulnerability, propose a specific fix: name the exact file and line range, describe the code change needed, and explain why it closes the vulnerability. Prioritize fixes by severity (Critical first). Where a fix introduces new risk, call it out.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "plan", "required": true }
+    },
+    {
+      "id": "summary",
+      "title": "Risk Assessment Summary",
+      "goal": "Produce a final risk assessment: total counts by severity (Critical/High/Medium/Low), top 3 priorities that must be addressed before the next release, and an estimate of residual risk after all proposed fixes are applied. Keep it concise — this is the executive summary.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "summary", "required": true }
+    }
+  ]
+}

package/playbooks/security.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "name": "security-audit",
+  "description": "Structured security audit workflow: inventory, threat model, vulnerability scan, remediation plan",
+  "matchIntents": ["security"],
+  "steps": [
+    {
+      "id": "inventory",
+      "title": "Asset Inventory",
+      "goal": "Identify all security-sensitive files, endpoints, auth flows, and secret storage locations in the codebase",
+      "tier": "search",
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "threat-model",
+      "title": "Threat Model",
+      "goal": "Map attack surfaces identified in inventory to STRIDE threat categories; call out critical trust boundaries and privilege escalation paths",
+      "tier": "think",
+      "consensus": true,
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "vuln-scan",
+      "title": "Vulnerability Scan",
+      "goal": "Review each high-risk file from inventory for concrete vulnerabilities: injection, insecure defaults, secret leakage, broken auth, missing input validation",
+      "tier": "execute",
+      "output": { "kind": "findings", "required": true }
+    },
+    {
+      "id": "remediation-plan",
+      "title": "Remediation Plan",
+      "goal": "Produce a prioritised remediation plan: severity, affected file/line, recommended fix, and estimated effort for each finding",
+      "tier": "think",
+      "consensus": true,
+      "gate": "human-review",
+      "output": { "kind": "plan", "required": true }
+    }
+  ]
+}

package/playbooks/test-gen.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+  "name": "test-gen",
+  "description": "Targeted test generation: analyze behavior, design test cases, write tests, run them, and report coverage gaps",
+  "matchIntents": ["test"],
+  "steps": [
+    {
+      "id": "analyze",
+      "title": "Analyze Target Behavior",
+      "goal": "Identify the target code's observable behavior: all inputs and their valid ranges, outputs and their shapes, side effects (writes, network calls, mutations), error paths and what triggers them, and edge cases implied by the logic. Check existing test coverage to avoid duplicating what already exists.",
+      "tier": "search",
+      "consensus": false,
+      "output": { "kind": "analysis", "required": true }
+    },
+    {
+      "id": "design",
+      "title": "Design the Test Cases",
+      "goal": "Choose the appropriate test strategy: unit tests for isolated logic, integration tests for module boundaries, or end-to-end tests for full flows. Enumerate the specific test cases to write: happy path scenarios, edge cases (empty input, max values, nulls), error and exception paths, and boundary value cases. Justify the strategy choice based on what will give the most signal per test.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "plan", "required": true }
+    },
+    {
+      "id": "generate",
+      "title": "Write the Tests",
+      "goal": "Write the tests following the design plan. Match the existing test framework, file naming conventions, describe/it or test() structure, assertion style, and helper patterns already used in this project. Each test should have a clear name that describes the scenario, not the implementation. Do not test internal state — test observable behavior.",
+      "tier": "execute",
+      "consensus": false,
+      "output": { "kind": "test", "required": true }
+    },
+    {
+      "id": "run",
+      "title": "Run Tests and Fix Test Bugs",
+      "goal": "Run the newly generated tests. If any fail, determine whether the failure is a bug in the test (wrong assertion, bad setup, incorrect expectation) or a real bug in the code under test. Fix test bugs only — do not change production code here unless a genuine pre-existing bug is discovered (if so, note it and leave it for a separate fix step). Ensure all generated tests pass.",
+      "tier": "execute",
+      "consensus": false,
+      "gate": { "type": "test", "requiredWhen": "always" },
+      "output": { "kind": "test", "required": true }
+    },
+    {
+      "id": "coverage",
+      "title": "Coverage Summary and Gap Analysis",
+      "goal": "Summarize the test coverage added: how many new test cases, which behaviors are now verified, and which code paths are exercised. Identify remaining gaps: behaviors that are hard to test (external dependencies, time-dependent logic, non-determinism) and explain why. Recommend follow-up tests if any critical paths remain untested.",
+      "tier": "think",
+      "consensus": false,
+      "output": { "kind": "summary", "required": true }
+    }
+  ]
+}