dual-brain 6.0.1 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import { fileURLToPath } from 'node:url';
7
7
  import { execSync } from 'node:child_process';
8
8
 
9
9
  import {
10
- ensureProfile, loadProfile, runOnboarding,
10
+ ensureProfile, loadProfile, saveProfile, runOnboarding,
11
11
  rememberPreference, forgetPreference, getActivePreferences,
12
12
  getAvailableProviders, isSoloBrain, getHeadModel,
13
13
  } from '../src/profile.mjs';
@@ -15,11 +15,18 @@ import {
15
15
  import { detectTask } from '../src/detect.mjs';
16
16
 
17
17
  import {
18
- decideRoute, getAvailableModels, estimateBudgetPressure,
18
+ decideRoute, getAvailableModels,
19
19
  } from '../src/decide.mjs';
20
20
 
21
+ import {
22
+ getHealth, markHot, markHealthy, remainingCooldownMinutes, getSessionStats,
23
+ } from '../src/health.mjs';
24
+
21
25
  import { dispatch, detectRuntime, dispatchDualBrain } from '../src/dispatch.mjs';
22
26
 
27
+ import { loadRepoCache } from '../src/repo.mjs';
28
+ import { loadSession, saveSession, formatSessionCard } from '../src/session.mjs';
29
+
23
30
  // ─── Helpers ─────────────────────────────────────────────────────────────────
24
31
 
25
32
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -30,6 +37,7 @@ function readVersion() {
30
37
  }
31
38
  function flag(args, name) { const i = args.indexOf(name); return i !== -1 ? (args[i + 1] ?? true) : null; }
32
39
  function err(msg) { process.stderr.write(`Error: ${msg}\n`); process.exit(1); }
40
+ function vtrace(msg) { process.stderr.write(`[verbose] ${msg}\n`); }
33
41
 
34
42
  function printHelp() {
35
43
  console.log(`
@@ -41,20 +49,37 @@ Commands:
41
49
  go "task description" Detect → decide → dispatch a task
42
50
  --dry-run Show routing decision without executing
43
51
  --files a.mjs,b.mjs Provide file context for risk classification
44
- status Provider health, budget pressure, available models
52
+ --verbose, -v Print routing trace (intent, risk, health, model selection)
53
+ status Provider health, session stats, available models
54
+ --verbose, -v Also print profile file path and raw profile object
55
+ hot <provider> Manually mark all model classes for provider as hot
56
+ cool <provider> Manually clear hot state for a provider
45
57
  remember "preference" Save a project-scoped preference
46
58
  forget "preference" Remove a preference by fuzzy match
47
59
 
48
60
  Options:
49
61
  --version Print version
50
62
  --help Show this help
63
+ --verbose, -v Enable verbose routing trace output (stderr)
51
64
  `.trim());
52
65
  }
53
66
 
67
+ // ─── Card command (default) ──────────────────────────────────────────────────
68
+
69
+ async function cmdCard() {
70
+ const cwd = process.cwd();
71
+ const repo = loadRepoCache(cwd);
72
+ const session = loadSession(cwd);
73
+ const health = getHealth(cwd);
74
+ const card = formatSessionCard(session, repo, health);
75
+ console.log(card);
76
+ }
77
+
54
78
  // ─── Commands ─────────────────────────────────────────────────────────────────
55
79
 
56
80
  async function cmdInit() {
57
81
  const profile = await runOnboarding({ interactive: true });
82
+ saveProfile(profile, { cwd: process.cwd() });
58
83
  const rt = await detectRuntime();
59
84
  const providers = getAvailableProviders(profile);
60
85
  const providerSummary = providers.length
@@ -65,14 +90,15 @@ async function cmdInit() {
65
90
 
66
91
  async function cmdGo(args) {
67
92
  const dryRun = args.includes('--dry-run');
93
+ const verbose = args.includes('--verbose') || args.includes('-v');
68
94
  const filesRaw = flag(args, '--files');
69
95
  const files = filesRaw && typeof filesRaw === 'string'
70
96
  ? filesRaw.split(',').map(f => f.trim()).filter(Boolean)
71
97
  : [];
72
98
 
73
99
  // prompt is the first non-flag argument (or value after --dry-run which is boolean)
74
- const prompt = args.find(a => !a.startsWith('--') && a !== (filesRaw ?? ''));
75
- if (!prompt) err('Usage: dual-brain go "task description" [--dry-run] [--files a,b]');
100
+ const prompt = args.find(a => !a.startsWith('--') && !a.startsWith('-') && a !== (filesRaw ?? ''));
101
+ if (!prompt) err('Usage: dual-brain go "task description" [--dry-run] [--files a,b] [--verbose]');
76
102
 
77
103
  const cwd = process.cwd();
78
104
  const profile = await ensureProfile(cwd);
@@ -81,8 +107,44 @@ async function cmdGo(args) {
81
107
  // Print the one-sentence classification
82
108
  console.log(detection.explanation);
83
109
 
110
+ // Verbose: emit detection trace before routing decision
111
+ if (verbose) {
112
+ vtrace(`Intent: ${detection.intent} | Risk: ${detection.risk} | Complexity: ${detection.complexity} | Effort: ${detection.effort ?? 'n/a'}`);
113
+ vtrace(`Tier: ${detection.tier} | Files: ${detection.fileCount ?? files.length} | Requires write: ${detection.requiresWrite}`);
114
+ }
115
+
116
+ // Verbose: emit provider health scores before dispatch
117
+ if (verbose) {
118
+ const providers = getAvailableProviders(profile);
119
+ const { states } = getHealth(cwd);
120
+ const providerScores = ['claude', 'openai'].map(name => {
121
+ const enabled = providers.some(p => p.name === name);
122
+ if (!enabled) return `${name}=unavailable`;
123
+ // Find any state entry for this provider
124
+ const statuses = Object.entries(states)
125
+ .filter(([k]) => k.startsWith(`${name}:`))
126
+ .map(([, v]) => v.status);
127
+ const worst = statuses.includes('hot') ? 'hot'
128
+ : statuses.includes('probing') ? 'probing'
129
+ : statuses.includes('degraded') ? 'degraded'
130
+ : 'healthy';
131
+ return `${name}=${worst}`;
132
+ }).join(' ');
133
+ vtrace(`Provider health: ${providerScores}`);
134
+ }
135
+
84
136
  const decision = decideRoute({ profile, detection, cwd });
85
137
 
138
+ // Verbose: emit model selection and dual-brain rationale
139
+ if (verbose) {
140
+ const modelLabel = decision.effort ? `${decision.model} (${decision.effort})` : decision.model;
141
+ const modelStatus = getAvailableModels(profile)[decision.provider]?.includes(decision.model)
142
+ ? 'available, matches tier'
143
+ : 'selected';
144
+ vtrace(`Model selection: ${modelLabel} (${modelStatus})`);
145
+ vtrace(`Dual-brain: ${decision.dualBrain ? 'yes' : 'no'} (${isSoloBrain(profile) ? 'solo provider' : 'dual provider'}, ${detection.risk} risk)`);
146
+ }
147
+
86
148
  // Print routing table
87
149
  console.log(` provider : ${decision.provider}`);
88
150
  console.log(` model : ${decision.model}${decision.effort ? ' (' + decision.effort + ')' : ''}`);
@@ -102,38 +164,85 @@ async function cmdGo(args) {
102
164
  console.log(`\nConsensus: ${result.consensus}`);
103
165
  if (result.claude?.summary) console.log(`Claude : ${result.claude.summary}`);
104
166
  if (result.openai?.summary) console.log(`OpenAI : ${result.openai.summary}`);
167
+ // Save session state
168
+ saveSession({
169
+ objective: prompt,
170
+ branch: null,
171
+ filesChanged: files,
172
+ commandsRun: [`dual-brain go "${prompt}"`],
173
+ lastResult: { status: 'success', summary: result.consensus || 'dual-brain complete' },
174
+ provider: decision.provider,
175
+ nextAction: null,
176
+ }, cwd);
105
177
  } else {
106
178
  result = await dispatch({ decision, prompt, files, cwd });
107
179
  const statusLine = result.status === 'completed' ? 'Done' : `Failed (exit ${result.exitCode})`;
108
180
  console.log(`\n${statusLine} in ${(result.durationMs / 1000).toFixed(1)}s`);
109
181
  if (result.summary) console.log(result.summary);
110
182
  if (result.error) process.stderr.write(`${result.error}\n`);
111
- if (result.status !== 'completed') process.exit(1); }
183
+ // Save session state regardless of success/failure
184
+ saveSession({
185
+ objective: prompt,
186
+ branch: null,
187
+ filesChanged: files,
188
+ commandsRun: [`dual-brain go "${prompt}"`],
189
+ lastResult: {
190
+ status: result.status === 'completed' ? 'success' : 'failure',
191
+ summary: result.summary || (result.status === 'completed' ? 'completed' : `exit ${result.exitCode}`),
192
+ },
193
+ provider: decision.provider,
194
+ nextAction: null,
195
+ }, cwd);
196
+ if (result.status !== 'completed') process.exit(1);
197
+ }
112
198
  }
113
199
 
114
- async function cmdStatus() {
200
+ async function cmdStatus(args = []) {
201
+ const verbose = args.includes('--verbose') || args.includes('-v');
115
202
  const cwd = process.cwd();
116
203
  const profile = loadProfile(cwd);
117
204
  const rt = await detectRuntime();
118
205
  const providers = getAvailableProviders(profile);
119
- const pressure = estimateBudgetPressure(profile, cwd);
120
206
  const available = getAvailableModels(profile);
121
207
  const prefs = getActivePreferences(cwd);
208
+ const { states } = getHealth(cwd);
209
+ const sessionStats = getSessionStats(cwd);
122
210
 
123
211
  console.log('=== Dual-Brain Status ===\n');
124
212
 
125
- // Providers
213
+ // Providers + health
126
214
  console.log('Providers:');
127
215
  if (providers.length === 0) {
128
216
  console.log(' (none configured — run: dual-brain init)');
129
217
  } else {
130
218
  for (const p of providers) {
131
219
  const label = p.name === 'claude' ? 'Claude' : 'OpenAI';
132
- const pct = Math.round((pressure[p.name] ?? 0) * 100);
133
- console.log(` ${label} plan=${p.plan} budget=${pct}% used`);
220
+ // Collect all model-class states for this provider
221
+ const provStates = Object.entries(states)
222
+ .filter(([k]) => k.startsWith(`${p.name}:`));
223
+ const sess = sessionStats[p.name] ?? { calls: 0, tokens: 0 };
224
+
225
+ if (provStates.length === 0) {
226
+ console.log(` ${label} plan=${p.plan} status=healthy calls=${sess.calls} tokens=${sess.tokens}`);
227
+ } else {
228
+ for (const [k, st] of provStates) {
229
+ const modelClass = k.split(':').slice(1).join(':');
230
+ let statusStr = st.status;
231
+ if (st.status === 'hot') {
232
+ const remaining = remainingCooldownMinutes(p.name, modelClass, cwd);
233
+ statusStr = remaining > 0 ? `hot (retry in ${remaining}m)` : 'hot (cooling)';
234
+ }
235
+ console.log(` ${label} plan=${p.plan} model=${modelClass} status=${statusStr} calls=${sess.calls} tokens=${sess.tokens}`);
236
+ }
237
+ }
134
238
  }
135
239
  }
136
240
 
241
+ // Session totals
242
+ const totalCalls = Object.values(sessionStats).reduce((s, v) => s + v.calls, 0);
243
+ const totalTokens = Object.values(sessionStats).reduce((s, v) => s + v.tokens, 0);
244
+ console.log(`\nSession: ${totalCalls} dispatch${totalCalls !== 1 ? 'es' : ''}, ${totalTokens} tokens observed`);
245
+
137
246
  // Models
138
247
  console.log('\nAvailable models:');
139
248
  if (available.claude.length) console.log(` Claude : ${available.claude.join(', ')}`);
@@ -154,16 +263,62 @@ async function cmdStatus() {
154
263
  console.log(`\nPreferences: ${prefs.length ? '' : '(none)'}`);
155
264
  for (const p of prefs) console.log(` [${p.scope}] ${p.text}`);
156
265
 
266
+ // Verbose: profile file path and raw object
267
+ if (verbose) {
268
+ const { homedir } = await import('node:os');
269
+ const globalPath = join(homedir(), '.config', 'dual-brain', 'profile.json');
270
+ const projectPath = join(cwd, '.dualbrain', 'profile.json');
271
+ const { existsSync } = await import('node:fs');
272
+ const loadedFrom = existsSync(projectPath) ? projectPath : existsSync(globalPath) ? globalPath : '(defaults)';
273
+ vtrace(`Profile file: ${loadedFrom}`);
274
+ vtrace(`Raw profile:\n${JSON.stringify(profile, null, 2)}`);
275
+ }
276
+
157
277
  // Update check
158
278
  try {
159
279
  const localVer = readVersion();
160
280
  const remoteVer = execSync('npm view dual-brain version 2>/dev/null', { timeout: 5000 }).toString().trim();
161
- if (remoteVer && remoteVer !== localVer) {
162
- console.log(`\nUpdate available: npm i -g dual-brain@latest (${localVer} → ${remoteVer})`);
281
+ if (remoteVer) {
282
+ const localParts = localVer.split('.').map(Number);
283
+ const remoteParts = remoteVer.split('.').map(Number);
284
+ const updateAvailable =
285
+ remoteParts[0] > localParts[0]
286
+ || (remoteParts[0] === localParts[0] && remoteParts[1] > localParts[1])
287
+ || (remoteParts[0] === localParts[0] && remoteParts[1] === localParts[1] && remoteParts[2] > localParts[2]);
288
+ if (updateAvailable) {
289
+ console.log(`\nUpdate available: npm i -g dual-brain@latest (${localVer} → ${remoteVer})`);
290
+ }
163
291
  }
164
292
  } catch { /* network unavailable — skip */ }
165
293
  }
166
294
 
295
+ // ─── cmdHot / cmdCool ─────────────────────────────────────────────────────────
296
+
297
+ const PROVIDER_MODEL_CLASSES = {
298
+ claude: ['haiku', 'sonnet', 'opus'],
299
+ openai: ['o4-mini', 'o3', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-5.4', 'gpt-5.5'],
300
+ };
301
+
302
+ function cmdHot(providerArg) {
303
+ if (!providerArg) err('Usage: dual-brain hot <provider> (claude | openai)');
304
+ const provider = providerArg.toLowerCase();
305
+ const classes = PROVIDER_MODEL_CLASSES[provider];
306
+ if (!classes) err(`Unknown provider: ${provider}. Use "claude" or "openai".`);
307
+ const cwd = process.cwd();
308
+ for (const mc of classes) markHot(provider, mc, cwd);
309
+ console.log(`Marked ${classes.length} model classes as hot for ${provider}.`);
310
+ }
311
+
312
+ function cmdCool(providerArg) {
313
+ if (!providerArg) err('Usage: dual-brain cool <provider> (claude | openai)');
314
+ const provider = providerArg.toLowerCase();
315
+ const classes = PROVIDER_MODEL_CLASSES[provider];
316
+ if (!classes) err(`Unknown provider: ${provider}. Use "claude" or "openai".`);
317
+ const cwd = process.cwd();
318
+ for (const mc of classes) markHealthy(provider, mc, cwd);
319
+ console.log(`Cleared hot state for all ${provider} model classes.`);
320
+ }
321
+
167
322
  async function cmdInstall() {
168
323
  const { spawnSync } = await import('child_process');
169
324
  const result = spawnSync('node', [join(__dirname, '..', 'install.mjs')], { stdio: 'inherit', cwd: process.cwd() });
@@ -188,13 +343,16 @@ async function main() {
188
343
  const args = process.argv.slice(2);
189
344
  const cmd = args[0];
190
345
 
191
- if (!cmd || cmd === '--help' || cmd === '-h') { printHelp(); return; }
346
+ if (cmd === '--help' || cmd === '-h') { printHelp(); return; }
347
+ if (!cmd) { await cmdCard(); return; }
192
348
  if (cmd === '--version' || cmd === '-v') { console.log(readVersion()); return; }
193
349
 
194
350
  if (cmd === 'init') { await cmdInit(); return; }
195
351
  if (cmd === 'install') { await cmdInstall(); return; }
196
352
  if (cmd === 'go') { await cmdGo(args.slice(1)); return; }
197
- if (cmd === 'status') { await cmdStatus(); return; }
353
+ if (cmd === 'status') { await cmdStatus(args.slice(1)); return; }
354
+ if (cmd === 'hot') { cmdHot(args[1]); return; }
355
+ if (cmd === 'cool') { cmdCool(args[1]); return; }
198
356
  if (cmd === 'remember') { cmdRemember(args[1]); return; }
199
357
  if (cmd === 'forget') { cmdForget(args[1]); return; }
200
358
 
@@ -80,6 +80,12 @@ if [[ "${TOOL}" == "Bash" ]]; then
80
80
  exit 2
81
81
  fi
82
82
 
83
+ # Interpreter one-liners that can write files (node -e, python -c, perl -e, ruby -e)
84
+ if printf '%s' "${CMD}" | grep -qE '(^|[[:space:];|&])(node[[:space:]]+(--eval|-e)|python3?[[:space:]]+-c|perl[[:space:]]+-e|ruby[[:space:]]+-e)[[:space:]]'; then
85
+ echo "HEAD cannot implement directly (interpreter one-liner). Use: node hooks/dispatch.mjs --task \"description\"" >&2
86
+ exit 2
87
+ fi
88
+
83
89
  # mv / cp where the destination looks like a source code file
84
90
  if printf '%s' "${CMD}" | grep -qE '(^|[[:space:];|&])(mv|cp)[[:space:]].*\.(js|mjs|cjs|ts|tsx|py|sh|json|yaml|yml|toml|rb|go|rs|java|c|cpp|h|css|html|sql)([[:space:]]|$)'; then
85
91
  echo "HEAD cannot implement directly (mv/cp to source file). Use: node hooks/dispatch.mjs --task \"description\"" >&2
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dual-brain",
3
- "version": "6.0.1",
3
+ "version": "6.1.0",
4
4
  "description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,7 +12,14 @@
12
12
  "./profile": "./src/profile.mjs",
13
13
  "./detect": "./src/detect.mjs",
14
14
  "./decide": "./src/decide.mjs",
15
- "./dispatch": "./src/dispatch.mjs"
15
+ "./dispatch": "./src/dispatch.mjs",
16
+ "./playbook": "./src/playbook.mjs",
17
+ "./health": "./src/health.mjs",
18
+ "./repo": "./src/repo.mjs",
19
+ "./session": "./src/session.mjs",
20
+ "./decompose": "./src/decompose.mjs",
21
+ "./brief": "./src/brief.mjs",
22
+ "./redact": "./src/redact.mjs"
16
23
  },
17
24
  "keywords": [
18
25
  "claude-code",
@@ -33,7 +40,7 @@
33
40
  "scripts": {
34
41
  "test": "node hooks/test-orchestrator.mjs",
35
42
  "test:core": "node --test src/test.mjs",
36
- "postinstall": "node install.mjs"
43
+ "postinstall": "echo 'dual-brain installed. Run: dual-brain install (in your project) to set up hooks.'"
37
44
  },
38
45
  "engines": {
39
46
  "node": ">=20.0.0"
@@ -49,6 +56,7 @@
49
56
  "review-rules.md",
50
57
  "CLAUDE.md",
51
58
  "README.md",
52
- "LICENSE"
59
+ "LICENSE",
60
+ "playbooks/*.json"
53
61
  ]
54
62
  }
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "debug",
3
+ "description": "Structured bug resolution: reproduce, isolate, hypothesize root cause, fix minimally, verify with tests",
4
+ "matchIntents": ["debug", "fix"],
5
+ "steps": [
6
+ {
7
+ "id": "reproduce",
8
+ "title": "Reproduce the Failure",
9
+ "goal": "Find the failing code path. Identify the error message, stack trace, or unexpected behavior being reported. Locate the relevant source files, the entry point where the failure originates, and any existing tests that exercise this path. Confirm you understand the expected vs actual behavior.",
10
+ "tier": "search",
11
+ "consensus": false,
12
+ "output": { "kind": "analysis", "required": true }
13
+ },
14
+ {
15
+ "id": "isolate",
16
+ "title": "Isolate the Root Cause",
17
+ "goal": "Narrow down the root cause to a specific file, function, or line range. Trace the data flow from the failing callsite back to where the incorrect value or state originates. Check recent git changes to this code path. Identify the single most likely source of the problem before moving on.",
18
+ "tier": "search",
19
+ "consensus": false,
20
+ "output": { "kind": "analysis", "required": true }
21
+ },
22
+ {
23
+ "id": "hypothesize",
24
+ "title": "Form a Root Cause Hypothesis",
25
+ "goal": "Based on the isolated evidence, form a clear hypothesis about why the bug occurs. Consider: edge cases not handled, race conditions or ordering issues, incorrect assumptions about inputs or state, stale or shared mutable state, off-by-one errors, or API contract mismatches. State your hypothesis explicitly and explain what evidence supports it.",
26
+ "tier": "think",
27
+ "consensus": false,
28
+ "output": { "kind": "analysis", "required": true }
29
+ },
30
+ {
31
+ "id": "fix",
32
+ "title": "Implement the Minimal Fix",
33
+ "goal": "Implement the smallest change that fixes the bug according to the hypothesis. Do not refactor surrounding code, rename things, or improve unrelated areas. If a regression test for this bug does not exist, add one. The fix should be easy to review and easy to revert if wrong.",
34
+ "tier": "execute",
35
+ "consensus": false,
36
+ "gate": { "type": "diff-review", "requiredWhen": "high-risk" },
37
+ "output": { "kind": "patch", "required": true }
38
+ },
39
+ {
40
+ "id": "verify",
41
+ "title": "Verify Fix and Check for Regressions",
42
+ "goal": "Run the full test suite. Confirm the bug is no longer reproducible. Confirm no previously passing tests now fail. If regressions are found, determine whether they are related to the fix or pre-existing. Summarize: the root cause in one sentence, the fix applied, and the test evidence that the bug is resolved.",
43
+ "tier": "execute",
44
+ "consensus": false,
45
+ "gate": { "type": "test", "requiredWhen": "always" },
46
+ "output": { "kind": "summary", "required": true }
47
+ }
48
+ ]
49
+ }
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "refactor",
3
+ "description": "Safe, verified refactoring: map callers, lock invariants, plan steps, apply, test, and confirm behavior preservation",
4
+ "matchIntents": ["refactor"],
5
+ "steps": [
6
+ {
7
+ "id": "understand",
8
+ "title": "Map the Target Code",
9
+ "goal": "Map the target code: find all callers and call sites, direct and transitive dependencies, existing tests that cover it, and any observable side effects. Document the current behavior contract — what it accepts, what it returns, what it mutates, and what errors it may throw.",
10
+ "tier": "search",
11
+ "consensus": false,
12
+ "output": { "kind": "analysis", "required": true }
13
+ },
14
+ {
15
+ "id": "invariants",
16
+ "title": "Identify Behavioral Invariants",
17
+ "goal": "Based on the code map, identify the behavioral invariants that MUST be preserved through the refactor: public API surface (function signatures, exported names), return types and shapes, error handling contracts, ordering guarantees, and any side effects callers depend on. This list is the acceptance criterion for the refactor.",
18
+ "tier": "think",
19
+ "consensus": false,
20
+ "output": { "kind": "checklist", "required": true }
21
+ },
22
+ {
23
+ "id": "plan",
24
+ "title": "Design the Refactor",
25
+ "goal": "Design the refactoring as a sequence of small, independently verifiable steps. For each step, describe: what changes, what stays the same, and how to verify it didn't break anything. Avoid big-bang rewrites. Each step should leave the codebase in a working state.",
26
+ "tier": "think",
27
+ "consensus": true,
28
+ "gate": { "type": "approval", "requiredWhen": "always" },
29
+ "output": { "kind": "plan", "required": true }
30
+ },
31
+ {
32
+ "id": "apply",
33
+ "title": "Apply the Refactoring",
34
+ "goal": "Implement the refactoring changes following the approved plan. Make minimal edits — do not improve unrelated code, fix unrelated bugs, or change formatting outside the target. Preserve all invariants identified in the invariants step. Commit or stage changes step by step if the plan has multiple stages.",
35
+ "tier": "execute",
36
+ "consensus": false,
37
+ "output": { "kind": "patch", "required": true }
38
+ },
39
+ {
40
+ "id": "verify",
41
+ "title": "Run Tests and Fix Regressions",
42
+ "goal": "Run the full existing test suite. For any failures, determine whether they are real regressions (the refactor broke behavior) or expected test updates (tests were asserting on internal structure that legitimately changed). Fix real regressions immediately. Update tests only where the old test was testing implementation detail, not behavior.",
43
+ "tier": "execute",
44
+ "consensus": false,
45
+ "gate": { "type": "test", "requiredWhen": "always" },
46
+ "output": { "kind": "test", "required": true }
47
+ },
48
+ {
49
+ "id": "confirm",
50
+ "title": "Confirm Behavior Preservation",
51
+ "goal": "Review the final diff against the invariants checklist. Confirm each invariant is still satisfied. Summarize: what structural changes were made, what behavioral aspects are provably unchanged, and whether there are any remaining risks or follow-up tasks.",
52
+ "tier": "think",
53
+ "consensus": false,
54
+ "output": { "kind": "summary", "required": true }
55
+ }
56
+ ]
57
+ }
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "security-audit",
3
+ "description": "Systematic security review: inventory, threat model, vulnerability scan, ranked findings, remediation plan",
4
+ "matchIntents": ["security"],
5
+ "steps": [
6
+ {
7
+ "id": "inventory",
8
+ "title": "Inventory Security-Sensitive Files",
9
+ "goal": "Identify all security-sensitive files in the codebase: auth modules, secret storage, .env files, API key usage, token handling, encryption routines, and permission checks. List each file with a one-line note on its security role.",
10
+ "tier": "search",
11
+ "consensus": false,
12
+ "output": { "kind": "checklist", "required": true }
13
+ },
14
+ {
15
+ "id": "threat-model",
16
+ "title": "Threat Model",
17
+ "goal": "Analyze the attack surface of this codebase. Identify threat actors (external users, internal users, third-party services), attack vectors (inputs, APIs, files, network), and trust boundaries. Categorize threats using STRIDE (Spoofing, Tampering, Repudiation, Information Disclosure, Denial of Service, Elevation of Privilege) or a similar framework. Produce a structured threat model.",
18
+ "tier": "think",
19
+ "consensus": true,
20
+ "gate": { "type": "risk", "requiredWhen": "always" },
21
+ "output": { "kind": "analysis", "required": true }
22
+ },
23
+ {
24
+ "id": "vulnerability-scan",
25
+ "title": "Vulnerability Scan",
26
+ "goal": "Check the identified security-sensitive files for common vulnerabilities: hardcoded secrets or API keys, SQL injection, cross-site scripting (XSS), cross-site request forgery (CSRF), insecure deserialization, path traversal, missing or insufficient input validation, and improper error handling that leaks internals. List each finding with file, line reference, and a brief description.",
27
+ "tier": "search",
28
+ "consensus": false,
29
+ "output": { "kind": "checklist", "required": true }
30
+ },
31
+ {
32
+ "id": "exploitability-rank",
33
+ "title": "Exploitability Ranking",
34
+ "goal": "Rank the findings from the vulnerability scan by exploitability and impact using CVSS-style severity (Critical, High, Medium, Low). For each finding, estimate attack complexity and potential blast radius. Filter out false positives and explain your reasoning. Produce a ranked list with severity labels.",
35
+ "tier": "think",
36
+ "consensus": true,
37
+ "gate": { "type": "risk", "requiredWhen": "always" },
38
+ "output": { "kind": "analysis", "required": true }
39
+ },
40
+ {
41
+ "id": "remediation-plan",
42
+ "title": "Remediation Plan",
43
+ "goal": "For each confirmed vulnerability, propose a specific fix: name the exact file and line range, describe the code change needed, and explain why it closes the vulnerability. Prioritize fixes by severity (Critical first). Where a fix introduces new risk, call it out.",
44
+ "tier": "think",
45
+ "consensus": false,
46
+ "output": { "kind": "plan", "required": true }
47
+ },
48
+ {
49
+ "id": "summary",
50
+ "title": "Risk Assessment Summary",
51
+ "goal": "Produce a final risk assessment: total counts by severity (Critical/High/Medium/Low), top 3 priorities that must be addressed before the next release, and an estimate of residual risk after all proposed fixes are applied. Keep it concise — this is the executive summary.",
52
+ "tier": "think",
53
+ "consensus": false,
54
+ "output": { "kind": "summary", "required": true }
55
+ }
56
+ ]
57
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "security-audit",
3
+ "description": "Structured security audit workflow: inventory, threat model, vulnerability scan, remediation plan",
4
+ "matchIntents": ["security"],
5
+ "steps": [
6
+ {
7
+ "id": "inventory",
8
+ "title": "Asset Inventory",
9
+ "goal": "Identify all security-sensitive files, endpoints, auth flows, and secret storage locations in the codebase",
10
+ "tier": "search",
11
+ "output": { "kind": "analysis", "required": true }
12
+ },
13
+ {
14
+ "id": "threat-model",
15
+ "title": "Threat Model",
16
+ "goal": "Map attack surfaces identified in inventory to STRIDE threat categories; call out critical trust boundaries and privilege escalation paths",
17
+ "tier": "think",
18
+ "consensus": true,
19
+ "output": { "kind": "analysis", "required": true }
20
+ },
21
+ {
22
+ "id": "vuln-scan",
23
+ "title": "Vulnerability Scan",
24
+ "goal": "Review each high-risk file from inventory for concrete vulnerabilities: injection, insecure defaults, secret leakage, broken auth, missing input validation",
25
+ "tier": "execute",
26
+ "output": { "kind": "findings", "required": true }
27
+ },
28
+ {
29
+ "id": "remediation-plan",
30
+ "title": "Remediation Plan",
31
+ "goal": "Produce a prioritised remediation plan: severity, affected file/line, recommended fix, and estimated effort for each finding",
32
+ "tier": "think",
33
+ "consensus": true,
34
+ "gate": "human-review",
35
+ "output": { "kind": "plan", "required": true }
36
+ }
37
+ ]
38
+ }
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "test-gen",
3
+ "description": "Targeted test generation: analyze behavior, design test cases, write tests, run them, and report coverage gaps",
4
+ "matchIntents": ["test"],
5
+ "steps": [
6
+ {
7
+ "id": "analyze",
8
+ "title": "Analyze Target Behavior",
9
+ "goal": "Identify the target code's observable behavior: all inputs and their valid ranges, outputs and their shapes, side effects (writes, network calls, mutations), error paths and what triggers them, and edge cases implied by the logic. Check existing test coverage to avoid duplicating what already exists.",
10
+ "tier": "search",
11
+ "consensus": false,
12
+ "output": { "kind": "analysis", "required": true }
13
+ },
14
+ {
15
+ "id": "design",
16
+ "title": "Design the Test Cases",
17
+ "goal": "Choose the appropriate test strategy: unit tests for isolated logic, integration tests for module boundaries, or end-to-end tests for full flows. Enumerate the specific test cases to write: happy path scenarios, edge cases (empty input, max values, nulls), error and exception paths, and boundary value cases. Justify the strategy choice based on what will give the most signal per test.",
18
+ "tier": "think",
19
+ "consensus": false,
20
+ "output": { "kind": "plan", "required": true }
21
+ },
22
+ {
23
+ "id": "generate",
24
+ "title": "Write the Tests",
25
+ "goal": "Write the tests following the design plan. Match the existing test framework, file naming conventions, describe/it or test() structure, assertion style, and helper patterns already used in this project. Each test should have a clear name that describes the scenario, not the implementation. Do not test internal state — test observable behavior.",
26
+ "tier": "execute",
27
+ "consensus": false,
28
+ "output": { "kind": "test", "required": true }
29
+ },
30
+ {
31
+ "id": "run",
32
+ "title": "Run Tests and Fix Test Bugs",
33
+ "goal": "Run the newly generated tests. If any fail, determine whether the failure is a bug in the test (wrong assertion, bad setup, incorrect expectation) or a real bug in the code under test. Fix test bugs only — do not change production code here unless a genuine pre-existing bug is discovered (if so, note it and leave it for a separate fix step). Ensure all generated tests pass.",
34
+ "tier": "execute",
35
+ "consensus": false,
36
+ "gate": { "type": "test", "requiredWhen": "always" },
37
+ "output": { "kind": "test", "required": true }
38
+ },
39
+ {
40
+ "id": "coverage",
41
+ "title": "Coverage Summary and Gap Analysis",
42
+ "goal": "Summarize the test coverage added: how many new test cases, which behaviors are now verified, and which code paths are exercised. Identify remaining gaps: behaviors that are hard to test (external dependencies, time-dependent logic, non-determinism) and explain why. Recommend follow-up tests if any critical paths remain untested.",
43
+ "tier": "think",
44
+ "consensus": false,
45
+ "output": { "kind": "summary", "required": true }
46
+ }
47
+ ]
48
+ }