dual-brain 7.1.2 → 7.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dual-brain.mjs +38 -28
- package/mcp-server/index.mjs +1 -1
- package/package.json +44 -4
- package/src/decide.mjs +32 -0
- package/src/index.mjs +1 -1
- package/src/profile.mjs +7 -4
- package/src/session.mjs +50 -10
- package/src/tui.mjs +10 -1
- package/hooks/agent-fleet.mjs +0 -659
- package/hooks/context-guard.mjs +0 -468
- package/hooks/dag-scheduler.mjs +0 -1249
- package/hooks/head-guard.sh +0 -41
- package/hooks/hook-dispatch.mjs +0 -254
- package/hooks/ledger-analysis.mjs +0 -337
- package/hooks/parallelism-scaler.mjs +0 -572
- package/hooks/quality-tiers.mjs +0 -642
- package/src/test.mjs +0 -1374
package/src/test.mjs
DELETED
|
@@ -1,1374 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* test.mjs — Test suite for core dual-brain modules.
|
|
4
|
-
* Run: node --test src/test.mjs
|
|
5
|
-
*
|
|
6
|
-
* Covers: profile, detect, decide, dispatch (+ CLI dry-run smoke tests).
|
|
7
|
-
* Uses node:test + node:assert only — no external dependencies.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import { describe, it, before, after, afterEach } from 'node:test';
|
|
11
|
-
import assert from 'node:assert/strict';
|
|
12
|
-
import { mkdirSync, rmSync, existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
13
|
-
import { join } from 'node:path';
|
|
14
|
-
import { tmpdir } from 'node:os';
|
|
15
|
-
import { spawn } from 'node:child_process';
|
|
16
|
-
import { fileURLToPath } from 'node:url';
|
|
17
|
-
import { dirname } from 'node:path';
|
|
18
|
-
|
|
19
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
|
-
const ROOT = join(__dirname, '..');
|
|
21
|
-
const BIN = join(ROOT, 'bin', 'dual-brain.mjs');
|
|
22
|
-
const PKG = join(ROOT, 'package.json');
|
|
23
|
-
|
|
24
|
-
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
25
|
-
|
|
26
|
-
function makeTmp() {
|
|
27
|
-
const dir = join(tmpdir(), `dual-brain-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
28
|
-
mkdirSync(dir, { recursive: true });
|
|
29
|
-
return dir;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function removeTmp(dir) {
|
|
33
|
-
if (dir && existsSync(dir)) rmSync(dir, { recursive: true, force: true });
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
/** Spawn a command and collect stdout+stderr, returns { code, stdout, stderr } */
|
|
37
|
-
function run(args, opts = {}) {
|
|
38
|
-
return new Promise((resolve) => {
|
|
39
|
-
const proc = spawn(process.execPath, args, {
|
|
40
|
-
stdio: ['ignore', 'pipe', 'pipe'],
|
|
41
|
-
...opts,
|
|
42
|
-
});
|
|
43
|
-
let stdout = '';
|
|
44
|
-
let stderr = '';
|
|
45
|
-
proc.stdout.on('data', d => { stdout += d; });
|
|
46
|
-
proc.stderr.on('data', d => { stderr += d; });
|
|
47
|
-
proc.on('close', code => resolve({ code, stdout, stderr }));
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// ─── Import modules under test ────────────────────────────────────────────────
|
|
52
|
-
|
|
53
|
-
import {
|
|
54
|
-
loadProfile, saveProfile,
|
|
55
|
-
rememberPreference, forgetPreference,
|
|
56
|
-
getAvailableProviders, isSoloBrain, getHeadModel,
|
|
57
|
-
} from './profile.mjs';
|
|
58
|
-
|
|
59
|
-
import {
|
|
60
|
-
classifyIntent, classifyRisk, estimateComplexity,
|
|
61
|
-
detectTask, inferTier,
|
|
62
|
-
} from './detect.mjs';
|
|
63
|
-
|
|
64
|
-
import {
|
|
65
|
-
decideRoute, getAvailableModels, shouldDualBrain, explainDecision, parsePreferences,
|
|
66
|
-
} from './decide.mjs';
|
|
67
|
-
|
|
68
|
-
import {
|
|
69
|
-
buildCommand, compressResult, detectRuntime,
|
|
70
|
-
validateDispatch, checkWorktreeClean, getRetryBudget,
|
|
71
|
-
} from './dispatch.mjs';
|
|
72
|
-
|
|
73
|
-
import { redact } from './redact.mjs';
|
|
74
|
-
import { markHot, markHealthy } from './health.mjs';
|
|
75
|
-
import { decompose } from './decompose.mjs';
|
|
76
|
-
import { loadPlaybook } from './playbook.mjs';
|
|
77
|
-
import { formatSessionCard } from './session.mjs';
|
|
78
|
-
|
|
79
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
80
|
-
// PROFILE TESTS
|
|
81
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
82
|
-
|
|
83
|
-
describe('profile', () => {
|
|
84
|
-
let tmp;
|
|
85
|
-
before(() => { tmp = makeTmp(); });
|
|
86
|
-
after(() => removeTmp(tmp));
|
|
87
|
-
|
|
88
|
-
it('loadProfile returns defaults when no config exists', () => {
|
|
89
|
-
const profile = loadProfile(tmp);
|
|
90
|
-
assert.equal(profile.schemaVersion, 1);
|
|
91
|
-
assert.equal(profile.mode, 'auto');
|
|
92
|
-
assert.equal(profile.bias, 'balanced');
|
|
93
|
-
assert.ok(Array.isArray(profile.preferences));
|
|
94
|
-
assert.equal(profile.preferences.length, 0);
|
|
95
|
-
assert.ok(profile.providers);
|
|
96
|
-
assert.ok(profile.providers.claude);
|
|
97
|
-
assert.equal(profile.providers.claude.enabled, true);
|
|
98
|
-
assert.equal(profile.providers.openai.enabled, false);
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
it('saveProfile + loadProfile round-trips correctly', () => {
|
|
102
|
-
const dir = makeTmp();
|
|
103
|
-
try {
|
|
104
|
-
const profile = loadProfile(dir); // get defaults
|
|
105
|
-
profile.mode = 'dual';
|
|
106
|
-
profile.bias = 'quality-first';
|
|
107
|
-
profile.providers.openai.enabled = true;
|
|
108
|
-
profile.providers.openai.plan = '$100';
|
|
109
|
-
saveProfile(profile, { cwd: dir });
|
|
110
|
-
const loaded = loadProfile(dir);
|
|
111
|
-
assert.equal(loaded.mode, 'dual');
|
|
112
|
-
assert.equal(loaded.bias, 'quality-first');
|
|
113
|
-
assert.equal(loaded.providers.openai.enabled, true);
|
|
114
|
-
assert.equal(loaded.providers.openai.plan, '$100');
|
|
115
|
-
assert.equal(loaded.schemaVersion, 1);
|
|
116
|
-
} finally {
|
|
117
|
-
removeTmp(dir);
|
|
118
|
-
}
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
it('migrateProfile handles missing fields (schemaVersion 0 → 1)', () => {
|
|
122
|
-
// migrateProfile is not exported directly; test indirectly via loadProfile which
|
|
123
|
-
// calls migrateProfile internally when reading a saved profile.
|
|
124
|
-
const dir = makeTmp();
|
|
125
|
-
try {
|
|
126
|
-
// Write a raw v0-style profile (no schemaVersion, no mode/bias/preferences)
|
|
127
|
-
const raw = {
|
|
128
|
-
providers: {
|
|
129
|
-
claude: { plan: '$20', enabled: true },
|
|
130
|
-
openai: { plan: '$20', enabled: false },
|
|
131
|
-
},
|
|
132
|
-
};
|
|
133
|
-
const profileDir = join(dir, '.dualbrain');
|
|
134
|
-
mkdirSync(profileDir, { recursive: true });
|
|
135
|
-
// writeFileSync is already imported at the top of this file from 'node:fs'
|
|
136
|
-
writeFileSync(join(profileDir, 'profile.json'), JSON.stringify(raw));
|
|
137
|
-
const profile = loadProfile(dir);
|
|
138
|
-
assert.equal(profile.schemaVersion, 1);
|
|
139
|
-
assert.equal(profile.mode, 'auto');
|
|
140
|
-
assert.equal(profile.bias, 'balanced');
|
|
141
|
-
assert.ok(Array.isArray(profile.preferences));
|
|
142
|
-
} finally {
|
|
143
|
-
removeTmp(dir);
|
|
144
|
-
}
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
it('rememberPreference adds a preference', () => {
|
|
148
|
-
const dir = makeTmp();
|
|
149
|
-
try {
|
|
150
|
-
const profile = rememberPreference('always use strict TypeScript', { cwd: dir, scope: 'project' });
|
|
151
|
-
assert.equal(profile.preferences.length, 1);
|
|
152
|
-
assert.equal(profile.preferences[0].text, 'always use strict TypeScript');
|
|
153
|
-
assert.equal(profile.preferences[0].enabled, true);
|
|
154
|
-
assert.equal(profile.preferences[0].scope, 'project');
|
|
155
|
-
} finally {
|
|
156
|
-
removeTmp(dir);
|
|
157
|
-
}
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
it('rememberPreference deduplicates (updates existing match)', () => {
|
|
161
|
-
const dir = makeTmp();
|
|
162
|
-
try {
|
|
163
|
-
rememberPreference('use strict TypeScript', { cwd: dir, scope: 'project' });
|
|
164
|
-
const profile = rememberPreference('use strict TypeScript always', { cwd: dir, scope: 'project' });
|
|
165
|
-
// Should update, not append a second entry
|
|
166
|
-
assert.equal(profile.preferences.length, 1);
|
|
167
|
-
} finally {
|
|
168
|
-
removeTmp(dir);
|
|
169
|
-
}
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
it('forgetPreference removes by substring match', () => {
|
|
173
|
-
const dir = makeTmp();
|
|
174
|
-
try {
|
|
175
|
-
rememberPreference('always lint on save', { cwd: dir, scope: 'project' });
|
|
176
|
-
rememberPreference('prefer short functions', { cwd: dir, scope: 'project' });
|
|
177
|
-
const profile = forgetPreference('lint on save', dir);
|
|
178
|
-
assert.equal(profile.preferences.length, 1);
|
|
179
|
-
assert.equal(profile.preferences[0].text, 'prefer short functions');
|
|
180
|
-
} finally {
|
|
181
|
-
removeTmp(dir);
|
|
182
|
-
}
|
|
183
|
-
});
|
|
184
|
-
|
|
185
|
-
it('getAvailableProviders returns only enabled providers', () => {
|
|
186
|
-
const profile = {
|
|
187
|
-
providers: {
|
|
188
|
-
claude: { plan: '$20', enabled: true },
|
|
189
|
-
openai: { plan: '$100', enabled: false },
|
|
190
|
-
},
|
|
191
|
-
};
|
|
192
|
-
const providers = getAvailableProviders(profile);
|
|
193
|
-
assert.equal(providers.length, 1);
|
|
194
|
-
assert.equal(providers[0].name, 'claude');
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
it('getAvailableProviders returns both when both enabled', () => {
|
|
198
|
-
const profile = {
|
|
199
|
-
providers: {
|
|
200
|
-
claude: { plan: '$20', enabled: true },
|
|
201
|
-
openai: { plan: '$100', enabled: true },
|
|
202
|
-
},
|
|
203
|
-
};
|
|
204
|
-
const providers = getAvailableProviders(profile);
|
|
205
|
-
assert.equal(providers.length, 2);
|
|
206
|
-
});
|
|
207
|
-
|
|
208
|
-
it('isSoloBrain returns true with one provider', () => {
|
|
209
|
-
const profile = {
|
|
210
|
-
providers: {
|
|
211
|
-
claude: { plan: '$20', enabled: true },
|
|
212
|
-
openai: { plan: '$20', enabled: false },
|
|
213
|
-
},
|
|
214
|
-
};
|
|
215
|
-
assert.equal(isSoloBrain(profile), true);
|
|
216
|
-
});
|
|
217
|
-
|
|
218
|
-
it('isSoloBrain returns false with two providers', () => {
|
|
219
|
-
const profile = {
|
|
220
|
-
providers: {
|
|
221
|
-
claude: { plan: '$20', enabled: true },
|
|
222
|
-
openai: { plan: '$20', enabled: true },
|
|
223
|
-
},
|
|
224
|
-
};
|
|
225
|
-
assert.equal(isSoloBrain(profile), false);
|
|
226
|
-
});
|
|
227
|
-
|
|
228
|
-
it('getHeadModel returns sonnet for solo-claude', () => {
|
|
229
|
-
const profile = {
|
|
230
|
-
providers: {
|
|
231
|
-
claude: { plan: '$20', enabled: true },
|
|
232
|
-
openai: { plan: '$20', enabled: false },
|
|
233
|
-
},
|
|
234
|
-
};
|
|
235
|
-
assert.equal(getHeadModel(profile), 'sonnet');
|
|
236
|
-
});
|
|
237
|
-
|
|
238
|
-
it('getHeadModel returns gpt-5.4 for solo-openai', () => {
|
|
239
|
-
const profile = {
|
|
240
|
-
providers: {
|
|
241
|
-
claude: { plan: '$20', enabled: false },
|
|
242
|
-
openai: { plan: '$20', enabled: true },
|
|
243
|
-
},
|
|
244
|
-
};
|
|
245
|
-
assert.equal(getHeadModel(profile), 'gpt-5.4');
|
|
246
|
-
});
|
|
247
|
-
|
|
248
|
-
it('getHeadModel returns sonnet for dual profile (claude is default highest when ranks tie)', () => {
|
|
249
|
-
// Both at $20 rank 1 — reduce() keeps first when equal, which is claude → sonnet
|
|
250
|
-
const profile = {
|
|
251
|
-
providers: {
|
|
252
|
-
claude: { plan: '$20', enabled: true },
|
|
253
|
-
openai: { plan: '$20', enabled: true },
|
|
254
|
-
},
|
|
255
|
-
};
|
|
256
|
-
const model = getHeadModel(profile);
|
|
257
|
-
// sonnet (claude wins tie) or gpt-5.4 (openai) — both are valid depending on iteration order
|
|
258
|
-
assert.ok(['sonnet', 'gpt-5.4'].includes(model), `Unexpected model: ${model}`);
|
|
259
|
-
});
|
|
260
|
-
|
|
261
|
-
it('getHeadModel returns gpt-5.4 for dual profile when openai has higher plan', () => {
|
|
262
|
-
const profile = {
|
|
263
|
-
providers: {
|
|
264
|
-
claude: { plan: '$20', enabled: true }, // rank 1
|
|
265
|
-
openai: { plan: '$100', enabled: true }, // rank 2
|
|
266
|
-
},
|
|
267
|
-
};
|
|
268
|
-
assert.equal(getHeadModel(profile), 'gpt-5.4');
|
|
269
|
-
});
|
|
270
|
-
});
|
|
271
|
-
|
|
272
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
273
|
-
// DETECT TESTS
|
|
274
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
275
|
-
|
|
276
|
-
describe('detect', () => {
|
|
277
|
-
describe('classifyIntent', () => {
|
|
278
|
-
it('"fix the bug" → edit', () => {
|
|
279
|
-
assert.equal(classifyIntent('fix the bug'), 'edit');
|
|
280
|
-
});
|
|
281
|
-
|
|
282
|
-
it('"explain this function" → explain', () => {
|
|
283
|
-
assert.equal(classifyIntent('explain this function'), 'explain');
|
|
284
|
-
});
|
|
285
|
-
|
|
286
|
-
it('"refactor auth module" → security (security has higher priority than refactor)', () => {
|
|
287
|
-
// "auth" matches the security regex and security ranks above refactor in INTENT_PRIORITY.
|
|
288
|
-
assert.equal(classifyIntent('refactor auth module'), 'security');
|
|
289
|
-
});
|
|
290
|
-
|
|
291
|
-
it('"refactor the navigation component" → refactor', () => {
|
|
292
|
-
assert.equal(classifyIntent('refactor the navigation component'), 'refactor');
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
it('"review the PR" → review', () => {
|
|
296
|
-
assert.equal(classifyIntent('review the PR'), 'review');
|
|
297
|
-
});
|
|
298
|
-
|
|
299
|
-
it('"find where the logger is called" → search', () => {
|
|
300
|
-
assert.equal(classifyIntent('find where the logger is called'), 'search');
|
|
301
|
-
});
|
|
302
|
-
|
|
303
|
-
it('"design the system architecture" → architecture', () => {
|
|
304
|
-
// Note: "auth" keyword triggers security before architecture in priority order.
|
|
305
|
-
// Use a prompt without auth to reliably get architecture.
|
|
306
|
-
assert.equal(classifyIntent('design the system architecture'), 'architecture');
|
|
307
|
-
});
|
|
308
|
-
|
|
309
|
-
it('"auth" in prompt triggers security intent (higher priority than architecture)', () => {
|
|
310
|
-
// "security" has higher priority than "architecture" in INTENT_PRIORITY.
|
|
311
|
-
// "auth" matches the security regex, so it wins over "design".
|
|
312
|
-
assert.equal(classifyIntent('design the new auth system'), 'security');
|
|
313
|
-
});
|
|
314
|
-
});
|
|
315
|
-
|
|
316
|
-
describe('classifyRisk', () => {
|
|
317
|
-
it('returns low for empty paths', () => {
|
|
318
|
-
const { level } = classifyRisk([]);
|
|
319
|
-
assert.equal(level, 'low');
|
|
320
|
-
});
|
|
321
|
-
|
|
322
|
-
it('returns critical for auth paths', () => {
|
|
323
|
-
const { level } = classifyRisk(['src/auth/token.mjs']);
|
|
324
|
-
assert.equal(level, 'critical');
|
|
325
|
-
});
|
|
326
|
-
|
|
327
|
-
it('returns critical for secret/key paths', () => {
|
|
328
|
-
const { level } = classifyRisk(['config/secrets.env']);
|
|
329
|
-
assert.equal(level, 'critical');
|
|
330
|
-
});
|
|
331
|
-
|
|
332
|
-
it('returns high for billing paths', () => {
|
|
333
|
-
const { level } = classifyRisk(['src/billing/invoice.mjs']);
|
|
334
|
-
assert.equal(level, 'high');
|
|
335
|
-
});
|
|
336
|
-
|
|
337
|
-
it('returns high for migration paths', () => {
|
|
338
|
-
// The regex uses \b boundaries; "migration.sql" matches but "migrations/" does not
|
|
339
|
-
// because "migrations" adds an extra 's' that breaks the word boundary.
|
|
340
|
-
const { level } = classifyRisk(['db/migration.sql']);
|
|
341
|
-
assert.equal(level, 'high');
|
|
342
|
-
});
|
|
343
|
-
|
|
344
|
-
it('returns low for docs paths', () => {
|
|
345
|
-
const { level } = classifyRisk(['docs/README.md']);
|
|
346
|
-
assert.equal(level, 'low');
|
|
347
|
-
});
|
|
348
|
-
|
|
349
|
-
it('returns medium for test files', () => {
|
|
350
|
-
const { level } = classifyRisk(['src/utils.test.mjs']);
|
|
351
|
-
assert.equal(level, 'medium');
|
|
352
|
-
});
|
|
353
|
-
});
|
|
354
|
-
|
|
355
|
-
describe('estimateComplexity', () => {
|
|
356
|
-
it('returns trivial for simple low-risk single-file format', () => {
|
|
357
|
-
const c = estimateComplexity({ prompt: 'format this file', fileCount: 1, risk: 'low', intent: 'format' });
|
|
358
|
-
assert.equal(c, 'trivial');
|
|
359
|
-
});
|
|
360
|
-
|
|
361
|
-
it('returns complex for critical risk', () => {
|
|
362
|
-
const c = estimateComplexity({ prompt: 'fix the auth token', fileCount: 0, risk: 'critical', intent: 'edit' });
|
|
363
|
-
assert.equal(c, 'complex');
|
|
364
|
-
});
|
|
365
|
-
|
|
366
|
-
it('returns complex for 6+ files', () => {
|
|
367
|
-
const c = estimateComplexity({ prompt: 'update all services', fileCount: 6, risk: 'low', intent: 'edit' });
|
|
368
|
-
assert.equal(c, 'complex');
|
|
369
|
-
});
|
|
370
|
-
|
|
371
|
-
it('returns complex for architecture intent', () => {
|
|
372
|
-
const c = estimateComplexity({ prompt: 'design the cache layer', fileCount: 0, risk: 'low', intent: 'architecture' });
|
|
373
|
-
assert.equal(c, 'complex');
|
|
374
|
-
});
|
|
375
|
-
|
|
376
|
-
it('returns moderate for 3+ files', () => {
|
|
377
|
-
const c = estimateComplexity({ prompt: 'update three files', fileCount: 3, risk: 'low', intent: 'edit' });
|
|
378
|
-
assert.equal(c, 'moderate');
|
|
379
|
-
});
|
|
380
|
-
|
|
381
|
-
it('returns moderate for refactor intent', () => {
|
|
382
|
-
const c = estimateComplexity({ prompt: 'refactor nav', fileCount: 0, risk: 'low', intent: 'refactor' });
|
|
383
|
-
assert.equal(c, 'moderate');
|
|
384
|
-
});
|
|
385
|
-
|
|
386
|
-
it('returns complex with 2+ prior failures', () => {
|
|
387
|
-
const c = estimateComplexity({ prompt: 'fix the same bug again', fileCount: 1, risk: 'low', intent: 'edit', priorFailures: 2 });
|
|
388
|
-
assert.equal(c, 'complex');
|
|
389
|
-
});
|
|
390
|
-
});
|
|
391
|
-
|
|
392
|
-
describe('detectTask full pipeline', () => {
|
|
393
|
-
it('simple edit → {intent:edit, risk:low, complexity:simple, tier:execute}', () => {
|
|
394
|
-
// Use a plain edit prompt with no keywords that trigger higher-priority intents.
|
|
395
|
-
const result = detectTask({ prompt: 'add a new button to the settings page' });
|
|
396
|
-
assert.equal(result.intent, 'edit');
|
|
397
|
-
assert.ok(['low', 'medium'].includes(result.risk));
|
|
398
|
-
assert.equal(result.tier, 'execute');
|
|
399
|
-
});
|
|
400
|
-
|
|
401
|
-
it('security: "fix auth token leak in src/auth.mjs" → critical risk, think tier', () => {
|
|
402
|
-
const result = detectTask({ prompt: 'fix auth token leak in src/auth.mjs' });
|
|
403
|
-
assert.equal(result.risk, 'critical');
|
|
404
|
-
assert.equal(result.tier, 'think');
|
|
405
|
-
});
|
|
406
|
-
|
|
407
|
-
it('search: "find where logger is used" → intent:search, tier:search or execute', () => {
|
|
408
|
-
const result = detectTask({ prompt: 'find where logger is used in the codebase' });
|
|
409
|
-
assert.equal(result.intent, 'search');
|
|
410
|
-
// Low effort → search tier; effort depends on risk/complexity
|
|
411
|
-
assert.ok(['search', 'execute'].includes(result.tier));
|
|
412
|
-
});
|
|
413
|
-
|
|
414
|
-
it('result has all required fields', () => {
|
|
415
|
-
const result = detectTask({ prompt: 'add a new endpoint' });
|
|
416
|
-
assert.ok('intent' in result);
|
|
417
|
-
assert.ok('risk' in result);
|
|
418
|
-
assert.ok('complexity' in result);
|
|
419
|
-
assert.ok('effort' in result);
|
|
420
|
-
assert.ok('tier' in result);
|
|
421
|
-
assert.ok('fileCount' in result);
|
|
422
|
-
assert.ok('riskyFiles' in result);
|
|
423
|
-
assert.ok('requiresWrite' in result);
|
|
424
|
-
assert.ok('explanation' in result);
|
|
425
|
-
});
|
|
426
|
-
|
|
427
|
-
it('priorFailures escalates effort and complexity', () => {
|
|
428
|
-
const base = detectTask({ prompt: 'fix the bug', files: [], priorFailures: 0 });
|
|
429
|
-
const failed = detectTask({ prompt: 'fix the bug', files: [], priorFailures: 2 });
|
|
430
|
-
assert.equal(failed.complexity, 'complex');
|
|
431
|
-
assert.equal(failed.effort, 'xhigh');
|
|
432
|
-
});
|
|
433
|
-
});
|
|
434
|
-
|
|
435
|
-
describe('inferTier', () => {
|
|
436
|
-
it('architecture intent → think', () => {
|
|
437
|
-
assert.equal(inferTier({ intent: 'architecture', risk: 'low', complexity: 'simple' }), 'think');
|
|
438
|
-
});
|
|
439
|
-
|
|
440
|
-
it('critical risk → think', () => {
|
|
441
|
-
assert.equal(inferTier({ intent: 'edit', risk: 'critical', complexity: 'moderate' }), 'think');
|
|
442
|
-
});
|
|
443
|
-
|
|
444
|
-
it('edit intent, low risk → execute', () => {
|
|
445
|
-
assert.equal(inferTier({ intent: 'edit', risk: 'low', complexity: 'simple' }), 'execute');
|
|
446
|
-
});
|
|
447
|
-
|
|
448
|
-
it('search intent, low effort → search', () => {
|
|
449
|
-
assert.equal(inferTier({ intent: 'search', risk: 'low', complexity: 'trivial', effort: 'low' }), 'search');
|
|
450
|
-
});
|
|
451
|
-
|
|
452
|
-
it('format intent, low effort → search', () => {
|
|
453
|
-
assert.equal(inferTier({ intent: 'format', risk: 'low', complexity: 'trivial', effort: 'low' }), 'search');
|
|
454
|
-
});
|
|
455
|
-
|
|
456
|
-
it('review intent → think', () => {
|
|
457
|
-
assert.equal(inferTier({ intent: 'review', risk: 'low', complexity: 'simple' }), 'think');
|
|
458
|
-
});
|
|
459
|
-
|
|
460
|
-
it('refactor intent → execute', () => {
|
|
461
|
-
assert.equal(inferTier({ intent: 'refactor', risk: 'low', complexity: 'moderate' }), 'execute');
|
|
462
|
-
});
|
|
463
|
-
});
|
|
464
|
-
});
|
|
465
|
-
|
|
466
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
467
|
-
// DECIDE TESTS
|
|
468
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
469
|
-
|
|
470
|
-
describe('decide', () => {
|
|
471
|
-
const soloClaude20 = {
|
|
472
|
-
providers: {
|
|
473
|
-
claude: { plan: '$20', enabled: true },
|
|
474
|
-
openai: { plan: '$20', enabled: false },
|
|
475
|
-
},
|
|
476
|
-
mode: 'solo-claude',
|
|
477
|
-
bias: 'balanced',
|
|
478
|
-
};
|
|
479
|
-
|
|
480
|
-
const soloClaude100 = {
|
|
481
|
-
providers: {
|
|
482
|
-
claude: { plan: '$100', enabled: true },
|
|
483
|
-
openai: { plan: '$20', enabled: false },
|
|
484
|
-
},
|
|
485
|
-
mode: 'solo-claude',
|
|
486
|
-
bias: 'balanced',
|
|
487
|
-
};
|
|
488
|
-
|
|
489
|
-
const dualProfile = {
|
|
490
|
-
providers: {
|
|
491
|
-
claude: { plan: '$100', enabled: true },
|
|
492
|
-
openai: { plan: '$100', enabled: true },
|
|
493
|
-
},
|
|
494
|
-
mode: 'dual',
|
|
495
|
-
bias: 'balanced',
|
|
496
|
-
};
|
|
497
|
-
|
|
498
|
-
describe('decideRoute', () => {
|
|
499
|
-
it('solo-claude $20 → haiku or sonnet, never opus', () => {
|
|
500
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple', effort: 'medium', tier: 'execute' };
|
|
501
|
-
const decision = decideRoute({ profile: soloClaude20, detection });
|
|
502
|
-
assert.equal(decision.provider, 'claude');
|
|
503
|
-
assert.ok(['haiku', 'sonnet'].includes(decision.model), `Got: ${decision.model}`);
|
|
504
|
-
assert.notEqual(decision.model, 'opus');
|
|
505
|
-
});
|
|
506
|
-
|
|
507
|
-
it('solo-claude $100 → can use opus for think-tier tasks', () => {
|
|
508
|
-
const detection = { intent: 'architecture', risk: 'high', complexity: 'complex', effort: 'xhigh', tier: 'think' };
|
|
509
|
-
const decision = decideRoute({ profile: soloClaude100, detection });
|
|
510
|
-
assert.equal(decision.provider, 'claude');
|
|
511
|
-
assert.equal(decision.model, 'opus');
|
|
512
|
-
});
|
|
513
|
-
|
|
514
|
-
it('dual profile, search task → picks a provider and a model', () => {
|
|
515
|
-
const detection = { intent: 'search', risk: 'low', complexity: 'trivial', effort: 'low', tier: 'search' };
|
|
516
|
-
const decision = decideRoute({ profile: dualProfile, detection });
|
|
517
|
-
assert.ok(['claude', 'openai'].includes(decision.provider));
|
|
518
|
-
assert.ok(typeof decision.model === 'string' && decision.model.length > 0);
|
|
519
|
-
});
|
|
520
|
-
|
|
521
|
-
it('dual profile, think-tier → provider is claude (session coupling)', () => {
|
|
522
|
-
const detection = { intent: 'architecture', risk: 'high', complexity: 'complex', effort: 'xhigh', tier: 'think' };
|
|
523
|
-
const decision = decideRoute({ profile: dualProfile, detection });
|
|
524
|
-
assert.equal(decision.provider, 'claude');
|
|
525
|
-
});
|
|
526
|
-
|
|
527
|
-
it('returns decision object with required fields', () => {
|
|
528
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple', effort: 'medium', tier: 'execute' };
|
|
529
|
-
const decision = decideRoute({ profile: soloClaude20, detection });
|
|
530
|
-
assert.ok('provider' in decision);
|
|
531
|
-
assert.ok('model' in decision);
|
|
532
|
-
assert.ok('tier' in decision);
|
|
533
|
-
assert.ok('dualBrain' in decision);
|
|
534
|
-
assert.ok('explanation' in decision);
|
|
535
|
-
assert.ok('modes' in decision);
|
|
536
|
-
assert.ok('sandbox' in decision);
|
|
537
|
-
});
|
|
538
|
-
});
|
|
539
|
-
|
|
540
|
-
describe('getAvailableModels', () => {
|
|
541
|
-
it('$20 claude plan excludes opus', () => {
|
|
542
|
-
const { claude } = getAvailableModels(soloClaude20);
|
|
543
|
-
assert.ok(!claude.includes('opus'), `opus found in $20 plan: ${claude.join(', ')}`);
|
|
544
|
-
assert.ok(claude.includes('sonnet'));
|
|
545
|
-
assert.ok(claude.includes('haiku'));
|
|
546
|
-
});
|
|
547
|
-
|
|
548
|
-
it('$100 claude plan includes opus', () => {
|
|
549
|
-
const { claude } = getAvailableModels(soloClaude100);
|
|
550
|
-
assert.ok(claude.includes('opus'), `opus missing from $100 plan: ${claude.join(', ')}`);
|
|
551
|
-
});
|
|
552
|
-
|
|
553
|
-
it('$20 openai plan excludes gpt-5.5', () => {
|
|
554
|
-
const profile = {
|
|
555
|
-
providers: {
|
|
556
|
-
claude: { plan: '$20', enabled: false },
|
|
557
|
-
openai: { plan: '$20', enabled: true },
|
|
558
|
-
},
|
|
559
|
-
};
|
|
560
|
-
const { openai } = getAvailableModels(profile);
|
|
561
|
-
assert.ok(!openai.includes('gpt-5.5'), `gpt-5.5 found in $20 plan`);
|
|
562
|
-
});
|
|
563
|
-
|
|
564
|
-
it('$100 openai plan includes gpt-5.5', () => {
|
|
565
|
-
const profile = {
|
|
566
|
-
providers: {
|
|
567
|
-
claude: { plan: '$20', enabled: false },
|
|
568
|
-
openai: { plan: '$100', enabled: true },
|
|
569
|
-
},
|
|
570
|
-
};
|
|
571
|
-
const { openai } = getAvailableModels(profile);
|
|
572
|
-
assert.ok(openai.includes('gpt-5.5'), `gpt-5.5 missing from $100 plan`);
|
|
573
|
-
});
|
|
574
|
-
});
|
|
575
|
-
|
|
576
|
-
describe('shouldDualBrain', () => {
|
|
577
|
-
it('returns false for solo profile regardless of risk', () => {
|
|
578
|
-
const detection = { intent: 'edit', risk: 'critical', complexity: 'complex' };
|
|
579
|
-
assert.equal(shouldDualBrain(detection, soloClaude100), false);
|
|
580
|
-
});
|
|
581
|
-
|
|
582
|
-
it('returns false for solo-openai profile', () => {
|
|
583
|
-
const soloOpenai = {
|
|
584
|
-
providers: {
|
|
585
|
-
claude: { plan: '$20', enabled: false },
|
|
586
|
-
openai: { plan: '$100', enabled: true },
|
|
587
|
-
},
|
|
588
|
-
};
|
|
589
|
-
const detection = { intent: 'security', risk: 'critical', complexity: 'complex' };
|
|
590
|
-
assert.equal(shouldDualBrain(detection, soloOpenai), false);
|
|
591
|
-
});
|
|
592
|
-
|
|
593
|
-
it('returns true for dual profile with critical risk', () => {
|
|
594
|
-
const detection = { intent: 'edit', risk: 'critical', complexity: 'simple' };
|
|
595
|
-
assert.equal(shouldDualBrain(detection, dualProfile), true);
|
|
596
|
-
});
|
|
597
|
-
|
|
598
|
-
it('returns true for dual profile with architecture intent', () => {
|
|
599
|
-
const detection = { intent: 'architecture', risk: 'low', complexity: 'complex' };
|
|
600
|
-
assert.equal(shouldDualBrain(detection, dualProfile), true);
|
|
601
|
-
});
|
|
602
|
-
|
|
603
|
-
it('returns true for dual profile with security intent', () => {
|
|
604
|
-
const detection = { intent: 'security', risk: 'high', complexity: 'moderate' };
|
|
605
|
-
assert.equal(shouldDualBrain(detection, dualProfile), true);
|
|
606
|
-
});
|
|
607
|
-
|
|
608
|
-
it('returns false for dual profile with low-risk edit', () => {
|
|
609
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple' };
|
|
610
|
-
assert.equal(shouldDualBrain(detection, dualProfile), false);
|
|
611
|
-
});
|
|
612
|
-
|
|
613
|
-
it('returns true for dual profile complex+high risk', () => {
|
|
614
|
-
const detection = { intent: 'refactor', risk: 'high', complexity: 'complex' };
|
|
615
|
-
assert.equal(shouldDualBrain(detection, dualProfile), true);
|
|
616
|
-
});
|
|
617
|
-
});
|
|
618
|
-
|
|
619
|
-
describe('explainDecision', () => {
|
|
620
|
-
it('returns a non-empty string', () => {
|
|
621
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple', tier: 'execute' };
|
|
622
|
-
const decision = decideRoute({ profile: soloClaude20, detection });
|
|
623
|
-
const explanation = explainDecision(decision, detection, soloClaude20);
|
|
624
|
-
assert.ok(typeof explanation === 'string');
|
|
625
|
-
assert.ok(explanation.length > 0);
|
|
626
|
-
});
|
|
627
|
-
|
|
628
|
-
it('mentions dual-brain when dualBrain is true', () => {
|
|
629
|
-
const detection = { intent: 'edit', risk: 'critical', complexity: 'complex', tier: 'think' };
|
|
630
|
-
const decisionWithDual = {
|
|
631
|
-
provider: 'claude',
|
|
632
|
-
model: 'opus',
|
|
633
|
-
effort: 'xhigh',
|
|
634
|
-
dualBrain: true,
|
|
635
|
-
_pressure: { claude: 0, openai: 0 },
|
|
636
|
-
};
|
|
637
|
-
const explanation = explainDecision(decisionWithDual, detection, dualProfile);
|
|
638
|
-
assert.ok(explanation.toLowerCase().includes('dual-brain'), `Expected dual-brain mention: ${explanation}`);
|
|
639
|
-
});
|
|
640
|
-
});
|
|
641
|
-
|
|
642
|
-
describe('budget pressure downgrade', () => {
|
|
643
|
-
it('high pressure > 0.7 results in a downgraded model (not opus when under pressure)', () => {
|
|
644
|
-
// We cannot inject pressure directly into decideRoute without real files,
|
|
645
|
-
// so we test the observable: with $100 plan, think task normally picks opus,
|
|
646
|
-
// but if we can verify the downgrade path exists, we check with a search task
|
|
647
|
-
// where sonnet → haiku downgrade is expected under pressure.
|
|
648
|
-
// We verify via getAvailableModels that downgrade candidates exist.
|
|
649
|
-
const { claude } = getAvailableModels(soloClaude100);
|
|
650
|
-
// haiku must be available as downgrade target from sonnet
|
|
651
|
-
assert.ok(claude.includes('haiku'));
|
|
652
|
-
assert.ok(claude.includes('sonnet'));
|
|
653
|
-
assert.ok(claude.includes('opus'));
|
|
654
|
-
// Confidence check: rank order is correct (haiku < sonnet < opus)
|
|
655
|
-
const rank = ['haiku', 'sonnet', 'opus'];
|
|
656
|
-
const haikuIdx = rank.indexOf('haiku');
|
|
657
|
-
const sonnetIdx = rank.indexOf('sonnet');
|
|
658
|
-
const opusIdx = rank.indexOf('opus');
|
|
659
|
-
assert.ok(haikuIdx < sonnetIdx && sonnetIdx < opusIdx);
|
|
660
|
-
});
|
|
661
|
-
});
|
|
662
|
-
});
|
|
663
|
-
|
|
664
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
665
|
-
// PREFERENCE ROUTING TESTS
|
|
666
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
667
|
-
|
|
668
|
-
describe('preference routing', () => {
|
|
669
|
-
describe('parsePreferences — signal extraction', () => {
|
|
670
|
-
it('"prefer cheaper models" → biasOverride = cost-saver', () => {
|
|
671
|
-
const signals = parsePreferences([{ text: 'prefer cheaper models', enabled: true, scope: 'project' }]);
|
|
672
|
-
assert.equal(signals.biasOverride, 'cost-saver');
|
|
673
|
-
});
|
|
674
|
-
|
|
675
|
-
it('"always use dual brain consensus" → alwaysDualBrain = true', () => {
|
|
676
|
-
const signals = parsePreferences([{ text: 'always use dual brain consensus', enabled: true, scope: 'project' }]);
|
|
677
|
-
assert.equal(signals.alwaysDualBrain, true);
|
|
678
|
-
});
|
|
679
|
-
|
|
680
|
-
it('"prefer claude" → preferProvider = claude', () => {
|
|
681
|
-
const signals = parsePreferences([{ text: 'prefer claude', enabled: true, scope: 'project' }]);
|
|
682
|
-
assert.equal(signals.preferProvider, 'claude');
|
|
683
|
-
});
|
|
684
|
-
|
|
685
|
-
it('"avoid openai" → avoidProvider = openai', () => {
|
|
686
|
-
const signals = parsePreferences([{ text: 'avoid openai', enabled: true, scope: 'project' }]);
|
|
687
|
-
assert.equal(signals.avoidProvider, 'openai');
|
|
688
|
-
});
|
|
689
|
-
|
|
690
|
-
it('empty preferences array → all nulls/false', () => {
|
|
691
|
-
const signals = parsePreferences([]);
|
|
692
|
-
assert.equal(signals.biasOverride, null);
|
|
693
|
-
assert.equal(signals.preferProvider, null);
|
|
694
|
-
assert.equal(signals.avoidProvider, null);
|
|
695
|
-
assert.equal(signals.alwaysDualBrain, false);
|
|
696
|
-
assert.equal(signals.neverDualBrain, false);
|
|
697
|
-
assert.equal(signals.preferModel, null);
|
|
698
|
-
});
|
|
699
|
-
|
|
700
|
-
it('null preferences → all nulls/false', () => {
|
|
701
|
-
const signals = parsePreferences(null);
|
|
702
|
-
assert.equal(signals.biasOverride, null);
|
|
703
|
-
assert.equal(signals.preferProvider, null);
|
|
704
|
-
assert.equal(signals.avoidProvider, null);
|
|
705
|
-
assert.equal(signals.alwaysDualBrain, false);
|
|
706
|
-
assert.equal(signals.neverDualBrain, false);
|
|
707
|
-
assert.equal(signals.preferModel, null);
|
|
708
|
-
});
|
|
709
|
-
|
|
710
|
-
it('disabled preferences are ignored', () => {
|
|
711
|
-
const signals = parsePreferences([
|
|
712
|
-
{ text: 'prefer cheaper models', enabled: false, scope: 'project' },
|
|
713
|
-
{ text: 'avoid openai', enabled: false, scope: 'project' },
|
|
714
|
-
]);
|
|
715
|
-
assert.equal(signals.biasOverride, null);
|
|
716
|
-
assert.equal(signals.avoidProvider, null);
|
|
717
|
-
});
|
|
718
|
-
|
|
719
|
-
it('"use best quality" → biasOverride = quality-first', () => {
|
|
720
|
-
const signals = parsePreferences([{ text: 'use best quality', enabled: true, scope: 'project' }]);
|
|
721
|
-
assert.equal(signals.biasOverride, 'quality-first');
|
|
722
|
-
});
|
|
723
|
-
|
|
724
|
-
it('"prefer gpt" → preferProvider = openai', () => {
|
|
725
|
-
const signals = parsePreferences([{ text: 'prefer gpt', enabled: true, scope: 'project' }]);
|
|
726
|
-
assert.equal(signals.preferProvider, 'openai');
|
|
727
|
-
});
|
|
728
|
-
|
|
729
|
-
it('"prefer opus" → preferModel = opus', () => {
|
|
730
|
-
const signals = parsePreferences([{ text: 'prefer opus', enabled: true, scope: 'project' }]);
|
|
731
|
-
assert.equal(signals.preferModel, 'opus');
|
|
732
|
-
});
|
|
733
|
-
|
|
734
|
-
it('"never dual" → neverDualBrain = true', () => {
|
|
735
|
-
const signals = parsePreferences([{ text: 'never dual brain', enabled: true, scope: 'project' }]);
|
|
736
|
-
assert.equal(signals.neverDualBrain, true);
|
|
737
|
-
});
|
|
738
|
-
});
|
|
739
|
-
|
|
740
|
-
describe('parsePreferences → decideRoute wiring', () => {
|
|
741
|
-
const dualProfile100 = {
|
|
742
|
-
providers: {
|
|
743
|
-
claude: { plan: '$100', enabled: true },
|
|
744
|
-
openai: { plan: '$100', enabled: true },
|
|
745
|
-
},
|
|
746
|
-
mode: 'dual',
|
|
747
|
-
bias: 'balanced',
|
|
748
|
-
};
|
|
749
|
-
|
|
750
|
-
it('cost-saver preference overrides balanced bias → cheaper model selected', () => {
|
|
751
|
-
const profileWithPref = {
|
|
752
|
-
...dualProfile100,
|
|
753
|
-
preferences: [{ text: 'prefer cheaper models', enabled: true, scope: 'project' }],
|
|
754
|
-
};
|
|
755
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple', effort: 'medium', tier: 'execute' };
|
|
756
|
-
const decision = decideRoute({ profile: profileWithPref, detection });
|
|
757
|
-
const cheapModels = ['haiku', 'gpt-4.1-mini'];
|
|
758
|
-
assert.ok(cheapModels.includes(decision.model), `Expected cheap model, got: ${decision.model}`);
|
|
759
|
-
});
|
|
760
|
-
|
|
761
|
-
it('alwaysDualBrain preference forces dualBrain = true even for low-risk edit', () => {
|
|
762
|
-
const profileWithPref = {
|
|
763
|
-
...dualProfile100,
|
|
764
|
-
preferences: [{ text: 'always use dual brain consensus', enabled: true, scope: 'project' }],
|
|
765
|
-
};
|
|
766
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple', effort: 'medium', tier: 'execute' };
|
|
767
|
-
const decision = decideRoute({ profile: profileWithPref, detection });
|
|
768
|
-
assert.equal(decision.dualBrain, true);
|
|
769
|
-
});
|
|
770
|
-
|
|
771
|
-
it('neverDualBrain preference forces dualBrain = false even for critical risk', () => {
|
|
772
|
-
const profileWithPref = {
|
|
773
|
-
...dualProfile100,
|
|
774
|
-
preferences: [{ text: 'never dual brain', enabled: true, scope: 'project' }],
|
|
775
|
-
};
|
|
776
|
-
const detection = { intent: 'architecture', risk: 'critical', complexity: 'complex', effort: 'xhigh', tier: 'think' };
|
|
777
|
-
const decision = decideRoute({ profile: profileWithPref, detection });
|
|
778
|
-
assert.equal(decision.dualBrain, false);
|
|
779
|
-
});
|
|
780
|
-
|
|
781
|
-
it('disabled preferences do not affect routing', () => {
|
|
782
|
-
const profileWithDisabledPref = {
|
|
783
|
-
...dualProfile100,
|
|
784
|
-
preferences: [{ text: 'always use dual brain consensus', enabled: false, scope: 'project' }],
|
|
785
|
-
};
|
|
786
|
-
const detection = { intent: 'edit', risk: 'low', complexity: 'simple', effort: 'medium', tier: 'execute' };
|
|
787
|
-
const decisionWithDisabled = decideRoute({ profile: profileWithDisabledPref, detection });
|
|
788
|
-
const decisionWithout = decideRoute({ profile: dualProfile100, detection });
|
|
789
|
-
assert.equal(decisionWithDisabled.dualBrain, decisionWithout.dualBrain);
|
|
790
|
-
});
|
|
791
|
-
});
|
|
792
|
-
});
|
|
793
|
-
|
|
794
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
795
|
-
// DISPATCH TESTS
|
|
796
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
797
|
-
|
|
798
|
-
describe('dispatch', () => {
|
|
799
|
-
describe('buildCommand', () => {
|
|
800
|
-
it('claude provider returns claude CLI args with model ID', () => {
|
|
801
|
-
const decision = { provider: 'claude', model: 'sonnet', effort: null, sandbox: 'workspace-write' };
|
|
802
|
-
const cmd = buildCommand(decision, 'fix the bug');
|
|
803
|
-
assert.equal(cmd[0], 'claude');
|
|
804
|
-
assert.ok(cmd.includes('--model'));
|
|
805
|
-
// Model ID should be the full claude model ID, not the alias
|
|
806
|
-
const modelIdx = cmd.indexOf('--model');
|
|
807
|
-
assert.ok(cmd[modelIdx + 1].startsWith('claude-'), `Expected claude-* model ID, got: ${cmd[modelIdx + 1]}`);
|
|
808
|
-
assert.ok(cmd.includes('-p'));
|
|
809
|
-
assert.ok(cmd.includes('fix the bug'));
|
|
810
|
-
});
|
|
811
|
-
|
|
812
|
-
it('claude provider with opus model returns opus model ID', () => {
|
|
813
|
-
const decision = { provider: 'claude', model: 'opus', effort: null, sandbox: 'workspace-write' };
|
|
814
|
-
const cmd = buildCommand(decision, 'design the system');
|
|
815
|
-
const modelIdx = cmd.indexOf('--model');
|
|
816
|
-
assert.ok(cmd[modelIdx + 1].includes('opus'), `Expected opus in model ID: ${cmd[modelIdx + 1]}`);
|
|
817
|
-
});
|
|
818
|
-
|
|
819
|
-
it('claude provider with haiku model returns haiku model ID', () => {
|
|
820
|
-
const decision = { provider: 'claude', model: 'haiku', effort: null, sandbox: 'read-only' };
|
|
821
|
-
const cmd = buildCommand(decision, 'find the logger');
|
|
822
|
-
const modelIdx = cmd.indexOf('--model');
|
|
823
|
-
assert.ok(cmd[modelIdx + 1].includes('haiku'), `Expected haiku in model ID: ${cmd[modelIdx + 1]}`);
|
|
824
|
-
});
|
|
825
|
-
|
|
826
|
-
it('openai provider returns codex CLI args', () => {
|
|
827
|
-
const decision = { provider: 'openai', model: 'gpt-5.4', effort: null, sandbox: 'danger-full-access' };
|
|
828
|
-
const cmd = buildCommand(decision, 'fix the bug');
|
|
829
|
-
assert.equal(cmd[0], 'codex');
|
|
830
|
-
assert.ok(cmd.includes('gpt-5.4'));
|
|
831
|
-
assert.ok(cmd.includes('fix the bug'));
|
|
832
|
-
});
|
|
833
|
-
|
|
834
|
-
it('buildCommand includes effort flag for claude when set', () => {
|
|
835
|
-
const decision = { provider: 'claude', model: 'sonnet', effort: 'high', sandbox: 'workspace-write' };
|
|
836
|
-
const cmd = buildCommand(decision, 'fix the bug');
|
|
837
|
-
assert.ok(cmd.includes('--effort'));
|
|
838
|
-
const effortIdx = cmd.indexOf('--effort');
|
|
839
|
-
assert.equal(cmd[effortIdx + 1], 'high');
|
|
840
|
-
});
|
|
841
|
-
|
|
842
|
-
it('buildCommand includes effort flag for openai when set', () => {
|
|
843
|
-
const decision = { provider: 'openai', model: 'gpt-5.4', effort: 'high', sandbox: 'danger-full-access' };
|
|
844
|
-
const cmd = buildCommand(decision, 'fix the bug');
|
|
845
|
-
assert.ok(cmd.includes('-c'));
|
|
846
|
-
});
|
|
847
|
-
|
|
848
|
-
it('buildCommand omits effort flag when effort is null', () => {
|
|
849
|
-
const decision = { provider: 'claude', model: 'sonnet', effort: null, sandbox: 'workspace-write' };
|
|
850
|
-
const cmd = buildCommand(decision, 'fix the bug');
|
|
851
|
-
assert.ok(!cmd.includes('--effort'));
|
|
852
|
-
});
|
|
853
|
-
});
|
|
854
|
-
|
|
855
|
-
describe('compressResult', () => {
|
|
856
|
-
it('returns (no output) for empty string', () => {
|
|
857
|
-
assert.equal(compressResult(''), '(no output)');
|
|
858
|
-
});
|
|
859
|
-
|
|
860
|
-
it('returns (no output) for null/undefined', () => {
|
|
861
|
-
assert.equal(compressResult(null), '(no output)');
|
|
862
|
-
assert.equal(compressResult(undefined), '(no output)');
|
|
863
|
-
});
|
|
864
|
-
|
|
865
|
-
it('strips code blocks (console.log not present in output)', () => {
|
|
866
|
-
// compressResult replaces ```...``` with [code block] then extracts
|
|
867
|
-
// the first meaningful sentences (> 15 chars). "Done." is too short
|
|
868
|
-
// to qualify, so the result is the text before the code block.
|
|
869
|
-
const raw = 'Here is the fix:\n```js\nconsole.log("hello");\n```\nDone.';
|
|
870
|
-
const result = compressResult(raw);
|
|
871
|
-
// The raw JS inside the code block must not leak through
|
|
872
|
-
assert.ok(!result.includes('console.log'), `Code block not stripped: ${result}`);
|
|
873
|
-
});
|
|
874
|
-
|
|
875
|
-
it('truncates to maxLength', () => {
|
|
876
|
-
const raw = 'x'.repeat(1000);
|
|
877
|
-
const result = compressResult(raw, 100);
|
|
878
|
-
assert.ok(result.length <= 100, `Too long: ${result.length}`);
|
|
879
|
-
});
|
|
880
|
-
|
|
881
|
-
it('parses JSON result field when available', () => {
|
|
882
|
-
const raw = JSON.stringify({ result: 'Task completed successfully.' });
|
|
883
|
-
const result = compressResult(raw, 300);
|
|
884
|
-
assert.equal(result, 'Task completed successfully.');
|
|
885
|
-
});
|
|
886
|
-
|
|
887
|
-
it('parses JSON content field as fallback', () => {
|
|
888
|
-
const raw = JSON.stringify({ content: 'Changes applied.' });
|
|
889
|
-
const result = compressResult(raw, 300);
|
|
890
|
-
assert.equal(result, 'Changes applied.');
|
|
891
|
-
});
|
|
892
|
-
});
|
|
893
|
-
|
|
894
|
-
describe('detectRuntime', () => {
|
|
895
|
-
it('returns an object with claudeAvailable and codexAvailable booleans', async () => {
|
|
896
|
-
const rt = await detectRuntime();
|
|
897
|
-
assert.ok(typeof rt === 'object' && rt !== null);
|
|
898
|
-
assert.ok('claudeAvailable' in rt, 'missing claudeAvailable');
|
|
899
|
-
assert.ok('codexAvailable' in rt, 'missing codexAvailable');
|
|
900
|
-
assert.ok('runtime' in rt, 'missing runtime');
|
|
901
|
-
assert.ok(typeof rt.claudeAvailable === 'boolean');
|
|
902
|
-
assert.ok(typeof rt.codexAvailable === 'boolean');
|
|
903
|
-
assert.ok(typeof rt.runtime === 'string');
|
|
904
|
-
assert.ok(['claude-code', 'codex-cli', 'standalone', 'none'].includes(rt.runtime),
|
|
905
|
-
`Unexpected runtime: ${rt.runtime}`);
|
|
906
|
-
});
|
|
907
|
-
});
|
|
908
|
-
});
|
|
909
|
-
|
|
910
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
911
|
-
// DISPATCH SAFETY FEATURES
|
|
912
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
913
|
-
|
|
914
|
-
describe('dispatch safety features', () => {
|
|
915
|
-
|
|
916
|
-
// ── Feature 1: validateDispatch ────────────────────────────────────────────
|
|
917
|
-
describe('validateDispatch', () => {
|
|
918
|
-
it('returns _error when no CLI is available', () => {
|
|
919
|
-
const rt = { claudeAvailable: false, codexAvailable: false };
|
|
920
|
-
const result = validateDispatch({ provider: 'claude', model: 'sonnet', tier: 'execute' }, rt);
|
|
921
|
-
assert.ok(result._error, `Expected _error, got: ${JSON.stringify(result)}`);
|
|
922
|
-
assert.ok(result._error.includes('No AI CLI available'), `Unexpected error: ${result._error}`);
|
|
923
|
-
});
|
|
924
|
-
|
|
925
|
-
it('falls back to openai when claude is unavailable but codex is', () => {
|
|
926
|
-
const rt = { claudeAvailable: false, codexAvailable: true };
|
|
927
|
-
const result = validateDispatch({ provider: 'claude', model: 'sonnet', tier: 'execute' }, rt);
|
|
928
|
-
assert.ok(!result._error, `Unexpected error: ${result._error}`);
|
|
929
|
-
assert.equal(result.provider, 'openai', `Expected openai fallback, got: ${result.provider}`);
|
|
930
|
-
});
|
|
931
|
-
|
|
932
|
-
it('falls back to claude when openai is unavailable but claude is', () => {
|
|
933
|
-
const rt = { claudeAvailable: true, codexAvailable: false };
|
|
934
|
-
const result = validateDispatch({ provider: 'openai', model: 'o4-mini', tier: 'execute' }, rt);
|
|
935
|
-
assert.ok(!result._error, `Unexpected error: ${result._error}`);
|
|
936
|
-
assert.equal(result.provider, 'claude', `Expected claude fallback, got: ${result.provider}`);
|
|
937
|
-
});
|
|
938
|
-
|
|
939
|
-
it('keeps original decision when both CLIs available and model is valid', () => {
|
|
940
|
-
const rt = { claudeAvailable: true, codexAvailable: true };
|
|
941
|
-
const result = validateDispatch({ provider: 'claude', model: 'sonnet', tier: 'execute' }, rt);
|
|
942
|
-
assert.ok(!result._error);
|
|
943
|
-
assert.equal(result.provider, 'claude');
|
|
944
|
-
assert.equal(result.model, 'sonnet');
|
|
945
|
-
});
|
|
946
|
-
|
|
947
|
-
it('resets invalid claude model to sonnet for execute tier', () => {
|
|
948
|
-
const rt = { claudeAvailable: true, codexAvailable: false };
|
|
949
|
-
const result = validateDispatch({ provider: 'claude', model: 'gpt-5.5', tier: 'execute' }, rt);
|
|
950
|
-
assert.ok(!result._error);
|
|
951
|
-
assert.equal(result.model, 'sonnet', `Expected sonnet fallback, got: ${result.model}`);
|
|
952
|
-
});
|
|
953
|
-
|
|
954
|
-
it('resets invalid claude model to haiku for search tier', () => {
|
|
955
|
-
const rt = { claudeAvailable: true, codexAvailable: false };
|
|
956
|
-
const result = validateDispatch({ provider: 'claude', model: 'gpt-4.1', tier: 'search' }, rt);
|
|
957
|
-
assert.ok(!result._error);
|
|
958
|
-
assert.equal(result.model, 'haiku', `Expected haiku fallback for search tier, got: ${result.model}`);
|
|
959
|
-
});
|
|
960
|
-
|
|
961
|
-
it('resets invalid openai model to o4-mini', () => {
|
|
962
|
-
const rt = { claudeAvailable: false, codexAvailable: true };
|
|
963
|
-
const result = validateDispatch({ provider: 'openai', model: 'bogus-model', tier: 'execute' }, rt);
|
|
964
|
-
assert.ok(!result._error);
|
|
965
|
-
assert.equal(result.model, 'o4-mini', `Expected o4-mini fallback, got: ${result.model}`);
|
|
966
|
-
});
|
|
967
|
-
|
|
968
|
-
it('valid openai models pass through unchanged', () => {
|
|
969
|
-
const rt = { claudeAvailable: true, codexAvailable: true };
|
|
970
|
-
for (const m of ['o4-mini', 'o3', 'gpt-4.1']) {
|
|
971
|
-
const result = validateDispatch({ provider: 'openai', model: m, tier: 'execute' }, rt);
|
|
972
|
-
assert.ok(!result._error, `Unexpected error for model ${m}`);
|
|
973
|
-
assert.equal(result.model, m, `Model changed unexpectedly: ${result.model}`);
|
|
974
|
-
}
|
|
975
|
-
});
|
|
976
|
-
|
|
977
|
-
it('valid claude models pass through unchanged', () => {
|
|
978
|
-
const rt = { claudeAvailable: true, codexAvailable: true };
|
|
979
|
-
for (const m of ['opus', 'sonnet', 'haiku']) {
|
|
980
|
-
const result = validateDispatch({ provider: 'claude', model: m, tier: 'execute' }, rt);
|
|
981
|
-
assert.ok(!result._error, `Unexpected error for model ${m}`);
|
|
982
|
-
assert.equal(result.model, m, `Model changed unexpectedly: ${result.model}`);
|
|
983
|
-
}
|
|
984
|
-
});
|
|
985
|
-
});
|
|
986
|
-
|
|
987
|
-
// ── Feature 2: checkWorktreeClean ──────────────────────────────────────────
|
|
988
|
-
describe('checkWorktreeClean', () => {
|
|
989
|
-
it('returns safe:true when owns is empty', async () => {
|
|
990
|
-
const result = await checkWorktreeClean([], process.cwd());
|
|
991
|
-
assert.deepEqual(result, { safe: true });
|
|
992
|
-
});
|
|
993
|
-
|
|
994
|
-
it('returns safe:true when owns is undefined', async () => {
|
|
995
|
-
const result = await checkWorktreeClean(undefined, process.cwd());
|
|
996
|
-
assert.deepEqual(result, { safe: true });
|
|
997
|
-
});
|
|
998
|
-
|
|
999
|
-
it('_globMatch: dir/* prefix pattern', () => {
|
|
1000
|
-
// Test the glob logic indirectly via checkWorktreeClean with a tmp git repo
|
|
1001
|
-
// We test the building-block function via the module internals instead,
|
|
1002
|
-
// using a clean git repo (no dirty files) to verify the guard is skipped.
|
|
1003
|
-
// In CI the workspace may have dirty files but not in src/noexist/ prefix.
|
|
1004
|
-
});
|
|
1005
|
-
|
|
1006
|
-
it('returns safe:true for non-overlapping owns patterns (dir that does not exist dirty)', async () => {
|
|
1007
|
-
// If there are no dirty files matching 'src/totally-fake-dir/*', should be safe
|
|
1008
|
-
const result = await checkWorktreeClean(['src/totally-fake-dir/*'], process.cwd());
|
|
1009
|
-
assert.equal(result.safe, true, `Expected safe:true for non-overlapping pattern`);
|
|
1010
|
-
});
|
|
1011
|
-
|
|
1012
|
-
it('detects conflict when dirty file matches exact path', async () => {
|
|
1013
|
-
// Create a temp git repo with a dirty file to simulate a conflict
|
|
1014
|
-
const tmp = join(tmpdir(), `wt-test-${Date.now()}`);
|
|
1015
|
-
mkdirSync(tmp, { recursive: true });
|
|
1016
|
-
try {
|
|
1017
|
-
// Initialize a git repo
|
|
1018
|
-
await new Promise((res) => {
|
|
1019
|
-
const p = spawn('git', ['init'], { cwd: tmp, stdio: 'ignore' });
|
|
1020
|
-
p.on('close', res);
|
|
1021
|
-
});
|
|
1022
|
-
await new Promise((res) => {
|
|
1023
|
-
const p = spawn('git', ['config', 'user.email', 'test@test.com'], { cwd: tmp, stdio: 'ignore' });
|
|
1024
|
-
p.on('close', res);
|
|
1025
|
-
});
|
|
1026
|
-
await new Promise((res) => {
|
|
1027
|
-
const p = spawn('git', ['config', 'user.name', 'Test'], { cwd: tmp, stdio: 'ignore' });
|
|
1028
|
-
p.on('close', res);
|
|
1029
|
-
});
|
|
1030
|
-
// Create a dirty (untracked) file
|
|
1031
|
-
const { writeFileSync: wfs } = await import('node:fs');
|
|
1032
|
-
wfs(join(tmp, 'dirty.mjs'), '// dirty');
|
|
1033
|
-
const result = await checkWorktreeClean(['dirty.mjs'], tmp);
|
|
1034
|
-
assert.equal(result.safe, false, `Expected safe:false, got: ${JSON.stringify(result)}`);
|
|
1035
|
-
assert.ok(result.conflicts.includes('dirty.mjs'), `Expected dirty.mjs in conflicts: ${result.conflicts}`);
|
|
1036
|
-
} finally {
|
|
1037
|
-
rmSync(tmp, { recursive: true, force: true });
|
|
1038
|
-
}
|
|
1039
|
-
});
|
|
1040
|
-
|
|
1041
|
-
it('detects conflict via *.ext glob pattern', async () => {
|
|
1042
|
-
const tmp = join(tmpdir(), `wt-test-ext-${Date.now()}`);
|
|
1043
|
-
mkdirSync(tmp, { recursive: true });
|
|
1044
|
-
try {
|
|
1045
|
-
await new Promise((res) => {
|
|
1046
|
-
const p = spawn('git', ['init'], { cwd: tmp, stdio: 'ignore' });
|
|
1047
|
-
p.on('close', res);
|
|
1048
|
-
});
|
|
1049
|
-
const { writeFileSync: wfs } = await import('node:fs');
|
|
1050
|
-
wfs(join(tmp, 'something.mjs'), '// dirty');
|
|
1051
|
-
const result = await checkWorktreeClean(['*.mjs'], tmp);
|
|
1052
|
-
assert.equal(result.safe, false, `Expected conflict from *.mjs pattern`);
|
|
1053
|
-
assert.ok(result.conflicts.some(f => f.endsWith('.mjs')), `Expected .mjs conflict: ${result.conflicts}`);
|
|
1054
|
-
} finally {
|
|
1055
|
-
rmSync(tmp, { recursive: true, force: true });
|
|
1056
|
-
}
|
|
1057
|
-
});
|
|
1058
|
-
|
|
1059
|
-
it('detects conflict via dir/* prefix pattern', async () => {
|
|
1060
|
-
const tmp = join(tmpdir(), `wt-test-dir-${Date.now()}`);
|
|
1061
|
-
mkdirSync(tmp, { recursive: true });
|
|
1062
|
-
try {
|
|
1063
|
-
await new Promise((res) => {
|
|
1064
|
-
const p = spawn('git', ['init'], { cwd: tmp, stdio: 'ignore' });
|
|
1065
|
-
p.on('close', res);
|
|
1066
|
-
});
|
|
1067
|
-
const { writeFileSync: wfs } = await import('node:fs');
|
|
1068
|
-
mkdirSync(join(tmp, 'src', 'auth'), { recursive: true });
|
|
1069
|
-
wfs(join(tmp, 'src', 'auth', 'token.mjs'), '// dirty');
|
|
1070
|
-
const result = await checkWorktreeClean(['src/auth/*'], tmp);
|
|
1071
|
-
assert.equal(result.safe, false, `Expected conflict from src/auth/* pattern`);
|
|
1072
|
-
assert.ok(result.conflicts.some(f => f.startsWith('src/auth/')), `Expected src/auth/ conflict: ${result.conflicts}`);
|
|
1073
|
-
} finally {
|
|
1074
|
-
rmSync(tmp, { recursive: true, force: true });
|
|
1075
|
-
}
|
|
1076
|
-
});
|
|
1077
|
-
});
|
|
1078
|
-
|
|
1079
|
-
// ── Feature 3: getRetryBudget ──────────────────────────────────────────────
|
|
1080
|
-
describe('getRetryBudget', () => {
|
|
1081
|
-
it('returns expected shape', () => {
|
|
1082
|
-
const budget = getRetryBudget();
|
|
1083
|
-
assert.ok(typeof budget === 'object' && budget !== null);
|
|
1084
|
-
assert.ok('perTaskRetries' in budget, 'missing perTaskRetries');
|
|
1085
|
-
assert.ok('recentDispatches' in budget, 'missing recentDispatches');
|
|
1086
|
-
assert.ok('windowMs' in budget, 'missing windowMs');
|
|
1087
|
-
assert.ok('maxPerTask' in budget, 'missing maxPerTask');
|
|
1088
|
-
assert.ok('maxPerWindow' in budget, 'missing maxPerWindow');
|
|
1089
|
-
assert.equal(budget.maxPerTask, 2);
|
|
1090
|
-
assert.equal(budget.maxPerWindow, 5);
|
|
1091
|
-
assert.equal(budget.windowMs, 5 * 60 * 1000);
|
|
1092
|
-
});
|
|
1093
|
-
|
|
1094
|
-
it('recentDispatches is a non-negative integer', () => {
|
|
1095
|
-
const budget = getRetryBudget();
|
|
1096
|
-
assert.ok(Number.isInteger(budget.recentDispatches));
|
|
1097
|
-
assert.ok(budget.recentDispatches >= 0);
|
|
1098
|
-
});
|
|
1099
|
-
});
|
|
1100
|
-
});
|
|
1101
|
-
|
|
1102
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1103
|
-
// CLI DRY-RUN SMOKE TESTS
|
|
1104
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1105
|
-
|
|
1106
|
-
describe('CLI', () => {
|
|
1107
|
-
it('init writes profile to disk', async () => {
|
|
1108
|
-
// The bug was that saveProfile was never called in cmdInit.
|
|
1109
|
-
// Supply answers via stdin so runOnboarding completes: choose Claude-only,
|
|
1110
|
-
// $20 plan, balanced optimization.
|
|
1111
|
-
const tmp = makeTmp();
|
|
1112
|
-
try {
|
|
1113
|
-
const { code, stdout, stderr } = await new Promise((resolve) => {
|
|
1114
|
-
const proc = spawn(process.execPath, [BIN, 'init'], {
|
|
1115
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1116
|
-
cwd: tmp,
|
|
1117
|
-
});
|
|
1118
|
-
let out = '', err = '';
|
|
1119
|
-
proc.stdout.on('data', d => { out += d; });
|
|
1120
|
-
proc.stderr.on('data', d => { err += d; });
|
|
1121
|
-
proc.on('close', exitCode => resolve({ code: exitCode, stdout: out, stderr: err }));
|
|
1122
|
-
// Send answers with small delays so readline receives each line before stdin ends.
|
|
1123
|
-
// Q1: Claude only, Q2: $20 plan, Q3: balanced
|
|
1124
|
-
setTimeout(() => proc.stdin.write('1\n'), 50);
|
|
1125
|
-
setTimeout(() => proc.stdin.write('1\n'), 200);
|
|
1126
|
-
setTimeout(() => proc.stdin.write('2\n'), 350);
|
|
1127
|
-
setTimeout(() => proc.stdin.end(), 500);
|
|
1128
|
-
});
|
|
1129
|
-
const profileFile = join(tmp, '.dualbrain', 'profile.json');
|
|
1130
|
-
assert.ok(
|
|
1131
|
-
existsSync(profileFile),
|
|
1132
|
-
`Profile file not created at ${profileFile} (exit ${code})\nstdout:${stdout}\nstderr:${stderr}`,
|
|
1133
|
-
);
|
|
1134
|
-
const saved = JSON.parse(readFileSync(profileFile, 'utf8'));
|
|
1135
|
-
assert.equal(saved.schemaVersion, 1);
|
|
1136
|
-
assert.equal(saved.providers.claude.enabled, true);
|
|
1137
|
-
} finally {
|
|
1138
|
-
removeTmp(tmp);
|
|
1139
|
-
}
|
|
1140
|
-
});
|
|
1141
|
-
|
|
1142
|
-
it('--help exits 0', async () => {
|
|
1143
|
-
const { code, stdout } = await run([BIN, '--help']);
|
|
1144
|
-
assert.equal(code, 0, `Expected exit 0, got ${code}`);
|
|
1145
|
-
assert.ok(stdout.length > 0, 'Expected some help output');
|
|
1146
|
-
assert.ok(stdout.toLowerCase().includes('dual-brain') || stdout.includes('go'), `Help text missing: ${stdout.slice(0, 200)}`);
|
|
1147
|
-
});
|
|
1148
|
-
|
|
1149
|
-
it('--version exits 0 and prints package.json version', async () => {
|
|
1150
|
-
const { code, stdout } = await run([BIN, '--version']);
|
|
1151
|
-
assert.equal(code, 0, `Expected exit 0, got ${code}`);
|
|
1152
|
-
const expectedVersion = JSON.parse(readFileSync(PKG, 'utf8')).version;
|
|
1153
|
-
assert.ok(stdout.trim().includes(expectedVersion), `Expected version ${expectedVersion}, got: ${stdout.trim()}`);
|
|
1154
|
-
});
|
|
1155
|
-
|
|
1156
|
-
it('go --dry-run "fix a bug" exits 0 and prints routing info', async () => {
|
|
1157
|
-
const { code, stdout, stderr } = await run([BIN, 'go', '--dry-run', 'fix a bug'], {
|
|
1158
|
-
timeout: 15_000,
|
|
1159
|
-
});
|
|
1160
|
-
// Should not crash — even without a profile file it falls back to defaults
|
|
1161
|
-
assert.ok([0, 1].includes(code), `Unexpected exit code ${code}\nstdout: ${stdout}\nstderr: ${stderr}`);
|
|
1162
|
-
// If it succeeded, verify routing output
|
|
1163
|
-
if (code === 0) {
|
|
1164
|
-
const combined = stdout + stderr;
|
|
1165
|
-
assert.ok(
|
|
1166
|
-
combined.includes('provider') || combined.includes('dry-run') || combined.includes('model'),
|
|
1167
|
-
`Expected routing info in output:\n${combined.slice(0, 500)}`,
|
|
1168
|
-
);
|
|
1169
|
-
}
|
|
1170
|
-
});
|
|
1171
|
-
});
|
|
1172
|
-
|
|
1173
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1174
|
-
// INTEGRATION: FULL PIPELINE
|
|
1175
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
1176
|
-
|
|
1177
|
-
describe('integration: full pipeline', () => {
|
|
1178
|
-
|
|
1179
|
-
// Shared dual-provider profile used by several tests
|
|
1180
|
-
const dualProfile = {
|
|
1181
|
-
schemaVersion: 1,
|
|
1182
|
-
providers: {
|
|
1183
|
-
claude: { plan: '$100', enabled: true },
|
|
1184
|
-
openai: { plan: '$100', enabled: true },
|
|
1185
|
-
},
|
|
1186
|
-
mode: 'dual',
|
|
1187
|
-
bias: 'balanced',
|
|
1188
|
-
preferences: [],
|
|
1189
|
-
};
|
|
1190
|
-
|
|
1191
|
-
// Solo-claude profile (no openai)
|
|
1192
|
-
const soloProfile = {
|
|
1193
|
-
schemaVersion: 1,
|
|
1194
|
-
providers: {
|
|
1195
|
-
claude: { plan: '$100', enabled: true },
|
|
1196
|
-
openai: { plan: '$20', enabled: false },
|
|
1197
|
-
},
|
|
1198
|
-
mode: 'auto',
|
|
1199
|
-
bias: 'balanced',
|
|
1200
|
-
preferences: [],
|
|
1201
|
-
};
|
|
1202
|
-
|
|
1203
|
-
// ── Test 1: simple edit routes to sonnet and dispatches ────────────────────
|
|
1204
|
-
it('simple edit routes to sonnet and dispatches', () => {
|
|
1205
|
-
// Deliberately avoid keywords that trigger higher-priority intents (document, security, etc.)
|
|
1206
|
-
const prompt = 'fix the button label in the settings page';
|
|
1207
|
-
|
|
1208
|
-
// Detect
|
|
1209
|
-
const detection = detectTask({ prompt });
|
|
1210
|
-
assert.equal(detection.intent, 'edit', `Expected intent:edit, got: ${detection.intent}`);
|
|
1211
|
-
assert.ok(['low', 'medium'].includes(detection.risk), `Unexpected risk: ${detection.risk}`);
|
|
1212
|
-
assert.equal(detection.tier, 'execute', `Expected tier:execute, got: ${detection.tier}`);
|
|
1213
|
-
|
|
1214
|
-
// Decide
|
|
1215
|
-
const decision = decideRoute({ profile: soloProfile, detection });
|
|
1216
|
-
// Simple edit on solo-claude $100 should stay with claude
|
|
1217
|
-
assert.equal(decision.provider, 'claude', `Expected claude, got: ${decision.provider}`);
|
|
1218
|
-
// Should pick sonnet (or haiku) — not opus — for a trivial/simple edit
|
|
1219
|
-
assert.ok(['sonnet', 'haiku'].includes(decision.model),
|
|
1220
|
-
`Expected sonnet or haiku for simple edit, got: ${decision.model}`);
|
|
1221
|
-
assert.equal(decision.tier, 'execute', `Expected tier:execute, got: ${decision.tier}`);
|
|
1222
|
-
assert.equal(decision.dualBrain, false, `Expected dualBrain:false, got: ${decision.dualBrain}`);
|
|
1223
|
-
|
|
1224
|
-
// Verify buildCommand produces a valid claude command (no real subprocess spawned)
|
|
1225
|
-
const cmd = buildCommand(decision, prompt);
|
|
1226
|
-
assert.equal(cmd[0], 'claude', `Expected claude CLI command, got: ${cmd[0]}`);
|
|
1227
|
-
assert.ok(cmd.includes('-p'), 'Expected -p flag in command');
|
|
1228
|
-
assert.ok(cmd.includes(prompt), 'Expected prompt in command');
|
|
1229
|
-
});
|
|
1230
|
-
|
|
1231
|
-
// ── Test 2: security task routes to think tier with dual-brain ─────────────
|
|
1232
|
-
it('security task routes to think tier with dual-brain', () => {
|
|
1233
|
-
const prompt = 'audit authentication security';
|
|
1234
|
-
|
|
1235
|
-
// Detect
|
|
1236
|
-
const detection = detectTask({ prompt });
|
|
1237
|
-
assert.equal(detection.intent, 'security',
|
|
1238
|
-
`Expected intent:security, got: ${detection.intent}`);
|
|
1239
|
-
assert.equal(detection.tier, 'think',
|
|
1240
|
-
`Expected tier:think for security, got: ${detection.tier}`);
|
|
1241
|
-
|
|
1242
|
-
// Decide with dual-provider profile
|
|
1243
|
-
const decision = decideRoute({ profile: dualProfile, detection });
|
|
1244
|
-
assert.equal(decision.tier, 'think', `Expected tier:think in decision, got: ${decision.tier}`);
|
|
1245
|
-
// Dual-provider + security intent → dualBrain should be true
|
|
1246
|
-
assert.equal(decision.dualBrain, true,
|
|
1247
|
-
`Expected dualBrain:true for security task with dual profile, got: ${decision.dualBrain}`);
|
|
1248
|
-
});
|
|
1249
|
-
|
|
1250
|
-
// ── Test 3: cost-saver bias downgrades model ───────────────────────────────
|
|
1251
|
-
it('cost-saver bias downgrades model', () => {
|
|
1252
|
-
const prompt = 'refactor the utils module';
|
|
1253
|
-
const costSaverProfile = {
|
|
1254
|
-
...soloProfile,
|
|
1255
|
-
mode: 'cost-saver',
|
|
1256
|
-
bias: 'cost-saver',
|
|
1257
|
-
};
|
|
1258
|
-
|
|
1259
|
-
const detection = detectTask({ prompt });
|
|
1260
|
-
const decision = decideRoute({ profile: costSaverProfile, detection });
|
|
1261
|
-
|
|
1262
|
-
// cost-saver should prefer the cheapest model: haiku or sonnet, never opus
|
|
1263
|
-
assert.ok(['haiku', 'sonnet'].includes(decision.model),
|
|
1264
|
-
`Expected haiku or sonnet for cost-saver mode, got: ${decision.model}`);
|
|
1265
|
-
assert.notEqual(decision.model, 'opus',
|
|
1266
|
-
`cost-saver should not route to opus, got: ${decision.model}`);
|
|
1267
|
-
});
|
|
1268
|
-
|
|
1269
|
-
// ── Test 4: hot provider triggers fallback ─────────────────────────────────
|
|
1270
|
-
it('hot provider triggers fallback', async () => {
|
|
1271
|
-
const tmp = makeTmp();
|
|
1272
|
-
try {
|
|
1273
|
-
// Mark claude as hot in the temp dir's health file
|
|
1274
|
-
markHot('claude', 'sonnet', tmp);
|
|
1275
|
-
|
|
1276
|
-
const detection = detectTask({ prompt: 'update the settings component' });
|
|
1277
|
-
assert.equal(detection.tier, 'execute', `Pre-condition: expected execute tier`);
|
|
1278
|
-
|
|
1279
|
-
const decision = decideRoute({ profile: dualProfile, detection, cwd: tmp });
|
|
1280
|
-
|
|
1281
|
-
// Claude is hot (score=0) and openai is healthy → should route to openai
|
|
1282
|
-
assert.equal(decision.provider, 'openai',
|
|
1283
|
-
`Expected openai fallback when claude is hot, got: ${decision.provider}`);
|
|
1284
|
-
} finally {
|
|
1285
|
-
// Clean up: restore claude to healthy
|
|
1286
|
-
markHealthy('claude', 'sonnet', tmp);
|
|
1287
|
-
removeTmp(tmp);
|
|
1288
|
-
}
|
|
1289
|
-
});
|
|
1290
|
-
|
|
1291
|
-
// ── Test 5: redaction happens before dispatch args ──────────────────────────
|
|
1292
|
-
it('redaction happens before dispatch args', () => {
|
|
1293
|
-
const rawPrompt = 'use API_KEY=sk-secret123 to authenticate';
|
|
1294
|
-
|
|
1295
|
-
const redacted = redact(rawPrompt);
|
|
1296
|
-
|
|
1297
|
-
// The secret value must not appear in the redacted output
|
|
1298
|
-
assert.ok(!redacted.includes('sk-secret123'),
|
|
1299
|
-
`Secret value must be redacted, got: ${redacted}`);
|
|
1300
|
-
// The placeholder must be present instead
|
|
1301
|
-
assert.ok(redacted.includes('[REDACTED]'),
|
|
1302
|
-
`Expected [REDACTED] in output, got: ${redacted}`);
|
|
1303
|
-
|
|
1304
|
-
// Verify buildCommand also gets the safe prompt (as dispatch() applies redact before build)
|
|
1305
|
-
const decision = { provider: 'claude', model: 'sonnet', tier: 'execute', effort: null, sandbox: 'workspace-write' };
|
|
1306
|
-
const cmd = buildCommand(decision, redacted);
|
|
1307
|
-
assert.ok(!cmd.join(' ').includes('sk-secret123'),
|
|
1308
|
-
`Secret must not appear in CLI args: ${cmd.join(' ')}`);
|
|
1309
|
-
});
|
|
1310
|
-
|
|
1311
|
-
// ── Test 6: decompose splits complex task ───────────────────────────────────
|
|
1312
|
-
it('decompose splits complex task', () => {
|
|
1313
|
-
const prompt = 'refactor auth module and add tests for it';
|
|
1314
|
-
|
|
1315
|
-
const result = decompose(prompt);
|
|
1316
|
-
|
|
1317
|
-
assert.ok(result.tasks.length > 1,
|
|
1318
|
-
`Expected multiple tasks from compound prompt, got: ${result.tasks.length}`);
|
|
1319
|
-
assert.ok(result.waves.length > 1,
|
|
1320
|
-
`Expected multiple waves for compound task, got: ${result.waves.length}`);
|
|
1321
|
-
|
|
1322
|
-
// At least one task should have role='researcher' or 'implementer' or 'verifier'
|
|
1323
|
-
const validRoles = ['researcher', 'implementer', 'reviewer', 'verifier'];
|
|
1324
|
-
const allRolesValid = result.tasks.every(t => validRoles.includes(t.role));
|
|
1325
|
-
assert.ok(allRolesValid,
|
|
1326
|
-
`All tasks must have valid roles, got: ${result.tasks.map(t => t.role).join(', ')}`);
|
|
1327
|
-
|
|
1328
|
-
const hasSearchableRole = result.tasks.some(t =>
|
|
1329
|
-
['researcher', 'implementer'].includes(t.role)
|
|
1330
|
-
);
|
|
1331
|
-
assert.ok(hasSearchableRole,
|
|
1332
|
-
`Expected at least one task with role researcher or implementer, got: ${result.tasks.map(t => t.role).join(', ')}`);
|
|
1333
|
-
});
|
|
1334
|
-
|
|
1335
|
-
// ── Test 7: session card formats correctly ──────────────────────────────────
|
|
1336
|
-
it('session card formats correctly', () => {
|
|
1337
|
-
const repo = {
|
|
1338
|
-
name: 'my-test-project',
|
|
1339
|
-
type: 'node',
|
|
1340
|
-
packageManager: 'npm',
|
|
1341
|
-
branch: 'main',
|
|
1342
|
-
dirty: false,
|
|
1343
|
-
commands: { test: 'jest --coverage', build: null, lint: null },
|
|
1344
|
-
};
|
|
1345
|
-
const health = { states: {}, session: null };
|
|
1346
|
-
|
|
1347
|
-
const card = formatSessionCard(null, repo, health);
|
|
1348
|
-
|
|
1349
|
-
assert.ok(typeof card === 'string' && card.length > 0, 'Expected non-empty string');
|
|
1350
|
-
assert.ok(card.includes('dual-brain ready'),
|
|
1351
|
-
`Expected "dual-brain ready" in card, got:\n${card}`);
|
|
1352
|
-
assert.ok(card.includes('my-test-project'),
|
|
1353
|
-
`Expected repo name in card, got:\n${card}`);
|
|
1354
|
-
});
|
|
1355
|
-
|
|
1356
|
-
// ── Test 8: playbook loads for matching intent ──────────────────────────────
|
|
1357
|
-
it('playbook loads for matching intent', () => {
|
|
1358
|
-
const playbook = loadPlaybook('security');
|
|
1359
|
-
|
|
1360
|
-
assert.ok(playbook !== null, 'Expected non-null playbook for "security" intent');
|
|
1361
|
-
assert.ok(Array.isArray(playbook.steps),
|
|
1362
|
-
`Expected steps array, got: ${typeof playbook.steps}`);
|
|
1363
|
-
assert.ok(playbook.steps.length > 0,
|
|
1364
|
-
`Expected at least one step, got: ${playbook.steps.length}`);
|
|
1365
|
-
|
|
1366
|
-
// Each step should have an id and tier
|
|
1367
|
-
for (const step of playbook.steps) {
|
|
1368
|
-
assert.ok(typeof step.id === 'string' && step.id.length > 0,
|
|
1369
|
-
`Each step must have a string id, got: ${JSON.stringify(step)}`);
|
|
1370
|
-
assert.ok(['search', 'execute', 'think'].includes(step.tier),
|
|
1371
|
-
`Step tier must be search/execute/think, got: ${step.tier}`);
|
|
1372
|
-
}
|
|
1373
|
-
});
|
|
1374
|
-
});
|