dual-brain 4.2.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +130 -35
- package/README.md +171 -44
- package/hooks/agent-chains.mjs +369 -0
- package/hooks/agent-templates.mjs +441 -0
- package/hooks/atomic-write.mjs +5 -3
- package/hooks/config-validator.mjs +156 -0
- package/hooks/confirmation-policy.mjs +167 -0
- package/hooks/cost-logger.mjs +32 -12
- package/hooks/cost-report.mjs +60 -114
- package/hooks/decision-ledger.mjs +3 -2
- package/hooks/dual-brain-review.mjs +249 -2
- package/hooks/dual-brain-think.mjs +294 -25
- package/hooks/enforce-tier.mjs +246 -87
- package/hooks/error-channel.mjs +68 -0
- package/hooks/failure-detector.mjs +2 -1
- package/hooks/health-check.mjs +16 -17
- package/hooks/risk-classifier.mjs +135 -2
- package/hooks/session-report.mjs +41 -71
- package/hooks/ship-captain.mjs +1176 -0
- package/hooks/ship-gate.mjs +971 -0
- package/hooks/summary-checkpoint.mjs +31 -4
- package/hooks/test-orchestrator.mjs +1975 -11
- package/install.mjs +1064 -31
- package/orchestrator.json +73 -96
- package/package.json +7 -2
|
@@ -28,6 +28,10 @@ const DUAL_BRAIN = resolve(HOOKS, 'dual-brain-review.mjs');
|
|
|
28
28
|
const ORCHESTRATOR = resolve(HOOKS, '..', 'orchestrator.json');
|
|
29
29
|
const USAGE_JSONL = resolve(HOOKS, `usage-${new Date().toISOString().slice(0, 10)}.jsonl`);
|
|
30
30
|
const BURST_FILE = resolve(HOOKS, '.burst-state');
|
|
31
|
+
const COOLDOWN_FILE = resolve(HOOKS, '.recommendation-cooldowns');
|
|
32
|
+
|
|
33
|
+
// Clean up cooldown state before tests so cooldowns don't interfere
|
|
34
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
31
35
|
|
|
32
36
|
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
33
37
|
|
|
@@ -121,6 +125,8 @@ test('enforce-tier: correct tier', () => {
|
|
|
121
125
|
|
|
122
126
|
// ─── Test 3: enforce-tier: think task on haiku ───────────────────────────────
|
|
123
127
|
test('enforce-tier: think on haiku', () => {
|
|
128
|
+
// Clear cooldown state so tier_warning isn't suppressed
|
|
129
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
124
130
|
const payload = JSON.stringify({
|
|
125
131
|
tool_name: 'Agent',
|
|
126
132
|
tool_input: { prompt: 'review security', model: 'haiku' },
|
|
@@ -173,15 +179,20 @@ test('cost-logger: logs entry', () => {
|
|
|
173
179
|
}
|
|
174
180
|
|
|
175
181
|
const payload = JSON.stringify({
|
|
176
|
-
tool_name: '
|
|
177
|
-
tool_input: {
|
|
182
|
+
tool_name: 'Bash',
|
|
183
|
+
tool_input: { command: 'echo hello' },
|
|
178
184
|
});
|
|
179
185
|
// cost-logger uses for-await on process.stdin → use runStream (spawnSync input pipe)
|
|
180
186
|
const { parsed, status } = runStream(COST_LOGGER, payload);
|
|
181
187
|
|
|
182
188
|
if (status !== 0) return `non-zero exit: ${status}`;
|
|
183
|
-
|
|
184
|
-
|
|
189
|
+
// Accept {} or a budget/activity systemMessage (not a tier-mismatch error)
|
|
190
|
+
if (!parsed) return `expected {} or activity alert, got: ${JSON.stringify(parsed)}`;
|
|
191
|
+
if (Object.keys(parsed).length !== 0) {
|
|
192
|
+
const msg = parsed.systemMessage || '';
|
|
193
|
+
if (!msg.includes('Activity Alert') && !msg.includes('Budget'))
|
|
194
|
+
return `unexpected output, got: ${JSON.stringify(parsed)}`;
|
|
195
|
+
}
|
|
185
196
|
|
|
186
197
|
if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
|
|
187
198
|
|
|
@@ -254,6 +265,8 @@ test('orchestrator.json: valid JSON', () => {
|
|
|
254
265
|
|
|
255
266
|
// ─── Test 9: enforce-tier: think on gpt-4.1-mini ─────────────────────────────
|
|
256
267
|
test('enforce-tier: think on gpt-4.1-mini', () => {
|
|
268
|
+
// Clear cooldown state so tier_warning isn't suppressed
|
|
269
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
257
270
|
const input = JSON.stringify({ tool_name: 'Agent', tool_input: { description: 'review security architecture', prompt: 'audit auth', model: 'gpt-4.1-mini' } });
|
|
258
271
|
const { parsed } = run(ENFORCE_TIER, input);
|
|
259
272
|
if (!parsed) return 'no valid JSON output';
|
|
@@ -263,14 +276,19 @@ test('enforce-tier: think on gpt-4.1-mini', () => {
|
|
|
263
276
|
return true;
|
|
264
277
|
});
|
|
265
278
|
|
|
266
|
-
// ─── Test 10: orchestrator.json: model_intelligence
|
|
267
|
-
test('orchestrator.json: model_intelligence', () => {
|
|
279
|
+
// ─── Test 10: orchestrator.json: model_intelligence (inline in subscriptions) ─
|
|
280
|
+
test('orchestrator.json: model_intelligence (inline)', () => {
|
|
268
281
|
const config = JSON.parse(readFileSync(resolve(__dirname, '..', 'orchestrator.json'), 'utf8'));
|
|
269
|
-
const
|
|
270
|
-
|
|
271
|
-
if (!
|
|
272
|
-
if (!
|
|
273
|
-
if (!
|
|
282
|
+
const claude = config.subscriptions?.claude?.models || {};
|
|
283
|
+
const openai = config.subscriptions?.openai?.models || {};
|
|
284
|
+
if (!claude.opus?.best_for) return 'claude.models.opus missing best_for';
|
|
285
|
+
if (!claude.sonnet?.best_for) return 'claude.models.sonnet missing best_for';
|
|
286
|
+
if (!claude.haiku?.best_for) return 'claude.models.haiku missing best_for';
|
|
287
|
+
if (!claude.opus?.model_id) return 'claude.models.opus missing model_id';
|
|
288
|
+
// Verify openai models also have intelligence fields
|
|
289
|
+
for (const [name, meta] of Object.entries(openai)) {
|
|
290
|
+
if (!meta.best_for) return `openai.models.${name} missing best_for`;
|
|
291
|
+
}
|
|
274
292
|
return true;
|
|
275
293
|
});
|
|
276
294
|
|
|
@@ -545,6 +563,8 @@ test('enforce-tier: cost-saver demotes think', () => {
|
|
|
545
563
|
|
|
546
564
|
// ─── Test 24: enforce-tier: quality-first promotes execute ──────────────────
|
|
547
565
|
test('enforce-tier: quality-first promotes execute', () => {
|
|
566
|
+
// Clear cooldown state so tier_warning isn't suppressed
|
|
567
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
548
568
|
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
549
569
|
let originalProfile;
|
|
550
570
|
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
@@ -789,7 +809,14 @@ test('enforce-tier: burst mode suppresses duplicate warnings', () => {
|
|
|
789
809
|
|
|
790
810
|
// ─── Test 32: enforce-tier: non-burst mode still warns on duplicates ───────
|
|
791
811
|
test('enforce-tier: non-burst mode still warns on duplicates', () => {
|
|
812
|
+
// Clear cooldown and summary state to start clean
|
|
813
|
+
const summaryFile = resolve(HOOKS, `usage-summary-${new Date().toISOString().slice(0, 10)}.json`);
|
|
814
|
+
let savedSummary;
|
|
815
|
+
try { savedSummary = readFileSync(summaryFile, 'utf8'); } catch { savedSummary = null; }
|
|
792
816
|
try {
|
|
817
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
818
|
+
// Temporarily clear summary to prevent stale hash matches on first call
|
|
819
|
+
try { writeFileSync(summaryFile, JSON.stringify({ version: 1, recent_hashes: [] })); } catch {}
|
|
793
820
|
// Expire burst state by setting window_start to 0 (well outside 90s window)
|
|
794
821
|
writeFileSync(BURST_FILE, JSON.stringify({ count: 0, window_start: 0 }));
|
|
795
822
|
const payload = JSON.stringify({
|
|
@@ -799,6 +826,8 @@ test('enforce-tier: non-burst mode still warns on duplicates', () => {
|
|
|
799
826
|
|
|
800
827
|
// First call — establishes the prompt hash
|
|
801
828
|
run(ENFORCE_TIER, payload);
|
|
829
|
+
// Clear cooldown between calls so the second call's duplicate warning isn't suppressed
|
|
830
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
802
831
|
// Second identical call — should trigger duplicate warning
|
|
803
832
|
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
804
833
|
if (status !== 0) return `non-zero exit: ${status}`;
|
|
@@ -810,6 +839,11 @@ test('enforce-tier: non-burst mode still warns on duplicates', () => {
|
|
|
810
839
|
return true;
|
|
811
840
|
} finally {
|
|
812
841
|
try { unlinkSync(BURST_FILE); } catch {}
|
|
842
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
843
|
+
// Restore summary file if it existed before
|
|
844
|
+
if (savedSummary) {
|
|
845
|
+
try { writeFileSync(summaryFile, savedSummary); } catch {}
|
|
846
|
+
}
|
|
813
847
|
}
|
|
814
848
|
});
|
|
815
849
|
|
|
@@ -1156,6 +1190,1936 @@ test('adaptive loop: end-to-end hash match', () => {
|
|
|
1156
1190
|
}
|
|
1157
1191
|
});
|
|
1158
1192
|
|
|
1193
|
+
// ─── Test 41: error-channel exports logHookError and getRecentErrors ────────
|
|
1194
|
+
test('error-channel: exports logHookError and getRecentErrors', () => {
|
|
1195
|
+
const ERROR_FILE = resolve(HOOKS, 'errors.jsonl');
|
|
1196
|
+
const backup = existsSync(ERROR_FILE) ? readFileSync(ERROR_FILE, 'utf8') : null;
|
|
1197
|
+
|
|
1198
|
+
try {
|
|
1199
|
+
// Start clean
|
|
1200
|
+
try { writeFileSync(ERROR_FILE, '', 'utf8'); } catch {}
|
|
1201
|
+
|
|
1202
|
+
const script = `
|
|
1203
|
+
import { logHookError, getRecentErrors } from './error-channel.mjs';
|
|
1204
|
+
const results = { errors: [] };
|
|
1205
|
+
|
|
1206
|
+
// 1. Both functions exist and are functions
|
|
1207
|
+
if (typeof logHookError !== 'function') results.errors.push('logHookError not a function');
|
|
1208
|
+
if (typeof getRecentErrors !== 'function') results.errors.push('getRecentErrors not a function');
|
|
1209
|
+
|
|
1210
|
+
// 2. logHookError writes an entry
|
|
1211
|
+
logHookError('test-hook', 'test-op', new Error('test error'), { extra: 'ctx' });
|
|
1212
|
+
|
|
1213
|
+
// 3. getRecentErrors reads it back
|
|
1214
|
+
const recent = getRecentErrors(1);
|
|
1215
|
+
if (!Array.isArray(recent)) results.errors.push('getRecentErrors did not return array');
|
|
1216
|
+
else if (recent.length < 1) results.errors.push('getRecentErrors returned empty after logHookError');
|
|
1217
|
+
else {
|
|
1218
|
+
const entry = recent[0];
|
|
1219
|
+
if (entry.hook !== 'test-hook') results.errors.push('entry.hook mismatch: ' + entry.hook);
|
|
1220
|
+
if (entry.operation !== 'test-op') results.errors.push('entry.operation mismatch: ' + entry.operation);
|
|
1221
|
+
if (!entry.error.includes('test error')) results.errors.push('entry.error mismatch: ' + entry.error);
|
|
1222
|
+
if (!entry.timestamp) results.errors.push('entry missing timestamp');
|
|
1223
|
+
if (entry.context?.extra !== 'ctx') results.errors.push('entry.context mismatch');
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
process.stdout.write(JSON.stringify(results));
|
|
1227
|
+
`;
|
|
1228
|
+
const proc = spawnSync(process.execPath, [
|
|
1229
|
+
'--input-type=module',
|
|
1230
|
+
'-e', script,
|
|
1231
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1232
|
+
|
|
1233
|
+
if (proc.status !== 0) return `error-channel script failed: ${proc.stderr}`;
|
|
1234
|
+
let results;
|
|
1235
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1236
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1237
|
+
return true;
|
|
1238
|
+
} finally {
|
|
1239
|
+
if (backup !== null) writeFileSync(ERROR_FILE, backup, 'utf8');
|
|
1240
|
+
else try { unlinkSync(ERROR_FILE); } catch {}
|
|
1241
|
+
}
|
|
1242
|
+
});
|
|
1243
|
+
|
|
1244
|
+
// ─── Test 42: atomic-write: lockedReadModifyWrite rejects on lock contention ─
|
|
1245
|
+
test('atomic-write: lockedReadModifyWrite rejects on lock contention', () => {
|
|
1246
|
+
const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
|
|
1247
|
+
const testFile = resolve(tmpDir, 'test-locked.json');
|
|
1248
|
+
const lockFile = testFile + '.lock';
|
|
1249
|
+
|
|
1250
|
+
try {
|
|
1251
|
+
// Write initial data
|
|
1252
|
+
writeFileSync(testFile, JSON.stringify({ value: 1 }));
|
|
1253
|
+
// Manually create a lock file to simulate contention
|
|
1254
|
+
writeFileSync(lockFile, JSON.stringify({ pid: process.pid, ts: Date.now() }));
|
|
1255
|
+
|
|
1256
|
+
const script = `
|
|
1257
|
+
import { lockedReadModifyWrite } from '${resolve(HOOKS, 'atomic-write.mjs').replace(/\\/g, '/')}';
|
|
1258
|
+
try {
|
|
1259
|
+
lockedReadModifyWrite('${testFile.replace(/\\/g, '/')}', (data) => ({ ...data, value: 2 }));
|
|
1260
|
+
process.stdout.write(JSON.stringify({ threw: false }));
|
|
1261
|
+
} catch (e) {
|
|
1262
|
+
process.stdout.write(JSON.stringify({ threw: true, message: e.message }));
|
|
1263
|
+
}
|
|
1264
|
+
`;
|
|
1265
|
+
const proc = spawnSync(process.execPath, [
|
|
1266
|
+
'--input-type=module',
|
|
1267
|
+
'-e', script,
|
|
1268
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
1269
|
+
|
|
1270
|
+
if (proc.status !== 0 && proc.status !== null) {
|
|
1271
|
+
// Non-zero exit is acceptable if process threw
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
let result;
|
|
1275
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch {
|
|
1276
|
+
return `output not JSON: ${proc.stdout || ''} stderr: ${proc.stderr || ''}`;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
if (!result.threw) return 'expected lockedReadModifyWrite to throw on lock contention, but it did not';
|
|
1280
|
+
if (!result.message.includes('timed out')) return `expected timeout message, got: ${result.message}`;
|
|
1281
|
+
|
|
1282
|
+
// Verify the file was NOT modified (write should not have proceeded)
|
|
1283
|
+
const data = JSON.parse(readFileSync(testFile, 'utf8'));
|
|
1284
|
+
if (data.value !== 1) return `expected file value=1 (unchanged), got: ${data.value}`;
|
|
1285
|
+
|
|
1286
|
+
return true;
|
|
1287
|
+
} finally {
|
|
1288
|
+
spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
|
|
1289
|
+
}
|
|
1290
|
+
});
|
|
1291
|
+
|
|
1292
|
+
// ─── Test 43: config-validator: validates good config ────────────────────────
|
|
1293
|
+
test('config-validator: validates good config', () => {
|
|
1294
|
+
const script = `
|
|
1295
|
+
import { validateConfig } from './config-validator.mjs';
|
|
1296
|
+
const config = {
|
|
1297
|
+
subscriptions: { claude: { models: { opus: { tier: 'think' } } } },
|
|
1298
|
+
tiers: { search: {}, execute: {}, think: {} },
|
|
1299
|
+
routing: { strategy: 'test' },
|
|
1300
|
+
quality_gate: { enabled: true },
|
|
1301
|
+
};
|
|
1302
|
+
const result = validateConfig(config);
|
|
1303
|
+
process.stdout.write(JSON.stringify(result));
|
|
1304
|
+
`;
|
|
1305
|
+
const proc = spawnSync(process.execPath, [
|
|
1306
|
+
'--input-type=module',
|
|
1307
|
+
'-e', script,
|
|
1308
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1309
|
+
|
|
1310
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1311
|
+
let result;
|
|
1312
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1313
|
+
if (!result.valid) return `expected valid=true, got errors: ${result.errors.join('; ')}`;
|
|
1314
|
+
return true;
|
|
1315
|
+
});
|
|
1316
|
+
|
|
1317
|
+
// ─── Test 44: config-validator: detects missing keys ─────────────────────────
|
|
1318
|
+
test('config-validator: detects missing keys', () => {
|
|
1319
|
+
const script = `
|
|
1320
|
+
import { validateConfig } from './config-validator.mjs';
|
|
1321
|
+
const result = validateConfig({ subscriptions: { claude: { models: { opus: { tier: 'think' } } } } });
|
|
1322
|
+
process.stdout.write(JSON.stringify(result));
|
|
1323
|
+
`;
|
|
1324
|
+
const proc = spawnSync(process.execPath, [
|
|
1325
|
+
'--input-type=module',
|
|
1326
|
+
'-e', script,
|
|
1327
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1328
|
+
|
|
1329
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1330
|
+
let result;
|
|
1331
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1332
|
+
if (result.valid) return 'expected valid=false for config missing tiers/routing/quality_gate';
|
|
1333
|
+
if (result.errors.length < 3) return `expected at least 3 errors, got: ${result.errors.length}`;
|
|
1334
|
+
return true;
|
|
1335
|
+
});
|
|
1336
|
+
|
|
1337
|
+
// ─── Test 45: config-validator: warns on unknown keys ────────────────────────
|
|
1338
|
+
test('config-validator: warns on unknown keys', () => {
|
|
1339
|
+
const script = `
|
|
1340
|
+
import { validateConfig } from './config-validator.mjs';
|
|
1341
|
+
const config = {
|
|
1342
|
+
subscriptions: { claude: { models: { opus: { tier: 'think' } } } },
|
|
1343
|
+
tiers: { search: {}, execute: {}, think: {} },
|
|
1344
|
+
routing: {},
|
|
1345
|
+
quality_gate: {},
|
|
1346
|
+
typo_key: true,
|
|
1347
|
+
};
|
|
1348
|
+
const result = validateConfig(config);
|
|
1349
|
+
process.stdout.write(JSON.stringify(result));
|
|
1350
|
+
`;
|
|
1351
|
+
const proc = spawnSync(process.execPath, [
|
|
1352
|
+
'--input-type=module',
|
|
1353
|
+
'-e', script,
|
|
1354
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1355
|
+
|
|
1356
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1357
|
+
let result;
|
|
1358
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1359
|
+
if (!result.valid) return `expected valid=true (unknown keys are warnings, not errors): ${result.errors.join('; ')}`;
|
|
1360
|
+
if (result.warnings.length === 0) return 'expected warning about unknown key "typo_key"';
|
|
1361
|
+
if (!result.warnings[0].includes('typo_key')) return `expected warning about typo_key, got: ${result.warnings[0]}`;
|
|
1362
|
+
return true;
|
|
1363
|
+
});
|
|
1364
|
+
|
|
1365
|
+
// ─── Test 46: config-validator: loadAndValidateConfig on real config ─────────
|
|
1366
|
+
test('config-validator: loadAndValidateConfig on real config', () => {
|
|
1367
|
+
const script = `
|
|
1368
|
+
import { loadAndValidateConfig } from './config-validator.mjs';
|
|
1369
|
+
import { resolve, dirname } from 'path';
|
|
1370
|
+
import { fileURLToPath } from 'url';
|
|
1371
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
1372
|
+
const result = loadAndValidateConfig(resolve(__dirname, '..', 'orchestrator.json'));
|
|
1373
|
+
process.stdout.write(JSON.stringify({ valid: result.validation.valid, errors: result.validation.errors, warnings: result.validation.warnings }));
|
|
1374
|
+
`;
|
|
1375
|
+
const proc = spawnSync(process.execPath, [
|
|
1376
|
+
'--input-type=module',
|
|
1377
|
+
'-e', script,
|
|
1378
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1379
|
+
|
|
1380
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1381
|
+
let result;
|
|
1382
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1383
|
+
if (!result.valid) return `real orchestrator.json failed validation: ${result.errors.join('; ')}`;
|
|
1384
|
+
return true;
|
|
1385
|
+
});
|
|
1386
|
+
|
|
1387
|
+
// ─── Test 47: risk-classifier: exports classifyRiskEnhanced ─────────────────
|
|
1388
|
+
test('risk-classifier: exports classifyRiskEnhanced', () => {
|
|
1389
|
+
const script = `
|
|
1390
|
+
import { classifyRiskEnhanced } from './risk-classifier.mjs';
|
|
1391
|
+
process.stdout.write(JSON.stringify({ exported: typeof classifyRiskEnhanced === 'function' }));
|
|
1392
|
+
`;
|
|
1393
|
+
const proc = spawnSync(process.execPath, [
|
|
1394
|
+
'--input-type=module',
|
|
1395
|
+
'-e', script,
|
|
1396
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1397
|
+
|
|
1398
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1399
|
+
let result;
|
|
1400
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1401
|
+
if (!result.exported) return 'classifyRiskEnhanced is not exported as a function';
|
|
1402
|
+
return true;
|
|
1403
|
+
});
|
|
1404
|
+
|
|
1405
|
+
// ─── Test 48: risk-classifier: classifyRiskEnhanced returns expected shape ───
|
|
1406
|
+
test('risk-classifier: classifyRiskEnhanced returns { risk, basis, details }', () => {
|
|
1407
|
+
const script = `
|
|
1408
|
+
import { classifyRiskEnhanced } from './risk-classifier.mjs';
|
|
1409
|
+
const result = classifyRiskEnhanced('src/utils/helper.js');
|
|
1410
|
+
const errors = [];
|
|
1411
|
+
|
|
1412
|
+
if (typeof result !== 'object' || result === null) {
|
|
1413
|
+
errors.push('result is not an object');
|
|
1414
|
+
} else {
|
|
1415
|
+
const validRisks = ['low', 'medium', 'high', 'critical'];
|
|
1416
|
+
const validBases = ['static', 'churn', 'history', 'churn+history'];
|
|
1417
|
+
if (!validRisks.includes(result.risk)) errors.push('risk not in valid set: ' + result.risk);
|
|
1418
|
+
if (!validBases.includes(result.basis)) errors.push('basis not in valid set: ' + result.basis);
|
|
1419
|
+
if (typeof result.details !== 'object' || result.details === null) {
|
|
1420
|
+
errors.push('details is not an object');
|
|
1421
|
+
} else {
|
|
1422
|
+
if (!('static_risk' in result.details)) errors.push('details missing static_risk');
|
|
1423
|
+
if (!('churn_commits' in result.details)) errors.push('details missing churn_commits');
|
|
1424
|
+
if (!('history_success_rate' in result.details)) errors.push('details missing history_success_rate');
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
1429
|
+
`;
|
|
1430
|
+
const proc = spawnSync(process.execPath, [
|
|
1431
|
+
'--input-type=module',
|
|
1432
|
+
'-e', script,
|
|
1433
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1434
|
+
|
|
1435
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1436
|
+
let result;
|
|
1437
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1438
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
1439
|
+
return true;
|
|
1440
|
+
});
|
|
1441
|
+
|
|
1442
|
+
// ─── Test 49: risk-classifier: static classification still works (auth → critical) ─
|
|
1443
|
+
test('risk-classifier: static auth path → critical', () => {
|
|
1444
|
+
const script = `
|
|
1445
|
+
import { classifyRiskEnhanced } from './risk-classifier.mjs';
|
|
1446
|
+
const result = classifyRiskEnhanced('src/auth/credentials.mjs');
|
|
1447
|
+
process.stdout.write(JSON.stringify({ risk: result.risk, static_risk: result.details.static_risk }));
|
|
1448
|
+
`;
|
|
1449
|
+
const proc = spawnSync(process.execPath, [
|
|
1450
|
+
'--input-type=module',
|
|
1451
|
+
'-e', script,
|
|
1452
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1453
|
+
|
|
1454
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1455
|
+
let result;
|
|
1456
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1457
|
+
if (result.static_risk !== 'critical') return `expected static_risk=critical for auth path, got: ${result.static_risk}`;
|
|
1458
|
+
if (result.risk !== 'critical') return `expected risk=critical for auth path, got: ${result.risk}`;
|
|
1459
|
+
return true;
|
|
1460
|
+
});
|
|
1461
|
+
|
|
1462
|
+
// ─── Test 50: risk-classifier: handles missing git gracefully ────────────────
|
|
1463
|
+
test('risk-classifier: handles missing git gracefully (no crash)', () => {
|
|
1464
|
+
// Run classifyRiskEnhanced in a temp directory that has no git repo, so
|
|
1465
|
+
// git log will fail — the function must not crash, must return a valid shape.
|
|
1466
|
+
const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
|
|
1467
|
+
try {
|
|
1468
|
+
const script = `
|
|
1469
|
+
import { classifyRiskEnhanced } from '${resolve(HOOKS, 'risk-classifier.mjs').replace(/\\/g, '/')}';
|
|
1470
|
+
let result;
|
|
1471
|
+
try {
|
|
1472
|
+
result = classifyRiskEnhanced('some/random/file.js');
|
|
1473
|
+
} catch (e) {
|
|
1474
|
+
process.stdout.write(JSON.stringify({ threw: true, message: e.message }));
|
|
1475
|
+
process.exit(0);
|
|
1476
|
+
}
|
|
1477
|
+
const validBases = ['static', 'churn', 'history', 'churn+history'];
|
|
1478
|
+
const validRisks = ['low', 'medium', 'high', 'critical'];
|
|
1479
|
+
const ok = validRisks.includes(result.risk) && validBases.includes(result.basis) && typeof result.details === 'object';
|
|
1480
|
+
process.stdout.write(JSON.stringify({ ok, threw: false, result }));
|
|
1481
|
+
`;
|
|
1482
|
+
const proc = spawnSync(process.execPath, [
|
|
1483
|
+
'--input-type=module',
|
|
1484
|
+
'-e', script,
|
|
1485
|
+
], { encoding: 'utf8', timeout: 8000, cwd: tmpDir });
|
|
1486
|
+
|
|
1487
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
1488
|
+
let result;
|
|
1489
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1490
|
+
if (result.threw) return `classifyRiskEnhanced threw when git unavailable: ${result.message}`;
|
|
1491
|
+
if (!result.ok) return `invalid shape returned when git unavailable: ${JSON.stringify(result.result)}`;
|
|
1492
|
+
return true;
|
|
1493
|
+
} finally {
|
|
1494
|
+
spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
|
|
1495
|
+
}
|
|
1496
|
+
});
|
|
1497
|
+
|
|
1498
|
+
// ─── Test 51: agent-chains: exports getChain and listChains ──────────────────
|
|
1499
|
+
test('agent-chains: exports getChain and listChains', () => {
|
|
1500
|
+
const script = `
|
|
1501
|
+
import { getChain, listChains } from './agent-chains.mjs';
|
|
1502
|
+
const results = { errors: [] };
|
|
1503
|
+
if (typeof getChain !== 'function') results.errors.push('getChain is not a function');
|
|
1504
|
+
if (typeof listChains !== 'function') results.errors.push('listChains is not a function');
|
|
1505
|
+
process.stdout.write(JSON.stringify(results));
|
|
1506
|
+
`;
|
|
1507
|
+
const proc = spawnSync(process.execPath, [
|
|
1508
|
+
'--input-type=module',
|
|
1509
|
+
'-e', script,
|
|
1510
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1511
|
+
|
|
1512
|
+
if (proc.status !== 0) return `agent-chains script failed: ${proc.stderr}`;
|
|
1513
|
+
let results;
|
|
1514
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1515
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1516
|
+
return true;
|
|
1517
|
+
});
|
|
1518
|
+
|
|
1519
|
+
// ─── Test 52: agent-chains: all 3 chains have required fields ────────────────
|
|
1520
|
+
test('agent-chains: all 3 chains have required fields', () => {
|
|
1521
|
+
const script = `
|
|
1522
|
+
import { listChains } from './agent-chains.mjs';
|
|
1523
|
+
const results = { errors: [] };
|
|
1524
|
+
|
|
1525
|
+
const chains = listChains();
|
|
1526
|
+
const EXPECTED = ['explore-then-fix', 'review-and-test', 'audit-and-plan'];
|
|
1527
|
+
|
|
1528
|
+
for (const name of EXPECTED) {
|
|
1529
|
+
if (!chains.find(c => c.name === name))
|
|
1530
|
+
results.errors.push('missing chain: ' + name);
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
for (const chain of chains) {
|
|
1534
|
+
if (!chain.name) results.errors.push('chain missing name');
|
|
1535
|
+
if (!chain.description) results.errors.push((chain.name || '?') + ': missing description');
|
|
1536
|
+
if (!Array.isArray(chain.steps) || chain.steps.length < 2)
|
|
1537
|
+
results.errors.push((chain.name || '?') + ': must have at least 2 steps');
|
|
1538
|
+
for (const step of (chain.steps || [])) {
|
|
1539
|
+
if (!step.label) results.errors.push((chain.name || '?') + ' step missing label');
|
|
1540
|
+
if (!step.tier) results.errors.push((chain.name || '?') + ' step missing tier');
|
|
1541
|
+
if (!step.model) results.errors.push((chain.name || '?') + ' step missing model');
|
|
1542
|
+
if (!('template' in step)) results.errors.push((chain.name || '?') + ' step missing template key');
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1546
|
+
process.stdout.write(JSON.stringify(results));
|
|
1547
|
+
`;
|
|
1548
|
+
const proc = spawnSync(process.execPath, [
|
|
1549
|
+
'--input-type=module',
|
|
1550
|
+
'-e', script,
|
|
1551
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1552
|
+
|
|
1553
|
+
if (proc.status !== 0) return `agent-chains script failed: ${proc.stderr}`;
|
|
1554
|
+
let results;
|
|
1555
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1556
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1557
|
+
return true;
|
|
1558
|
+
});
|
|
1559
|
+
|
|
1560
|
+
// ─── Test 53: agent-chains: getChain returns null for unknown chains ──────────
|
|
1561
|
+
test('agent-chains: getChain returns null for unknown chains', () => {
|
|
1562
|
+
const script = `
|
|
1563
|
+
import { getChain } from './agent-chains.mjs';
|
|
1564
|
+
const results = { errors: [] };
|
|
1565
|
+
|
|
1566
|
+
const unknown = getChain('no-such-chain');
|
|
1567
|
+
if (unknown !== null) results.errors.push('expected null for unknown chain, got: ' + JSON.stringify(unknown));
|
|
1568
|
+
|
|
1569
|
+
const alsoUnknown = getChain('');
|
|
1570
|
+
if (alsoUnknown !== null) results.errors.push('expected null for empty string, got: ' + JSON.stringify(alsoUnknown));
|
|
1571
|
+
|
|
1572
|
+
const known = getChain('explore-then-fix');
|
|
1573
|
+
if (!known || known.name !== 'explore-then-fix')
|
|
1574
|
+
results.errors.push('expected explore-then-fix chain, got: ' + JSON.stringify(known));
|
|
1575
|
+
|
|
1576
|
+
process.stdout.write(JSON.stringify(results));
|
|
1577
|
+
`;
|
|
1578
|
+
const proc = spawnSync(process.execPath, [
|
|
1579
|
+
'--input-type=module',
|
|
1580
|
+
'-e', script,
|
|
1581
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1582
|
+
|
|
1583
|
+
if (proc.status !== 0) return `agent-chains script failed: ${proc.stderr}`;
|
|
1584
|
+
let results;
|
|
1585
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1586
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1587
|
+
return true;
|
|
1588
|
+
});
|
|
1589
|
+
|
|
1590
|
+
// ─── Test 54: agent-templates: exports getTemplate, listTemplates, buildAgentPrompt ─
|
|
1591
|
+
test('agent-templates: exports getTemplate, listTemplates, buildAgentPrompt', () => {
|
|
1592
|
+
const script = `
|
|
1593
|
+
import { getTemplate, listTemplates, buildAgentPrompt } from './agent-templates.mjs';
|
|
1594
|
+
const results = { errors: [] };
|
|
1595
|
+
|
|
1596
|
+
if (typeof getTemplate !== 'function') results.errors.push('getTemplate not a function');
|
|
1597
|
+
if (typeof listTemplates !== 'function') results.errors.push('listTemplates not a function');
|
|
1598
|
+
if (typeof buildAgentPrompt !== 'function') results.errors.push('buildAgentPrompt not a function');
|
|
1599
|
+
|
|
1600
|
+
process.stdout.write(JSON.stringify(results));
|
|
1601
|
+
`;
|
|
1602
|
+
const proc = spawnSync(process.execPath, [
|
|
1603
|
+
'--input-type=module',
|
|
1604
|
+
'-e', script,
|
|
1605
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1606
|
+
|
|
1607
|
+
if (proc.status !== 0) return `agent-templates script failed: ${proc.stderr}`;
|
|
1608
|
+
let results;
|
|
1609
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1610
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1611
|
+
return true;
|
|
1612
|
+
});
|
|
1613
|
+
|
|
1614
|
+
// ─── Test 55: agent-templates: all 4 templates have required fields ───────────
|
|
1615
|
+
test('agent-templates: all 4 templates have required fields', () => {
|
|
1616
|
+
const script = `
|
|
1617
|
+
import { TEMPLATES } from './agent-templates.mjs';
|
|
1618
|
+
const results = { errors: [] };
|
|
1619
|
+
|
|
1620
|
+
const expected = ['explorer', 'security-review', 'test-writer', 'bug-hunter'];
|
|
1621
|
+
const requiredFields = ['tier', 'risk', 'quality_gate', 'prompt_template', 'flags'];
|
|
1622
|
+
|
|
1623
|
+
for (const name of expected) {
|
|
1624
|
+
const tmpl = TEMPLATES[name];
|
|
1625
|
+
if (!tmpl) { results.errors.push('missing template: ' + name); continue; }
|
|
1626
|
+
for (const f of requiredFields) {
|
|
1627
|
+
if (tmpl[f] === undefined || tmpl[f] === null)
|
|
1628
|
+
results.errors.push(name + ' missing field: ' + f);
|
|
1629
|
+
}
|
|
1630
|
+
// tier must be one of the known tiers
|
|
1631
|
+
if (!['search', 'execute', 'think'].includes(tmpl.tier))
|
|
1632
|
+
results.errors.push(name + ' has unknown tier: ' + tmpl.tier);
|
|
1633
|
+
// risk must be a valid level
|
|
1634
|
+
if (!['low', 'medium', 'high', 'critical'].includes(tmpl.risk))
|
|
1635
|
+
results.errors.push(name + ' has unknown risk: ' + tmpl.risk);
|
|
1636
|
+
// flags must be an object
|
|
1637
|
+
if (typeof tmpl.flags !== 'object' || tmpl.flags === null)
|
|
1638
|
+
results.errors.push(name + ' flags is not an object');
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
process.stdout.write(JSON.stringify(results));
|
|
1642
|
+
`;
|
|
1643
|
+
const proc = spawnSync(process.execPath, [
|
|
1644
|
+
'--input-type=module',
|
|
1645
|
+
'-e', script,
|
|
1646
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1647
|
+
|
|
1648
|
+
if (proc.status !== 0) return `agent-templates script failed: ${proc.stderr}`;
|
|
1649
|
+
let results;
|
|
1650
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1651
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1652
|
+
return true;
|
|
1653
|
+
});
|
|
1654
|
+
|
|
1655
|
+
// ─── Test 56: agent-templates: buildAgentPrompt interpolates flags correctly ──
|
|
1656
|
+
test('agent-templates: buildAgentPrompt interpolates flags correctly', () => {
|
|
1657
|
+
const script = `
|
|
1658
|
+
import { buildAgentPrompt } from './agent-templates.mjs';
|
|
1659
|
+
const results = { errors: [] };
|
|
1660
|
+
|
|
1661
|
+
// explorer: question + scope
|
|
1662
|
+
const explorerFull = buildAgentPrompt('explorer', { question: 'find auth files', scope: 'src/auth' });
|
|
1663
|
+
if (!explorerFull) { results.errors.push('explorer returned null'); }
|
|
1664
|
+
else {
|
|
1665
|
+
if (!explorerFull.prompt.includes('find auth files'))
|
|
1666
|
+
results.errors.push('explorer prompt missing question: ' + explorerFull.prompt.slice(0, 100));
|
|
1667
|
+
if (!explorerFull.model) results.errors.push('explorer missing model');
|
|
1668
|
+
if (!explorerFull.tier) results.errors.push('explorer missing tier');
|
|
1669
|
+
if (!explorerFull.risk) results.errors.push('explorer missing risk');
|
|
1670
|
+
if (!explorerFull.quality_gate) results.errors.push('explorer missing quality_gate');
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
// explorer: without scope
|
|
1674
|
+
const explorerNoScope = buildAgentPrompt('explorer', { question: 'map the codebase' });
|
|
1675
|
+
if (!explorerNoScope) { results.errors.push('explorer (no scope) returned null'); }
|
|
1676
|
+
else {
|
|
1677
|
+
if (!explorerNoScope.prompt.includes('map the codebase'))
|
|
1678
|
+
results.errors.push('explorer prompt missing question (no scope): ' + explorerNoScope.prompt.slice(0, 100));
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
// test-writer: file flag
|
|
1682
|
+
const testWriter = buildAgentPrompt('test-writer', { file: 'src/api.ts', framework: 'jest' });
|
|
1683
|
+
if (!testWriter) { results.errors.push('test-writer returned null'); }
|
|
1684
|
+
else {
|
|
1685
|
+
if (!testWriter.prompt.includes('src/api.ts'))
|
|
1686
|
+
results.errors.push('test-writer prompt missing file: ' + testWriter.prompt.slice(0, 100));
|
|
1687
|
+
}
|
|
1688
|
+
|
|
1689
|
+
// bug-hunter: deep depth
|
|
1690
|
+
const bugHunterDeep = buildAgentPrompt('bug-hunter', { area: 'payments', depth: 'deep' });
|
|
1691
|
+
if (!bugHunterDeep) { results.errors.push('bug-hunter returned null'); }
|
|
1692
|
+
else {
|
|
1693
|
+
if (!bugHunterDeep.prompt.toLowerCase().includes('payments'))
|
|
1694
|
+
results.errors.push('bug-hunter prompt missing area: ' + bugHunterDeep.prompt.slice(0, 100));
|
|
1695
|
+
}
|
|
1696
|
+
|
|
1697
|
+
// security-review: severity filter
|
|
1698
|
+
const secReview = buildAgentPrompt('security-review', { scope: 'src/auth', severity: 'high' });
|
|
1699
|
+
if (!secReview) { results.errors.push('security-review returned null'); }
|
|
1700
|
+
else {
|
|
1701
|
+
if (secReview.risk !== 'high') results.errors.push('security-review risk should be high, got: ' + secReview.risk);
|
|
1702
|
+
if (secReview.quality_gate !== 'dual_brain_review')
|
|
1703
|
+
results.errors.push('security-review quality_gate should be dual_brain_review, got: ' + secReview.quality_gate);
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
process.stdout.write(JSON.stringify(results));
|
|
1707
|
+
`;
|
|
1708
|
+
const proc = spawnSync(process.execPath, [
|
|
1709
|
+
'--input-type=module',
|
|
1710
|
+
'-e', script,
|
|
1711
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1712
|
+
|
|
1713
|
+
if (proc.status !== 0) return `agent-templates script failed: ${proc.stderr}`;
|
|
1714
|
+
let results;
|
|
1715
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1716
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1717
|
+
return true;
|
|
1718
|
+
});
|
|
1719
|
+
|
|
1720
|
+
// ─── Test 57: agent-templates: getTemplate returns null for unknown ───────────
|
|
1721
|
+
test('agent-templates: getTemplate returns null for unknown template', () => {
|
|
1722
|
+
const script = `
|
|
1723
|
+
import { getTemplate } from './agent-templates.mjs';
|
|
1724
|
+
const results = { errors: [] };
|
|
1725
|
+
|
|
1726
|
+
const unknown = getTemplate('no-such-template');
|
|
1727
|
+
if (unknown !== null) results.errors.push('expected null for unknown template, got: ' + JSON.stringify(unknown));
|
|
1728
|
+
|
|
1729
|
+
const alsoUnknown = getTemplate('');
|
|
1730
|
+
if (alsoUnknown !== null) results.errors.push('expected null for empty string, got: ' + JSON.stringify(alsoUnknown));
|
|
1731
|
+
|
|
1732
|
+
const known = getTemplate('explorer');
|
|
1733
|
+
if (!known || known.name !== 'explorer')
|
|
1734
|
+
results.errors.push('expected explorer template, got: ' + JSON.stringify(known));
|
|
1735
|
+
|
|
1736
|
+
process.stdout.write(JSON.stringify(results));
|
|
1737
|
+
`;
|
|
1738
|
+
const proc = spawnSync(process.execPath, [
|
|
1739
|
+
'--input-type=module',
|
|
1740
|
+
'-e', script,
|
|
1741
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1742
|
+
|
|
1743
|
+
if (proc.status !== 0) return `agent-templates script failed: ${proc.stderr}`;
|
|
1744
|
+
let results;
|
|
1745
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1746
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1747
|
+
return true;
|
|
1748
|
+
});
|
|
1749
|
+
|
|
1750
|
+
// ─── Test 58: ship-captain.mjs exports planExecution and executeShipCaptain ───
|
|
1751
|
+
test('ship-captain: exports planExecution and executeShipCaptain', () => {
|
|
1752
|
+
const script = `
|
|
1753
|
+
import { planExecution, executeShipCaptain } from './ship-captain.mjs';
|
|
1754
|
+
const results = { errors: [] };
|
|
1755
|
+
if (typeof planExecution !== 'function') results.errors.push('planExecution is not a function');
|
|
1756
|
+
if (typeof executeShipCaptain !== 'function') results.errors.push('executeShipCaptain is not a function');
|
|
1757
|
+
process.stdout.write(JSON.stringify(results));
|
|
1758
|
+
`;
|
|
1759
|
+
const proc = spawnSync(process.execPath, [
|
|
1760
|
+
'--input-type=module',
|
|
1761
|
+
'-e', script,
|
|
1762
|
+
], { encoding: 'utf8', timeout: 8000, cwd: HOOKS });
|
|
1763
|
+
|
|
1764
|
+
if (proc.status !== 0) return `ship-captain script failed: ${proc.stderr}`;
|
|
1765
|
+
let results;
|
|
1766
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1767
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1768
|
+
return true;
|
|
1769
|
+
});
|
|
1770
|
+
|
|
1771
|
+
// ─── Test 59: ship-gate.mjs exports discoverTests, runTests, generateDiffSummary, createPR ─
|
|
1772
|
+
test('ship-gate: exports discoverTests, runTests, generateDiffSummary, createPR', () => {
|
|
1773
|
+
const script = `
|
|
1774
|
+
import { discoverTests, runTests, generateDiffSummary, createPR } from './ship-gate.mjs';
|
|
1775
|
+
const results = { errors: [] };
|
|
1776
|
+
if (typeof discoverTests !== 'function') results.errors.push('discoverTests is not a function');
|
|
1777
|
+
if (typeof runTests !== 'function') results.errors.push('runTests is not a function');
|
|
1778
|
+
if (typeof generateDiffSummary !== 'function') results.errors.push('generateDiffSummary is not a function');
|
|
1779
|
+
if (typeof createPR !== 'function') results.errors.push('createPR is not a function');
|
|
1780
|
+
process.stdout.write(JSON.stringify(results));
|
|
1781
|
+
`;
|
|
1782
|
+
const proc = spawnSync(process.execPath, [
|
|
1783
|
+
'--input-type=module',
|
|
1784
|
+
'-e', script,
|
|
1785
|
+
], { encoding: 'utf8', timeout: 8000, cwd: HOOKS });
|
|
1786
|
+
|
|
1787
|
+
if (proc.status !== 0) return `ship-gate script failed: ${proc.stderr}`;
|
|
1788
|
+
let results;
|
|
1789
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1790
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1791
|
+
return true;
|
|
1792
|
+
});
|
|
1793
|
+
|
|
1794
|
+
// ─── Test 60: ship-gate discoverTests finds test command in dual-brain itself ─
|
|
1795
|
+
test('ship-gate: discoverTests finds npm test script', () => {
|
|
1796
|
+
// Run from the dual-brain package root so package.json is in cwd
|
|
1797
|
+
const pkgRoot = resolve(HOOKS, '..');
|
|
1798
|
+
const script = `
|
|
1799
|
+
import { discoverTests } from './hooks/ship-gate.mjs';
|
|
1800
|
+
const result = await discoverTests();
|
|
1801
|
+
const results = { errors: [] };
|
|
1802
|
+
if (!result) { results.errors.push('discoverTests returned null/undefined'); }
|
|
1803
|
+
else {
|
|
1804
|
+
// Must find the npm test script from package.json
|
|
1805
|
+
const resultStr = JSON.stringify(result);
|
|
1806
|
+
const hasNpmTest = result.command === 'npm test'
|
|
1807
|
+
|| result.command === 'npm run test'
|
|
1808
|
+
|| resultStr.includes('npm test')
|
|
1809
|
+
|| resultStr.includes('test-orchestrator');
|
|
1810
|
+
if (!hasNpmTest) results.errors.push('discoverTests did not find npm test: ' + resultStr);
|
|
1811
|
+
// Confidence should be "high" when package.json test script is found
|
|
1812
|
+
if (result.confidence && result.confidence !== 'high')
|
|
1813
|
+
results.errors.push('expected confidence=high, got: ' + result.confidence);
|
|
1814
|
+
}
|
|
1815
|
+
process.stdout.write(JSON.stringify(results));
|
|
1816
|
+
`;
|
|
1817
|
+
const proc = spawnSync(process.execPath, [
|
|
1818
|
+
'--input-type=module',
|
|
1819
|
+
'-e', script,
|
|
1820
|
+
], { encoding: 'utf8', timeout: 10000, cwd: pkgRoot });
|
|
1821
|
+
|
|
1822
|
+
if (proc.status !== 0) return `ship-gate discoverTests script failed: ${proc.stderr}`;
|
|
1823
|
+
let results;
|
|
1824
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1825
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1826
|
+
return true;
|
|
1827
|
+
});
|
|
1828
|
+
|
|
1829
|
+
// ─── Test 61: ship-captain planExecution returns valid plan ───────────────────
|
|
1830
|
+
test('ship-captain: planExecution returns valid plan', () => {
|
|
1831
|
+
const script = `
|
|
1832
|
+
import { planExecution } from './ship-captain.mjs';
|
|
1833
|
+
const result = await planExecution('fix a bug and write tests');
|
|
1834
|
+
const results = { errors: [] };
|
|
1835
|
+
if (!result || typeof result !== 'object') {
|
|
1836
|
+
results.errors.push('planExecution did not return an object');
|
|
1837
|
+
} else {
|
|
1838
|
+
if (!result.goal) results.errors.push('plan missing goal');
|
|
1839
|
+
if (!Array.isArray(result.steps)) results.errors.push('plan missing steps array');
|
|
1840
|
+
else if (result.steps.length === 0) results.errors.push('plan steps array is empty');
|
|
1841
|
+
// Accept any duration/time/steps indicator — implementation may vary
|
|
1842
|
+
const hasDuration = result.estimated_duration != null
|
|
1843
|
+
|| result.estimated_duration_ms != null
|
|
1844
|
+
|| result.duration != null
|
|
1845
|
+
|| result.estimated_steps != null
|
|
1846
|
+
|| result.step_count != null
|
|
1847
|
+
|| result.total_steps != null
|
|
1848
|
+
|| typeof result.steps?.length === 'number';
|
|
1849
|
+
if (!hasDuration) results.errors.push('plan missing any duration or steps count field');
|
|
1850
|
+
}
|
|
1851
|
+
process.stdout.write(JSON.stringify(results));
|
|
1852
|
+
`;
|
|
1853
|
+
const proc = spawnSync(process.execPath, [
|
|
1854
|
+
'--input-type=module',
|
|
1855
|
+
'-e', script,
|
|
1856
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
1857
|
+
|
|
1858
|
+
if (proc.status !== 0) return `ship-captain planExecution script failed: ${proc.stderr}`;
|
|
1859
|
+
let results;
|
|
1860
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1861
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1862
|
+
return true;
|
|
1863
|
+
});
|
|
1864
|
+
|
|
1865
|
+
// ─── Test 62: install.mjs includes ship-captain commands in help ──────────────
|
|
1866
|
+
test('install.mjs: includes ship-captain commands in help', () => {
|
|
1867
|
+
const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
|
|
1868
|
+
const required = ['do', 'ship', 'runs', 'resume', 'test-run', 'diff', 'Ship Captain'];
|
|
1869
|
+
const missing = required.filter(s => !installSrc.includes(s));
|
|
1870
|
+
if (missing.length > 0) return `install.mjs missing: ${missing.join(', ')}`;
|
|
1871
|
+
// Also check ship-captain.mjs and ship-gate.mjs are in HOOKS array
|
|
1872
|
+
if (!installSrc.includes('ship-captain.mjs')) return 'HOOKS array missing ship-captain.mjs';
|
|
1873
|
+
if (!installSrc.includes('ship-gate.mjs')) return 'HOOKS array missing ship-gate.mjs';
|
|
1874
|
+
return true;
|
|
1875
|
+
});
|
|
1876
|
+
|
|
1877
|
+
// ─── Test 63: confirmation-policy: exports all required functions ─────────────
|
|
1878
|
+
test('confirmation-policy: exports all required functions', () => {
|
|
1879
|
+
const script = `
|
|
1880
|
+
import { getConfirmationPolicy, resolveMode, aggregateRisk, formatConfirmation } from './confirmation-policy.mjs';
|
|
1881
|
+
const results = { errors: [] };
|
|
1882
|
+
if (typeof getConfirmationPolicy !== 'function') results.errors.push('getConfirmationPolicy is not a function');
|
|
1883
|
+
if (typeof resolveMode !== 'function') results.errors.push('resolveMode is not a function');
|
|
1884
|
+
if (typeof aggregateRisk !== 'function') results.errors.push('aggregateRisk is not a function');
|
|
1885
|
+
if (typeof formatConfirmation !== 'function') results.errors.push('formatConfirmation is not a function');
|
|
1886
|
+
process.stdout.write(JSON.stringify(results));
|
|
1887
|
+
`;
|
|
1888
|
+
const proc = spawnSync(process.execPath, [
|
|
1889
|
+
'--input-type=module',
|
|
1890
|
+
'-e', script,
|
|
1891
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1892
|
+
|
|
1893
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
1894
|
+
let results;
|
|
1895
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1896
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1897
|
+
return true;
|
|
1898
|
+
});
|
|
1899
|
+
|
|
1900
|
+
// ─── Test 64: confirmation-policy default mode low risk skips confirmations ───
|
|
1901
|
+
test('confirmation-policy: default mode low risk skips confirmations', () => {
|
|
1902
|
+
const script = `
|
|
1903
|
+
import { getConfirmationPolicy } from './confirmation-policy.mjs';
|
|
1904
|
+
const result = getConfirmationPolicy({ risk: 'low', mode: 'default', step: 'edit' });
|
|
1905
|
+
const results = { errors: [] };
|
|
1906
|
+
if (result.shouldConfirm !== false) results.errors.push('expected shouldConfirm=false, got: ' + result.shouldConfirm);
|
|
1907
|
+
if (result.shouldBlock !== false) results.errors.push('expected shouldBlock=false, got: ' + result.shouldBlock);
|
|
1908
|
+
process.stdout.write(JSON.stringify(results));
|
|
1909
|
+
`;
|
|
1910
|
+
const proc = spawnSync(process.execPath, [
|
|
1911
|
+
'--input-type=module',
|
|
1912
|
+
'-e', script,
|
|
1913
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1914
|
+
|
|
1915
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
1916
|
+
let results;
|
|
1917
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1918
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1919
|
+
return true;
|
|
1920
|
+
});
|
|
1921
|
+
|
|
1922
|
+
// ─── Test 65: confirmation-policy default mode critical risk blocks ────────────
|
|
1923
|
+
test('confirmation-policy: default mode critical risk blocks', () => {
|
|
1924
|
+
const script = `
|
|
1925
|
+
import { getConfirmationPolicy } from './confirmation-policy.mjs';
|
|
1926
|
+
const result = getConfirmationPolicy({ risk: 'critical', mode: 'default', step: 'edit' });
|
|
1927
|
+
const results = { errors: [] };
|
|
1928
|
+
if (result.shouldBlock !== true) results.errors.push('expected shouldBlock=true, got: ' + result.shouldBlock);
|
|
1929
|
+
process.stdout.write(JSON.stringify(results));
|
|
1930
|
+
`;
|
|
1931
|
+
const proc = spawnSync(process.execPath, [
|
|
1932
|
+
'--input-type=module',
|
|
1933
|
+
'-e', script,
|
|
1934
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1935
|
+
|
|
1936
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
1937
|
+
let results;
|
|
1938
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1939
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1940
|
+
return true;
|
|
1941
|
+
});
|
|
1942
|
+
|
|
1943
|
+
// ─── Test 66: confirmation-policy yolo mode allows critical ───────────────────
|
|
1944
|
+
test('confirmation-policy: yolo mode allows critical', () => {
|
|
1945
|
+
const script = `
|
|
1946
|
+
import { getConfirmationPolicy } from './confirmation-policy.mjs';
|
|
1947
|
+
const result = getConfirmationPolicy({ risk: 'critical', mode: 'yolo', step: 'edit' });
|
|
1948
|
+
const results = { errors: [] };
|
|
1949
|
+
if (result.shouldBlock !== false) results.errors.push('expected shouldBlock=false in yolo mode, got: ' + result.shouldBlock);
|
|
1950
|
+
if (result.shouldConfirm !== false) results.errors.push('expected shouldConfirm=false in yolo mode, got: ' + result.shouldConfirm);
|
|
1951
|
+
process.stdout.write(JSON.stringify(results));
|
|
1952
|
+
`;
|
|
1953
|
+
const proc = spawnSync(process.execPath, [
|
|
1954
|
+
'--input-type=module',
|
|
1955
|
+
'-e', script,
|
|
1956
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1957
|
+
|
|
1958
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
1959
|
+
let results;
|
|
1960
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1961
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1962
|
+
return true;
|
|
1963
|
+
});
|
|
1964
|
+
|
|
1965
|
+
// ─── Test 67: confirmation-policy careful mode confirms everything ─────────────
|
|
1966
|
+
test('confirmation-policy: careful mode confirms everything', () => {
|
|
1967
|
+
const script = `
|
|
1968
|
+
import { getConfirmationPolicy } from './confirmation-policy.mjs';
|
|
1969
|
+
const result = getConfirmationPolicy({ risk: 'low', mode: 'careful', step: 'edit' });
|
|
1970
|
+
const results = { errors: [] };
|
|
1971
|
+
if (result.shouldConfirm !== true) results.errors.push('expected shouldConfirm=true in careful mode, got: ' + result.shouldConfirm);
|
|
1972
|
+
process.stdout.write(JSON.stringify(results));
|
|
1973
|
+
`;
|
|
1974
|
+
const proc = spawnSync(process.execPath, [
|
|
1975
|
+
'--input-type=module',
|
|
1976
|
+
'-e', script,
|
|
1977
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1978
|
+
|
|
1979
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
1980
|
+
let results;
|
|
1981
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
1982
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
1983
|
+
return true;
|
|
1984
|
+
});
|
|
1985
|
+
|
|
1986
|
+
// ─── Test 68: confirmation-policy aggregateRisk returns highest ───────────────
|
|
1987
|
+
test('confirmation-policy: aggregateRisk returns highest', () => {
|
|
1988
|
+
const script = `
|
|
1989
|
+
import { aggregateRisk } from './confirmation-policy.mjs';
|
|
1990
|
+
const result = aggregateRisk(['low', 'medium', 'high', 'low']);
|
|
1991
|
+
const results = { errors: [] };
|
|
1992
|
+
if (result !== 'high') results.errors.push('expected "high", got: ' + result);
|
|
1993
|
+
process.stdout.write(JSON.stringify(results));
|
|
1994
|
+
`;
|
|
1995
|
+
const proc = spawnSync(process.execPath, [
|
|
1996
|
+
'--input-type=module',
|
|
1997
|
+
'-e', script,
|
|
1998
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
1999
|
+
|
|
2000
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
2001
|
+
let results;
|
|
2002
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2003
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
2004
|
+
return true;
|
|
2005
|
+
});
|
|
2006
|
+
|
|
2007
|
+
// ─── Test 69: confirmation-policy resolveMode parses flags ────────────────────
|
|
2008
|
+
test('confirmation-policy: resolveMode parses flags', () => {
|
|
2009
|
+
const script = `
|
|
2010
|
+
import { resolveMode } from './confirmation-policy.mjs';
|
|
2011
|
+
const results = { errors: [] };
|
|
2012
|
+
const yolo = resolveMode(['--yolo']);
|
|
2013
|
+
if (yolo !== 'yolo') results.errors.push('expected "yolo" for ["--yolo"], got: ' + yolo);
|
|
2014
|
+
const careful = resolveMode(['--careful']);
|
|
2015
|
+
if (careful !== 'careful') results.errors.push('expected "careful" for ["--careful"], got: ' + careful);
|
|
2016
|
+
const def = resolveMode([]);
|
|
2017
|
+
if (def !== 'default') results.errors.push('expected "default" for [], got: ' + def);
|
|
2018
|
+
process.stdout.write(JSON.stringify(results));
|
|
2019
|
+
`;
|
|
2020
|
+
const proc = spawnSync(process.execPath, [
|
|
2021
|
+
'--input-type=module',
|
|
2022
|
+
'-e', script,
|
|
2023
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
2024
|
+
|
|
2025
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
2026
|
+
let results;
|
|
2027
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2028
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
2029
|
+
return true;
|
|
2030
|
+
});
|
|
2031
|
+
|
|
2032
|
+
// ─── Test 70: ship-gate exports runShipGate ───────────────────────────────────
|
|
2033
|
+
test('ship-gate: exports runShipGate', () => {
|
|
2034
|
+
const script = `
|
|
2035
|
+
import { runShipGate } from './ship-gate.mjs';
|
|
2036
|
+
const results = { errors: [] };
|
|
2037
|
+
if (typeof runShipGate !== 'function') results.errors.push('runShipGate is not a function');
|
|
2038
|
+
process.stdout.write(JSON.stringify(results));
|
|
2039
|
+
`;
|
|
2040
|
+
const proc = spawnSync(process.execPath, [
|
|
2041
|
+
'--input-type=module',
|
|
2042
|
+
'-e', script,
|
|
2043
|
+
], { encoding: 'utf8', timeout: 8000, cwd: HOOKS });
|
|
2044
|
+
|
|
2045
|
+
if (proc.status !== 0) return `ship-gate script failed: ${proc.stderr}`;
|
|
2046
|
+
let results;
|
|
2047
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2048
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
2049
|
+
return true;
|
|
2050
|
+
});
|
|
2051
|
+
|
|
2052
|
+
// ─── Test 71: ship-captain run record includes options field ──────────────────
|
|
2053
|
+
test('ship-captain: planExecution returns plan with steps array', () => {
|
|
2054
|
+
const script = `
|
|
2055
|
+
import { planExecution } from './ship-captain.mjs';
|
|
2056
|
+
const result = planExecution('write tests for the auth module');
|
|
2057
|
+
const errors = [];
|
|
2058
|
+
if (!result || typeof result !== 'object') {
|
|
2059
|
+
errors.push('planExecution did not return an object');
|
|
2060
|
+
} else {
|
|
2061
|
+
if (!result.goal) errors.push('plan missing goal');
|
|
2062
|
+
if (!Array.isArray(result.steps)) errors.push('plan missing steps array');
|
|
2063
|
+
else if (result.steps.length === 0) errors.push('plan steps array is empty');
|
|
2064
|
+
// Verify each step has the expected shape from planExecution
|
|
2065
|
+
for (const step of (result.steps || [])) {
|
|
2066
|
+
if (typeof step.index !== 'number') errors.push('step missing index');
|
|
2067
|
+
if (typeof step.total !== 'number') errors.push('step missing total');
|
|
2068
|
+
if (!step.task) errors.push('step missing task');
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2072
|
+
`;
|
|
2073
|
+
const proc = spawnSync(process.execPath, [
|
|
2074
|
+
'--input-type=module',
|
|
2075
|
+
'-e', script,
|
|
2076
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
2077
|
+
|
|
2078
|
+
if (proc.status !== 0) return `ship-captain planExecution script failed: ${proc.stderr}`;
|
|
2079
|
+
let result;
|
|
2080
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2081
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2082
|
+
return true;
|
|
2083
|
+
});
|
|
2084
|
+
|
|
2085
|
+
// ─── Test 72: resume handles missing runs directory gracefully ─────────────────
|
|
2086
|
+
test('resume: handles missing .claude/runs/ directory gracefully', () => {
|
|
2087
|
+
// Run the install.mjs resume subcommand from a temp dir that has no .claude/runs/
|
|
2088
|
+
const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
|
|
2089
|
+
try {
|
|
2090
|
+
const installScript = resolve(HOOKS, '..', 'install.mjs');
|
|
2091
|
+
const proc = spawnSync(process.execPath, [installScript, 'resume'], {
|
|
2092
|
+
encoding: 'utf8',
|
|
2093
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
2094
|
+
cwd: tmpDir,
|
|
2095
|
+
timeout: 8000,
|
|
2096
|
+
env: { ...process.env, HOME: tmpDir },
|
|
2097
|
+
});
|
|
2098
|
+
|
|
2099
|
+
// Should exit cleanly (0 or 1 is fine — just not crash with unhandled exception)
|
|
2100
|
+
if (proc.status === null) return 'process timed out';
|
|
2101
|
+
|
|
2102
|
+
const combined = (proc.stdout || '') + (proc.stderr || '');
|
|
2103
|
+
// Should print a helpful message, not a stack trace
|
|
2104
|
+
if (combined.includes('TypeError') || combined.includes('ReferenceError') ||
|
|
2105
|
+
combined.includes('SyntaxError') || combined.includes('at Object.<anonymous>'))
|
|
2106
|
+
return `unexpected JS error in output: ${combined.slice(0, 200)}`;
|
|
2107
|
+
|
|
2108
|
+
// Should mention "No runs found" or similar
|
|
2109
|
+
const hasHelpMsg = combined.includes('No runs') || combined.includes('no runs') ||
|
|
2110
|
+
combined.includes("Start with") || combined.includes('Nothing to resume');
|
|
2111
|
+
if (!hasHelpMsg) return `expected helpful message, got: ${combined.slice(0, 200)}`;
|
|
2112
|
+
|
|
2113
|
+
return true;
|
|
2114
|
+
} finally {
|
|
2115
|
+
spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
|
|
2116
|
+
}
|
|
2117
|
+
});
|
|
2118
|
+
|
|
2119
|
+
// ─── Test 73: mismatch severity — think task on haiku gets BLOCKED (major) ────
|
|
2120
|
+
test('enforce-tier v4.5: think on haiku → major mismatch, BLOCKED message', () => {
|
|
2121
|
+
// Clear cooldown and use balanced profile (strict tolerance → block on major)
|
|
2122
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
2123
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
2124
|
+
let originalProfile;
|
|
2125
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
2126
|
+
try {
|
|
2127
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'balanced' }));
|
|
2128
|
+
const payload = JSON.stringify({
|
|
2129
|
+
tool_name: 'Agent',
|
|
2130
|
+
tool_input: {
|
|
2131
|
+
prompt: 'review security architecture and design the auth system',
|
|
2132
|
+
model: 'haiku',
|
|
2133
|
+
},
|
|
2134
|
+
});
|
|
2135
|
+
const { parsed } = run(ENFORCE_TIER, payload);
|
|
2136
|
+
if (!parsed) return 'no valid JSON output';
|
|
2137
|
+
if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
|
|
2138
|
+
const msg = parsed.systemMessage;
|
|
2139
|
+
if (!msg.includes('BLOCKED') && !msg.includes('⛔'))
|
|
2140
|
+
return `expected BLOCKED marker for think/haiku major mismatch, got: ${msg}`;
|
|
2141
|
+
if (!msg.toLowerCase().includes('opus') && !msg.toLowerCase().includes('gpt-5.5') && !msg.toLowerCase().includes('think'))
|
|
2142
|
+
return `expected think-tier model suggestion in message, got: ${msg}`;
|
|
2143
|
+
return true;
|
|
2144
|
+
} finally {
|
|
2145
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
2146
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
2147
|
+
}
|
|
2148
|
+
});
|
|
2149
|
+
|
|
2150
|
+
// ─── Test 74: mismatch severity — execute on sonnet gets no mismatch ──────────
|
|
2151
|
+
test('enforce-tier v4.5: execute on sonnet → no mismatch', () => {
|
|
2152
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
2153
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
2154
|
+
let originalProfile;
|
|
2155
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
2156
|
+
try {
|
|
2157
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'balanced' }));
|
|
2158
|
+
const payload = JSON.stringify({
|
|
2159
|
+
tool_name: 'Agent',
|
|
2160
|
+
tool_input: {
|
|
2161
|
+
prompt: `implement the fix and write unit tests ${Date.now()}`,
|
|
2162
|
+
model: 'sonnet',
|
|
2163
|
+
},
|
|
2164
|
+
});
|
|
2165
|
+
const { parsed } = run(ENFORCE_TIER, payload);
|
|
2166
|
+
if (!parsed) return 'no valid JSON output';
|
|
2167
|
+
const msg = parsed.systemMessage || '';
|
|
2168
|
+
if (msg.includes('BLOCKED') || msg.includes('⛔'))
|
|
2169
|
+
return `unexpected BLOCKED for execute/sonnet (correct tier), got: ${msg}`;
|
|
2170
|
+
if (msg.toLowerCase().includes('mismatch'))
|
|
2171
|
+
return `unexpected mismatch warning for execute/sonnet, got: ${msg}`;
|
|
2172
|
+
return true;
|
|
2173
|
+
} finally {
|
|
2174
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
2175
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
2176
|
+
}
|
|
2177
|
+
});
|
|
2178
|
+
|
|
2179
|
+
// ─── Test 75: mismatch severity — search on opus gets BLOCKED (major, overkill) ─
|
|
2180
|
+
test('enforce-tier v4.5: search on opus → major mismatch, BLOCKED message', () => {
|
|
2181
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
2182
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
2183
|
+
let originalProfile;
|
|
2184
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
2185
|
+
try {
|
|
2186
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'balanced' }));
|
|
2187
|
+
const payload = JSON.stringify({
|
|
2188
|
+
tool_name: 'Agent',
|
|
2189
|
+
tool_input: {
|
|
2190
|
+
prompt: 'find all auth files',
|
|
2191
|
+
model: 'opus',
|
|
2192
|
+
subagent_type: 'Explore',
|
|
2193
|
+
},
|
|
2194
|
+
});
|
|
2195
|
+
const { parsed } = run(ENFORCE_TIER, payload);
|
|
2196
|
+
if (!parsed) return 'no valid JSON output';
|
|
2197
|
+
if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
|
|
2198
|
+
const msg = parsed.systemMessage;
|
|
2199
|
+
if (!msg.includes('BLOCKED') && !msg.includes('⛔'))
|
|
2200
|
+
return `expected BLOCKED marker for search/opus major mismatch (overkill), got: ${msg}`;
|
|
2201
|
+
if (!msg.toLowerCase().includes('haiku') && !msg.toLowerCase().includes('gpt-4.1-mini') && !msg.toLowerCase().includes('search'))
|
|
2202
|
+
return `expected search-tier model suggestion in message, got: ${msg}`;
|
|
2203
|
+
return true;
|
|
2204
|
+
} finally {
|
|
2205
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
2206
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
2207
|
+
}
|
|
2208
|
+
});
|
|
2209
|
+
|
|
2210
|
+
// ─── Test 76: mismatch severity — think on sonnet gets minor WARNING (not block) ─
|
|
2211
|
+
test('enforce-tier v4.5: think on sonnet → minor mismatch, warning not block', () => {
|
|
2212
|
+
try { unlinkSync(COOLDOWN_FILE); } catch {}
|
|
2213
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
2214
|
+
let originalProfile;
|
|
2215
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
2216
|
+
try {
|
|
2217
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'balanced' }));
|
|
2218
|
+
const payload = JSON.stringify({
|
|
2219
|
+
tool_name: 'Agent',
|
|
2220
|
+
tool_input: {
|
|
2221
|
+
prompt: 'review architecture and plan the migration strategy',
|
|
2222
|
+
model: 'sonnet',
|
|
2223
|
+
},
|
|
2224
|
+
});
|
|
2225
|
+
const { parsed } = run(ENFORCE_TIER, payload);
|
|
2226
|
+
if (!parsed) return 'no valid JSON output';
|
|
2227
|
+
if (!parsed.systemMessage) return `expected systemMessage warning, got: ${JSON.stringify(parsed)}`;
|
|
2228
|
+
const msg = parsed.systemMessage;
|
|
2229
|
+
// Minor mismatch under balanced profile → warn, not block
|
|
2230
|
+
if (msg.includes('⛔') || msg.includes('BLOCKED'))
|
|
2231
|
+
return `expected warning (not BLOCKED) for think/sonnet minor mismatch, got: ${msg}`;
|
|
2232
|
+
if (!msg.toLowerCase().includes('think') && !msg.toLowerCase().includes('mismatch') && !msg.toLowerCase().includes('opus'))
|
|
2233
|
+
return `expected think-tier mention in warning, got: ${msg}`;
|
|
2234
|
+
return true;
|
|
2235
|
+
} finally {
|
|
2236
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
2237
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
2238
|
+
}
|
|
2239
|
+
});
|
|
2240
|
+
|
|
2241
|
+
// ─── Test 76: ship-gate exports selfHealGate as a function ───────────────────
|
|
2242
|
+
test('ship-gate: exports selfHealGate as a function', () => {
|
|
2243
|
+
const script = `
|
|
2244
|
+
import { selfHealGate } from './ship-gate.mjs';
|
|
2245
|
+
const results = { errors: [] };
|
|
2246
|
+
if (typeof selfHealGate !== 'function') results.errors.push('selfHealGate is not a function');
|
|
2247
|
+
process.stdout.write(JSON.stringify(results));
|
|
2248
|
+
`;
|
|
2249
|
+
const proc = spawnSync(process.execPath, [
|
|
2250
|
+
'--input-type=module',
|
|
2251
|
+
'-e', script,
|
|
2252
|
+
], { encoding: 'utf8', timeout: 8000, cwd: HOOKS });
|
|
2253
|
+
|
|
2254
|
+
if (proc.status !== 0) return `ship-gate script failed: ${proc.stderr}`;
|
|
2255
|
+
let results;
|
|
2256
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2257
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
2258
|
+
return true;
|
|
2259
|
+
});
|
|
2260
|
+
|
|
2261
|
+
// ─── Test 77: confirmation-policy handles 'heal' step correctly ───────────────
|
|
2262
|
+
test('confirmation-policy: heal step auto-proceeds in default mode', () => {
|
|
2263
|
+
const script = `
|
|
2264
|
+
import { getConfirmationPolicy } from './confirmation-policy.mjs';
|
|
2265
|
+
const results = { errors: [] };
|
|
2266
|
+
|
|
2267
|
+
// Default mode: heal should auto-proceed at all risk levels
|
|
2268
|
+
const healLow = getConfirmationPolicy({ risk: 'low', mode: 'default', step: 'heal' });
|
|
2269
|
+
if (healLow.shouldBlock !== false) results.errors.push('default/low heal: expected shouldBlock=false, got: ' + healLow.shouldBlock);
|
|
2270
|
+
if (healLow.shouldConfirm !== false) results.errors.push('default/low heal: expected shouldConfirm=false, got: ' + healLow.shouldConfirm);
|
|
2271
|
+
|
|
2272
|
+
const healHigh = getConfirmationPolicy({ risk: 'high', mode: 'default', step: 'heal' });
|
|
2273
|
+
if (healHigh.shouldBlock !== false) results.errors.push('default/high heal: expected shouldBlock=false, got: ' + healHigh.shouldBlock);
|
|
2274
|
+
if (healHigh.shouldConfirm !== false) results.errors.push('default/high heal: expected shouldConfirm=false, got: ' + healHigh.shouldConfirm);
|
|
2275
|
+
|
|
2276
|
+
const healCritical = getConfirmationPolicy({ risk: 'critical', mode: 'default', step: 'heal' });
|
|
2277
|
+
if (healCritical.shouldBlock !== false) results.errors.push('default/critical heal: expected shouldBlock=false, got: ' + healCritical.shouldBlock);
|
|
2278
|
+
|
|
2279
|
+
// Careful mode: heal should require confirmation
|
|
2280
|
+
const healCareful = getConfirmationPolicy({ risk: 'low', mode: 'careful', step: 'heal' });
|
|
2281
|
+
if (healCareful.shouldConfirm !== true) results.errors.push('careful heal: expected shouldConfirm=true, got: ' + healCareful.shouldConfirm);
|
|
2282
|
+
|
|
2283
|
+
// Yolo mode: heal should auto-proceed
|
|
2284
|
+
const healYolo = getConfirmationPolicy({ risk: 'critical', mode: 'yolo', step: 'heal' });
|
|
2285
|
+
if (healYolo.shouldBlock !== false) results.errors.push('yolo heal: expected shouldBlock=false, got: ' + healYolo.shouldBlock);
|
|
2286
|
+
if (healYolo.shouldConfirm !== false) results.errors.push('yolo heal: expected shouldConfirm=false, got: ' + healYolo.shouldConfirm);
|
|
2287
|
+
|
|
2288
|
+
// Plan-only mode: heal should be blocked (no mutations)
|
|
2289
|
+
const healPlanOnly = getConfirmationPolicy({ risk: 'low', mode: 'plan-only', step: 'heal' });
|
|
2290
|
+
if (healPlanOnly.shouldBlock !== true) results.errors.push('plan-only heal: expected shouldBlock=true, got: ' + healPlanOnly.shouldBlock);
|
|
2291
|
+
|
|
2292
|
+
process.stdout.write(JSON.stringify(results));
|
|
2293
|
+
`;
|
|
2294
|
+
const proc = spawnSync(process.execPath, [
|
|
2295
|
+
'--input-type=module',
|
|
2296
|
+
'-e', script,
|
|
2297
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
2298
|
+
|
|
2299
|
+
if (proc.status !== 0) return `confirmation-policy script failed: ${proc.stderr}`;
|
|
2300
|
+
let results;
|
|
2301
|
+
try { results = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2302
|
+
if (results.errors.length > 0) return results.errors.join('; ');
|
|
2303
|
+
return true;
|
|
2304
|
+
});
|
|
2305
|
+
|
|
2306
|
+
// ─── Test E2E-1: ship-captain plan-only mode returns without executing ────────
|
|
2307
|
+
test('E2E: ship-captain plan-only mode (no agent calls, no file changes)', () => {
|
|
2308
|
+
// executeShipCaptain prints the plan via console.log to stdout before returning.
|
|
2309
|
+
// Redirect console.log to stderr so process.stdout has only our JSON result.
|
|
2310
|
+
const script = `
|
|
2311
|
+
import { executeShipCaptain } from './ship-captain.mjs';
|
|
2312
|
+
console.log = (...a) => process.stderr.write(a.join(' ') + '\\n');
|
|
2313
|
+
console.error = (...a) => process.stderr.write(a.join(' ') + '\\n');
|
|
2314
|
+
const result = await executeShipCaptain('fix a bug and write tests', { planOnly: true });
|
|
2315
|
+
const errors = [];
|
|
2316
|
+
if (!result || typeof result !== 'object') {
|
|
2317
|
+
errors.push('executeShipCaptain did not return an object');
|
|
2318
|
+
} else {
|
|
2319
|
+
if (result.status !== 'dry_run') errors.push('expected status=dry_run, got: ' + result.status);
|
|
2320
|
+
if (result.id !== null && result.id !== undefined) errors.push('expected id=null in plan-only, got: ' + result.id);
|
|
2321
|
+
if (!result.goal) errors.push('result missing goal field');
|
|
2322
|
+
if (!Array.isArray(result.steps)) errors.push('result.steps not array, got: ' + typeof result.steps);
|
|
2323
|
+
}
|
|
2324
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2325
|
+
`;
|
|
2326
|
+
const proc = spawnSync(process.execPath, [
|
|
2327
|
+
'--input-type=module',
|
|
2328
|
+
'-e', script,
|
|
2329
|
+
], { encoding: 'utf8', timeout: 20000, cwd: HOOKS });
|
|
2330
|
+
|
|
2331
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 300)}`;
|
|
2332
|
+
let result;
|
|
2333
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 300)}`; }
|
|
2334
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2335
|
+
return true;
|
|
2336
|
+
});
|
|
2337
|
+
|
|
2338
|
+
// ─── Test E2E-2: ship-captain planExecution produces valid plan ───────────────
|
|
2339
|
+
test('E2E: planExecution("review security and write tests for auth") returns valid plan', () => {
|
|
2340
|
+
const script = `
|
|
2341
|
+
import { planExecution } from './ship-captain.mjs';
|
|
2342
|
+
const plan = planExecution('review security and write tests for auth');
|
|
2343
|
+
const errors = [];
|
|
2344
|
+
|
|
2345
|
+
if (!plan || typeof plan !== 'object') {
|
|
2346
|
+
errors.push('planExecution did not return an object');
|
|
2347
|
+
} else {
|
|
2348
|
+
if (!plan.goal) errors.push('plan missing goal');
|
|
2349
|
+
if (!Array.isArray(plan.steps)) {
|
|
2350
|
+
errors.push('plan.steps is not an array');
|
|
2351
|
+
} else {
|
|
2352
|
+
if (plan.steps.length === 0) errors.push('plan.steps is empty');
|
|
2353
|
+
for (const step of plan.steps) {
|
|
2354
|
+
if (!step.task) { errors.push('step missing task field'); continue; }
|
|
2355
|
+
if (!step.task.title && !step.task.task) errors.push('step.task missing title/task');
|
|
2356
|
+
if (!step.task.tier) errors.push('step.task missing tier');
|
|
2357
|
+
if (!step.task.risk) errors.push('step.task missing risk');
|
|
2358
|
+
if (step.templateName === undefined && step.chainName === undefined)
|
|
2359
|
+
errors.push('step missing templateName/chainName');
|
|
2360
|
+
}
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
|
|
2364
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2365
|
+
`;
|
|
2366
|
+
const proc = spawnSync(process.execPath, [
|
|
2367
|
+
'--input-type=module',
|
|
2368
|
+
'-e', script,
|
|
2369
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
2370
|
+
|
|
2371
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 400)}`;
|
|
2372
|
+
let result;
|
|
2373
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)}`; }
|
|
2374
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2375
|
+
return true;
|
|
2376
|
+
});
|
|
2377
|
+
|
|
2378
|
+
// ─── Test E2E-3: ship-gate discoverTests + generateDiffSummary (real repo) ────
|
|
2379
|
+
test('E2E: discoverTests finds npm test + generateDiffSummary returns valid shape', () => {
|
|
2380
|
+
const pkgRoot = resolve(HOOKS, '..');
|
|
2381
|
+
const script = `
|
|
2382
|
+
import { discoverTests, generateDiffSummary } from './hooks/ship-gate.mjs';
|
|
2383
|
+
const errors = [];
|
|
2384
|
+
|
|
2385
|
+
const discovery = discoverTests();
|
|
2386
|
+
if (!discovery || typeof discovery !== 'object') {
|
|
2387
|
+
errors.push('discoverTests did not return an object');
|
|
2388
|
+
} else {
|
|
2389
|
+
if (discovery.command !== 'npm test')
|
|
2390
|
+
errors.push('expected command=npm test, got: ' + discovery.command);
|
|
2391
|
+
if (discovery.confidence !== 'high')
|
|
2392
|
+
errors.push('expected confidence=high, got: ' + discovery.confidence);
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2395
|
+
const diff = generateDiffSummary();
|
|
2396
|
+
if (!diff || typeof diff !== 'object') {
|
|
2397
|
+
errors.push('generateDiffSummary did not return an object');
|
|
2398
|
+
} else {
|
|
2399
|
+
if (!Array.isArray(diff.files_added)) errors.push('diff missing files_added array');
|
|
2400
|
+
if (!Array.isArray(diff.files_modified)) errors.push('diff missing files_modified array');
|
|
2401
|
+
if (!Array.isArray(diff.files_deleted)) errors.push('diff missing files_deleted array');
|
|
2402
|
+
if (typeof diff.stats !== 'string') errors.push('diff.stats is not a string, got: ' + typeof diff.stats);
|
|
2403
|
+
}
|
|
2404
|
+
|
|
2405
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2406
|
+
`;
|
|
2407
|
+
const proc = spawnSync(process.execPath, [
|
|
2408
|
+
'--input-type=module',
|
|
2409
|
+
'-e', script,
|
|
2410
|
+
], { encoding: 'utf8', timeout: 20000, cwd: pkgRoot });
|
|
2411
|
+
|
|
2412
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 400)}`;
|
|
2413
|
+
let result;
|
|
2414
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)}`; }
|
|
2415
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2416
|
+
return true;
|
|
2417
|
+
});
|
|
2418
|
+
|
|
2419
|
+
// ─── Test E2E-4: runShipGate with no-pr returns structured result ─────────────
|
|
2420
|
+
test('E2E: runShipGate({ noPr: true, yes: true }) returns status + tests + diff', () => {
|
|
2421
|
+
const pkgRoot = resolve(HOOKS, '..');
|
|
2422
|
+
const script = `
|
|
2423
|
+
import { runShipGate } from './hooks/ship-gate.mjs';
|
|
2424
|
+
const errors = [];
|
|
2425
|
+
let result;
|
|
2426
|
+
try {
|
|
2427
|
+
result = await runShipGate({ goal: 'E2E test run', noPr: true, yes: true });
|
|
2428
|
+
} catch (e) {
|
|
2429
|
+
errors.push('runShipGate threw: ' + e.message);
|
|
2430
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2431
|
+
process.exit(0);
|
|
2432
|
+
}
|
|
2433
|
+
|
|
2434
|
+
if (!result || typeof result !== 'object') {
|
|
2435
|
+
errors.push('runShipGate did not return an object');
|
|
2436
|
+
} else {
|
|
2437
|
+
if (typeof result.status !== 'string')
|
|
2438
|
+
errors.push('result.status is not a string, got: ' + typeof result.status);
|
|
2439
|
+
if (!result.tests || typeof result.tests !== 'object')
|
|
2440
|
+
errors.push('result.tests is missing or not an object');
|
|
2441
|
+
else if (typeof result.tests.ran !== 'boolean')
|
|
2442
|
+
errors.push('result.tests.ran is not a boolean');
|
|
2443
|
+
if (!result.diff || typeof result.diff !== 'object')
|
|
2444
|
+
errors.push('result.diff is missing or not an object');
|
|
2445
|
+
}
|
|
2446
|
+
|
|
2447
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2448
|
+
`;
|
|
2449
|
+
const proc = spawnSync(process.execPath, [
|
|
2450
|
+
'--input-type=module',
|
|
2451
|
+
'-e', script,
|
|
2452
|
+
], { encoding: 'utf8', timeout: 180000, cwd: pkgRoot });
|
|
2453
|
+
|
|
2454
|
+
if (proc.status === null) return 'process timed out';
|
|
2455
|
+
let result;
|
|
2456
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)} stderr: ${(proc.stderr || '').slice(0, 200)}`; }
|
|
2457
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2458
|
+
return true;
|
|
2459
|
+
});
|
|
2460
|
+
|
|
2461
|
+
// ─── Test E2E-5: intent classification coverage (10 goals, ≥ 8/10 correct) ───
|
|
2462
|
+
test('E2E: classifyGoalIntent — 10 goals, at least 8/10 match expected intent', () => {
|
|
2463
|
+
const script = `
|
|
2464
|
+
import { classifyGoalIntent } from './ship-captain.mjs';
|
|
2465
|
+
const errors = [];
|
|
2466
|
+
|
|
2467
|
+
const cases = [
|
|
2468
|
+
{ goal: 'should we use Redis or Postgres?', expected: ['think'] },
|
|
2469
|
+
{ goal: 'review this PR for security issues', expected: ['review'] },
|
|
2470
|
+
{ goal: 'how does the auth system work?', expected: ['explore', 'think'] },
|
|
2471
|
+
{ goal: 'ship it', expected: ['ship'] },
|
|
2472
|
+
{ goal: 'fix the login bug', expected: ['execute'] },
|
|
2473
|
+
{ goal: "what's the best approach for caching?", expected: ['think'] },
|
|
2474
|
+
{ goal: 'find where the API key is stored', expected: ['explore'] },
|
|
2475
|
+
{ goal: 'refactor the payment module', expected: ['execute'] },
|
|
2476
|
+
{ goal: 'audit the architecture', expected: ['think', 'review'] },
|
|
2477
|
+
{ goal: 'write tests for the API', expected: ['execute'] },
|
|
2478
|
+
];
|
|
2479
|
+
|
|
2480
|
+
let correct = 0;
|
|
2481
|
+
const mismatches = [];
|
|
2482
|
+
|
|
2483
|
+
for (const { goal, expected } of cases) {
|
|
2484
|
+
const result = classifyGoalIntent(goal);
|
|
2485
|
+
if (!result || typeof result !== 'object') {
|
|
2486
|
+
errors.push('returned non-object for: ' + goal);
|
|
2487
|
+
continue;
|
|
2488
|
+
}
|
|
2489
|
+
if (!result.intent) errors.push('result missing intent for: ' + goal);
|
|
2490
|
+
if (!result.confidence) errors.push('result missing confidence for: ' + goal);
|
|
2491
|
+
if (!result.reason) errors.push('result missing reason for: ' + goal);
|
|
2492
|
+
|
|
2493
|
+
if (expected.includes(result.intent)) {
|
|
2494
|
+
correct++;
|
|
2495
|
+
} else {
|
|
2496
|
+
mismatches.push(goal + ' → got ' + result.intent + ' (expected: ' + expected.join('|') + ')');
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2499
|
+
|
|
2500
|
+
if (correct < 8) {
|
|
2501
|
+
errors.push('Only ' + correct + '/10 intents matched. Mismatches: ' + mismatches.join('; '));
|
|
2502
|
+
}
|
|
2503
|
+
|
|
2504
|
+
process.stdout.write(JSON.stringify({ errors, correct }));
|
|
2505
|
+
`;
|
|
2506
|
+
const proc = spawnSync(process.execPath, [
|
|
2507
|
+
'--input-type=module',
|
|
2508
|
+
'-e', script,
|
|
2509
|
+
], { encoding: 'utf8', timeout: 10000, cwd: HOOKS });
|
|
2510
|
+
|
|
2511
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 400)}`;
|
|
2512
|
+
let result;
|
|
2513
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)}`; }
|
|
2514
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2515
|
+
return true;
|
|
2516
|
+
});
|
|
2517
|
+
|
|
2518
|
+
// ─── Test E2E-6: confirmation-policy full matrix 4x4x5 = 80 combos ───────────
|
|
2519
|
+
test('E2E: confirmation-policy full matrix — 80 combos, all return valid shape + invariants', () => {
|
|
2520
|
+
const script = `
|
|
2521
|
+
import { getConfirmationPolicy } from './confirmation-policy.mjs';
|
|
2522
|
+
const errors = [];
|
|
2523
|
+
|
|
2524
|
+
const modes = ['default', 'yolo', 'careful', 'plan-only'];
|
|
2525
|
+
const risks = ['low', 'medium', 'high', 'critical'];
|
|
2526
|
+
const steps = ['edit', 'test', 'gate', 'pr', 'heal'];
|
|
2527
|
+
|
|
2528
|
+
for (const mode of modes) {
|
|
2529
|
+
for (const risk of risks) {
|
|
2530
|
+
for (const step of steps) {
|
|
2531
|
+
let result;
|
|
2532
|
+
try {
|
|
2533
|
+
result = getConfirmationPolicy({ risk, mode, step });
|
|
2534
|
+
} catch (e) {
|
|
2535
|
+
errors.push('threw for ' + mode + '/' + risk + '/' + step + ': ' + e.message);
|
|
2536
|
+
continue;
|
|
2537
|
+
}
|
|
2538
|
+
|
|
2539
|
+
if (!result || typeof result !== 'object') {
|
|
2540
|
+
errors.push('non-object for ' + mode + '/' + risk + '/' + step);
|
|
2541
|
+
continue;
|
|
2542
|
+
}
|
|
2543
|
+
if (typeof result.shouldConfirm !== 'boolean')
|
|
2544
|
+
errors.push('shouldConfirm not boolean for ' + mode + '/' + risk + '/' + step);
|
|
2545
|
+
if (typeof result.shouldBlock !== 'boolean')
|
|
2546
|
+
errors.push('shouldBlock not boolean for ' + mode + '/' + risk + '/' + step);
|
|
2547
|
+
if (typeof result.reason !== 'string')
|
|
2548
|
+
errors.push('reason not string for ' + mode + '/' + risk + '/' + step);
|
|
2549
|
+
|
|
2550
|
+
// Invariant: yolo — shouldBlock never true
|
|
2551
|
+
if (mode === 'yolo' && result.shouldBlock === true)
|
|
2552
|
+
errors.push('yolo/shouldBlock must never be true, got true for ' + risk + '/' + step);
|
|
2553
|
+
|
|
2554
|
+
// Invariant: careful — edit and pr steps always confirm
|
|
2555
|
+
if (mode === 'careful' && (step === 'edit' || step === 'pr') && result.shouldConfirm !== true)
|
|
2556
|
+
errors.push('careful/' + step + '/' + risk + ': expected shouldConfirm=true, got: ' + result.shouldConfirm);
|
|
2557
|
+
|
|
2558
|
+
// Invariant: plan-only — edit/test/gate/pr all blocked
|
|
2559
|
+
if (mode === 'plan-only' && ['edit', 'test', 'gate', 'pr'].includes(step) && result.shouldBlock !== true)
|
|
2560
|
+
errors.push('plan-only/' + step + '/' + risk + ': expected shouldBlock=true, got: ' + result.shouldBlock);
|
|
2561
|
+
|
|
2562
|
+
// Invariant: default + critical + edit → blocked
|
|
2563
|
+
if (mode === 'default' && risk === 'critical' && step === 'edit' && result.shouldBlock !== true)
|
|
2564
|
+
errors.push('default/critical/edit: expected shouldBlock=true, got: ' + result.shouldBlock);
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
}
|
|
2568
|
+
|
|
2569
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2570
|
+
`;
|
|
2571
|
+
const proc = spawnSync(process.execPath, [
|
|
2572
|
+
'--input-type=module',
|
|
2573
|
+
'-e', script,
|
|
2574
|
+
], { encoding: 'utf8', timeout: 10000, cwd: HOOKS });
|
|
2575
|
+
|
|
2576
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 400)}`;
|
|
2577
|
+
let result;
|
|
2578
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)}`; }
|
|
2579
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2580
|
+
return true;
|
|
2581
|
+
});
|
|
2582
|
+
|
|
2583
|
+
// ─── Test E2E-7: selfHealGate + selfHealTests API and noHeal short-circuit ────
|
|
2584
|
+
test('E2E: selfHealGate and selfHealTests — async, correct noHeal short-circuit', () => {
|
|
2585
|
+
const script = `
|
|
2586
|
+
import { selfHealGate } from './ship-gate.mjs';
|
|
2587
|
+
import { selfHealTests } from './ship-captain.mjs';
|
|
2588
|
+
const errors = [];
|
|
2589
|
+
|
|
2590
|
+
if (typeof selfHealGate !== 'function') errors.push('selfHealGate is not a function');
|
|
2591
|
+
if (typeof selfHealTests !== 'function') errors.push('selfHealTests is not a function');
|
|
2592
|
+
|
|
2593
|
+
// Call with noHeal=true — should return immediately without spawning claude
|
|
2594
|
+
const gatePromise = selfHealGate({ gate: 'issues_found' }, { noHeal: true });
|
|
2595
|
+
const testsPromise = selfHealTests({ passed: false, output: 'test error', exit_code: 1 }, { noHeal: true });
|
|
2596
|
+
|
|
2597
|
+
if (!gatePromise || typeof gatePromise.then !== 'function')
|
|
2598
|
+
errors.push('selfHealGate did not return a Promise');
|
|
2599
|
+
if (!testsPromise || typeof testsPromise.then !== 'function')
|
|
2600
|
+
errors.push('selfHealTests did not return a Promise');
|
|
2601
|
+
|
|
2602
|
+
const gateRes = await gatePromise;
|
|
2603
|
+
const testsRes = await testsPromise;
|
|
2604
|
+
|
|
2605
|
+
if (gateRes.healed !== false) errors.push('selfHealGate noHeal: expected healed=false, got: ' + gateRes.healed);
|
|
2606
|
+
if (gateRes.attempts !== 0) errors.push('selfHealGate noHeal: expected attempts=0, got: ' + gateRes.attempts);
|
|
2607
|
+
if (!('finalGateResult' in gateRes)) errors.push('selfHealGate missing finalGateResult field');
|
|
2608
|
+
|
|
2609
|
+
if (testsRes.healed !== false) errors.push('selfHealTests noHeal: expected healed=false, got: ' + testsRes.healed);
|
|
2610
|
+
if (testsRes.attempts !== 0) errors.push('selfHealTests noHeal: expected attempts=0, got: ' + testsRes.attempts);
|
|
2611
|
+
if (!('finalTestResult' in testsRes)) errors.push('selfHealTests missing finalTestResult field');
|
|
2612
|
+
|
|
2613
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2614
|
+
`;
|
|
2615
|
+
const proc = spawnSync(process.execPath, [
|
|
2616
|
+
'--input-type=module',
|
|
2617
|
+
'-e', script,
|
|
2618
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
2619
|
+
|
|
2620
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 400)}`;
|
|
2621
|
+
let result;
|
|
2622
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)}`; }
|
|
2623
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2624
|
+
return true;
|
|
2625
|
+
});
|
|
2626
|
+
|
|
2627
|
+
// ─── Test E2E-8: resume handles various run record states ─────────────────────
|
|
2628
|
+
test('E2E: resume logic handles completed/failed/aborted run records', () => {
|
|
2629
|
+
const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
|
|
2630
|
+
try {
|
|
2631
|
+
const script = `
|
|
2632
|
+
import { mkdirSync, writeFileSync, readFileSync, readdirSync } from 'fs';
|
|
2633
|
+
import { resolve } from 'path';
|
|
2634
|
+
|
|
2635
|
+
const tmpDir = ${JSON.stringify(tmpDir)};
|
|
2636
|
+
const runsDir = resolve(tmpDir, '.claude', 'runs');
|
|
2637
|
+
mkdirSync(runsDir, { recursive: true });
|
|
2638
|
+
|
|
2639
|
+
const completedRecord = {
|
|
2640
|
+
id: 'run-2026-01-01T00-00-00',
|
|
2641
|
+
goal: 'fix the login bug',
|
|
2642
|
+
status: 'completed',
|
|
2643
|
+
steps: [
|
|
2644
|
+
{ task: 'Explore codebase', status: 'done' },
|
|
2645
|
+
{ task: 'Fix bug', status: 'done' },
|
|
2646
|
+
],
|
|
2647
|
+
started_at: '2026-01-01T00:00:00.000Z',
|
|
2648
|
+
completed_at: '2026-01-01T00:10:00.000Z',
|
|
2649
|
+
};
|
|
2650
|
+
writeFileSync(resolve(runsDir, 'run-completed.json'), JSON.stringify(completedRecord, null, 2), 'utf8');
|
|
2651
|
+
|
|
2652
|
+
const failedRecord = {
|
|
2653
|
+
id: 'run-2026-01-01T01-00-00',
|
|
2654
|
+
goal: 'refactor auth module',
|
|
2655
|
+
status: 'failed',
|
|
2656
|
+
steps: [
|
|
2657
|
+
{ task: 'Explore auth module', status: 'done' },
|
|
2658
|
+
{ task: 'Refactor code', status: 'failed', error: 'compilation error' },
|
|
2659
|
+
{ task: 'Write tests', status: 'pending' },
|
|
2660
|
+
],
|
|
2661
|
+
started_at: '2026-01-01T01:00:00.000Z',
|
|
2662
|
+
completed_at: null,
|
|
2663
|
+
};
|
|
2664
|
+
writeFileSync(resolve(runsDir, 'run-failed.json'), JSON.stringify(failedRecord, null, 2), 'utf8');
|
|
2665
|
+
|
|
2666
|
+
const abortedRecord = {
|
|
2667
|
+
id: 'run-2026-01-01T02-00-00',
|
|
2668
|
+
goal: 'write tests for API',
|
|
2669
|
+
status: 'aborted',
|
|
2670
|
+
steps: [
|
|
2671
|
+
{ task: 'Discover test files', status: 'done' },
|
|
2672
|
+
],
|
|
2673
|
+
started_at: '2026-01-01T02:00:00.000Z',
|
|
2674
|
+
completed_at: null,
|
|
2675
|
+
};
|
|
2676
|
+
writeFileSync(resolve(runsDir, 'run-aborted.json'), JSON.stringify(abortedRecord, null, 2), 'utf8');
|
|
2677
|
+
|
|
2678
|
+
const errors = [];
|
|
2679
|
+
|
|
2680
|
+
const completedBack = JSON.parse(readFileSync(resolve(runsDir, 'run-completed.json'), 'utf8'));
|
|
2681
|
+
if (completedBack.status !== 'completed')
|
|
2682
|
+
errors.push('completed record: wrong status: ' + completedBack.status);
|
|
2683
|
+
const allDone = completedBack.steps.every(s => s.status === 'done');
|
|
2684
|
+
if (!allDone) errors.push('completed record: not all steps done');
|
|
2685
|
+
|
|
2686
|
+
const failedBack = JSON.parse(readFileSync(resolve(runsDir, 'run-failed.json'), 'utf8'));
|
|
2687
|
+
if (failedBack.status !== 'failed')
|
|
2688
|
+
errors.push('failed record: wrong status: ' + failedBack.status);
|
|
2689
|
+
const failedStepIdx = failedBack.steps.findIndex(s => s.status === 'failed');
|
|
2690
|
+
if (failedStepIdx !== 1)
|
|
2691
|
+
errors.push('failed record: expected failed step at index 1, got: ' + failedStepIdx);
|
|
2692
|
+
|
|
2693
|
+
const abortedBack = JSON.parse(readFileSync(resolve(runsDir, 'run-aborted.json'), 'utf8'));
|
|
2694
|
+
if (abortedBack.status !== 'aborted')
|
|
2695
|
+
errors.push('aborted record: wrong status: ' + abortedBack.status);
|
|
2696
|
+
|
|
2697
|
+
const files = readdirSync(runsDir).filter(f => f.endsWith('.json'));
|
|
2698
|
+
if (files.length !== 3) errors.push('expected 3 run records, got: ' + files.length);
|
|
2699
|
+
|
|
2700
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
2701
|
+
`;
|
|
2702
|
+
const proc = spawnSync(process.execPath, [
|
|
2703
|
+
'--input-type=module',
|
|
2704
|
+
'-e', script,
|
|
2705
|
+
], { encoding: 'utf8', timeout: 10000, cwd: HOOKS });
|
|
2706
|
+
|
|
2707
|
+
if (proc.status !== 0) return `script failed (exit ${proc.status}): ${(proc.stderr || '').slice(0, 400)}`;
|
|
2708
|
+
let result;
|
|
2709
|
+
try { result = JSON.parse((proc.stdout || '').trim()); } catch { return `output not JSON: ${(proc.stdout || '').slice(0, 200)}`; }
|
|
2710
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
2711
|
+
return true;
|
|
2712
|
+
} finally {
|
|
2713
|
+
spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
|
|
2714
|
+
}
|
|
2715
|
+
});
|
|
2716
|
+
|
|
2717
|
+
// ─── Test scale-1: large JSONL ledger performance ─────────────────────────────
|
|
2718
|
+
test('scale: large JSONL ledger (1000 entries) completes in < 2000ms', () => {
|
|
2719
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
2720
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
2721
|
+
|
|
2722
|
+
try {
|
|
2723
|
+
// Build 1000 paired entries spanning 48 hours.
|
|
2724
|
+
// Each pair: one 'decision' + one 'outcome' linked by id/decision_id.
|
|
2725
|
+
// Pairs 0-499 are older than 24h; pairs 500-999 are within 24h.
|
|
2726
|
+
// getOutcomeStats merges decisions with outcomes, so both types are needed.
|
|
2727
|
+
const lines = [];
|
|
2728
|
+
const now = Date.now();
|
|
2729
|
+
for (let i = 0; i < 1000; i++) {
|
|
2730
|
+
const ageMs = (1000 - i) * (48 * 60 * 60 * 1000 / 1000);
|
|
2731
|
+
const ts = new Date(now - ageMs).toISOString();
|
|
2732
|
+
const tier = i % 3 === 0 ? 'search' : i % 3 === 1 ? 'execute' : 'think';
|
|
2733
|
+
const provider = i % 2 === 0 ? 'claude' : 'openai';
|
|
2734
|
+
lines.push(JSON.stringify({
|
|
2735
|
+
type: 'decision',
|
|
2736
|
+
timestamp: ts,
|
|
2737
|
+
id: `scale-test-${i}`,
|
|
2738
|
+
tier,
|
|
2739
|
+
provider,
|
|
2740
|
+
model: provider === 'claude' ? 'sonnet' : 'gpt-5.4',
|
|
2741
|
+
}));
|
|
2742
|
+
lines.push(JSON.stringify({
|
|
2743
|
+
type: 'outcome',
|
|
2744
|
+
timestamp: ts,
|
|
2745
|
+
decision_id: `scale-test-${i}`,
|
|
2746
|
+
success: i % 5 !== 0,
|
|
2747
|
+
}));
|
|
2748
|
+
}
|
|
2749
|
+
writeFileSync(LEDGER, lines.join('\n') + '\n', 'utf8');
|
|
2750
|
+
|
|
2751
|
+
const script = `
|
|
2752
|
+
import { getOutcomeStats } from './decision-ledger.mjs';
|
|
2753
|
+
const start = Date.now();
|
|
2754
|
+
const stats = getOutcomeStats();
|
|
2755
|
+
const elapsed = Date.now() - start;
|
|
2756
|
+
process.stdout.write(JSON.stringify({ elapsed, total_outcomes: stats.total_outcomes }));
|
|
2757
|
+
`;
|
|
2758
|
+
const proc = spawnSync(process.execPath, [
|
|
2759
|
+
'--input-type=module', '-e', script,
|
|
2760
|
+
], { encoding: 'utf8', timeout: 10000, cwd: HOOKS });
|
|
2761
|
+
|
|
2762
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
2763
|
+
let result;
|
|
2764
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2765
|
+
|
|
2766
|
+
if (result.elapsed >= 2000) return `took ${result.elapsed}ms — expected < 2000ms`;
|
|
2767
|
+
if (result.total_outcomes < 400 || result.total_outcomes > 600)
|
|
2768
|
+
return `expected ~500 outcomes in last 24h, got: ${result.total_outcomes}`;
|
|
2769
|
+
|
|
2770
|
+
return true;
|
|
2771
|
+
} finally {
|
|
2772
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
2773
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
2774
|
+
}
|
|
2775
|
+
});
|
|
2776
|
+
|
|
2777
|
+
// ─── Test scale-2: large usage log (500 entries) doesn't crash ────────────────
|
|
2778
|
+
test('scale: large usage JSONL (500 entries) loads without crash', () => {
|
|
2779
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
2780
|
+
const usageFile = resolve(HOOKS, `usage-${today}.jsonl`);
|
|
2781
|
+
const backup = existsSync(usageFile) ? readFileSync(usageFile, 'utf8') : null;
|
|
2782
|
+
|
|
2783
|
+
try {
|
|
2784
|
+
const lines = [];
|
|
2785
|
+
const now = Date.now();
|
|
2786
|
+
for (let i = 0; i < 500; i++) {
|
|
2787
|
+
const ts = new Date(now - i * 60 * 1000).toISOString();
|
|
2788
|
+
lines.push(JSON.stringify({
|
|
2789
|
+
timestamp: ts,
|
|
2790
|
+
tier: i % 3 === 0 ? 'search' : i % 3 === 1 ? 'execute' : 'think',
|
|
2791
|
+
tool: 'Agent',
|
|
2792
|
+
status: i % 10 === 0 ? 'error' : 'ok',
|
|
2793
|
+
duration_ms: 500 + i * 10,
|
|
2794
|
+
}));
|
|
2795
|
+
}
|
|
2796
|
+
writeFileSync(usageFile, lines.join('\n') + '\n', 'utf8');
|
|
2797
|
+
|
|
2798
|
+
const script = `
|
|
2799
|
+
import { rebuildSummary } from './summary-checkpoint.mjs';
|
|
2800
|
+
let ok = false;
|
|
2801
|
+
try {
|
|
2802
|
+
rebuildSummary();
|
|
2803
|
+
ok = true;
|
|
2804
|
+
} catch (e) {
|
|
2805
|
+
process.stdout.write(JSON.stringify({ ok: false, error: e.message }));
|
|
2806
|
+
process.exit(0);
|
|
2807
|
+
}
|
|
2808
|
+
process.stdout.write(JSON.stringify({ ok }));
|
|
2809
|
+
`;
|
|
2810
|
+
const proc = spawnSync(process.execPath, [
|
|
2811
|
+
'--input-type=module', '-e', script,
|
|
2812
|
+
], { encoding: 'utf8', timeout: 10000, cwd: HOOKS });
|
|
2813
|
+
|
|
2814
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
2815
|
+
let result;
|
|
2816
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
2817
|
+
if (!result.ok) return `rebuildSummary crashed: ${result.error}`;
|
|
2818
|
+
|
|
2819
|
+
return true;
|
|
2820
|
+
} finally {
|
|
2821
|
+
if (backup !== null) writeFileSync(usageFile, backup, 'utf8');
|
|
2822
|
+
else try { writeFileSync(usageFile, '', 'utf8'); } catch {}
|
|
2823
|
+
}
|
|
2824
|
+
});
|
|
2825
|
+
|
|
2826
|
+
// ─── Test scale-3: 100 run records in .claude/runs/ ──────────────────────────
|
|
2827
|
+
test('scale: 100 run records read and listed in < 1000ms', () => {
|
|
2828
|
+
const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
|
|
2829
|
+
try {
|
|
2830
|
+
const runsDir = resolve(tmpDir, '.claude', 'runs');
|
|
2831
|
+
spawnSync('mkdir', ['-p', runsDir], { stdio: 'pipe' });
|
|
2832
|
+
|
|
2833
|
+
// 90 completed, 5 failed, 3 aborted, 2 running (incomplete)
|
|
2834
|
+
const now = Date.now();
|
|
2835
|
+
for (let i = 0; i < 100; i++) {
|
|
2836
|
+
let status;
|
|
2837
|
+
if (i < 90) status = 'completed';
|
|
2838
|
+
else if (i < 95) status = 'failed';
|
|
2839
|
+
else if (i < 98) status = 'aborted';
|
|
2840
|
+
else status = 'running';
|
|
2841
|
+
|
|
2842
|
+
const ts = new Date(now - i * 10 * 60 * 1000).toISOString();
|
|
2843
|
+
const rec = {
|
|
2844
|
+
id: `run-${String(i).padStart(3, '0')}`,
|
|
2845
|
+
goal: `test goal ${i}`,
|
|
2846
|
+
status,
|
|
2847
|
+
steps: status === 'running' ? [{ task: 'step1', done: false }] : [],
|
|
2848
|
+
started_at: ts,
|
|
2849
|
+
completed_at: status !== 'running' ? new Date(now - i * 10 * 60 * 1000 + 5000).toISOString() : null,
|
|
2850
|
+
duration_ms: status !== 'running' ? 5000 : null,
|
|
2851
|
+
};
|
|
2852
|
+
const fname = `${ts.slice(0, 19).replace(/:/g, '-')}-run-${String(i).padStart(3, '0')}.json`;
|
|
2853
|
+
writeFileSync(resolve(runsDir, fname), JSON.stringify(rec, null, 2), 'utf8');
|
|
2854
|
+
}
|
|
2855
|
+
|
|
2856
|
+
const installScript = resolve(HOOKS, '..', 'install.mjs');
|
|
2857
|
+
const startMs = Date.now();
|
|
2858
|
+
const proc = spawnSync(process.execPath, [installScript, 'runs'], {
|
|
2859
|
+
encoding: 'utf8',
|
|
2860
|
+
timeout: 10000,
|
|
2861
|
+
cwd: tmpDir,
|
|
2862
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
2863
|
+
});
|
|
2864
|
+
const elapsed = Date.now() - startMs;
|
|
2865
|
+
|
|
2866
|
+
if (proc.status === null) return 'process timed out';
|
|
2867
|
+
if (elapsed >= 1000) return `runs command took ${elapsed}ms — expected < 1000ms`;
|
|
2868
|
+
|
|
2869
|
+
const output = proc.stdout || '';
|
|
2870
|
+
if (!output.includes('100') && !output.toLowerCase().includes('run'))
|
|
2871
|
+
return `expected output mentioning runs, got: ${output.slice(0, 200)}`;
|
|
2872
|
+
|
|
2873
|
+
// Verify resume can identify failed/incomplete runs without crashing
|
|
2874
|
+
const resumeProc = spawnSync(process.execPath, [installScript, 'resume'], {
|
|
2875
|
+
encoding: 'utf8',
|
|
2876
|
+
timeout: 10000,
|
|
2877
|
+
cwd: tmpDir,
|
|
2878
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
2879
|
+
});
|
|
2880
|
+
if (resumeProc.status === null) return 'resume process timed out';
|
|
2881
|
+
const resumeOut = (resumeProc.stdout || '') + (resumeProc.stderr || '');
|
|
2882
|
+
if (resumeOut.includes('TypeError') || resumeOut.includes('ReferenceError'))
|
|
2883
|
+
return `unexpected JS error in resume: ${resumeOut.slice(0, 200)}`;
|
|
2884
|
+
|
|
2885
|
+
return true;
|
|
2886
|
+
} finally {
|
|
2887
|
+
spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
|
|
2888
|
+
}
|
|
2889
|
+
});
|
|
2890
|
+
|
|
2891
|
+
// ─── Test scale-4: burst-state survives 10 rapid sequential writes ────────────
|
|
2892
|
+
test('scale: burst-state valid JSON after 10 rapid sequential writes', () => {
|
|
2893
|
+
const burstFile = resolve(HOOKS, '.burst-state');
|
|
2894
|
+
const backup = existsSync(burstFile) ? readFileSync(burstFile, 'utf8') : null;
|
|
2895
|
+
|
|
2896
|
+
try {
|
|
2897
|
+
for (let i = 1; i <= 10; i++) {
|
|
2898
|
+
writeFileSync(burstFile, JSON.stringify({ count: i, window_start: Date.now() }), 'utf8');
|
|
2899
|
+
}
|
|
2900
|
+
|
|
2901
|
+
if (!existsSync(burstFile)) return '.burst-state file not found after writes';
|
|
2902
|
+
let state;
|
|
2903
|
+
try {
|
|
2904
|
+
state = JSON.parse(readFileSync(burstFile, 'utf8'));
|
|
2905
|
+
} catch (e) {
|
|
2906
|
+
return `.burst-state not valid JSON after 10 writes: ${e.message}`;
|
|
2907
|
+
}
|
|
2908
|
+
|
|
2909
|
+
if (typeof state.count !== 'number') return `expected numeric count, got: ${JSON.stringify(state)}`;
|
|
2910
|
+
if (typeof state.window_start !== 'number') return `expected numeric window_start, got: ${JSON.stringify(state)}`;
|
|
2911
|
+
if (state.count !== 10) return `expected count=10 after 10 writes, got: ${state.count}`;
|
|
2912
|
+
|
|
2913
|
+
return true;
|
|
2914
|
+
} finally {
|
|
2915
|
+
if (backup !== null) writeFileSync(burstFile, backup, 'utf8');
|
|
2916
|
+
else try { unlinkSync(burstFile); } catch {}
|
|
2917
|
+
}
|
|
2918
|
+
});
|
|
2919
|
+
|
|
2920
|
+
// ─── Test scale-5: atomic-write lock-contention then success (round-trip) ─────
|
|
2921
|
+
test('scale: atomic-write lock-contention then success (round-trip)', () => {
|
|
2922
|
+
const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
|
|
2923
|
+
const testFile = resolve(tmpDir, 'counter.json');
|
|
2924
|
+
const lockFile = testFile + '.lock';
|
|
2925
|
+
|
|
2926
|
+
try {
|
|
2927
|
+
writeFileSync(testFile, JSON.stringify({ count: 0 }), 'utf8');
|
|
2928
|
+
|
|
2929
|
+
// Phase 1: pre-create lock → expect timeout throw
|
|
2930
|
+
writeFileSync(lockFile, JSON.stringify({ pid: 99999999, ts: Date.now() }), 'utf8');
|
|
2931
|
+
|
|
2932
|
+
const atomicPath = resolve(HOOKS, 'atomic-write.mjs').replace(/\\/g, '/');
|
|
2933
|
+
const filePath = testFile.replace(/\\/g, '/');
|
|
2934
|
+
|
|
2935
|
+
const script1 = `
|
|
2936
|
+
import { lockedReadModifyWrite } from '${atomicPath}';
|
|
2937
|
+
try {
|
|
2938
|
+
lockedReadModifyWrite('${filePath}', d => ({ ...d, count: d.count + 1 }));
|
|
2939
|
+
process.stdout.write(JSON.stringify({ threw: false }));
|
|
2940
|
+
} catch (e) {
|
|
2941
|
+
process.stdout.write(JSON.stringify({ threw: true, msg: e.message }));
|
|
2942
|
+
}
|
|
2943
|
+
`;
|
|
2944
|
+
const proc1 = spawnSync(process.execPath, [
|
|
2945
|
+
'--input-type=module', '-e', script1,
|
|
2946
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
2947
|
+
|
|
2948
|
+
let r1;
|
|
2949
|
+
try { r1 = JSON.parse((proc1.stdout || '').trim()); } catch {
|
|
2950
|
+
return `phase1 output not JSON: ${proc1.stdout} stderr: ${proc1.stderr}`;
|
|
2951
|
+
}
|
|
2952
|
+
if (!r1.threw) return 'expected throw on lock contention, but did not throw';
|
|
2953
|
+
if (!r1.msg.includes('timed out')) return `expected timeout message, got: ${r1.msg}`;
|
|
2954
|
+
|
|
2955
|
+
const afterContention = JSON.parse(readFileSync(testFile, 'utf8'));
|
|
2956
|
+
if (afterContention.count !== 0)
|
|
2957
|
+
return `file modified during contention — expected count=0, got: ${afterContention.count}`;
|
|
2958
|
+
|
|
2959
|
+
// Phase 2: remove lock → expect success
|
|
2960
|
+
try { unlinkSync(lockFile); } catch {}
|
|
2961
|
+
|
|
2962
|
+
const script2 = `
|
|
2963
|
+
import { lockedReadModifyWrite } from '${atomicPath}';
|
|
2964
|
+
try {
|
|
2965
|
+
const result = lockedReadModifyWrite('${filePath}', d => ({ ...d, count: d.count + 1 }));
|
|
2966
|
+
process.stdout.write(JSON.stringify({ threw: false, count: result.count }));
|
|
2967
|
+
} catch (e) {
|
|
2968
|
+
process.stdout.write(JSON.stringify({ threw: true, msg: e.message }));
|
|
2969
|
+
}
|
|
2970
|
+
`;
|
|
2971
|
+
const proc2 = spawnSync(process.execPath, [
|
|
2972
|
+
'--input-type=module', '-e', script2,
|
|
2973
|
+
], { encoding: 'utf8', timeout: 15000, cwd: HOOKS });
|
|
2974
|
+
|
|
2975
|
+
let r2;
|
|
2976
|
+
try { r2 = JSON.parse((proc2.stdout || '').trim()); } catch {
|
|
2977
|
+
return `phase2 output not JSON: ${proc2.stdout} stderr: ${proc2.stderr}`;
|
|
2978
|
+
}
|
|
2979
|
+
if (r2.threw) return `unexpected throw after lock removed: ${r2.msg}`;
|
|
2980
|
+
if (r2.count !== 1) return `expected count=1 after successful write, got: ${r2.count}`;
|
|
2981
|
+
|
|
2982
|
+
const final = JSON.parse(readFileSync(testFile, 'utf8'));
|
|
2983
|
+
if (final.count !== 1) return `file on disk: expected count=1, got: ${final.count}`;
|
|
2984
|
+
|
|
2985
|
+
return true;
|
|
2986
|
+
} finally {
|
|
2987
|
+
spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
|
|
2988
|
+
}
|
|
2989
|
+
});
|
|
2990
|
+
|
|
2991
|
+
// ─── Test scale-6: config validation with 50 unknown keys ─────────────────────
|
|
2992
|
+
test('scale: config-validator handles 50 unknown keys without crash', () => {
|
|
2993
|
+
const script = `
|
|
2994
|
+
import { validateConfig } from './config-validator.mjs';
|
|
2995
|
+
|
|
2996
|
+
const config = {
|
|
2997
|
+
subscriptions: { claude: { models: { opus: { tier: 'think' } } } },
|
|
2998
|
+
tiers: { search: {}, execute: {}, think: {} },
|
|
2999
|
+
routing: { strategy: 'test' },
|
|
3000
|
+
quality_gate: { enabled: true },
|
|
3001
|
+
};
|
|
3002
|
+
|
|
3003
|
+
for (let i = 0; i < 50; i++) {
|
|
3004
|
+
config['unknown_key_' + i] = { value: i, nested: { deep: true } };
|
|
3005
|
+
}
|
|
3006
|
+
|
|
3007
|
+
const result = validateConfig(config);
|
|
3008
|
+
process.stdout.write(JSON.stringify({
|
|
3009
|
+
valid: result.valid,
|
|
3010
|
+
errorCount: result.errors.length,
|
|
3011
|
+
warnCount: result.warnings.length,
|
|
3012
|
+
}));
|
|
3013
|
+
`;
|
|
3014
|
+
const proc = spawnSync(process.execPath, [
|
|
3015
|
+
'--input-type=module', '-e', script,
|
|
3016
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
3017
|
+
|
|
3018
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
3019
|
+
let result;
|
|
3020
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
3021
|
+
if (!result.valid) return `expected valid=true (unknown keys are warnings), errors present`;
|
|
3022
|
+
if (result.warnCount < 50) return `expected >= 50 warnings for 50 unknown keys, got: ${result.warnCount}`;
|
|
3023
|
+
return true;
|
|
3024
|
+
});
|
|
3025
|
+
|
|
3026
|
+
// ─── Test scale-7: risk-classifier edge case paths don't crash ────────────────
|
|
3027
|
+
test('scale: risk-classifier edge case paths (empty, long, special chars, null)', () => {
|
|
3028
|
+
const script = `
|
|
3029
|
+
import { classifyRisk, classifyRiskEnhanced } from './risk-classifier.mjs';
|
|
3030
|
+
const errors = [];
|
|
3031
|
+
|
|
3032
|
+
// 1. Empty string
|
|
3033
|
+
try {
|
|
3034
|
+
const r = classifyRiskEnhanced('');
|
|
3035
|
+
if (!r || typeof r.risk !== 'string') errors.push('empty string: invalid result: ' + JSON.stringify(r));
|
|
3036
|
+
} catch (e) {
|
|
3037
|
+
errors.push('empty string threw: ' + e.message);
|
|
3038
|
+
}
|
|
3039
|
+
|
|
3040
|
+
// 2. Very long path (500 chars)
|
|
3041
|
+
const longPath = 'src/' + 'a'.repeat(490) + '.js';
|
|
3042
|
+
try {
|
|
3043
|
+
const r = classifyRiskEnhanced(longPath);
|
|
3044
|
+
if (!r || typeof r.risk !== 'string') errors.push('long path: invalid result: ' + JSON.stringify(r));
|
|
3045
|
+
} catch (e) {
|
|
3046
|
+
errors.push('long path threw: ' + e.message);
|
|
3047
|
+
}
|
|
3048
|
+
|
|
3049
|
+
// 3. Path with special characters
|
|
3050
|
+
try {
|
|
3051
|
+
const r = classifyRiskEnhanced('src/auth/my-file!@#$%^&*().mjs');
|
|
3052
|
+
if (!r || typeof r.risk !== 'string') errors.push('special chars: invalid result: ' + JSON.stringify(r));
|
|
3053
|
+
} catch (e) {
|
|
3054
|
+
errors.push('special chars threw: ' + e.message);
|
|
3055
|
+
}
|
|
3056
|
+
|
|
3057
|
+
// 4. null — acceptable to throw, must not crash process
|
|
3058
|
+
try { classifyRiskEnhanced(null); } catch { /* acceptable */ }
|
|
3059
|
+
|
|
3060
|
+
// 5. undefined — acceptable to throw
|
|
3061
|
+
try { classifyRiskEnhanced(undefined); } catch { /* acceptable */ }
|
|
3062
|
+
|
|
3063
|
+
// 6. classifyRisk (batch) with empty array — returns object { level, reason }
|
|
3064
|
+
try {
|
|
3065
|
+
const r = classifyRisk([]);
|
|
3066
|
+
if (!r || typeof r !== 'object') errors.push('classifyRisk([]): expected object result, got: ' + typeof r);
|
|
3067
|
+
else if (typeof r.level !== 'string') errors.push('classifyRisk([]): expected result.level string, got: ' + JSON.stringify(r));
|
|
3068
|
+
} catch (e) {
|
|
3069
|
+
errors.push('classifyRisk([]) threw: ' + e.message);
|
|
3070
|
+
}
|
|
3071
|
+
|
|
3072
|
+
process.stdout.write(JSON.stringify({ errors }));
|
|
3073
|
+
`;
|
|
3074
|
+
const proc = spawnSync(process.execPath, [
|
|
3075
|
+
'--input-type=module', '-e', script,
|
|
3076
|
+
], { encoding: 'utf8', timeout: 8000, cwd: HOOKS });
|
|
3077
|
+
|
|
3078
|
+
if (proc.status !== 0) return `script crashed: ${proc.stderr}`;
|
|
3079
|
+
let result;
|
|
3080
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
3081
|
+
if (result.errors.length > 0) return result.errors.join('; ');
|
|
3082
|
+
return true;
|
|
3083
|
+
});
|
|
3084
|
+
|
|
3085
|
+
// ─── Test scale-8: enforce-tier 10 rapid sequential Agent calls ───────────────
|
|
3086
|
+
test('scale: enforce-tier 10 rapid sequential Agent calls all produce valid JSON', () => {
|
|
3087
|
+
try {
|
|
3088
|
+
// Pre-seed burst state so we start in burst mode (suppresses duplicate warnings)
|
|
3089
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 5, window_start: Date.now() }));
|
|
3090
|
+
|
|
3091
|
+
const errors = [];
|
|
3092
|
+
for (let i = 0; i < 10; i++) {
|
|
3093
|
+
const payload = JSON.stringify({
|
|
3094
|
+
tool_name: 'Agent',
|
|
3095
|
+
tool_input: {
|
|
3096
|
+
prompt: `rapid sequential task ${i} - implement feature ${Date.now()}`,
|
|
3097
|
+
model: 'sonnet',
|
|
3098
|
+
},
|
|
3099
|
+
});
|
|
3100
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
3101
|
+
if (status !== 0) errors.push(`call ${i}: non-zero exit ${status}`);
|
|
3102
|
+
if (!parsed) errors.push(`call ${i}: no valid JSON output`);
|
|
3103
|
+
}
|
|
3104
|
+
|
|
3105
|
+
if (errors.length > 0) return errors.join('; ');
|
|
3106
|
+
|
|
3107
|
+
// Verify burst-state is still valid JSON after 10 rapid calls
|
|
3108
|
+
if (!existsSync(BURST_FILE)) return '.burst-state missing after 10 rapid calls';
|
|
3109
|
+
let state;
|
|
3110
|
+
try {
|
|
3111
|
+
state = JSON.parse(readFileSync(BURST_FILE, 'utf8'));
|
|
3112
|
+
} catch (e) {
|
|
3113
|
+
return `.burst-state corrupted after 10 rapid calls: ${e.message}`;
|
|
3114
|
+
}
|
|
3115
|
+
if (typeof state.count !== 'number') return `burst-state.count not a number: ${JSON.stringify(state)}`;
|
|
3116
|
+
|
|
3117
|
+
return true;
|
|
3118
|
+
} finally {
|
|
3119
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
3120
|
+
}
|
|
3121
|
+
});
|
|
3122
|
+
|
|
1159
3123
|
// ─── Summary ─────────────────────────────────────────────────────────────────
|
|
1160
3124
|
const total = passed + failed;
|
|
1161
3125
|
console.log(`\n${passed}/${total} tests passed`);
|