opencastle 0.31.6 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/LICENSE +93 -21
  2. package/README.md +9 -3
  3. package/bin/cli.mjs +15 -0
  4. package/dist/cli/agents.d.ts.map +1 -1
  5. package/dist/cli/agents.js +19 -5
  6. package/dist/cli/agents.js.map +1 -1
  7. package/dist/cli/artifacts-cli.d.ts +3 -0
  8. package/dist/cli/artifacts-cli.d.ts.map +1 -0
  9. package/dist/cli/artifacts-cli.js +36 -0
  10. package/dist/cli/artifacts-cli.js.map +1 -0
  11. package/dist/cli/baselines.d.ts.map +1 -1
  12. package/dist/cli/baselines.js +11 -0
  13. package/dist/cli/baselines.js.map +1 -1
  14. package/dist/cli/convoy/artifacts.d.ts +25 -0
  15. package/dist/cli/convoy/artifacts.d.ts.map +1 -0
  16. package/dist/cli/convoy/artifacts.js +129 -0
  17. package/dist/cli/convoy/artifacts.js.map +1 -0
  18. package/dist/cli/convoy/artifacts.test.d.ts +2 -0
  19. package/dist/cli/convoy/artifacts.test.d.ts.map +1 -0
  20. package/dist/cli/convoy/artifacts.test.js +169 -0
  21. package/dist/cli/convoy/artifacts.test.js.map +1 -0
  22. package/dist/cli/convoy/compaction.d.ts +23 -0
  23. package/dist/cli/convoy/compaction.d.ts.map +1 -0
  24. package/dist/cli/convoy/compaction.js +117 -0
  25. package/dist/cli/convoy/compaction.js.map +1 -0
  26. package/dist/cli/convoy/compaction.test.d.ts +2 -0
  27. package/dist/cli/convoy/compaction.test.d.ts.map +1 -0
  28. package/dist/cli/convoy/compaction.test.js +205 -0
  29. package/dist/cli/convoy/compaction.test.js.map +1 -0
  30. package/dist/cli/convoy/contracts.d.ts +22 -0
  31. package/dist/cli/convoy/contracts.d.ts.map +1 -0
  32. package/dist/cli/convoy/contracts.js +254 -0
  33. package/dist/cli/convoy/contracts.js.map +1 -0
  34. package/dist/cli/convoy/contracts.test.d.ts +2 -0
  35. package/dist/cli/convoy/contracts.test.d.ts.map +1 -0
  36. package/dist/cli/convoy/contracts.test.js +239 -0
  37. package/dist/cli/convoy/contracts.test.js.map +1 -0
  38. package/dist/cli/convoy/dag-analysis.d.ts +40 -0
  39. package/dist/cli/convoy/dag-analysis.d.ts.map +1 -0
  40. package/dist/cli/convoy/dag-analysis.js +282 -0
  41. package/dist/cli/convoy/dag-analysis.js.map +1 -0
  42. package/dist/cli/convoy/dag-analysis.test.d.ts +2 -0
  43. package/dist/cli/convoy/dag-analysis.test.d.ts.map +1 -0
  44. package/dist/cli/convoy/dag-analysis.test.js +289 -0
  45. package/dist/cli/convoy/dag-analysis.test.js.map +1 -0
  46. package/dist/cli/convoy/effort-scaling.d.ts +20 -0
  47. package/dist/cli/convoy/effort-scaling.d.ts.map +1 -0
  48. package/dist/cli/convoy/effort-scaling.js +82 -0
  49. package/dist/cli/convoy/effort-scaling.js.map +1 -0
  50. package/dist/cli/convoy/effort-scaling.test.d.ts +2 -0
  51. package/dist/cli/convoy/effort-scaling.test.d.ts.map +1 -0
  52. package/dist/cli/convoy/effort-scaling.test.js +120 -0
  53. package/dist/cli/convoy/effort-scaling.test.js.map +1 -0
  54. package/dist/cli/convoy/engine.d.ts.map +1 -1
  55. package/dist/cli/convoy/engine.js +298 -11
  56. package/dist/cli/convoy/engine.js.map +1 -1
  57. package/dist/cli/convoy/engine.test.js +155 -18
  58. package/dist/cli/convoy/engine.test.js.map +1 -1
  59. package/dist/cli/convoy/event-schemas.d.ts.map +1 -1
  60. package/dist/cli/convoy/event-schemas.js +55 -0
  61. package/dist/cli/convoy/event-schemas.js.map +1 -1
  62. package/dist/cli/convoy/isolation.d.ts +27 -0
  63. package/dist/cli/convoy/isolation.d.ts.map +1 -0
  64. package/dist/cli/convoy/isolation.js +120 -0
  65. package/dist/cli/convoy/isolation.js.map +1 -0
  66. package/dist/cli/convoy/isolation.test.d.ts +2 -0
  67. package/dist/cli/convoy/isolation.test.d.ts.map +1 -0
  68. package/dist/cli/convoy/isolation.test.js +105 -0
  69. package/dist/cli/convoy/isolation.test.js.map +1 -0
  70. package/dist/cli/convoy/review-stages.d.ts +9 -0
  71. package/dist/cli/convoy/review-stages.d.ts.map +1 -0
  72. package/dist/cli/convoy/review-stages.js +134 -0
  73. package/dist/cli/convoy/review-stages.js.map +1 -0
  74. package/dist/cli/convoy/review-stages.test.d.ts +2 -0
  75. package/dist/cli/convoy/review-stages.test.d.ts.map +1 -0
  76. package/dist/cli/convoy/review-stages.test.js +197 -0
  77. package/dist/cli/convoy/review-stages.test.js.map +1 -0
  78. package/dist/cli/convoy/skill-refinement.d.ts +39 -0
  79. package/dist/cli/convoy/skill-refinement.d.ts.map +1 -0
  80. package/dist/cli/convoy/skill-refinement.js +239 -0
  81. package/dist/cli/convoy/skill-refinement.js.map +1 -0
  82. package/dist/cli/convoy/skill-refinement.test.d.ts +2 -0
  83. package/dist/cli/convoy/skill-refinement.test.d.ts.map +1 -0
  84. package/dist/cli/convoy/skill-refinement.test.js +230 -0
  85. package/dist/cli/convoy/skill-refinement.test.js.map +1 -0
  86. package/dist/cli/convoy/spec-builder.d.ts +1 -0
  87. package/dist/cli/convoy/spec-builder.d.ts.map +1 -1
  88. package/dist/cli/convoy/spec-builder.js +11 -0
  89. package/dist/cli/convoy/spec-builder.js.map +1 -1
  90. package/dist/cli/convoy/spec-builder.test.js +54 -0
  91. package/dist/cli/convoy/spec-builder.test.js.map +1 -1
  92. package/dist/cli/convoy/store.d.ts +3 -2
  93. package/dist/cli/convoy/store.d.ts.map +1 -1
  94. package/dist/cli/convoy/store.js +20 -2
  95. package/dist/cli/convoy/store.js.map +1 -1
  96. package/dist/cli/convoy/store.test.js +15 -15
  97. package/dist/cli/convoy/store.test.js.map +1 -1
  98. package/dist/cli/convoy/tdd-gate.d.ts +15 -0
  99. package/dist/cli/convoy/tdd-gate.d.ts.map +1 -0
  100. package/dist/cli/convoy/tdd-gate.js +119 -0
  101. package/dist/cli/convoy/tdd-gate.js.map +1 -0
  102. package/dist/cli/convoy/tdd-gate.test.d.ts +2 -0
  103. package/dist/cli/convoy/tdd-gate.test.d.ts.map +1 -0
  104. package/dist/cli/convoy/tdd-gate.test.js +227 -0
  105. package/dist/cli/convoy/tdd-gate.test.js.map +1 -0
  106. package/dist/cli/convoy/types.d.ts +91 -0
  107. package/dist/cli/convoy/types.d.ts.map +1 -1
  108. package/dist/cli/convoy/types.js +8 -0
  109. package/dist/cli/convoy/types.js.map +1 -1
  110. package/dist/cli/dashboard.d.ts.map +1 -1
  111. package/dist/cli/dashboard.js +54 -0
  112. package/dist/cli/dashboard.js.map +1 -1
  113. package/dist/cli/insights.d.ts +3 -0
  114. package/dist/cli/insights.d.ts.map +1 -0
  115. package/dist/cli/insights.js +94 -0
  116. package/dist/cli/insights.js.map +1 -0
  117. package/dist/cli/lesson.d.ts.map +1 -1
  118. package/dist/cli/lesson.js +7 -0
  119. package/dist/cli/lesson.js.map +1 -1
  120. package/dist/cli/log.d.ts.map +1 -1
  121. package/dist/cli/log.js +7 -0
  122. package/dist/cli/log.js.map +1 -1
  123. package/dist/cli/package-config.d.ts +12 -0
  124. package/dist/cli/package-config.d.ts.map +1 -0
  125. package/dist/cli/package-config.js +37 -0
  126. package/dist/cli/package-config.js.map +1 -0
  127. package/dist/cli/package.d.ts +23 -0
  128. package/dist/cli/package.d.ts.map +1 -0
  129. package/dist/cli/package.js +285 -0
  130. package/dist/cli/package.js.map +1 -0
  131. package/dist/cli/package.test.d.ts +2 -0
  132. package/dist/cli/package.test.d.ts.map +1 -0
  133. package/dist/cli/package.test.js +236 -0
  134. package/dist/cli/package.test.js.map +1 -0
  135. package/dist/cli/pipeline.d.ts +6 -0
  136. package/dist/cli/pipeline.d.ts.map +1 -1
  137. package/dist/cli/pipeline.js +15 -2
  138. package/dist/cli/pipeline.js.map +1 -1
  139. package/dist/cli/run/schema.d.ts.map +1 -1
  140. package/dist/cli/run/schema.js +32 -0
  141. package/dist/cli/run/schema.js.map +1 -1
  142. package/dist/cli/run/schema.test.js +51 -0
  143. package/dist/cli/run/schema.test.js.map +1 -1
  144. package/dist/cli/run.d.ts.map +1 -1
  145. package/dist/cli/run.js +10 -1
  146. package/dist/cli/run.js.map +1 -1
  147. package/dist/cli/skills.d.ts +3 -0
  148. package/dist/cli/skills.d.ts.map +1 -0
  149. package/dist/cli/skills.js +107 -0
  150. package/dist/cli/skills.js.map +1 -0
  151. package/dist/cli/types.d.ts +4 -1
  152. package/dist/cli/types.d.ts.map +1 -1
  153. package/dist/cli/update.js +2 -2
  154. package/package.json +3 -2
  155. package/src/cli/agents.ts +20 -5
  156. package/src/cli/artifacts-cli.ts +41 -0
  157. package/src/cli/baselines.ts +12 -0
  158. package/src/cli/convoy/artifacts.test.ts +201 -0
  159. package/src/cli/convoy/artifacts.ts +186 -0
  160. package/src/cli/convoy/compaction.test.ts +245 -0
  161. package/src/cli/convoy/compaction.ts +164 -0
  162. package/src/cli/convoy/contracts.test.ts +279 -0
  163. package/src/cli/convoy/contracts.ts +280 -0
  164. package/src/cli/convoy/dag-analysis.test.ts +349 -0
  165. package/src/cli/convoy/dag-analysis.ts +371 -0
  166. package/src/cli/convoy/effort-scaling.test.ts +140 -0
  167. package/src/cli/convoy/effort-scaling.ts +90 -0
  168. package/src/cli/convoy/engine.test.ts +175 -18
  169. package/src/cli/convoy/engine.ts +315 -12
  170. package/src/cli/convoy/event-schemas.ts +55 -0
  171. package/src/cli/convoy/isolation.test.ts +137 -0
  172. package/src/cli/convoy/isolation.ts +165 -0
  173. package/src/cli/convoy/review-stages.test.ts +235 -0
  174. package/src/cli/convoy/review-stages.ts +166 -0
  175. package/src/cli/convoy/skill-refinement.test.ts +277 -0
  176. package/src/cli/convoy/skill-refinement.ts +306 -0
  177. package/src/cli/convoy/spec-builder.test.ts +61 -0
  178. package/src/cli/convoy/spec-builder.ts +9 -0
  179. package/src/cli/convoy/store.test.ts +15 -15
  180. package/src/cli/convoy/store.ts +26 -4
  181. package/src/cli/convoy/tdd-gate.test.ts +281 -0
  182. package/src/cli/convoy/tdd-gate.ts +154 -0
  183. package/src/cli/convoy/types.ts +51 -0
  184. package/src/cli/dashboard.ts +55 -0
  185. package/src/cli/insights.ts +99 -0
  186. package/src/cli/lesson.ts +8 -0
  187. package/src/cli/log.ts +8 -0
  188. package/src/cli/package-config.ts +48 -0
  189. package/src/cli/package.test.ts +276 -0
  190. package/src/cli/package.ts +329 -0
  191. package/src/cli/pipeline.ts +21 -2
  192. package/src/cli/run/schema.test.ts +58 -0
  193. package/src/cli/run/schema.ts +33 -0
  194. package/src/cli/run.ts +14 -1
  195. package/src/cli/skills.ts +121 -0
  196. package/src/cli/types.ts +4 -1
  197. package/src/cli/update.ts +2 -2
  198. package/src/dashboard/dist/_astro/{index.Je1YjU_y.css → index.BRDFmNzR.css} +1 -1
  199. package/src/dashboard/dist/index.html +163 -2
  200. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
  201. package/src/dashboard/src/pages/index.astro +162 -1
  202. package/src/dashboard/src/styles/dashboard.css +85 -0
  203. package/src/orchestrator/agents/developer.agent.md +8 -0
  204. package/src/orchestrator/agents/ui-ux-expert.agent.md +7 -0
  205. package/src/orchestrator/prompts/assess-complexity.prompt.md +13 -0
  206. package/src/orchestrator/prompts/brainstorm.prompt.md +18 -0
  207. package/src/orchestrator/prompts/generate-convoy.prompt.md +61 -0
  208. package/src/orchestrator/skills/decomposition/SKILL.md +35 -0
  209. package/src/orchestrator/skills/frontend-design/SKILL.md +27 -1
  210. package/src/orchestrator/skills/project-consistency/SKILL.md +350 -0
@@ -20,6 +20,13 @@ import { readLessons, captureLessons, consolidateLessons } from './lessons.js';
20
20
  import { updateExpertise, feedCircuitBreaker } from './expertise.js';
21
21
  import { buildKnowledgeGraph } from './knowledge.js';
22
22
  import { injectDiscoveredIssuesInstruction, checkDiscoveredIssues, consolidateIssues } from './issues.js';
23
+ import { validateOutput, buildContractInstruction, buildContractRetryPrompt } from './contracts.js';
24
+ import { runTwoStageReview } from './review-stages.js';
25
+ import { buildIsolationPreamble, resolveDependencyResults, detectPartitionViolations } from './isolation.js';
26
+ import { checkTDD, formatTDDFailure, DEFAULT_TDD_CONFIG } from './tdd-gate.js';
27
+ import { runSkillRefinementCheck } from './skill-refinement.js';
28
+ import { getArtifactDir, extractArtifactRefs } from './artifacts.js';
29
+ import { shouldCompact, parseCompactionSummary, saveCompaction, canCompact, getMaxCompactions, generateCompactionPrompt, buildContinuationPrompt } from './compaction.js';
23
30
  const execFile = promisify(execFileCb);
24
31
  export class CircuitBreakerManager {
25
32
  states = new Map();
@@ -620,6 +627,7 @@ function pollInjectFile(convoyId, store, events, basePath) {
620
627
  dispute_id: null,
621
628
  drift_score: null,
622
629
  drift_retried: 0,
630
+ compaction_count: 0,
623
631
  outputs: null,
624
632
  inputs: null,
625
633
  discovered_issues: null,
@@ -995,6 +1003,33 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
995
1003
  const specTask = (spec.tasks ?? []).find(t => t.id === taskRecord.id);
996
1004
  const steps = specTask?.steps;
997
1005
  const taskHooks = specTask?.hooks ?? [];
1006
+ // ── Context isolation preamble (Phase 41) ────────────────────────────
1007
+ try {
1008
+ const taskFiles = taskRecord.files ? JSON.parse(taskRecord.files) : [];
1009
+ const depIds = taskRecord.depends_on ? JSON.parse(taskRecord.depends_on) : [];
1010
+ const depResults = resolveDependencyResults(store, convoyId, depIds);
1011
+ const preamble = buildIsolationPreamble({ id: taskRecord.id, description: taskRecord.prompt.slice(0, 200), prompt: taskRecord.prompt, files: taskFiles, agent: taskRecord.agent }, depResults);
1012
+ task.prompt = preamble + '\n\n' + task.prompt;
1013
+ }
1014
+ catch { /* non-critical — isolation preamble is best-effort */ }
1015
+ // ── Artifact output instructions (Phase 43) ────────────────────────────
1016
+ try {
1017
+ const artifactDir = getArtifactDir(convoyId, taskRecord.id);
1018
+ const artifactInstructions = [
1019
+ '',
1020
+ '## Artifact Output (for large results)',
1021
+ 'If your output includes large content (>100 lines of code, full reports, data dumps),',
1022
+ 'write it to an artifact file instead of including it inline:',
1023
+ '',
1024
+ '1. Write the content to: ' + artifactDir + '{filename}',
1025
+ '2. In your response, reference it: `[ARTIFACT: {filename}] {1-line summary}`',
1026
+ '3. Keep your inline response focused on the summary and key decisions.',
1027
+ '',
1028
+ 'Small outputs (< 100 lines) can remain inline.',
1029
+ ].join('\n');
1030
+ task.prompt = task.prompt + '\n' + artifactInstructions;
1031
+ }
1032
+ catch { /* non-critical */ }
998
1033
  // ── Intelligence: inject lessons (Phase 18.1) ─────────────────────────
999
1034
  if (spec.defaults?.inject_lessons !== false) {
1000
1035
  try {
@@ -1027,6 +1062,11 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1027
1062
  if (spec.defaults?.track_discovered_issues) {
1028
1063
  task.prompt = injectDiscoveredIssuesInstruction(task.prompt);
1029
1064
  }
1065
+ // ── Output contract injection ─────────────────────────────────────────
1066
+ const contractInstruction = buildContractInstruction(taskRecord.agent);
1067
+ if (contractInstruction) {
1068
+ task.prompt = task.prompt + '\n\n' + contractInstruction;
1069
+ }
1030
1070
  // ── pre_task hooks ────────────────────────────────────────────────────────
1031
1071
  if (taskHooks.length > 0) {
1032
1072
  const preResult = await runHooks(taskHooks, 'pre_task', {
@@ -1171,12 +1211,12 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1171
1211
  try {
1172
1212
  // SECURITY: Gate/hook commands come from the .convoy.yml spec file, which is operator-controlled.
1173
1213
  // They are NOT user-supplied and are part of the trusted build configuration.
1174
- await execFile('sh', ['-c', command], { cwd: worktreePath ?? basePath });
1214
+ await execFile('sh', ['-c', command], { cwd: worktreePath ?? basePath, maxBuffer: 10 * 1024 * 1024 });
1175
1215
  }
1176
1216
  catch (err) {
1177
1217
  const execErr = err;
1178
1218
  const code = typeof execErr.code === 'number' ? execErr.code : 1;
1179
- const output = execErr.stderr || execErr.stdout || execErr.message || '';
1219
+ const output = [execErr.stderr, execErr.stdout].filter(Boolean).join('\n').trim() || execErr.message || '';
1180
1220
  gateFailure = { command, exitCode: code, output };
1181
1221
  break;
1182
1222
  }
@@ -1369,6 +1409,91 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1369
1409
  return;
1370
1410
  }
1371
1411
  }
1412
+ // ── Partition violation check (Phase 41) ────────────────────────────
1413
+ if (changedFiles.length > 0) {
1414
+ try {
1415
+ const taskFiles = taskRecord.files ? JSON.parse(taskRecord.files) : [];
1416
+ if (taskFiles.length > 0) {
1417
+ const violation = detectPartitionViolations(taskRecord.id, taskFiles, changedFiles);
1418
+ if (violation) {
1419
+ events.emit('partition_violation', {
1420
+ task_id: taskRecord.id,
1421
+ allowed: violation.allowedFiles,
1422
+ actual: violation.actualFiles,
1423
+ violations: violation.violations,
1424
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
1425
+ process.stdout.write(` ${c.yellow('⚠')} ${c.bold(`[${taskRecord.id}]`)} partition violation: ${violation.violations.join(', ')}\n`);
1426
+ }
1427
+ }
1428
+ }
1429
+ catch { /* non-critical */ }
1430
+ }
1431
+ // ── TDD gate ──────────────────────────────────────────────────────────
1432
+ if (builtInGates.tdd_check && changedFiles.length > 0) {
1433
+ const tddConfig = typeof builtInGates.tdd_check === 'object'
1434
+ ? { ...DEFAULT_TDD_CONFIG, ...builtInGates.tdd_check }
1435
+ : DEFAULT_TDD_CONFIG;
1436
+ const specTaskForTDD = (spec.tasks ?? []).find(t => t.id === taskRecord.id);
1437
+ const tddResult = checkTDD(changedFiles, changedFiles, tddConfig, specTaskForTDD?.agent ?? taskRecord.agent);
1438
+ if (tddResult.skipped) {
1439
+ events.emit('tdd_check_skipped', {
1440
+ task_id: taskRecord.id,
1441
+ reason: tddResult.skip_reason,
1442
+ agent: specTaskForTDD?.agent ?? taskRecord.agent,
1443
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
1444
+ }
1445
+ else if (tddResult.passed) {
1446
+ events.emit('tdd_check_passed', {
1447
+ task_id: taskRecord.id,
1448
+ new_source_files: tddResult.new_source_files.length,
1449
+ existing_test_files: tddResult.existing_test_files.length,
1450
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
1451
+ }
1452
+ else {
1453
+ const failureMsg = formatTDDFailure(tddResult);
1454
+ events.emit('tdd_check_failed', {
1455
+ task_id: taskRecord.id,
1456
+ missing_test_files: tddResult.missing_test_files,
1457
+ new_source_files: tddResult.new_source_files.length,
1458
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
1459
+ if (tddConfig.mode === 'block') {
1460
+ await removeWorktree();
1461
+ const freshRecord = store.getTask(taskRecord.id, convoyId);
1462
+ if (freshRecord.retries < freshRecord.max_retries && spec.on_failure !== 'stop') {
1463
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
1464
+ retries: freshRecord.retries + 1,
1465
+ worker_id: null,
1466
+ worktree: null,
1467
+ started_at: null,
1468
+ finished_at: null,
1469
+ prompt: `TDD gate failed.\n${failureMsg}\n\nCreate the missing test files and try again.\n\n${taskRecord.prompt}`,
1470
+ });
1471
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt });
1472
+ process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} TDD gate failed, retry ${freshRecord.retries + 1}/${freshRecord.max_retries}\n`);
1473
+ }
1474
+ else {
1475
+ store.withTransaction(() => {
1476
+ store.updateTaskStatus(taskRecord.id, convoyId, 'gate-failed', {
1477
+ finished_at: finishedAt,
1478
+ output: `Built-in gate (tdd_check) failed:\n${failureMsg}`,
1479
+ exit_code: 1,
1480
+ });
1481
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt });
1482
+ });
1483
+ completedCount++;
1484
+ process.stdout.write(` ${c.red('✗')} ${c.bold(`[${taskRecord.id}]`)} TDD gate failed ${elapsed} ${c.dim(`[${completedCount}/${totalTasks}]`)}\n`);
1485
+ events.emit('task_failed', { reason: 'gate-failed', gate: 'tdd_check', worker_id: workerId }, { convoy_id: convoyId, task_id: taskRecord.id, worker_id: workerId });
1486
+ handleExhaustion(freshRecord, 'tdd-check', failureMsg);
1487
+ }
1488
+ taskAdapterMap.delete(taskRecord.id);
1489
+ return;
1490
+ }
1491
+ else {
1492
+ // warn mode — log but continue
1493
+ process.stdout.write(` ${c.yellow('⚠')} ${c.bold(`[${taskRecord.id}]`)} TDD gate warning: ${tddResult.missing_test_files.length} source file(s) without tests\n`);
1494
+ }
1495
+ }
1496
+ }
1372
1497
  }
1373
1498
  // ── Drift detection ──────────────────────────────────────────────────
1374
1499
  const specTaskForDrift = (spec.tasks ?? []).find(t => t.id === taskRecord.id);
@@ -1478,7 +1603,20 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1478
1603
  await reviewSemaphore.acquire();
1479
1604
  let reviewResult;
1480
1605
  try {
1481
- if (reviewRunner) {
1606
+ if (reviewRunner && spec.defaults?.review_stages !== false) {
1607
+ // Two-stage review: spec compliance first, then code quality
1608
+ const twoStageResult = await runTwoStageReview(taskRecord, reviewRunner, reviewerModel);
1609
+ for (const stage of twoStageResult.stages) {
1610
+ events.emit('review_stage_completed', { stage: stage.stage, verdict: stage.verdict, tokens: stage.tokens_used, task_id: taskRecord.id, model: reviewerModel }, { convoy_id: convoyId, task_id: taskRecord.id });
1611
+ }
1612
+ reviewResult = {
1613
+ verdict: twoStageResult.overall_verdict,
1614
+ feedback: twoStageResult.stages.flatMap(s => s.issues).join('\n'),
1615
+ tokens: twoStageResult.total_tokens,
1616
+ model: reviewerModel,
1617
+ };
1618
+ }
1619
+ else if (reviewRunner) {
1482
1620
  reviewResult = await reviewRunner(taskRecord, 'fast', reviewerModel);
1483
1621
  }
1484
1622
  else {
@@ -1541,11 +1679,33 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1541
1679
  try {
1542
1680
  const noopRunner = (_t, _l, m) => Promise.resolve({ verdict: 'pass', feedback: '', tokens: 0, model: m });
1543
1681
  const runner = reviewRunner ?? noopRunner;
1544
- panelResults = await Promise.all([
1545
- runner(taskRecord, 'panel', reviewerModel),
1546
- runner(taskRecord, 'panel', reviewerModel),
1547
- runner(taskRecord, 'panel', reviewerModel),
1548
- ]);
1682
+ const twoStageEnabled = spec.defaults?.review_stages !== false;
1683
+ if (twoStageEnabled && reviewRunner) {
1684
+ // Each panel reviewer runs both stages; majority vote on overall_verdict
1685
+ const twoStageResults = await Promise.all([
1686
+ runTwoStageReview(taskRecord, runner, reviewerModel),
1687
+ runTwoStageReview(taskRecord, runner, reviewerModel),
1688
+ runTwoStageReview(taskRecord, runner, reviewerModel),
1689
+ ]);
1690
+ for (const tsr of twoStageResults) {
1691
+ for (const stage of tsr.stages) {
1692
+ events.emit('review_stage_completed', { stage: stage.stage, verdict: stage.verdict, tokens: stage.tokens_used, task_id: taskRecord.id, model: reviewerModel }, { convoy_id: convoyId, task_id: taskRecord.id });
1693
+ }
1694
+ }
1695
+ panelResults = twoStageResults.map(tsr => ({
1696
+ verdict: tsr.overall_verdict,
1697
+ feedback: tsr.stages.flatMap(s => s.issues).join('\n'),
1698
+ tokens: tsr.total_tokens,
1699
+ model: reviewerModel,
1700
+ }));
1701
+ }
1702
+ else {
1703
+ panelResults = await Promise.all([
1704
+ runner(taskRecord, 'panel', reviewerModel),
1705
+ runner(taskRecord, 'panel', reviewerModel),
1706
+ runner(taskRecord, 'panel', reviewerModel),
1707
+ ]);
1708
+ }
1549
1709
  }
1550
1710
  finally {
1551
1711
  reviewSemaphore.release();
@@ -1770,6 +1930,7 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1770
1930
  dispute_id: null,
1771
1931
  drift_score: null,
1772
1932
  drift_retried: 0,
1933
+ compaction_count: 0,
1773
1934
  outputs: null,
1774
1935
  inputs: null,
1775
1936
  discovered_issues: null,
@@ -1815,6 +1976,64 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1815
1976
  if (result.usage.total_tokens != null)
1816
1977
  usageExtra.total_tokens = result.usage.total_tokens;
1817
1978
  }
1979
+ // ── Context compaction check (Phase 44) ─────────────────────────────
1980
+ const compactionConfig = spec.defaults?.compaction;
1981
+ if (compactionConfig?.enabled && usageExtra.total_tokens != null && taskRecord.model) {
1982
+ if (shouldCompact(usageExtra.total_tokens, taskRecord.model, compactionConfig)) {
1983
+ if (canCompact(taskRecord.compaction_count)) {
1984
+ const newCount = taskRecord.compaction_count + 1;
1985
+ store.updateTaskCompaction(taskRecord.id, convoyId, newCount);
1986
+ const summaryFromOutput = parseCompactionSummary(result.output, taskRecord.id, convoyId);
1987
+ let summaryPath;
1988
+ if (summaryFromOutput) {
1989
+ try {
1990
+ summaryPath = saveCompaction(convoyId, taskRecord.id, summaryFromOutput, newCount);
1991
+ }
1992
+ catch { /* non-critical */ }
1993
+ }
1994
+ const compactionTaskFiles = taskRecord.files ? JSON.parse(taskRecord.files) : [];
1995
+ const compactionDepIds = taskRecord.depends_on ? JSON.parse(taskRecord.depends_on) : [];
1996
+ const compactionDepResults = resolveDependencyResults(store, convoyId, compactionDepIds);
1997
+ const compactionPreamble = buildIsolationPreamble({ id: taskRecord.id, description: taskRecord.prompt.slice(0, 200), prompt: taskRecord.prompt, files: compactionTaskFiles, agent: taskRecord.agent }, compactionDepResults);
1998
+ const continuationPrompt = summaryPath
1999
+ ? buildContinuationPrompt(taskRecord.prompt, summaryPath, compactionPreamble)
2000
+ : compactionPreamble + '\n\n' + generateCompactionPrompt(taskRecord.id) + '\n\n' + taskRecord.prompt;
2001
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
2002
+ worker_id: null,
2003
+ worktree: null,
2004
+ started_at: null,
2005
+ finished_at: null,
2006
+ prompt: continuationPrompt,
2007
+ });
2008
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt });
2009
+ events.emit('context_compacted', {
2010
+ task_id: taskRecord.id,
2011
+ compaction_count: newCount,
2012
+ summary_path: summaryPath ?? '',
2013
+ model: taskRecord.model,
2014
+ tokens_used: usageExtra.total_tokens,
2015
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
2016
+ taskAdapterMap.delete(taskRecord.id);
2017
+ return;
2018
+ }
2019
+ else {
2020
+ // Max compactions exceeded — fail the task
2021
+ const exhaustedAt = new Date().toISOString();
2022
+ store.updateTaskStatus(taskRecord.id, convoyId, 'failed', {
2023
+ finished_at: exhaustedAt,
2024
+ output: `Context exhausted: reached maximum ${getMaxCompactions()} compactions`,
2025
+ exit_code: 1,
2026
+ });
2027
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: exhaustedAt });
2028
+ events.emit('task_failed', {
2029
+ reason: 'context_exhausted',
2030
+ worker_id: workerId,
2031
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
2032
+ taskAdapterMap.delete(taskRecord.id);
2033
+ return;
2034
+ }
2035
+ }
2036
+ }
1818
2037
  // ── Capture outputs as artifacts ────────────────────────────────────────
1819
2038
  if (taskRecord.outputs) {
1820
2039
  const outputs = JSON.parse(taskRecord.outputs);
@@ -1854,6 +2073,47 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1854
2073
  }
1855
2074
  }
1856
2075
  }
2076
+ // ── Extract filesystem artifacts (Phase 43) ────────────────────────
2077
+ try {
2078
+ const fsArtifactRefs = extractArtifactRefs(taskRecord.id, convoyId, result.output);
2079
+ if (fsArtifactRefs.length > 0) {
2080
+ events.emit('artifacts_extracted', {
2081
+ task_id: taskRecord.id,
2082
+ count: fsArtifactRefs.length,
2083
+ artifacts: fsArtifactRefs.map(r => ({ filename: r.filename, summary: r.summary })),
2084
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
2085
+ }
2086
+ }
2087
+ catch (err) {
2088
+ process.stderr.write(`[artifacts] Warning: extraction failed for task ${taskRecord.id}: ${err.message}\n`);
2089
+ }
2090
+ // ── Output contract validation ────────────────────────────────────────
2091
+ const contractResult = validateOutput(taskRecord.agent, result.output);
2092
+ if (!contractResult.valid) {
2093
+ const freshRecordForContract = store.getTask(taskRecord.id, convoyId);
2094
+ if (freshRecordForContract.retries < freshRecordForContract.max_retries) {
2095
+ const retryPrefix = buildContractRetryPrompt(contractResult) + '\n\n';
2096
+ store.updateTaskStatus(taskRecord.id, convoyId, 'pending', {
2097
+ retries: freshRecordForContract.retries + 1,
2098
+ worker_id: null,
2099
+ worktree: null,
2100
+ started_at: null,
2101
+ finished_at: null,
2102
+ prompt: retryPrefix + taskRecord.prompt,
2103
+ });
2104
+ store.updateWorkerStatus(workerId, 'failed', { finished_at: finishedAt });
2105
+ process.stdout.write(` ${c.yellow('⟳')} ${c.bold(`[${taskRecord.id}]`)} contract retry ${freshRecordForContract.retries + 1}/${freshRecordForContract.max_retries}\n`);
2106
+ taskAdapterMap.delete(taskRecord.id);
2107
+ return;
2108
+ }
2109
+ events.emit('contract_violation', {
2110
+ task_id: taskRecord.id,
2111
+ agent: taskRecord.agent,
2112
+ missing: contractResult.missing,
2113
+ warnings: contractResult.warnings,
2114
+ }, { convoy_id: convoyId, task_id: taskRecord.id });
2115
+ process.stdout.write(` ${c.yellow('⚠')} ${c.bold(`[${taskRecord.id}]`)} contract violation: missing ${contractResult.missing.join(', ')}\n`);
2116
+ }
1857
2117
  // ── Intelligence: capture persistent agent identity (Phase 17.2) ─────
1858
2118
  const specTaskForCapture = (spec.tasks ?? []).find(t => t.id === taskRecord.id);
1859
2119
  if (specTaskForCapture?.persistent && result.output) {
@@ -1895,6 +2155,7 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
1895
2155
  output: result.output,
1896
2156
  exit_code: result.exitCode,
1897
2157
  ...usageExtra,
2158
+ contract_result: JSON.stringify(contractResult),
1898
2159
  });
1899
2160
  store.updateWorkerStatus(workerId, 'done', { finished_at: finishedAt });
1900
2161
  });
@@ -2088,14 +2349,14 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
2088
2349
  try {
2089
2350
  // SECURITY: Gate/hook commands come from the .convoy.yml spec file, which is operator-controlled.
2090
2351
  // They are NOT user-supplied and are part of the trusted build configuration.
2091
- await execFile('sh', ['-c', command], { cwd: basePath });
2352
+ await execFile('sh', ['-c', command], { cwd: basePath, maxBuffer: 10 * 1024 * 1024 });
2092
2353
  gateResults.push({ command, exitCode: 0, passed: true });
2093
2354
  process.stdout.write(` ${c.green('✓')} ${c.dim(command)}\n`);
2094
2355
  }
2095
2356
  catch (err) {
2096
2357
  const execErr = err;
2097
2358
  const code = typeof execErr.code === 'number' ? execErr.code : 1;
2098
- const output = execErr.stderr || execErr.stdout || execErr.message || '';
2359
+ const output = [execErr.stderr, execErr.stdout].filter(Boolean).join('\n').trim() || execErr.message || '';
2099
2360
  gateResults.push({ command, exitCode: code, passed: false, output });
2100
2361
  process.stdout.write(` ${c.red('✗')} ${c.dim(command)}\n`);
2101
2362
  }
@@ -2111,7 +2372,20 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
2111
2372
  const failureSummary = failedGates
2112
2373
  .map(g => `Command: ${g.command}\nExit code: ${g.exitCode}\nOutput:\n${g.output ?? '(no output)'}`)
2113
2374
  .join('\n\n---\n\n');
2114
- const fixPrompt = `The following validation gates failed after all convoy tasks completed. Fix the issues so these commands pass.\n\n${failureSummary}`;
2375
+ // Gather files touched by convoy tasks to give the fix agent context
2376
+ const allTasks = store.getTasksByConvoy(convoyId);
2377
+ const touchedFiles = allTasks
2378
+ .filter(t => t.files)
2379
+ .flatMap(t => { try {
2380
+ return JSON.parse(t.files);
2381
+ }
2382
+ catch {
2383
+ return [];
2384
+ } });
2385
+ const filesContext = touchedFiles.length > 0
2386
+ ? `\n\nFiles modified by the convoy tasks:\n${touchedFiles.map(f => `- ${f}`).join('\n')}\n`
2387
+ : '';
2388
+ const fixPrompt = `The following validation gates failed after all convoy tasks completed. Fix the issues so these commands pass.${filesContext}\n\n${failureSummary}`;
2115
2389
  const fixTaskId = `gate-fix-${gateAttempt}`;
2116
2390
  process.stdout.write(`\n ${c.yellow('⟳')} ${c.bold(`[${fixTaskId}]`)} fixing gate failures (attempt ${gateAttempt}/${maxGateRetries})\n`);
2117
2391
  const fixTask = {
@@ -2162,6 +2436,18 @@ async function runConvoy(convoyId, spec, adapter, store, events, wtManager, merg
2162
2436
  }
2163
2437
  catch { /* non-critical */ }
2164
2438
  }
2439
+ // ── Intelligence: skill refinement check ───────────────────────────────
2440
+ try {
2441
+ const proposals = runSkillRefinementCheck(convoyId, basePath);
2442
+ for (const p of proposals) {
2443
+ events.emit('skill_refinement_proposed', {
2444
+ skill_name: p.skill,
2445
+ proposal_path: p.proposalPath,
2446
+ }, { convoy_id: convoyId });
2447
+ process.stdout.write(` ${c.yellow('◆')} Skill refinement proposed for "${p.skill}". Review at ${p.proposalPath}\n`);
2448
+ }
2449
+ }
2450
+ catch { /* non-critical */ }
2165
2451
  // ── Final status & summary ────────────────────────────────────────────────
2166
2452
  const allTasksFinal = store.getTasksByConvoy(convoyId);
2167
2453
  const summary = {
@@ -2577,6 +2863,7 @@ export function createConvoyEngine(options) {
2577
2863
  dispute_id: null,
2578
2864
  drift_score: null,
2579
2865
  drift_retried: 0,
2866
+ compaction_count: 0,
2580
2867
  outputs: null,
2581
2868
  inputs: null,
2582
2869
  discovered_issues: null,