@kognai/orchestrator-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -0
- package/dist/index.d.ts +63 -0
- package/dist/index.js +175 -0
- package/dist/lib/aar-middleware.d.ts +6 -0
- package/dist/lib/aar-middleware.js +70 -0
- package/dist/lib/aar-types.d.ts +34 -0
- package/dist/lib/aar-types.js +4 -0
- package/dist/lib/acp-engine.d.ts +68 -0
- package/dist/lib/acp-engine.js +123 -0
- package/dist/lib/acp.d.ts +61 -0
- package/dist/lib/acp.js +425 -0
- package/dist/lib/agent-registry.d.ts +50 -0
- package/dist/lib/agent-registry.js +137 -0
- package/dist/lib/anthropic-direct.d.ts +27 -0
- package/dist/lib/anthropic-direct.js +109 -0
- package/dist/lib/asmr-extractor.d.ts +40 -0
- package/dist/lib/asmr-extractor.js +151 -0
- package/dist/lib/asmr-retrieval.d.ts +76 -0
- package/dist/lib/asmr-retrieval.js +311 -0
- package/dist/lib/asmr.d.ts +8 -0
- package/dist/lib/asmr.js +24 -0
- package/dist/lib/brainx-client.d.ts +72 -0
- package/dist/lib/brainx-client.js +200 -0
- package/dist/lib/brainx-embed.d.ts +14 -0
- package/dist/lib/brainx-embed.js +139 -0
- package/dist/lib/brainx-swarm-bridge.d.ts +93 -0
- package/dist/lib/brainx-swarm-bridge.js +242 -0
- package/dist/lib/byterover-client.d.ts +19 -0
- package/dist/lib/byterover-client.js +59 -0
- package/dist/lib/ceo-wallet.d.ts +37 -0
- package/dist/lib/ceo-wallet.js +176 -0
- package/dist/lib/chomsky-gate.d.ts +24 -0
- package/dist/lib/chomsky-gate.js +178 -0
- package/dist/lib/chomsky-runner.d.ts +29 -0
- package/dist/lib/chomsky-runner.js +157 -0
- package/dist/lib/citizen-score-contract.d.ts +72 -0
- package/dist/lib/citizen-score-contract.js +16 -0
- package/dist/lib/citizen-score-registry.d.ts +25 -0
- package/dist/lib/citizen-score-registry.js +65 -0
- package/dist/lib/citizenship.d.ts +103 -0
- package/dist/lib/citizenship.js +272 -0
- package/dist/lib/clawrouter-client.d.ts +37 -0
- package/dist/lib/clawrouter-client.js +148 -0
- package/dist/lib/code-asset-crystalliser.d.ts +41 -0
- package/dist/lib/code-asset-crystalliser.js +181 -0
- package/dist/lib/code-failure-logger.d.ts +27 -0
- package/dist/lib/code-failure-logger.js +42 -0
- package/dist/lib/cto-approval-gate.d.ts +45 -0
- package/dist/lib/cto-approval-gate.js +478 -0
- package/dist/lib/cto-gate-types.d.ts +28 -0
- package/dist/lib/cto-gate-types.js +8 -0
- package/dist/lib/decomposer-feedback.d.ts +54 -0
- package/dist/lib/decomposer-feedback.js +115 -0
- package/dist/lib/emotional-safety-gate.d.ts +48 -0
- package/dist/lib/emotional-safety-gate.js +97 -0
- package/dist/lib/engine-paths.d.ts +13 -0
- package/dist/lib/engine-paths.js +32 -0
- package/dist/lib/event-bus-listener.d.ts +8 -0
- package/dist/lib/event-bus-listener.js +144 -0
- package/dist/lib/event-bus-publisher.d.ts +25 -0
- package/dist/lib/event-bus-publisher.js +188 -0
- package/dist/lib/event-bus-types.d.ts +73 -0
- package/dist/lib/event-bus-types.js +23 -0
- package/dist/lib/failure-library.d.ts +178 -0
- package/dist/lib/failure-library.js +349 -0
- package/dist/lib/ksl/error-log.d.ts +28 -0
- package/dist/lib/ksl/error-log.js +43 -0
- package/dist/lib/ksl/index.d.ts +9 -0
- package/dist/lib/ksl/index.js +25 -0
- package/dist/lib/ksl/orchestrator-tap.d.ts +16 -0
- package/dist/lib/ksl/orchestrator-tap.js +85 -0
- package/dist/lib/ksl/record-writer.d.ts +46 -0
- package/dist/lib/ksl/record-writer.js +45 -0
- package/dist/lib/llm-cost-table.d.ts +36 -0
- package/dist/lib/llm-cost-table.js +90 -0
- package/dist/lib/local-model-router.d.ts +27 -0
- package/dist/lib/local-model-router.js +61 -0
- package/dist/lib/mc-client.d.ts +51 -0
- package/dist/lib/mc-client.js +249 -0
- package/dist/lib/model-router-contract.d.ts +91 -0
- package/dist/lib/model-router-contract.js +19 -0
- package/dist/lib/model-router-registry.d.ts +24 -0
- package/dist/lib/model-router-registry.js +52 -0
- package/dist/lib/model-router.d.ts +20 -0
- package/dist/lib/model-router.js +79 -0
- package/dist/lib/monotask-state-machine.d.ts +19 -0
- package/dist/lib/monotask-state-machine.js +131 -0
- package/dist/lib/neutral-prompt-checker.d.ts +22 -0
- package/dist/lib/neutral-prompt-checker.js +130 -0
- package/dist/lib/notion-direct.d.ts +92 -0
- package/dist/lib/notion-direct.js +381 -0
- package/dist/lib/ollama-client.d.ts +37 -0
- package/dist/lib/ollama-client.js +158 -0
- package/dist/lib/omel/credential-vault.d.ts +57 -0
- package/dist/lib/omel/credential-vault.js +324 -0
- package/dist/lib/omel/human-brake.d.ts +32 -0
- package/dist/lib/omel/human-brake.js +289 -0
- package/dist/lib/omel/index.d.ts +10 -0
- package/dist/lib/omel/index.js +26 -0
- package/dist/lib/omel/phantom-workspace.d.ts +31 -0
- package/dist/lib/omel/phantom-workspace.js +256 -0
- package/dist/lib/omel/wipe-witness.d.ts +75 -0
- package/dist/lib/omel/wipe-witness.js +398 -0
- package/dist/lib/orchestrate-engine.d.ts +25 -0
- package/dist/lib/orchestrate-engine.js +4436 -0
- package/dist/lib/perm-judge.d.ts +46 -0
- package/dist/lib/perm-judge.js +173 -0
- package/dist/lib/plumber/conformance.d.ts +54 -0
- package/dist/lib/plumber/conformance.js +121 -0
- package/dist/lib/plumber/index.d.ts +9 -0
- package/dist/lib/plumber/index.js +25 -0
- package/dist/lib/plumber/observer.d.ts +52 -0
- package/dist/lib/plumber/observer.js +180 -0
- package/dist/lib/plumber/types.d.ts +78 -0
- package/dist/lib/plumber/types.js +29 -0
- package/dist/lib/research-impl-gate.d.ts +16 -0
- package/dist/lib/research-impl-gate.js +105 -0
- package/dist/lib/sherlock-memory.d.ts +29 -0
- package/dist/lib/sherlock-memory.js +105 -0
- package/dist/lib/skill-crystalliser.d.ts +44 -0
- package/dist/lib/skill-crystalliser.js +60 -0
- package/dist/lib/sprint-runner-engine.d.ts +27 -0
- package/dist/lib/sprint-runner-engine.js +1042 -0
- package/dist/lib/sprint-state.d.ts +71 -0
- package/dist/lib/sprint-state.js +202 -0
- package/dist/lib/stuck-handler.d.ts +17 -0
- package/dist/lib/stuck-handler.js +249 -0
- package/dist/lib/task-contract-checker.d.ts +17 -0
- package/dist/lib/task-contract-checker.js +29 -0
- package/dist/lib/task-router/index.d.ts +17 -0
- package/dist/lib/task-router/index.js +52 -0
- package/dist/lib/task-router/router/generate-execution-id.d.ts +10 -0
- package/dist/lib/task-router/router/generate-execution-id.js +24 -0
- package/dist/lib/task-router/router/resolve-route.d.ts +2 -0
- package/dist/lib/task-router/router/resolve-route.js +49 -0
- package/dist/lib/task-router/types.d.ts +79 -0
- package/dist/lib/task-router/types.js +39 -0
- package/dist/lib/token-budget-validator.d.ts +44 -0
- package/dist/lib/token-budget-validator.js +84 -0
- package/dist/lib/trust-score-updater.d.ts +30 -0
- package/dist/lib/trust-score-updater.js +107 -0
- package/dist/lib/wallet-state.d.ts +26 -0
- package/dist/lib/wallet-state.js +85 -0
- package/package.json +27 -0
|
@@ -0,0 +1,1042 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* sprint-runner.ts — Auto-executes pending sprint tasks via dual-supervisor orchestrator
|
|
4
|
+
*
|
|
5
|
+
* PM2 cron: every 30 minutes — checks for pending work and runs it
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Check lock file (prevents parallel sprints)
|
|
9
|
+
* 2. Scan sprints/ for JSON files with pending tasks
|
|
10
|
+
* Priority: week-N.json descending (newest sprint first)
|
|
11
|
+
* 3. Read sprint JSON, inject sprint_id into each task
|
|
12
|
+
* 4. Write modified sprint to logs/sprint-runner-active.json
|
|
13
|
+
* 5. Spawn orchestrate-agents-v2.ts on the temp ACTIVE file
|
|
14
|
+
* (Dual supervisor: Claude Sonnet + OpenAI Codex, CEO conflict resolution)
|
|
15
|
+
* 6. Telegram alert on start + finish
|
|
16
|
+
* 7. Release lock when done
|
|
17
|
+
*
|
|
18
|
+
* GitHub Issues → Sprint Tasks:
|
|
19
|
+
* CEO creates GitHub issues as directives. To execute them, they must be
|
|
20
|
+
* converted into a sprint JSON file (sprints/week-N.json) first.
|
|
21
|
+
* The CEO bot or a human writes the sprint JSON; this runner executes it.
|
|
22
|
+
*/
|
|
23
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
24
|
+
if (k2 === undefined) k2 = k;
|
|
25
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
26
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
27
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
28
|
+
}
|
|
29
|
+
Object.defineProperty(o, k2, desc);
|
|
30
|
+
}) : (function(o, m, k, k2) {
|
|
31
|
+
if (k2 === undefined) k2 = k;
|
|
32
|
+
o[k2] = m[k];
|
|
33
|
+
}));
|
|
34
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
35
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
36
|
+
}) : function(o, v) {
|
|
37
|
+
o["default"] = v;
|
|
38
|
+
});
|
|
39
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
40
|
+
var ownKeys = function(o) {
|
|
41
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
42
|
+
var ar = [];
|
|
43
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
44
|
+
return ar;
|
|
45
|
+
};
|
|
46
|
+
return ownKeys(o);
|
|
47
|
+
};
|
|
48
|
+
return function (mod) {
|
|
49
|
+
if (mod && mod.__esModule) return mod;
|
|
50
|
+
var result = {};
|
|
51
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
52
|
+
__setModuleDefault(result, mod);
|
|
53
|
+
return result;
|
|
54
|
+
};
|
|
55
|
+
})();
|
|
56
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
57
|
+
exports.runSprintCycle = runSprintCycle;
|
|
58
|
+
const fs_1 = require("fs");
|
|
59
|
+
// TICKET-098: source/state split — runtime status lives in .swarm-state/, not committed source
|
|
60
|
+
const sprint_state_1 = require("./sprint-state");
|
|
61
|
+
const engine_paths_1 = require("./engine-paths");
|
|
62
|
+
const path_1 = require("path");
|
|
63
|
+
const child_process_1 = require("child_process");
|
|
64
|
+
const https = __importStar(require("https"));
|
|
65
|
+
// sprint-1566 F2: agent-registry validation (catches non-existent agent assignments at sprint load)
|
|
66
|
+
const agent_registry_1 = require("./agent-registry");
|
|
67
|
+
// ── Config ─────────────────────────────────────────────────────────────────
|
|
68
|
+
// TICKET-215 Phase 3b: base all engine paths on the parameterized seam. Defaults to
|
|
69
|
+
// KOGNAI_ROOT || process.cwd() — identical to the prior process.cwd() when unset, but
|
|
70
|
+
// now overridable so the same engine serves any product from its own root.
|
|
71
|
+
const ENGINE_PATHS = (0, engine_paths_1.resolveEnginePaths)();
|
|
72
|
+
const ROOT = ENGINE_PATHS.root;
|
|
73
|
+
const SPRINTS = (0, fs_1.existsSync)((0, path_1.join)(ROOT, 'workspace', 'sprints')) ? (0, path_1.join)(ROOT, 'workspace', 'sprints') : (0, path_1.join)(ROOT, 'sprints');
|
|
74
|
+
const LOCK = (0, path_1.join)(ROOT, 'logs', 'sprint-runner.lock');
|
|
75
|
+
const LOG = (0, path_1.join)(ROOT, 'logs', 'sprint-runner.log');
|
|
76
|
+
const ACTIVE = (0, path_1.join)(ROOT, 'logs', 'sprint-runner-active.json');
|
|
77
|
+
const COOLDOWN = (0, path_1.join)(ROOT, 'logs', 'sprint-runner-cooldown.json');
|
|
78
|
+
const MAX_HOURS = 6; // kill orchestrator if it runs longer than this
|
|
79
|
+
// Rate limiter: minimum gap between sprint executions (prevents burning Claude 5h limit)
|
|
80
|
+
// Default: 30 min. Override via SPRINT_COOLDOWN_MINUTES env var.
|
|
81
|
+
const COOLDOWN_MINUTES = parseInt(process.env.SPRINT_COOLDOWN_MINUTES ?? '30', 10);
|
|
82
|
+
// Daily cap: max sprints per calendar day. Default: 100.
|
|
83
|
+
const DAILY_SPRINT_CAP = parseInt(process.env.DAILY_SPRINT_CAP ?? '100', 10);
|
|
84
|
+
// Rolling window cap: max sprints within the last N hours. Default: 20 per 5h.
|
|
85
|
+
const ROLLING_CAP = parseInt(process.env.ROLLING_SPRINT_CAP ?? '20', 10);
|
|
86
|
+
const ROLLING_WINDOW_HRS = parseInt(process.env.ROLLING_WINDOW_HOURS ?? '5', 10);
|
|
87
|
+
// 2026-05-19 anti-burn hardening (after overnight ~$60-80 incident):
|
|
88
|
+
// - PER_TASK_LIFETIME_MAX_ATTEMPTS: skip any task that has been attempted
|
|
89
|
+
// more than this many times across ALL runs (tracked via task.attempt_count
|
|
90
|
+
// in the source sprint file). Default 5 = if 5 prior runs all rejected, stop.
|
|
91
|
+
// - DAILY_TOKEN_BUDGET_USD: refuse to spawn the orchestrator if today's
|
|
92
|
+
// cumulative LLM spend (parsed from logs) already exceeds this. Default $30.
|
|
93
|
+
// - PER_RUN_HARD_TIMEOUT_MIN: kill the orchestrator if it runs longer than
|
|
94
|
+
// this. Tighter than MAX_HOURS=6. Default 35 (under the 60-min cron gap).
|
|
95
|
+
// Bumped 25→35 on 2026-05-28 after 4 consecutive sprints died mid-CEO-
|
|
96
|
+
// governance-phase: task-exec ate ~20 min, Phase-5b/5c/6 (post-sprint
|
|
97
|
+
// CTO analysis + CEO proposal review + daily report) needed ~10 more
|
|
98
|
+
// and got SIGKILLed. 35 gives governance room without blowing past the
|
|
99
|
+
// hourly cron cadence.
|
|
100
|
+
const PER_TASK_LIFETIME_MAX_ATTEMPTS = parseInt(process.env.PER_TASK_LIFETIME_MAX_ATTEMPTS ?? '5', 10);
|
|
101
|
+
const DAILY_TOKEN_BUDGET_USD = parseFloat(process.env.DAILY_TOKEN_BUDGET_USD ?? '30');
|
|
102
|
+
const PER_RUN_HARD_TIMEOUT_MIN = parseInt(process.env.PER_RUN_HARD_TIMEOUT_MIN ?? '35', 10);
|
|
103
|
+
// ── Logging ─────────────────────────────────────────────────────────────────
|
|
104
|
+
function ts() {
|
|
105
|
+
return new Date().toISOString().replace('T', ' ').slice(0, 19) + ' +00:00';
|
|
106
|
+
}
|
|
107
|
+
function log(msg) {
|
|
108
|
+
const line = `${ts()}: [SprintRunner] ${msg}`;
|
|
109
|
+
console.log(line);
|
|
110
|
+
try {
|
|
111
|
+
const prev = (0, fs_1.existsSync)(LOG) ? (0, fs_1.readFileSync)(LOG, 'utf8') : '';
|
|
112
|
+
(0, fs_1.writeFileSync)(LOG, prev + line + '\n');
|
|
113
|
+
}
|
|
114
|
+
catch { /* non-fatal */ }
|
|
115
|
+
}
|
|
116
|
+
// ── Telegram ────────────────────────────────────────────────────────────────
|
|
117
|
+
function sendTelegram(text) {
|
|
118
|
+
const token = process.env.CEO_TELEGRAM_BOT_TOKEN;
|
|
119
|
+
const chatId = process.env.OWNER_TELEGRAM_CHAT_ID;
|
|
120
|
+
if (!token || !chatId)
|
|
121
|
+
return;
|
|
122
|
+
const body = JSON.stringify({ chat_id: chatId, text, parse_mode: 'Markdown' });
|
|
123
|
+
const req = https.request({
|
|
124
|
+
hostname: 'api.telegram.org',
|
|
125
|
+
path: `/bot${token}/sendMessage`,
|
|
126
|
+
method: 'POST',
|
|
127
|
+
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) },
|
|
128
|
+
});
|
|
129
|
+
req.on('error', () => { });
|
|
130
|
+
req.write(body);
|
|
131
|
+
req.end();
|
|
132
|
+
}
|
|
133
|
+
// ── Lock ────────────────────────────────────────────────────────────────────
|
|
134
|
+
function isLocked() {
|
|
135
|
+
if (!(0, fs_1.existsSync)(LOCK))
|
|
136
|
+
return false;
|
|
137
|
+
try {
|
|
138
|
+
const pid = parseInt((0, fs_1.readFileSync)(LOCK, 'utf8').trim(), 10);
|
|
139
|
+
process.kill(pid, 0); // throws if PID not running
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
catch {
|
|
143
|
+
// stale lock
|
|
144
|
+
try {
|
|
145
|
+
(0, fs_1.unlinkSync)(LOCK);
|
|
146
|
+
}
|
|
147
|
+
catch { /* ignore */ }
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
function acquireLock() {
|
|
152
|
+
(0, fs_1.writeFileSync)(LOCK, String(process.pid));
|
|
153
|
+
}
|
|
154
|
+
function releaseLock() {
|
|
155
|
+
try {
|
|
156
|
+
(0, fs_1.unlinkSync)(LOCK);
|
|
157
|
+
}
|
|
158
|
+
catch { /* ignore */ }
|
|
159
|
+
}
|
|
160
|
+
// ── Rate Limiter (Claude 5h token budget protection) ────────────────────────
|
|
161
|
+
function isInCooldown() {
|
|
162
|
+
if (!(0, fs_1.existsSync)(COOLDOWN))
|
|
163
|
+
return false;
|
|
164
|
+
try {
|
|
165
|
+
const { until } = JSON.parse((0, fs_1.readFileSync)(COOLDOWN, 'utf8'));
|
|
166
|
+
if (Date.now() < until) {
|
|
167
|
+
const remaining = Math.ceil((until - Date.now()) / 60000);
|
|
168
|
+
log(`Rate limit cooldown active — ${remaining} min remaining. Exiting.`);
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
catch { /* stale/corrupt cooldown file — ignore */ }
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
function setCooldown() {
|
|
176
|
+
const until = Date.now() + COOLDOWN_MINUTES * 60 * 1000;
|
|
177
|
+
try {
|
|
178
|
+
(0, fs_1.writeFileSync)(COOLDOWN, JSON.stringify({ until, set_at: new Date().toISOString(), cooldown_minutes: COOLDOWN_MINUTES }));
|
|
179
|
+
log(`Cooldown set: next sprint no earlier than ${new Date(until).toISOString()} (+${COOLDOWN_MINUTES} min)`);
|
|
180
|
+
}
|
|
181
|
+
catch { /* non-fatal */ }
|
|
182
|
+
}
|
|
183
|
+
// Founder directive 2026-05-26: incident records routed to CTO + CEO for
|
|
184
|
+
// autonomous investigation. Each preflight or orchestrator failure writes one
|
|
185
|
+
// JSON file at logs/swarm-incidents/<ts>.json. CTO/CEO agents tail this dir.
|
|
186
|
+
function writeIncident(sprintPath, exitCode, elapsedMin) {
|
|
187
|
+
const dir = (0, path_1.join)(ROOT, 'logs', 'swarm-incidents');
|
|
188
|
+
if (!(0, fs_1.existsSync)(dir)) {
|
|
189
|
+
require('node:fs').mkdirSync(dir, { recursive: true });
|
|
190
|
+
}
|
|
191
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
192
|
+
const sprintId = sprintPath.replace(/.*\//, '').replace(/\.json$/, '');
|
|
193
|
+
// Read the last ~200 lines of the runner log to attach context.
|
|
194
|
+
let tail = '';
|
|
195
|
+
try {
|
|
196
|
+
const outLog = (0, path_1.join)(ROOT, 'logs', 'sprint-runner-out.log');
|
|
197
|
+
if ((0, fs_1.existsSync)(outLog)) {
|
|
198
|
+
const lines = (0, fs_1.readFileSync)(outLog, 'utf8').trim().split('\n');
|
|
199
|
+
tail = lines.slice(-200).join('\n');
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
catch { /* non-fatal */ }
|
|
203
|
+
const incident = {
|
|
204
|
+
ts: new Date().toISOString(),
|
|
205
|
+
sprint_id: sprintId,
|
|
206
|
+
sprint_path: sprintPath,
|
|
207
|
+
exit_code: exitCode,
|
|
208
|
+
elapsed_min: elapsedMin,
|
|
209
|
+
triage: {
|
|
210
|
+
assigned_to: ['cto', 'ceo'],
|
|
211
|
+
directive: 'Diagnose this orchestrator failure and propose a fix. Autonomous resolution preferred — escalate to founder only if blocked by external dependency (API credits, missing env var, infra access).',
|
|
212
|
+
common_patterns: [
|
|
213
|
+
'PREFLIGHT_FAILED — sprint task targets a file that does not exist yet; check depends_on_sprint chain',
|
|
214
|
+
'Anthropic credit exhausted — supervisor + chat blocked; mono-fallback should engage; if not, fix in scripts/orchestrate-agents-v2.ts',
|
|
215
|
+
'Integrity check stub overwrites — orchestrator preserved original; reset task to pending + investigate generation prompt',
|
|
216
|
+
'Per-task token budget exceeded — task too large; CTO should decompose via decomposer agent',
|
|
217
|
+
],
|
|
218
|
+
},
|
|
219
|
+
runner_log_tail: tail,
|
|
220
|
+
};
|
|
221
|
+
const outPath = (0, path_1.join)(dir, `${ts}_${sprintId}.json`);
|
|
222
|
+
(0, fs_1.writeFileSync)(outPath, JSON.stringify(incident, null, 2));
|
|
223
|
+
log(`Incident written: ${outPath}`);
|
|
224
|
+
log(`CTO + CEO will investigate on next orchestrator run.`);
|
|
225
|
+
}
|
|
226
|
+
// ── Rate Guards (Claude token budget protection) ─────────────────────────────
|
|
227
|
+
/** Parse unique sprint IDs from a JSONL AAR file */
|
|
228
|
+
function readUniqueSprintIds(filePath) {
|
|
229
|
+
// Returns Map<sprintId, latestTimestampMs>
|
|
230
|
+
const out = new Map();
|
|
231
|
+
if (!(0, fs_1.existsSync)(filePath))
|
|
232
|
+
return out;
|
|
233
|
+
try {
|
|
234
|
+
const lines = (0, fs_1.readFileSync)(filePath, 'utf8').trim().split('\n').filter(l => l.trim());
|
|
235
|
+
for (const l of lines) {
|
|
236
|
+
try {
|
|
237
|
+
const obj = JSON.parse(l);
|
|
238
|
+
const id = obj.sprintId ?? l;
|
|
239
|
+
const ts = obj.timestamp ? new Date(obj.timestamp).getTime() : Date.now();
|
|
240
|
+
if (!out.has(id) || ts > out.get(id))
|
|
241
|
+
out.set(id, ts);
|
|
242
|
+
}
|
|
243
|
+
catch { /* skip corrupt lines */ }
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
catch { /* non-fatal */ }
|
|
247
|
+
return out;
|
|
248
|
+
}
|
|
249
|
+
/** Hard daily cap: max DAILY_SPRINT_CAP unique sprints per calendar day */
|
|
250
|
+
function isDailyCapReached() {
|
|
251
|
+
try {
|
|
252
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
253
|
+
const aarFile = (0, path_1.join)(ROOT, 'logs', 'aar', `${today}.jsonl`);
|
|
254
|
+
const ids = readUniqueSprintIds(aarFile);
|
|
255
|
+
const count = ids.size;
|
|
256
|
+
if (count >= DAILY_SPRINT_CAP) {
|
|
257
|
+
log(`Daily cap reached: ${count}/${DAILY_SPRINT_CAP} sprints today. Pausing until midnight.`);
|
|
258
|
+
sendTelegram(`🚫 *Sprint Runner* — daily cap reached\\n\\n${count}/${DAILY_SPRINT_CAP} sprints today.\\nResuming tomorrow.`);
|
|
259
|
+
return true;
|
|
260
|
+
}
|
|
261
|
+
log(`Daily sprint count: ${count}/${DAILY_SPRINT_CAP}`);
|
|
262
|
+
}
|
|
263
|
+
catch { /* non-fatal */ }
|
|
264
|
+
return false;
|
|
265
|
+
}
|
|
266
|
+
/** Rolling window cap: max ROLLING_CAP unique sprints in the last ROLLING_WINDOW_HRS hours */
|
|
267
|
+
function isRollingCapReached() {
|
|
268
|
+
try {
|
|
269
|
+
const now = Date.now();
|
|
270
|
+
const windowMs = ROLLING_WINDOW_HRS * 60 * 60 * 1000;
|
|
271
|
+
const cutoff = now - windowMs;
|
|
272
|
+
// Collect IDs from today's AND yesterday's AAR (window may span midnight)
|
|
273
|
+
const todayStr = new Date().toISOString().slice(0, 10);
|
|
274
|
+
const yesterdayStr = new Date(now - 86_400_000).toISOString().slice(0, 10);
|
|
275
|
+
const aarDir = (0, path_1.join)(ROOT, 'logs', 'aar');
|
|
276
|
+
const allIds = new Map();
|
|
277
|
+
for (const day of [yesterdayStr, todayStr]) {
|
|
278
|
+
const ids = readUniqueSprintIds((0, path_1.join)(aarDir, `${day}.jsonl`));
|
|
279
|
+
ids.forEach((ts, id) => {
|
|
280
|
+
if (!allIds.has(id) || ts > allIds.get(id))
|
|
281
|
+
allIds.set(id, ts);
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
const recentTimestamps = [];
|
|
285
|
+
allIds.forEach(ts => { if (ts >= cutoff)
|
|
286
|
+
recentTimestamps.push(ts); });
|
|
287
|
+
const recentCount = recentTimestamps.length;
|
|
288
|
+
if (recentCount >= ROLLING_CAP) {
|
|
289
|
+
const oldest = recentTimestamps.reduce((min, ts) => ts < min ? ts : min, recentTimestamps[0]);
|
|
290
|
+
const resumeAt = new Date(oldest + windowMs);
|
|
291
|
+
log(`Rolling window cap: ${recentCount}/${ROLLING_CAP} sprints in last ${ROLLING_WINDOW_HRS}h. Resume ~${resumeAt.toISOString()}`);
|
|
292
|
+
sendTelegram(`⏳ *Sprint Runner* — rolling cap reached\\n\\n${recentCount}/${ROLLING_CAP} sprints in the last ${ROLLING_WINDOW_HRS}h.\\nResume: ~${resumeAt.toISOString().slice(11, 16)} UTC`);
|
|
293
|
+
return true;
|
|
294
|
+
}
|
|
295
|
+
log(`Rolling window: ${recentCount}/${ROLLING_CAP} sprints in last ${ROLLING_WINDOW_HRS}h`);
|
|
296
|
+
}
|
|
297
|
+
catch { /* non-fatal */ }
|
|
298
|
+
return false;
|
|
299
|
+
}
|
|
300
|
+
// ── Sprint ID Injection ────────────────────────────────────────────────────
|
|
301
|
+
function deriveSprintIdFromFilename(filename) {
|
|
302
|
+
// Extract sprint-NNN from path like sprints/sprint-064.json or sprints/sprint-064
|
|
303
|
+
const base = (0, path_1.basename)(filename, '.json');
|
|
304
|
+
// If it already matches sprint-NNN pattern, use it
|
|
305
|
+
if (/^sprint-\d+$/.test(base)) {
|
|
306
|
+
return base;
|
|
307
|
+
}
|
|
308
|
+
// Fallback: use the base name as-is
|
|
309
|
+
return base;
|
|
310
|
+
}
|
|
311
|
+
function injectSprintIdIntoTasks(sprint, sprintFilePath) {
|
|
312
|
+
// Determine sprint_id: use explicit field, or derive from filename
|
|
313
|
+
const sprintId = sprint.sprint_id || deriveSprintIdFromFilename(sprintFilePath);
|
|
314
|
+
// Inject sprint_id into each task
|
|
315
|
+
if (sprint.tasks && Array.isArray(sprint.tasks)) {
|
|
316
|
+
for (const task of sprint.tasks) {
|
|
317
|
+
task.sprint_id = sprintId;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return sprint;
|
|
321
|
+
}
|
|
322
|
+
// ── Schema Normalizer (S66-002) ──────────────────────────────────────────────
|
|
323
|
+
// Maps CEO-authored sprint schema → orchestrator schema
|
|
324
|
+
function normalizeTasks(tasks) {
|
|
325
|
+
return tasks.map((task) => ({
|
|
326
|
+
...task,
|
|
327
|
+
context: task.context ?? task.description ?? '',
|
|
328
|
+
dependencies: task.dependencies ?? task.depends_on ?? [],
|
|
329
|
+
deliverables: task.deliverables ?? (task.file ? { code: [task.file] } : task.task_target ? { code: [task.task_target] } : { code: [] }),
|
|
330
|
+
agent: task.agent ?? 'coder',
|
|
331
|
+
type: task.type ?? 'feature',
|
|
332
|
+
priority: task.priority ?? 'medium',
|
|
333
|
+
status: task.status ?? 'pending',
|
|
334
|
+
}));
|
|
335
|
+
}
|
|
336
|
+
function writeActiveSprint(sprint) {
|
|
337
|
+
// Ensure logs directory exists
|
|
338
|
+
const logsDir = (0, path_1.join)(ROOT, 'logs');
|
|
339
|
+
if (!(0, fs_1.existsSync)(logsDir)) {
|
|
340
|
+
(0, fs_1.mkdirSync)(logsDir, { recursive: true });
|
|
341
|
+
}
|
|
342
|
+
// Write the modified sprint to ACTIVE file
|
|
343
|
+
(0, fs_1.writeFileSync)(ACTIVE, JSON.stringify(sprint, null, 2), 'utf8');
|
|
344
|
+
return ACTIVE;
|
|
345
|
+
}
|
|
346
|
+
// ── Main ────────────────────────────────────────────────────────────────────
|
|
347
|
+
function extractSprintNumber(filename) {
|
|
348
|
+
// Extract numeric sprint ID: sprint-1501.json → 1501, sprint-TICKET-008-PROMO-05.json → -1
|
|
349
|
+
const match = (0, path_1.basename)(filename, '.json').match(/^sprint-(\d+)$/);
|
|
350
|
+
return match ? parseInt(match[1], 10) : -1;
|
|
351
|
+
}
|
|
352
|
+
// Statuses that override a "pending" local state — if Notion says any of these,
|
|
353
|
+
// runner skips the sprint even if the local file claims pending tasks.
|
|
354
|
+
// (Multi-session safety: prevents another Claude session reverting a local file
|
|
355
|
+
// from causing the runner to re-execute paused/done work.)
|
|
356
|
+
const NOTION_OVERRIDE_STATUSES = new Set(['skipped', 'blocked', 'done', 'done-manual', 'loop-stuck', 'rejected']);
|
|
357
|
+
async function findPendingSprint() {
|
|
358
|
+
if (!(0, fs_1.existsSync)(SPRINTS))
|
|
359
|
+
return null;
|
|
360
|
+
const files = (0, fs_1.readdirSync)(SPRINTS)
|
|
361
|
+
.filter(f => f.endsWith('.json') && f !== 'ACTIVE_SPRINT.json')
|
|
362
|
+
.sort((a, b) => {
|
|
363
|
+
// Numeric sort: highest sprint number first (1506 > 1501 > 999 > 52)
|
|
364
|
+
// Non-numeric files (TICKET-*, ZZGODMAN-*) get -1, sorted last
|
|
365
|
+
return extractSprintNumber(b) - extractSprintNumber(a);
|
|
366
|
+
});
|
|
367
|
+
// Source-of-truth: query Notion ONCE per cron run, build status map.
|
|
368
|
+
// Fail-safe: if Notion is unreachable, fall back to local-only filtering (legacy behavior).
|
|
369
|
+
let notionStatuses = null;
|
|
370
|
+
try {
|
|
371
|
+
const { getAllNotionSprintStatuses } = await Promise.resolve().then(() => __importStar(require('./notion-direct')));
|
|
372
|
+
const result = await getAllNotionSprintStatuses();
|
|
373
|
+
if (result.ok && result.data) {
|
|
374
|
+
notionStatuses = result.data;
|
|
375
|
+
log(`Notion source-of-truth: ${notionStatuses.size} sprint statuses loaded`);
|
|
376
|
+
}
|
|
377
|
+
else {
|
|
378
|
+
log(`Notion query failed (${result.reason}) — falling back to local-only filtering`);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
catch (err) {
|
|
382
|
+
log(`Notion module load failed (${err.message}) — falling back to local-only filtering`);
|
|
383
|
+
}
|
|
384
|
+
// Pre-load all sprint statuses for dependency resolution.
|
|
385
|
+
// TICKET-098: use loadSprintMerged so status comes from .swarm-state.
|
|
386
|
+
const allSprints = new Map();
|
|
387
|
+
for (const f of files) {
|
|
388
|
+
try {
|
|
389
|
+
allSprints.set(f.replace(/\.json$/, ''), (0, sprint_state_1.loadSprintMerged)((0, path_1.join)(SPRINTS, f)));
|
|
390
|
+
}
|
|
391
|
+
catch { /* skip unparseable */ }
|
|
392
|
+
}
|
|
393
|
+
for (const file of files) {
|
|
394
|
+
const filePath = (0, path_1.join)(SPRINTS, file);
|
|
395
|
+
try {
|
|
396
|
+
const sprint = allSprints.get(file.replace(/\.json$/, '')) || (0, sprint_state_1.loadSprintMerged)(filePath);
|
|
397
|
+
const hasPending = sprint.tasks?.some((t) => t.status === 'pending');
|
|
398
|
+
if (!hasPending)
|
|
399
|
+
continue;
|
|
400
|
+
// Notion source-of-truth check: if Notion says non-pending, respect it
|
|
401
|
+
const sprintId = file.replace(/\.json$/, '');
|
|
402
|
+
const notionStatus = notionStatuses?.get(sprintId);
|
|
403
|
+
if (notionStatus && NOTION_OVERRIDE_STATUSES.has(notionStatus)) {
|
|
404
|
+
log(`Skipped ${file}: Notion source-of-truth says '${notionStatus}' (overrides local pending)`);
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
// Dependency check: respect depends_on_sprint — if the upstream sprint
|
|
408
|
+
// has any non-terminal task, skip this one. Founder directive 2026-05-26:
|
|
409
|
+
// swarm must never deadlock on a sprint whose prereqs haven't shipped.
|
|
410
|
+
const depId = sprint.depends_on_sprint;
|
|
411
|
+
if (depId && typeof depId === 'string') {
|
|
412
|
+
const dep = allSprints.get(depId);
|
|
413
|
+
if (dep && dep.tasks) {
|
|
414
|
+
const depPending = dep.tasks.some((t) => t.status === 'pending' || t.status === 'in_progress' || t.status === 'blocked');
|
|
415
|
+
if (depPending) {
|
|
416
|
+
log(`Skipped ${file}: depends_on_sprint '${depId}' still has unfinished tasks`);
|
|
417
|
+
continue;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
log(`Selected sprint: ${file} (sprint #${extractSprintNumber(file)}${notionStatus ? `, notion=${notionStatus}` : ''})`);
|
|
422
|
+
return filePath;
|
|
423
|
+
}
|
|
424
|
+
catch {
|
|
425
|
+
continue;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
return null;
|
|
429
|
+
}
|
|
430
|
+
// 2026-05-19 anti-burn: kill any orphan orchestrator from a previous run
|
|
431
|
+
// before spawning a new one. PM2 cron_restart used to SIGKILL the parent but
|
|
432
|
+
// detach the orchestrator child, which kept making API calls indefinitely.
|
|
433
|
+
// Even with cron disabled now, defensive cleanup ensures one-shot manual runs
|
|
434
|
+
// can't accidentally double-spawn.
|
|
435
|
+
function killOrphanOrchestrators() {
|
|
436
|
+
try {
|
|
437
|
+
const { execSync } = require('node:child_process');
|
|
438
|
+
const out = execSync("pgrep -f 'orchestrate-agents-v2.ts' || true", { encoding: 'utf8' });
|
|
439
|
+
const pids = out.trim().split('\n').filter(Boolean).map((p) => parseInt(p, 10)).filter((p) => p > 0);
|
|
440
|
+
for (const pid of pids) {
|
|
441
|
+
if (pid === process.pid)
|
|
442
|
+
continue;
|
|
443
|
+
try {
|
|
444
|
+
process.kill(pid, 'SIGKILL');
|
|
445
|
+
log(`Killed orphan orchestrator PID ${pid}`);
|
|
446
|
+
}
|
|
447
|
+
catch { /* may already be dead */ }
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
catch { /* non-fatal */ }
|
|
451
|
+
}
|
|
452
|
+
// 2026-05-19 anti-burn: estimate today's LLM spend by scanning the orchestrator
|
|
453
|
+
// log for "Response: …, N tokens" lines and pricing per model. If we're over
|
|
454
|
+
// budget, refuse to spawn. Heuristic; mostly counts coder calls (supervisors
|
|
455
|
+
// are also there but interleaved).
|
|
456
|
+
function isOverDailyBudget() {
|
|
457
|
+
try {
|
|
458
|
+
const outLog = (0, path_1.join)(ROOT, 'logs', 'sprint-runner-out.log');
|
|
459
|
+
if (!(0, fs_1.existsSync)(outLog))
|
|
460
|
+
return false;
|
|
461
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
462
|
+
const content = (0, fs_1.readFileSync)(outLog, 'utf8');
|
|
463
|
+
// TICKET-099 (c): cost model recalibrated 2026-05-27 from $5/M → $2/M
|
|
464
|
+
// blended. Original $5/M assumed Sonnet-heavy traffic. Reality (per recent
|
|
465
|
+
// sprints): ~60% DeepSeek ($0.30/M), ~30% Haiku ($3/M), ~10% Sonnet ($10/M)
|
|
466
|
+
// = 0.18 + 0.90 + 1.00 ≈ $2.08/M. Round down to $2/M for headroom margin.
|
|
467
|
+
// Old rate caused false-positive budget freezes (6M tokens × $5/M = $30
|
|
468
|
+
// tripped the cap when actual cost was ~$12).
|
|
469
|
+
let totalTokens = 0;
|
|
470
|
+
for (const line of content.split('\n')) {
|
|
471
|
+
if (!line.startsWith(today))
|
|
472
|
+
continue;
|
|
473
|
+
const m = line.match(/Response: [\d.]+s, (\d+) tokens/);
|
|
474
|
+
if (m)
|
|
475
|
+
totalTokens += parseInt(m[1], 10);
|
|
476
|
+
}
|
|
477
|
+
const usd = (totalTokens / 1_000_000) * 2;
|
|
478
|
+
if (usd > DAILY_TOKEN_BUDGET_USD) {
|
|
479
|
+
log(`Daily token budget exceeded: ~$${usd.toFixed(2)} of $${DAILY_TOKEN_BUDGET_USD} (${totalTokens.toLocaleString()} tokens). Refusing to spawn.`);
|
|
480
|
+
sendTelegram(`🚨 *Sprint Runner* — daily budget exceeded\\n\\n~$${usd.toFixed(2)} / $${DAILY_TOKEN_BUDGET_USD}.\\nResuming tomorrow.`);
|
|
481
|
+
return true;
|
|
482
|
+
}
|
|
483
|
+
log(`Daily token estimate: ~$${usd.toFixed(2)} of $${DAILY_TOKEN_BUDGET_USD}`);
|
|
484
|
+
}
|
|
485
|
+
catch { /* non-fatal */ }
|
|
486
|
+
return false;
|
|
487
|
+
}
|
|
488
|
+
// 2026-05-19 anti-burn: skip tasks that have already burned through
|
|
489
|
+
// PER_TASK_LIFETIME_MAX_ATTEMPTS across all runs. The orchestrator's per-run
|
|
490
|
+
// 10-attempt budget doesn't help if every run starts fresh and re-tries the
|
|
491
|
+
// same failing task. We bump attempt_count in the source sprint file each
|
|
492
|
+
// time a task gets executed; once a task hits the lifetime ceiling, the
|
|
493
|
+
// runner marks it 'rejected-stuck' and moves on.
|
|
494
|
+
function gateLifetimeAttempts(sprintPath) {
|
|
495
|
+
try {
|
|
496
|
+
// TICKET-098: read merged source+state so attempt_count comes from .swarm-state
|
|
497
|
+
const sprint = (0, sprint_state_1.loadSprintMerged)(sprintPath);
|
|
498
|
+
if (!Array.isArray(sprint?.tasks))
|
|
499
|
+
return false;
|
|
500
|
+
let mutated = false;
|
|
501
|
+
let stuckIds = [];
|
|
502
|
+
const { updateTaskStatus } = require('./sprint-state');
|
|
503
|
+
for (const t of sprint.tasks) {
|
|
504
|
+
if (t.status !== 'pending')
|
|
505
|
+
continue;
|
|
506
|
+
const count = t.attempt_count || 0;
|
|
507
|
+
if (count >= PER_TASK_LIFETIME_MAX_ATTEMPTS) {
|
|
508
|
+
t.status = 'rejected-stuck';
|
|
509
|
+
t.rejected_reason = `Exceeded ${PER_TASK_LIFETIME_MAX_ATTEMPTS} lifetime attempts; needs founder intervention (likely hand-port).`;
|
|
510
|
+
stuckIds.push(t.id);
|
|
511
|
+
mutated = true;
|
|
512
|
+
// Write through to .swarm-state instead of source
|
|
513
|
+
updateTaskStatus(sprintPath, t.id, 'rejected-stuck', count, t.rejected_reason);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
if (mutated) {
|
|
517
|
+
log(`Marked ${stuckIds.length} task(s) as 'rejected-stuck' (lifetime cap): ${stuckIds.join(', ')}`);
|
|
518
|
+
sendTelegram(`⚠️ *Sprint Runner* — tasks hit lifetime cap\\n\\n${stuckIds.length} task(s) marked rejected-stuck:\\n${stuckIds.map(i => '• ' + i).join('\\n')}`);
|
|
519
|
+
}
|
|
520
|
+
// Return true if EVERY pending task is now stuck (no real work left)
|
|
521
|
+
const stillPending = sprint.tasks.filter((t) => t.status === 'pending').length;
|
|
522
|
+
return stillPending === 0;
|
|
523
|
+
}
|
|
524
|
+
catch {
|
|
525
|
+
return false;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
// sprint-1566 F1: pre-flight work gate. Returns true iff the sprint has at
|
|
529
|
+
// least one EXECUTABLE pending task — i.e. status='pending' AND the task's
|
|
530
|
+
// agent is known (assigned + the agent dir exists). Composes with F2
|
|
531
|
+
// agent_registry_validation which marks unknown-agent tasks as 'blocked'
|
|
532
|
+
// before this check runs.
|
|
533
|
+
//
|
|
534
|
+
// Saves the bulk of waste on sprint-1556-style no-op runs where the
|
|
535
|
+
// orchestrator's CEO/CTO meta-cycle burned 193K tokens analysing a sprint
|
|
536
|
+
// that had no real work to do.
|
|
537
|
+
function hasExecutableWork(sprintPath) {
|
|
538
|
+
try {
|
|
539
|
+
// TICKET-098: read merged source+state
|
|
540
|
+
const sprint = (0, sprint_state_1.loadSprintMerged)(sprintPath);
|
|
541
|
+
if (!Array.isArray(sprint?.tasks))
|
|
542
|
+
return false;
|
|
543
|
+
const known = (0, agent_registry_1.listKnownAgents)();
|
|
544
|
+
const executable = sprint.tasks.filter((t) => {
|
|
545
|
+
if (!t || t.status !== 'pending')
|
|
546
|
+
return false;
|
|
547
|
+
const agent = t.agent || 'coder'; // default per orchestrate-agents-v2.ts:2479
|
|
548
|
+
return known.has(agent);
|
|
549
|
+
});
|
|
550
|
+
if (executable.length === 0) {
|
|
551
|
+
// Diagnostic breakdown for the log so the founder can see WHY there's no work
|
|
552
|
+
const counts = {};
|
|
553
|
+
for (const t of sprint.tasks) {
|
|
554
|
+
const k = t?.status || 'unknown';
|
|
555
|
+
counts[k] = (counts[k] || 0) + 1;
|
|
556
|
+
}
|
|
557
|
+
log(` task status breakdown: ${Object.entries(counts).map(([k, v]) => `${k}=${v}`).join(', ')}`);
|
|
558
|
+
return false;
|
|
559
|
+
}
|
|
560
|
+
return true;
|
|
561
|
+
}
|
|
562
|
+
catch {
|
|
563
|
+
return false;
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
// ─── TICKET-204: Stuck-Task Escalation gate ──────────────────────────────────
|
|
567
|
+
// Atomic-merge helper: patch a task's DEFINITION fields (task_target, context)
|
|
568
|
+
// in the SOURCE sprint file so the next orchestrator spawn picks up the change.
|
|
569
|
+
function patchSourceTask(sprintPath, taskId, patch) {
|
|
570
|
+
try {
|
|
571
|
+
const raw = JSON.parse((0, fs_1.readFileSync)(sprintPath, 'utf-8'));
|
|
572
|
+
if (!Array.isArray(raw?.tasks))
|
|
573
|
+
return;
|
|
574
|
+
const task = raw.tasks.find((t) => t?.id === taskId);
|
|
575
|
+
if (!task)
|
|
576
|
+
return;
|
|
577
|
+
Object.assign(task, patch);
|
|
578
|
+
const tmp = `${sprintPath}.tmp`;
|
|
579
|
+
(0, fs_1.writeFileSync)(tmp, JSON.stringify(raw, null, 2));
|
|
580
|
+
require('fs').renameSync(tmp, sprintPath);
|
|
581
|
+
}
|
|
582
|
+
catch (e) {
|
|
583
|
+
log(`patchSourceTask(${taskId}) failed: ${(e?.message || '').slice(0, 120)}`);
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
// Atomic-merge helper: patch a task's RUNTIME entry (score_history) in
|
|
587
|
+
// .swarm-state/<sprint>.json. Preserves the file's flat-vs-{tasks} shape.
|
|
588
|
+
function patchStateTask(sprintPath, taskId, patch) {
|
|
589
|
+
try {
|
|
590
|
+
const stateFile = (0, path_1.join)(ENGINE_PATHS.swarmState, (0, path_1.basename)(sprintPath));
|
|
591
|
+
if (!(0, fs_1.existsSync)(stateFile))
|
|
592
|
+
return;
|
|
593
|
+
const raw = JSON.parse((0, fs_1.readFileSync)(stateFile, 'utf-8'));
|
|
594
|
+
const bag = raw && typeof raw === 'object' && raw.tasks && typeof raw.tasks === 'object' ? raw.tasks : raw;
|
|
595
|
+
if (!bag[taskId] || typeof bag[taskId] !== 'object')
|
|
596
|
+
return;
|
|
597
|
+
Object.assign(bag[taskId], patch);
|
|
598
|
+
const tmp = `${stateFile}.tmp`;
|
|
599
|
+
(0, fs_1.writeFileSync)(tmp, JSON.stringify(raw, null, 2));
|
|
600
|
+
require('fs').renameSync(tmp, stateFile);
|
|
601
|
+
}
|
|
602
|
+
catch (e) {
|
|
603
|
+
log(`patchStateTask(${taskId}) failed: ${(e?.message || '').slice(0, 120)}`);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
// The orchestrator records a single `score` per task per run; the detector needs
|
|
607
|
+
// a `score_history` series for its variance test. Append the latest score once
|
|
608
|
+
// per attempt (idempotent: only when attempt_count exceeds the recorded length).
|
|
609
|
+
function accumulateScoreHistory(sprintPath, taskId, entry) {
|
|
610
|
+
if (typeof entry?.score !== 'number')
|
|
611
|
+
return;
|
|
612
|
+
const attempt = entry.attempt_count ?? 0;
|
|
613
|
+
const hist = Array.isArray(entry.score_history) ? entry.score_history : [];
|
|
614
|
+
if (attempt <= hist.length)
|
|
615
|
+
return; // already recorded this attempt
|
|
616
|
+
const next = hist.concat(entry.score);
|
|
617
|
+
patchStateTask(sprintPath, taskId, { score_history: next });
|
|
618
|
+
entry.score_history = next; // keep in-memory copy consistent for the detector's file re-read
|
|
619
|
+
}
|
|
620
|
+
// Pre-dispatch escalation gate. For each pending task: accumulate score history,
|
|
621
|
+
// run detectStuckPattern, and on a stuck verdict (kill switch absent) run a
|
|
622
|
+
// synchronous CEO/CTO consult and apply the decision. See
|
|
623
|
+
// docs/specs/stuck-task-escalation.md §5. Non-fatal: any failure logs and the
|
|
624
|
+
// runner proceeds to normal dispatch.
|
|
625
|
+
async function gateStuckTasks(sprintPath) {
|
|
626
|
+
try {
|
|
627
|
+
const { detectStuckPattern, escalateStuckTask, escalationPaused } = require('./stuck-handler');
|
|
628
|
+
if (escalationPaused()) {
|
|
629
|
+
log('Stuck-task escalation: paused (kill switch present) — skipping detector.');
|
|
630
|
+
return;
|
|
631
|
+
}
|
|
632
|
+
const sprintId = (0, path_1.basename)(sprintPath).replace(/\.json$/, '');
|
|
633
|
+
const sprint = (0, sprint_state_1.loadSprintMerged)(sprintPath);
|
|
634
|
+
if (!Array.isArray(sprint?.tasks))
|
|
635
|
+
return;
|
|
636
|
+
const { readState, updateTaskStatus } = require('./sprint-state');
|
|
637
|
+
const state = readState(sprintPath) || {};
|
|
638
|
+
for (const t of sprint.tasks) {
|
|
639
|
+
if (!t || t.status !== 'pending')
|
|
640
|
+
continue;
|
|
641
|
+
const entry = state[t.id];
|
|
642
|
+
if (!entry)
|
|
643
|
+
continue;
|
|
644
|
+
accumulateScoreHistory(sprintPath, t.id, entry); // writes score_history to .swarm-state before detect
|
|
645
|
+
const detection = detectStuckPattern(sprintId, t.id);
|
|
646
|
+
if (!detection.stuck)
|
|
647
|
+
continue;
|
|
648
|
+
log(`🛑 Stuck task: ${t.id} (${detection.reason}) scores=[${detection.scores.join(', ')}]. Escalating CEO/CTO consult...`);
|
|
649
|
+
const decision = await escalateStuckTask(sprintId, t.id, t.context || '', t.title || t.id, detection);
|
|
650
|
+
log(` decision: ${decision.action} — ${(decision.reasoning || '').slice(0, 160)}`);
|
|
651
|
+
if (decision.action === 'bump_tier' && decision.new_tier) {
|
|
652
|
+
patchSourceTask(sprintPath, t.id, { task_target: decision.new_tier });
|
|
653
|
+
log(` → bump_tier: ${t.id}.task_target → ${decision.new_tier}`);
|
|
654
|
+
}
|
|
655
|
+
else if (decision.action === 'rephrase_context' && decision.rephrased_context) {
|
|
656
|
+
patchSourceTask(sprintPath, t.id, { context: decision.rephrased_context });
|
|
657
|
+
log(` → rephrase_context: ${t.id} context rewritten (${decision.rephrased_context.length} chars)`);
|
|
658
|
+
}
|
|
659
|
+
else if (decision.action === 'escalate_human') {
|
|
660
|
+
updateTaskStatus(sprintPath, t.id, 'awaiting_human', entry.attempt_count ?? 0, decision.reasoning);
|
|
661
|
+
sendTelegram(decision.telegram_message || `⚠️ *Sprint Runner* — task \`${t.id}\` escalated to human (stuck loop).`);
|
|
662
|
+
log(` → escalate_human: ${t.id} status=awaiting_human, founder notified.`);
|
|
663
|
+
}
|
|
664
|
+
else {
|
|
665
|
+
log(` → non-actionable decision for ${t.id} (action=${decision.action}); leaving pending.`);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
catch (e) {
|
|
670
|
+
log(`gateStuckTasks failed (non-fatal): ${(e?.message || String(e)).slice(0, 200)}`);
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
// TICKET-202: read the active PACT mandate (pointer → mandate file). Returns null
|
|
674
|
+
// when no mandate governance is in effect or the store is unreadable. The engine
|
|
675
|
+
// reads the JSON store directly — no kognai-build dependency.
|
|
676
|
+
function readActiveMandate() {
|
|
677
|
+
const pointer = (0, path_1.join)(ROOT, '.swarm-state', 'mandates', 'active.json');
|
|
678
|
+
if (!(0, fs_1.existsSync)(pointer))
|
|
679
|
+
return null;
|
|
680
|
+
try {
|
|
681
|
+
const id = JSON.parse((0, fs_1.readFileSync)(pointer, 'utf8')).mandate_id;
|
|
682
|
+
if (!id)
|
|
683
|
+
return null; // pointer cleared (e.g. after a revoke) → nothing active
|
|
684
|
+
const file = (0, path_1.join)(ROOT, '.swarm-state', 'mandates', `${id}.json`);
|
|
685
|
+
if (!(0, fs_1.existsSync)(file))
|
|
686
|
+
return null;
|
|
687
|
+
return { id: String(id), file, m: JSON.parse((0, fs_1.readFileSync)(file, 'utf8')) };
|
|
688
|
+
}
|
|
689
|
+
catch {
|
|
690
|
+
return null; // fail-open: a corrupt store must not wedge the swarm
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
// A signed mandate is the autonomy envelope. If the active mandate is revoked or
|
|
694
|
+
// breached (budget/deadline), halt dispatch — the mandate-driven analogue of the
|
|
695
|
+
// .swarm-state/dispatcher-paused kill switch. Fail-closed on a clearly
|
|
696
|
+
// revoked/breached/over-budget/expired mandate; fail-open otherwise.
|
|
697
|
+
function mandateEnvelopeBreached() {
|
|
698
|
+
const active = readActiveMandate();
|
|
699
|
+
if (!active)
|
|
700
|
+
return false;
|
|
701
|
+
const { id, m } = active;
|
|
702
|
+
const short = id.slice(0, 8);
|
|
703
|
+
let breach = null;
|
|
704
|
+
if (m.status === 'revoked')
|
|
705
|
+
breach = 'revoked by founder';
|
|
706
|
+
else if (m.status === 'breached')
|
|
707
|
+
breach = 'already breached';
|
|
708
|
+
else if (typeof m?.cost_envelope?.max_usdc === 'number' &&
|
|
709
|
+
Number(m?.burn?.usdc_consumed) > m.cost_envelope.max_usdc) {
|
|
710
|
+
breach = `budget: burn $${Number(m.burn.usdc_consumed).toFixed(4)} > cap $${m.cost_envelope.max_usdc.toFixed(4)}`;
|
|
711
|
+
}
|
|
712
|
+
else if (m.deadline) {
|
|
713
|
+
const d = Date.parse(m.deadline);
|
|
714
|
+
if (!Number.isNaN(d) && Date.now() > d)
|
|
715
|
+
breach = `deadline ${m.deadline} passed`;
|
|
716
|
+
}
|
|
717
|
+
if (breach) {
|
|
718
|
+
log(`Mandate ${short} envelope breach (${breach}) — halting autonomous dispatch.`);
|
|
719
|
+
sendTelegram(`⛔ *Mandate ${short}* envelope breach: ${breach}\\nAutonomous dispatch halted. Sign or extend a mandate to resume.`);
|
|
720
|
+
return true;
|
|
721
|
+
}
|
|
722
|
+
return false;
|
|
723
|
+
}
|
|
724
|
+
// TICKET-202: accrue a finished sprint's cost to the active mandate's burn. Mirrors
|
|
725
|
+
// kognai-build's updateBurn but kept self-contained in the engine. Flips status to
|
|
726
|
+
// 'breached' when cumulative burn crosses the max_usdc ceiling, so the NEXT dispatch
|
|
727
|
+
// is gated by mandateEnvelopeBreached(). Non-fatal.
|
|
728
|
+
function accrueActiveMandateBurn(costUsd, tasksCompleted) {
|
|
729
|
+
if (!(costUsd > 0))
|
|
730
|
+
return;
|
|
731
|
+
const active = readActiveMandate();
|
|
732
|
+
if (!active || active.m.status !== 'active')
|
|
733
|
+
return; // only accrue to a live mandate
|
|
734
|
+
const { id, file, m } = active;
|
|
735
|
+
try {
|
|
736
|
+
m.burn = m.burn || { usdc_consumed: 0, tasks_completed: 0, last_updated: '' };
|
|
737
|
+
m.burn.usdc_consumed = Number((Number(m.burn.usdc_consumed || 0) + costUsd).toFixed(6));
|
|
738
|
+
m.burn.tasks_completed = Number(m.burn.tasks_completed || 0) + Math.max(0, tasksCompleted);
|
|
739
|
+
m.burn.last_updated = new Date().toISOString();
|
|
740
|
+
const cap = m?.cost_envelope?.max_usdc;
|
|
741
|
+
if (typeof cap === 'number' && m.burn.usdc_consumed > cap) {
|
|
742
|
+
m.status = 'breached';
|
|
743
|
+
log(`Mandate ${id.slice(0, 8)} BREACHED: burn $${m.burn.usdc_consumed.toFixed(4)} > cap $${cap.toFixed(4)}.`);
|
|
744
|
+
sendTelegram(`⛔ *Mandate ${id.slice(0, 8)}* budget breached ($${m.burn.usdc_consumed.toFixed(4)} > $${cap.toFixed(4)}). Autonomous dispatch halts.`);
|
|
745
|
+
}
|
|
746
|
+
(0, fs_1.writeFileSync)(file, JSON.stringify(m, null, 2));
|
|
747
|
+
log(`Mandate ${id.slice(0, 8)} burn: $${m.burn.usdc_consumed.toFixed(4)}${typeof cap === 'number' ? ` / $${cap.toFixed(4)}` : ''} (${m.burn.tasks_completed} task(s)).`);
|
|
748
|
+
}
|
|
749
|
+
catch (e) {
|
|
750
|
+
log(`Mandate burn accrual failed (non-fatal): ${(e?.message || String(e)).slice(0, 120)}`);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
// TICKET-202: cost + completed-task count of the run that just finished. Guards a
|
|
754
|
+
// stale latest-run.json by requiring finished_at >= sinceMs (the spawn time).
|
|
755
|
+
function readLastRunReport(sinceMs) {
|
|
756
|
+
const f = (0, path_1.join)(ROOT, 'reports', 'swarm-runs', 'latest-run.json');
|
|
757
|
+
if (!(0, fs_1.existsSync)(f))
|
|
758
|
+
return { cost: 0, tasksCompleted: 0 };
|
|
759
|
+
try {
|
|
760
|
+
const r = JSON.parse((0, fs_1.readFileSync)(f, 'utf8'));
|
|
761
|
+
const finished = Date.parse(r.finished_at);
|
|
762
|
+
if (!Number.isNaN(finished) && finished < sinceMs)
|
|
763
|
+
return { cost: 0, tasksCompleted: 0 };
|
|
764
|
+
return { cost: Number(r.total_cost_usd) || 0, tasksCompleted: Number(r?.summary?.done) || 0 };
|
|
765
|
+
}
|
|
766
|
+
catch {
|
|
767
|
+
return { cost: 0, tasksCompleted: 0 };
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
async function runSprintCycle(opts) {
|
|
771
|
+
if (isLocked()) {
|
|
772
|
+
log('Lock present — another runner active. Exiting.');
|
|
773
|
+
return;
|
|
774
|
+
}
|
|
775
|
+
if (isInCooldown())
|
|
776
|
+
return;
|
|
777
|
+
if (isRollingCapReached())
|
|
778
|
+
return;
|
|
779
|
+
if (isDailyCapReached())
|
|
780
|
+
return;
|
|
781
|
+
// Anti-burn hardening 2026-05-19
|
|
782
|
+
killOrphanOrchestrators();
|
|
783
|
+
if (isOverDailyBudget())
|
|
784
|
+
return;
|
|
785
|
+
// TICKET-202: respect the active PACT mandate envelope (revoked / budget / deadline).
|
|
786
|
+
if (mandateEnvelopeBreached())
|
|
787
|
+
return;
|
|
788
|
+
acquireLock();
|
|
789
|
+
log('Starting sprint runner...');
|
|
790
|
+
const sprintPath = await findPendingSprint();
|
|
791
|
+
if (!sprintPath) {
|
|
792
|
+
log('No pending sprints found.');
|
|
793
|
+
releaseLock();
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
// TICKET-204: escalate stuck tasks BEFORE the lifetime-cap kill, so a low-variance
|
|
797
|
+
// sub-ceiling loop gets a CEO/CTO consult (bump_tier / rephrase_context / escalate_human)
|
|
798
|
+
// instead of silently burning attempts until the hard cap. Kill switch: .swarm-state/escalation-paused.
|
|
799
|
+
await gateStuckTasks(sprintPath);
|
|
800
|
+
if (gateLifetimeAttempts(sprintPath)) {
|
|
801
|
+
log(`All pending tasks in ${(0, path_1.basename)(sprintPath)} have hit lifetime cap. Releasing lock + exiting.`);
|
|
802
|
+
releaseLock();
|
|
803
|
+
return;
|
|
804
|
+
}
|
|
805
|
+
// sprint-1566 F2: validate agent assignments BEFORE spawn. Any task whose
|
|
806
|
+
// agent doesn't exist in agents/ + kognai-agents/ + workspace/agents/ (or
|
|
807
|
+
// the 'coder' pseudo) gets marked status='blocked' with a clear
|
|
808
|
+
// blocked_reason. Catches the 2026-05-19 "miner-or-founder" pattern that
|
|
809
|
+
// burned 193K tokens on CEO/CTO meta-analysis after the orchestrator
|
|
810
|
+
// silently soft-rejected the task.
|
|
811
|
+
const agentCheck = (0, agent_registry_1.validateSprintAgents)(sprintPath);
|
|
812
|
+
if (!agentCheck.ok) {
|
|
813
|
+
log(`⚠️ Agent validation: ${agentCheck.unknown_agents.length} unknown agent(s) in ${(0, path_1.basename)(sprintPath)}: ${agentCheck.unknown_agents.join(', ')}`);
|
|
814
|
+
log(` ${agentCheck.affected_tasks.length} task(s) marked blocked: ${agentCheck.affected_tasks.join(', ')}`);
|
|
815
|
+
log(` Founder: edit the task agent assignments to one of ${(0, agent_registry_1.listKnownAgents)().size} known agents + reset status to pending.`);
|
|
816
|
+
// Don't exit here — the sprint may still have other valid pending tasks.
|
|
817
|
+
// The preflight work gate (F1) below will check if there's any real work left.
|
|
818
|
+
}
|
|
819
|
+
// sprint-1566 F1: pre-flight work gate. If after lifetime-gate + agent-registry
|
|
820
|
+
// pruning there are NO executable pending tasks, skip the orchestrator spawn
|
|
821
|
+
// entirely. This is the single biggest waste-cutter: sprint-1556 on 2026-05-19
|
|
822
|
+
// burned 193K tokens running the full CEO/CTO meta-cycle on a sprint with
|
|
823
|
+
// 4/5 done + 1 task assigned to a non-existent agent (0 real work).
|
|
824
|
+
if (!hasExecutableWork(sprintPath)) {
|
|
825
|
+
log(`✗ Preflight work gate: no executable pending tasks in ${(0, path_1.basename)(sprintPath)}. Skipping orchestrator spawn.`);
|
|
826
|
+
log(` (all tasks are done / blocked / rejected / assigned to unknown agents)`);
|
|
827
|
+
releaseLock();
|
|
828
|
+
return;
|
|
829
|
+
}
|
|
830
|
+
log(`Found pending sprint: ${sprintPath}`);
|
|
831
|
+
// TICKET-210: build the ACTIVE sprint from the MERGED view (source definition
|
|
832
|
+
// + .swarm-state status), NOT raw source. The source file holds every task at
|
|
833
|
+
// its authored status (usually 'pending'); reading it directly meant a sprint
|
|
834
|
+
// with ANY pending task re-ran ALL its tasks — including ones already 'done' in
|
|
835
|
+
// .swarm-state — re-generating shipped files and risking reversion of later
|
|
836
|
+
// fixes (observed live 2026-05-30: sprint-1618 re-ran completed libs and nearly
|
|
837
|
+
// reverted the TICKET-207 workspace.ts fix). The gates above already use
|
|
838
|
+
// loadSprintMerged; the ACTIVE write was the one path that bypassed state.
|
|
839
|
+
// The orchestrator skips status !== 'pending' (orchestrate-agents-v2:4045), so
|
|
840
|
+
// overlaying terminal statuses here makes it run only genuinely-pending work.
|
|
841
|
+
let sprint;
|
|
842
|
+
try {
|
|
843
|
+
sprint = (0, sprint_state_1.loadSprintMerged)(sprintPath);
|
|
844
|
+
}
|
|
845
|
+
catch (err) {
|
|
846
|
+
log(`Failed to read/merge sprint JSON: ${err}`);
|
|
847
|
+
releaseLock();
|
|
848
|
+
return;
|
|
849
|
+
}
|
|
850
|
+
// TICKET-210: the merged view carries terminal statuses from .swarm-state. The
|
|
851
|
+
// orchestrator only runs status === 'pending', so DONE/skip-terminal tasks are
|
|
852
|
+
// now correctly excluded (the bug). But 'rejected' / 'in_progress' tasks must
|
|
853
|
+
// stay RUNNABLE — the bounded retry loop (capped by gateLifetimeAttempts, which
|
|
854
|
+
// already promoted over-cap tasks to 'rejected-stuck') depends on them being
|
|
855
|
+
// re-dispatched. Normalize only those retryable states back to 'pending';
|
|
856
|
+
// leave done / rejected-stuck / awaiting_human / blocked / skipped as-is so they
|
|
857
|
+
// are skipped this run.
|
|
858
|
+
const RUN_AS_PENDING = new Set(['rejected', 'in_progress']);
|
|
859
|
+
if (Array.isArray(sprint.tasks)) {
|
|
860
|
+
for (const t of sprint.tasks) {
|
|
861
|
+
if (RUN_AS_PENDING.has(t.status))
|
|
862
|
+
t.status = 'pending';
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
// Inject sprint_id into each task
|
|
866
|
+
const sprintWithIds = injectSprintIdIntoTasks(sprint, sprintPath);
|
|
867
|
+
log(`Injected sprint_id into ${sprintWithIds.tasks?.length || 0} tasks`);
|
|
868
|
+
// Normalize CEO schema → orchestrator schema (S66-002)
|
|
869
|
+
sprintWithIds.tasks = normalizeTasks(sprintWithIds.tasks);
|
|
870
|
+
log(`Normalized ${sprintWithIds.tasks.length} tasks (CEO schema → orchestrator schema)`);
|
|
871
|
+
// TICKET-202: if a PACT mandate governs this session, stamp the sprint with its
|
|
872
|
+
// id (traceability) so its cost can be accrued to that envelope after the run.
|
|
873
|
+
const governingMandate = readActiveMandate();
|
|
874
|
+
if (governingMandate) {
|
|
875
|
+
sprintWithIds.mandate_id = governingMandate.id;
|
|
876
|
+
log(`Sprint governed by mandate ${governingMandate.id.slice(0, 8)} (envelope active).`);
|
|
877
|
+
}
|
|
878
|
+
// Write modified sprint to ACTIVE file
|
|
879
|
+
const activePath = writeActiveSprint(sprintWithIds);
|
|
880
|
+
log(`Written modified sprint to: ${activePath}`);
|
|
881
|
+
// Spawn the product orchestrator script (path supplied by the thin entry).
|
|
882
|
+
const orchestratorPath = opts.orchestratorScript;
|
|
883
|
+
const start = Date.now();
|
|
884
|
+
sendTelegram(`🚀 *Sprint Runner* started\\n\\nSprint: \`${(0, path_1.basename)(sprintPath)}\`\\nTasks: ${sprintWithIds.tasks?.length || 0}`);
|
|
885
|
+
// 2026-05-19 anti-burn: hard timeout shorter than the disabled cron gap so
|
|
886
|
+
// overlapping orphans can't accumulate even if cron ever gets re-armed
|
|
887
|
+
const orchestratorTimeoutMs = PER_RUN_HARD_TIMEOUT_MIN * 60 * 1000;
|
|
888
|
+
log(`Spawning orchestrator with ${PER_RUN_HARD_TIMEOUT_MIN}-min hard timeout`);
|
|
889
|
+
const result = (0, child_process_1.spawnSync)('npx', ['ts-node', orchestratorPath, activePath], {
|
|
890
|
+
stdio: 'inherit',
|
|
891
|
+
cwd: ROOT,
|
|
892
|
+
env: { ...process.env },
|
|
893
|
+
timeout: orchestratorTimeoutMs,
|
|
894
|
+
killSignal: 'SIGKILL', // SIGTERM ignored when blocked on subprocess I/O
|
|
895
|
+
});
|
|
896
|
+
const elapsed = Math.round((Date.now() - start) / 60000);
|
|
897
|
+
const status = result.status === 0 ? '✅ Completed' : `❌ Failed (exit ${result.status})`;
|
|
898
|
+
log(`Orchestrator finished: ${status} (${elapsed} min)`);
|
|
899
|
+
// Founder directive 2026-05-26: on non-zero exit, write an incident record
|
|
900
|
+
// and emit an event for CTO + CEO to investigate autonomously. The swarm
|
|
901
|
+
// should never wait for human triage to recover.
|
|
902
|
+
if (result.status !== 0) {
|
|
903
|
+
try {
|
|
904
|
+
writeIncident(sprintPath, result.status ?? -1, elapsed);
|
|
905
|
+
}
|
|
906
|
+
catch (e) {
|
|
907
|
+
log(`incident write failed: ${e.message}`);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
// TICKET-202: accrue this sprint's cost to the active mandate's burn envelope.
|
|
911
|
+
// Flips the mandate to 'breached' if cumulative burn crosses max_usdc, which the
|
|
912
|
+
// next run's mandateEnvelopeBreached() gate then halts on.
|
|
913
|
+
const runReport = readLastRunReport(start);
|
|
914
|
+
accrueActiveMandateBurn(runReport.cost, runReport.tasksCompleted);
|
|
915
|
+
setCooldown(); // Rate limiter: enforce gap before next sprint
|
|
916
|
+
sendTelegram(`🏁 *Sprint Runner* finished\\n\\n${status}\\nDuration: ${elapsed} min\\nNext sprint in: ${COOLDOWN_MINUTES} min`);
|
|
917
|
+
// TICKET-098: mirror task statuses from ACTIVE → .swarm-state/<sprint-id>.json
|
|
918
|
+
// (gitignored). Previously wrote to committed source sprint file, which was
|
|
919
|
+
// vulnerable to git operations clobbering state (TICKET-095 push-with-rebase
|
|
920
|
+
// catastrophically demonstrated this). State is now isolated from git.
|
|
921
|
+
try {
|
|
922
|
+
if ((0, fs_1.existsSync)(ACTIVE) && (0, fs_1.existsSync)(sprintPath)) {
|
|
923
|
+
const activeRaw = JSON.parse((0, fs_1.readFileSync)(ACTIVE, 'utf8'));
|
|
924
|
+
if (Array.isArray(activeRaw.tasks)) {
|
|
925
|
+
// Bump attempt_count for tasks whose status changed during this run.
|
|
926
|
+
// Read prior state to compute the diff (orchestrator's persistTaskStatus
|
|
927
|
+
// wrote ACTIVE; we increment attempt_count here for tasks that moved
|
|
928
|
+
// off 'pending').
|
|
929
|
+
const { readState } = require('./sprint-state');
|
|
930
|
+
const priorState = readState(sprintPath);
|
|
931
|
+
const tasksWithAttempts = activeRaw.tasks.map((t) => {
|
|
932
|
+
if (!t || !t.id || !t.status)
|
|
933
|
+
return t;
|
|
934
|
+
const prior = priorState[t.id];
|
|
935
|
+
const priorStatus = prior?.status ?? 'pending';
|
|
936
|
+
const priorAttempts = prior?.attempt_count ?? 0;
|
|
937
|
+
// Bump iff status changed AND new status isn't 'pending'
|
|
938
|
+
const bump = priorStatus !== t.status && t.status !== 'pending';
|
|
939
|
+
return {
|
|
940
|
+
...t,
|
|
941
|
+
attempt_count: bump ? priorAttempts + 1 : (t.attempt_count ?? priorAttempts),
|
|
942
|
+
};
|
|
943
|
+
});
|
|
944
|
+
const { updates, attempted } = (0, sprint_state_1.applyActiveTasksToState)(sprintPath, tasksWithAttempts);
|
|
945
|
+
if (updates > 0 || attempted > 0) {
|
|
946
|
+
log(`Synced ${updates} status update(s) + ${attempted} attempt_count bump(s) from ACTIVE → .swarm-state/${(0, path_1.basename)(sprintPath)}`);
|
|
947
|
+
}
|
|
948
|
+
// Founder fix 2026-05-27: persist decomposer-injected split sub-tasks
|
|
949
|
+
// back to the source sprint definition so they survive ACTIVE cleanup.
|
|
950
|
+
// The atomicity pre-flight gate (CTO-20260528-002) auto-splits
|
|
951
|
+
// multi-file tasks via routeToDecomposer; those splits were being
|
|
952
|
+
// injected only into ACTIVE, then wiped at sprint-end → 0 ship.
|
|
953
|
+
// Fix: detect injected tasks (have `parent_task_id` field set) that
|
|
954
|
+
// aren't yet in the source sprint file, and append them. Tasks are
|
|
955
|
+
// definition-level, not state — they belong in the committed source.
|
|
956
|
+
try {
|
|
957
|
+
const sourceRaw = JSON.parse((0, fs_1.readFileSync)(sprintPath, 'utf8'));
|
|
958
|
+
if (Array.isArray(sourceRaw.tasks)) {
|
|
959
|
+
const sourceIds = new Set(sourceRaw.tasks.map((t) => t?.id));
|
|
960
|
+
const newSplitTasks = activeRaw.tasks.filter((t) => t && t.id && !sourceIds.has(t.id) && t.parent_task_id);
|
|
961
|
+
if (newSplitTasks.length > 0) {
|
|
962
|
+
// Strip per-run runtime fields (status / attempt_count) — the
|
|
963
|
+
// source file is definition only; status lives in .swarm-state.
|
|
964
|
+
const cleaned = newSplitTasks.map((t) => {
|
|
965
|
+
const { status: _s, attempt_count: _a, rejected_reason: _r, ...defn } = t;
|
|
966
|
+
return { ...defn, status: 'pending', attempt_count: 0 };
|
|
967
|
+
});
|
|
968
|
+
sourceRaw.tasks.push(...cleaned);
|
|
969
|
+
// Atomic write (tmp + rename) — matches the sprint-state pattern.
|
|
970
|
+
const tmp = `${sprintPath}.tmp.${process.pid}`;
|
|
971
|
+
require('fs').writeFileSync(tmp, JSON.stringify(sourceRaw, null, 2));
|
|
972
|
+
require('fs').renameSync(tmp, sprintPath);
|
|
973
|
+
log(`Persisted ${newSplitTasks.length} decomposer-split sub-task(s) back to ${(0, path_1.basename)(sprintPath)} (survive ACTIVE cleanup)`);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
catch (e) {
|
|
978
|
+
log(`WARN: failed to persist split sub-tasks to source: ${(e?.message || '').substring(0, 200)}`);
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
catch (e) {
|
|
984
|
+
log(`WARN: failed to sync ACTIVE → .swarm-state: ${(e?.message || '').substring(0, 200)}`);
|
|
985
|
+
}
|
|
986
|
+
// TICKET-206: forensic git-log recovery for tasks that shipped past the kill
|
|
987
|
+
// signal. When the orchestrator hits the hard timeout, in-flight tasks may
|
|
988
|
+
// have completed their `git commit` (OS-atomic) before persistTaskStatus could
|
|
989
|
+
// fire — so .swarm-state shows them as pending even though commits landed on
|
|
990
|
+
// origin/main. This pass scans git log since the cron start for
|
|
991
|
+
// `feat(coder): <task_id>` matches and flips matched pending tasks to done
|
|
992
|
+
// with a recovered_from_git_log marker. Live incident 2026-05-29:
|
|
993
|
+
// knowledge_lib (0454c341b at 15:39) + memory_lib (c60b32b0b at 15:42) shipped
|
|
994
|
+
// but state showed pending after the 15:35 kill.
|
|
995
|
+
try {
|
|
996
|
+
const cronStartIso = new Date(start).toISOString();
|
|
997
|
+
const { execSync } = require('child_process');
|
|
998
|
+
const commitMsgs = execSync(`git log --since='${cronStartIso}' --pretty=format:'%s' origin/main`, { cwd: ROOT, encoding: 'utf-8', timeout: 10000 }).split('\n').filter(Boolean);
|
|
999
|
+
if (commitMsgs.length > 0) {
|
|
1000
|
+
const { readState, updateTaskStatus } = require('./sprint-state');
|
|
1001
|
+
const state = readState(sprintPath);
|
|
1002
|
+
const pendingIds = Object.entries(state)
|
|
1003
|
+
.filter(([_, v]) => v?.status === 'pending')
|
|
1004
|
+
.map(([id]) => id);
|
|
1005
|
+
const recovered = [];
|
|
1006
|
+
for (const id of pendingIds) {
|
|
1007
|
+
// Match `feat(coder): <id>` or `feat(coder): <id>-A` (auto-decomposer split)
|
|
1008
|
+
const escaped = id.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
1009
|
+
const re = new RegExp(`feat\\(coder\\):\\s*${escaped}(\\b|-)`);
|
|
1010
|
+
if (commitMsgs.some((m) => re.test(m))) {
|
|
1011
|
+
updateTaskStatus(sprintPath, id, 'done', state[id]?.attempt_count ?? 1);
|
|
1012
|
+
recovered.push(id);
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
if (recovered.length > 0) {
|
|
1016
|
+
log(`Forensic recovery: ${recovered.length} task(s) marked done from git log: ${recovered.join(', ')}`);
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
catch (e) {
|
|
1021
|
+
log(`forensic recovery failed (non-fatal): ${(e?.message || '').substring(0, 200)}`);
|
|
1022
|
+
}
|
|
1023
|
+
// Clean up ACTIVE file after run
|
|
1024
|
+
try {
|
|
1025
|
+
if ((0, fs_1.existsSync)(ACTIVE)) {
|
|
1026
|
+
(0, fs_1.unlinkSync)(ACTIVE);
|
|
1027
|
+
log('Cleaned up ACTIVE sprint file');
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
catch {
|
|
1031
|
+
// non-fatal
|
|
1032
|
+
}
|
|
1033
|
+
// TICKET-201: post-sprint hook (e.g. dispatch-approved-proposals).
|
|
1034
|
+
// Supplied by the product entry so core stays product-agnostic.
|
|
1035
|
+
try {
|
|
1036
|
+
opts.postSprintHook?.();
|
|
1037
|
+
}
|
|
1038
|
+
catch (e) {
|
|
1039
|
+
log(`WARN: post-sprint hook failed (non-fatal): ${(e?.message || String(e)).substring(0, 200)}`);
|
|
1040
|
+
}
|
|
1041
|
+
releaseLock();
|
|
1042
|
+
}
|