@kognai/orchestrator-core 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -886,15 +886,52 @@ async function runSprintCycle(opts) {
|
|
|
886
886
|
// overlapping orphans can't accumulate even if cron ever gets re-armed
|
|
887
887
|
const orchestratorTimeoutMs = PER_RUN_HARD_TIMEOUT_MIN * 60 * 1000;
|
|
888
888
|
log(`Spawning orchestrator with ${PER_RUN_HARD_TIMEOUT_MIN}-min hard timeout`);
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
889
|
+
// TICKET-347: spawnSync's `timeout` only SIGKILLs the DIRECT child (`npx`).
|
|
890
|
+
// Its `ts-node` child + the orchestrator grandchildren (and everything THEY
|
|
891
|
+
// spawn) get orphaned and keep running — reparented to init. A live incident
|
|
892
|
+
// (2026-06-13) had a "timed-out" run keep executing for ~2h after the runner
|
|
893
|
+
// declared it failed, burning ~1.3M tokens + attempting wallet settlements.
|
|
894
|
+
// Fix: spawn DETACHED so the child leads its own process group, then SIGKILL
|
|
895
|
+
// the WHOLE group (negative pid) on timeout — taking the entire subtree down.
|
|
896
|
+
const result = await new Promise((resolveRun) => {
|
|
897
|
+
const child = (0, child_process_1.spawn)('npx', ['ts-node', orchestratorPath, activePath], {
|
|
898
|
+
stdio: 'inherit',
|
|
899
|
+
cwd: ROOT,
|
|
900
|
+
env: { ...process.env },
|
|
901
|
+
detached: true, // new process group (setsid) → killable as a unit
|
|
902
|
+
});
|
|
903
|
+
let timedOut = false;
|
|
904
|
+
let settled = false;
|
|
905
|
+
const finish = (status) => {
|
|
906
|
+
if (settled)
|
|
907
|
+
return;
|
|
908
|
+
settled = true;
|
|
909
|
+
clearTimeout(timer);
|
|
910
|
+
resolveRun({ status, timedOut });
|
|
911
|
+
};
|
|
912
|
+
const timer = setTimeout(() => {
|
|
913
|
+
timedOut = true;
|
|
914
|
+
// Negative pid = signal the entire process group (the detached subtree).
|
|
915
|
+
try {
|
|
916
|
+
if (child.pid)
|
|
917
|
+
process.kill(-child.pid, 'SIGKILL');
|
|
918
|
+
}
|
|
919
|
+
catch { /* group already gone */ }
|
|
920
|
+
// Belt-and-braces: also target the direct child in case the group call missed.
|
|
921
|
+
try {
|
|
922
|
+
child.kill('SIGKILL');
|
|
923
|
+
}
|
|
924
|
+
catch { /* already dead */ }
|
|
925
|
+
}, orchestratorTimeoutMs);
|
|
926
|
+
child.on('error', (err) => { log(`Orchestrator spawn error: ${err.message}`); finish(1); });
|
|
927
|
+
child.on('exit', (code) => finish(code));
|
|
895
928
|
});
|
|
896
929
|
const elapsed = Math.round((Date.now() - start) / 60000);
|
|
897
|
-
const status = result.status === 0
|
|
930
|
+
const status = result.status === 0
|
|
931
|
+
? '✅ Completed'
|
|
932
|
+
: result.timedOut
|
|
933
|
+
? `⏱️ Killed — ${PER_RUN_HARD_TIMEOUT_MIN}-min hard timeout (process group SIGKILLed)`
|
|
934
|
+
: `❌ Failed (exit ${result.status})`;
|
|
898
935
|
log(`Orchestrator finished: ${status} (${elapsed} min)`);
|
|
899
936
|
// Founder directive 2026-05-26: on non-zero exit, write an incident record
|
|
900
937
|
// and emit an event for CTO + CEO to investigate autonomously. The swarm
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kognai/orchestrator-core",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"description": "Kognai sovereign orchestrator — core engine (template-agnostic). Shared by all products (Kognai/coding, Voxight/market-intel, Invoica/fin-compliance); each supplies only its template. Replaces per-repo forks of orchestrate-agents-v2 / sprint-runner / lib.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "SkinGem",
|