@kognai/orchestrator-core 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -79,6 +79,13 @@ const MAX_HOURS = 6; // kill orchestrator if it runs longer than this
|
|
|
79
79
|
// Rate limiter: minimum gap between sprint executions (prevents burning Claude 5h limit)
|
|
80
80
|
// Default: 30 min. Override via SPRINT_COOLDOWN_MINUTES env var.
|
|
81
81
|
const COOLDOWN_MINUTES = parseInt(process.env.SPRINT_COOLDOWN_MINUTES ?? '30', 10);
|
|
82
|
+
// TICKET-348 sprint-level backoff: a sprint whose run makes NO forward progress
|
|
83
|
+
// (no pending task reached a terminal/done state) this many times IN A ROW is
|
|
84
|
+
// auto-skipped as 'loop-stuck', so a permanently-failing sprint (e.g. one whose
|
|
85
|
+
// files keep truncating) stops monopolising the runner and the selector advances
|
|
86
|
+
// to other queued work. State lives in .swarm-state/sprint-backoff.json.
|
|
87
|
+
const SPRINT_BACKOFF = (0, path_1.join)(ROOT, '.swarm-state', 'sprint-backoff.json');
|
|
88
|
+
const SPRINT_BACKOFF_THRESHOLD = parseInt(process.env.SPRINT_BACKOFF_THRESHOLD ?? '2', 10);
|
|
82
89
|
// Daily cap: max sprints per calendar day. Default: 100.
|
|
83
90
|
const DAILY_SPRINT_CAP = parseInt(process.env.DAILY_SPRINT_CAP ?? '100', 10);
|
|
84
91
|
// Rolling window cap: max sprints within the last N hours. Default: 20 per 5h.
|
|
@@ -354,6 +361,63 @@ function extractSprintNumber(filename) {
|
|
|
354
361
|
// (Multi-session safety: prevents another Claude session reverting a local file
|
|
355
362
|
// from causing the runner to re-execute paused/done work.)
|
|
356
363
|
const NOTION_OVERRIDE_STATUSES = new Set(['skipped', 'blocked', 'done', 'done-manual', 'loop-stuck', 'rejected']);
|
|
364
|
+
function readBackoff() {
|
|
365
|
+
try {
|
|
366
|
+
return JSON.parse((0, fs_1.readFileSync)(SPRINT_BACKOFF, 'utf8'));
|
|
367
|
+
}
|
|
368
|
+
catch {
|
|
369
|
+
return {};
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
function writeBackoff(data) {
|
|
373
|
+
try {
|
|
374
|
+
const dir = (0, path_1.join)(ROOT, '.swarm-state');
|
|
375
|
+
if (!(0, fs_1.existsSync)(dir))
|
|
376
|
+
(0, fs_1.mkdirSync)(dir, { recursive: true });
|
|
377
|
+
const tmp = `${SPRINT_BACKOFF}.tmp.${process.pid}`;
|
|
378
|
+
(0, fs_1.writeFileSync)(tmp, JSON.stringify(data, null, 2));
|
|
379
|
+
(0, fs_1.renameSync)(tmp, SPRINT_BACKOFF);
|
|
380
|
+
}
|
|
381
|
+
catch { /* backoff bookkeeping is best-effort — never block a run */ }
|
|
382
|
+
}
|
|
383
|
+
/** Count pending tasks in a sprint's MERGED view (source + .swarm-state status). */
|
|
384
|
+
function countPendingTasks(sprintPath) {
|
|
385
|
+
try {
|
|
386
|
+
return ((0, sprint_state_1.loadSprintMerged)(sprintPath).tasks ?? []).filter((t) => t.status === 'pending').length;
|
|
387
|
+
}
|
|
388
|
+
catch {
|
|
389
|
+
return 0;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
/** True once a sprint has hit the no-progress threshold — selector skips it. */
|
|
393
|
+
function isBackedOff(sprintId) {
|
|
394
|
+
const e = readBackoff()[sprintId];
|
|
395
|
+
return !!e && (e.loop_stuck === true || e.no_progress >= SPRINT_BACKOFF_THRESHOLD);
|
|
396
|
+
}
|
|
397
|
+
/** Post-run: progress = pending count dropped (a task reached terminal/done).
|
|
398
|
+
* No progress → increment the consecutive counter; at the threshold, flag
|
|
399
|
+
* loop_stuck so findPendingSprint skips it. Any progress → reset. */
|
|
400
|
+
function recordSprintProgress(sprintId, pendingBefore, pendingAfter) {
|
|
401
|
+
const data = readBackoff();
|
|
402
|
+
if (pendingAfter < pendingBefore) {
|
|
403
|
+
if (data[sprintId]) {
|
|
404
|
+
delete data[sprintId];
|
|
405
|
+
writeBackoff(data);
|
|
406
|
+
}
|
|
407
|
+
return;
|
|
408
|
+
}
|
|
409
|
+
const prev = data[sprintId]?.no_progress ?? 0;
|
|
410
|
+
const next = prev + 1;
|
|
411
|
+
const loop_stuck = next >= SPRINT_BACKOFF_THRESHOLD;
|
|
412
|
+
data[sprintId] = { no_progress: next, last: new Date().toISOString(), loop_stuck };
|
|
413
|
+
writeBackoff(data);
|
|
414
|
+
if (loop_stuck) {
|
|
415
|
+
log(`⛔ Backoff: ${sprintId} made no progress ${next}× in a row — marked loop-stuck (auto-skipped). Clear .swarm-state/sprint-backoff.json or fix the sprint to re-enable.`);
|
|
416
|
+
}
|
|
417
|
+
else {
|
|
418
|
+
log(`Backoff: ${sprintId} no progress this run (${next}/${SPRINT_BACKOFF_THRESHOLD} before auto-skip).`);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
357
421
|
async function findPendingSprint() {
|
|
358
422
|
if (!(0, fs_1.existsSync)(SPRINTS))
|
|
359
423
|
return null;
|
|
@@ -404,6 +468,12 @@ async function findPendingSprint() {
|
|
|
404
468
|
log(`Skipped ${file}: Notion source-of-truth says '${notionStatus}' (overrides local pending)`);
|
|
405
469
|
continue;
|
|
406
470
|
}
|
|
471
|
+
// TICKET-348: local backoff — skip a sprint that has made no forward
|
|
472
|
+
// progress N runs in a row (loop-stuck), so it can't monopolise the runner.
|
|
473
|
+
if (isBackedOff(sprintId)) {
|
|
474
|
+
log(`Skipped ${file}: backoff — no forward progress ${SPRINT_BACKOFF_THRESHOLD}× in a row (loop-stuck, auto-skipped)`);
|
|
475
|
+
continue;
|
|
476
|
+
}
|
|
407
477
|
// Dependency check: respect depends_on_sprint — if the upstream sprint
|
|
408
478
|
// has any non-terminal task, skip this one. Founder directive 2026-05-26:
|
|
409
479
|
// swarm must never deadlock on a sprint whose prereqs haven't shipped.
|
|
@@ -828,6 +898,10 @@ async function runSprintCycle(opts) {
|
|
|
828
898
|
return;
|
|
829
899
|
}
|
|
830
900
|
log(`Found pending sprint: ${sprintPath}`);
|
|
901
|
+
// TICKET-348: snapshot pending count before the run so we can detect whether
|
|
902
|
+
// this run made any forward progress (and apply backoff if it didn't).
|
|
903
|
+
const backoffSprintId = (0, path_1.basename)(sprintPath).replace(/\.json$/, '');
|
|
904
|
+
const pendingBefore = countPendingTasks(sprintPath);
|
|
831
905
|
// TICKET-210: build the ACTIVE sprint from the MERGED view (source definition
|
|
832
906
|
// + .swarm-state status), NOT raw source. The source file holds every task at
|
|
833
907
|
// its authored status (usually 'pending'); reading it directly meant a sprint
|
|
@@ -1067,6 +1141,10 @@ async function runSprintCycle(opts) {
|
|
|
1067
1141
|
catch {
|
|
1068
1142
|
// non-fatal
|
|
1069
1143
|
}
|
|
1144
|
+
// TICKET-348: update sprint backoff. State is fully synced by now (ACTIVE→
|
|
1145
|
+
// .swarm-state + forensic git-log recovery), so countPendingTasks reflects the
|
|
1146
|
+
// post-run truth. No drop in pending = no progress = step toward loop-stuck.
|
|
1147
|
+
recordSprintProgress(backoffSprintId, pendingBefore, countPendingTasks(sprintPath));
|
|
1070
1148
|
// TICKET-201: post-sprint hook (e.g. dispatch-approved-proposals).
|
|
1071
1149
|
// Supplied by the product entry so core stays product-agnostic.
|
|
1072
1150
|
try {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kognai/orchestrator-core",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "Kognai sovereign orchestrator — core engine (template-agnostic). Shared by all products (Kognai/coding, Voxight/market-intel, Invoica/fin-compliance); each supplies only its template. Replaces per-repo forks of orchestrate-agents-v2 / sprint-runner / lib.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "SkinGem",
|