@yemi33/minions 0.1.1953 → 0.1.1954

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/engine/cli.js CHANGED
@@ -772,6 +772,22 @@ const commands = {
772
772
  }
773
773
  })();
774
774
 
775
+ // W-mp7gox8w000n8936 — Boot reconcile for kb-sweep state: clear stale
776
+ // `in-flight`/`starting` records left over from a crashed runner (or a
777
+ // legacy pre-pid runner). Without this, the record sits there clogging
778
+ // /api/knowledge/sweep/status until someone POSTs a new sweep. Idempotent.
779
+ (function startupReconcileKbSweep() {
780
+ try {
781
+ const { reconcileSweepStateOnBoot } = require('./kb-sweep');
782
+ const stats = reconcileSweepStateOnBoot();
783
+ if (stats.released > 0) {
784
+ console.log(` KB-sweep boot reconcile: released stale ${stats.prevStatus} record (pid=${stats.prevPid}, reason=${stats.reason})`);
785
+ }
786
+ } catch (err) {
787
+ e.log('warn', `KB-sweep boot reconcile failed: ${err.message}`);
788
+ }
789
+ })();
790
+
775
791
  // Initial tick
776
792
  e.tick();
777
793
 
@@ -339,6 +339,91 @@ function readSweepLiveness(opts = {}) {
339
339
  };
340
340
  }
341
341
 
342
+ /**
343
+ * One-shot boot-time reconciliation for `engine/kb-sweep-state.json`.
344
+ *
345
+ * Mirrors the worktree-pool / keep-process boot reconcilers in `engine/cli.js`:
346
+ * after an engine restart we may inherit a stale `in-flight`/`starting` record
347
+ * whose runner has long since died (or — for legacy state files — never
348
+ * recorded a pid). Without proactive cleanup the record sits there clogging
349
+ * `/api/knowledge/sweep/status` until someone POSTs a new sweep (which the
350
+ * dashboard's stale-guard would then auto-release).
351
+ *
352
+ * Behavior:
353
+ * - Absent state file → no-op.
354
+ * - Terminal status (completed/failed) → no-op.
355
+ * - `starting` within 15s boot-grace → no-op (matches readSweepLiveness).
356
+ * - `in-flight` / stale `starting` with a live pid → no-op (running sweep).
357
+ * - Otherwise rewrite to `status: 'failed'` preserving the original pid for
358
+ * forensics. The original record fields are kept; `reconciliationReason`
359
+ * records why we released it.
360
+ *
361
+ * CAS guard: re-reads state immediately before the write and aborts if any of
362
+ * status/startedAt/pid/sweepToken changed since the snapshot — protects against
363
+ * a concurrent dashboard POST or runner that wrote a fresh record between our
364
+ * decision and our write.
365
+ *
366
+ * @param {object} [opts]
367
+ * @param {number} [opts.now=Date.now()] injectable clock (tests)
368
+ * @param {(pid:number)=>boolean} [opts.isPidAlive] injectable (tests)
369
+ * @returns {{ scanned:number, released:number, reason?:string,
370
+ * prevStatus?:string, prevPid?:number }}
371
+ */
372
+ function reconcileSweepStateOnBoot(opts = {}) {
373
+ const now = Number(opts.now) || Date.now();
374
+ const isPidAlive = typeof opts.isPidAlive === 'function'
375
+ ? opts.isPidAlive
376
+ : (pid) => { try { process.kill(pid, 0); return true; } catch { return false; } };
377
+ const state = safeJson(KB_SWEEP_STATE_PATH);
378
+ if (!state) return { scanned: 0, released: 0 };
379
+ if (state.status !== 'in-flight' && state.status !== 'starting') {
380
+ return { scanned: 1, released: 0, reason: `terminal-status-${state.status}` };
381
+ }
382
+ if (state.status === 'starting') {
383
+ const STARTING_GRACE_MS = 15000;
384
+ const age = state.startedAt ? now - Number(state.startedAt) : Infinity;
385
+ if (age <= STARTING_GRACE_MS) {
386
+ return { scanned: 1, released: 0, reason: 'within-boot-grace' };
387
+ }
388
+ }
389
+ const pid = Number(state.pid) || 0;
390
+ const alive = pid > 0 ? !!isPidAlive(pid) : false;
391
+ if (alive) {
392
+ return { scanned: 1, released: 0, reason: 'pid-still-alive', prevPid: pid };
393
+ }
394
+
395
+ const reason = pid > 0 ? 'pid-dead-on-startup-check' : 'no-pid-recorded-on-startup-check';
396
+
397
+ // CAS guard: re-read right before the write so we don't clobber a fresh
398
+ // `starting`/`in-flight` record that a concurrent dashboard POST or runner
399
+ // wrote between our decision and our write.
400
+ const current = safeJson(KB_SWEEP_STATE_PATH);
401
+ if (!current
402
+ || current.status !== state.status
403
+ || Number(current.startedAt || 0) !== Number(state.startedAt || 0)
404
+ || Number(current.pid || 0) !== pid
405
+ || (current.sweepToken || null) !== (state.sweepToken || null)) {
406
+ return { scanned: 1, released: 0, reason: 'state-changed-before-reconcile' };
407
+ }
408
+
409
+ const failedState = {
410
+ ...state,
411
+ status: 'failed',
412
+ completedAt: now,
413
+ completedAtIso: new Date(now).toISOString(),
414
+ error: `sweep abandoned: ${reason}`,
415
+ reconciliationReason: reason,
416
+ };
417
+ // Direct safeWrite (NOT _writeSweepState) so the original pid is preserved
418
+ // for forensics — _writeSweepState would overwrite it with this process's
419
+ // pid, destroying the diagnostic value of the record.
420
+ try { safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify(failedState)); } catch { /* ignore */ }
421
+ return {
422
+ scanned: 1, released: 1, reason,
423
+ prevStatus: state.status, prevPid: pid,
424
+ };
425
+ }
426
+
342
427
  /**
343
428
  * Run the full sweep. Returns a rich summary.
344
429
  *
@@ -480,6 +565,7 @@ module.exports = {
480
565
  runKbSweep,
481
566
  staleGuardMs,
482
567
  readSweepLiveness,
568
+ reconcileSweepStateOnBoot,
483
569
  KB_SWEEP_STATE_PATH,
484
570
  KB_SWEEP_LOG_PATH,
485
571
  KB_SWEEP_RUNNER_PATH,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.1953",
3
+ "version": "0.1.1954",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"