@yemi33/minions 0.1.2004 → 0.1.2006
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard.js +94 -1
- package/docs/README.md +2 -0
- package/docs/auto-discovery.md +1 -0
- package/docs/managed-spawn.md +1 -1
- package/package.json +1 -1
package/dashboard.js
CHANGED
|
@@ -1937,6 +1937,21 @@ const CC_LIVE_STREAM_MAX_AGE_MS = shared.ENGINE_DEFAULTS.ccLiveStreamMaxAgeMs;
|
|
|
1937
1937
|
// edits aren't killed mid-stream and the backend timeout never beats the user's reading
|
|
1938
1938
|
// time. The doc-chat handlers still abort on client disconnect.
|
|
1939
1939
|
const DOC_CHAT_TIMEOUT_MS = 60 * 60 * 1000;
|
|
1940
|
+
// W-mpetru71000re5de — bound the SSE per-tab queue and force-close streams
|
|
1941
|
+
// whose consumer has been backpressured >30s. writeCcEvent used to log
|
|
1942
|
+
// [cc-sse-backpressure] and silently return true on res.write()===false;
|
|
1943
|
+
// Node's WritableState.buffered[] has no upper bound, so a backgrounded tab
|
|
1944
|
+
// whose socket is half-open (Windows default TCP keepalive 7200s) can
|
|
1945
|
+
// accumulate bytes forever — one tab can push the dashboard past V8's 4 GB
|
|
1946
|
+
// heap and silently OOM-kill. Smoking gun: a single CC stream sat open
|
|
1947
|
+
// 8.5 min with 45 tool events queued and 0 bytes flushed
|
|
1948
|
+
// ([cc-stream] reason=heartbeat-write-failed duration=511442ms chunks=0
|
|
1949
|
+
// tools=45 bytes=0). Shedding is safe because liveState.text/.tools are
|
|
1950
|
+
// populated BEFORE the writer({...}) call (dashboard.js:6753, 6760, 6854,
|
|
1951
|
+
// 6864), so dropped wire frames are fully recoverable via the
|
|
1952
|
+
// reconnect-replay protocol (dashboard.js:7048-7083).
|
|
1953
|
+
const SSE_MAX_QUEUE_BYTES = 4 * 1024 * 1024; // 4 MB per-tab — conservative, tunable
|
|
1954
|
+
const SSE_STUCK_KILL_MS = 30 * 1000; // 30s of continuous backpressure → res.destroy()
|
|
1940
1955
|
function _releaseCCTab(tabId) { ccInFlightTabs.delete(tabId); ccInFlightAborts.delete(tabId); }
|
|
1941
1956
|
function _getCcLiveStream(tabId) {
|
|
1942
1957
|
return ccLiveStreams.get(tabId) || null;
|
|
@@ -4600,6 +4615,10 @@ const server = http.createServer(async (req, res) => {
|
|
|
4600
4615
|
d.meta?.item?.id === id
|
|
4601
4616
|
);
|
|
4602
4617
|
|
|
4618
|
+
// (W-mpfsl2rw000m9469) Archiving removes a row from work-items.json, which
|
|
4619
|
+
// is part of /api/status fast-state. Match every other mutating handler
|
|
4620
|
+
// and invalidate so the dashboard reflects the archive immediately.
|
|
4621
|
+
invalidateStatusCache();
|
|
4603
4622
|
return jsonReply(res, 200, { ok: true, id });
|
|
4604
4623
|
} catch (e) { return jsonReply(res, 400, { error: e.message }); }
|
|
4605
4624
|
}
|
|
@@ -4803,6 +4822,10 @@ const server = http.createServer(async (req, res) => {
|
|
|
4803
4822
|
});
|
|
4804
4823
|
}
|
|
4805
4824
|
recordCcTurnIfPresent(req, { kind: 'work-item', id, title: item.title, project: item.project || null });
|
|
4825
|
+
// (W-mpfsl2rw000m9469) Invalidate so the next ≤4s SPA poll sees the
|
|
4826
|
+
// followup WI instead of serving a stale 304. Sibling mutating handlers
|
|
4827
|
+
// (delete/cancel/retry/reopen/notes/plan) all invalidate before reply.
|
|
4828
|
+
invalidateStatusCache();
|
|
4806
4829
|
return jsonReply(res, 200, { ok: true, id });
|
|
4807
4830
|
}
|
|
4808
4831
|
const createResult = createWorkItemWithDedup(wiPath, item);
|
|
@@ -4811,6 +4834,10 @@ const server = http.createServer(async (req, res) => {
|
|
|
4811
4834
|
return jsonReply(res, 200, { ok: true, id: duplicateId, duplicate: true, duplicateOf: duplicateId });
|
|
4812
4835
|
}
|
|
4813
4836
|
recordCcTurnIfPresent(req, { kind: 'work-item', id, title: item.title, project: item.project || null });
|
|
4837
|
+
// (W-mpfsl2rw000m9469) Mirror the followup-branch invalidation above so
|
|
4838
|
+
// the new WI shows up on the next dashboard poll instead of waiting for
|
|
4839
|
+
// the 10s periodic push.
|
|
4840
|
+
invalidateStatusCache();
|
|
4814
4841
|
return jsonReply(res, 200, { ok: true, id });
|
|
4815
4842
|
} catch (e) { return jsonReply(res, 400, { error: e.message }); }
|
|
4816
4843
|
}
|
|
@@ -6372,6 +6399,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
6372
6399
|
let _docStreamEnded = false;
|
|
6373
6400
|
let _docHeartbeatTimer = null;
|
|
6374
6401
|
const writeDocEvent = (payload) => {
|
|
6402
|
+
// TODO(W-mpetru71000re5de): doc-chat SSE has the same unbounded-queue
|
|
6403
|
+
// failure mode as CC's writeCcEvent — res.write() returning false from
|
|
6404
|
+
// backpressure silently queues bytes in Node's WritableState.buffered[].
|
|
6405
|
+
// Out of scope for this fix (task is scoped to CC only). When this is
|
|
6406
|
+
// addressed, mirror the SSE_MAX_QUEUE_BYTES shed + SSE_STUCK_KILL_MS
|
|
6407
|
+
// heartbeat force-close pattern from the writeCcEvent closure
|
|
6408
|
+
// (dashboard.js, search for SSE_MAX_QUEUE_BYTES).
|
|
6375
6409
|
try {
|
|
6376
6410
|
res.write('data: ' + JSON.stringify(payload) + '\n\n');
|
|
6377
6411
|
return true;
|
|
@@ -7315,6 +7349,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7315
7349
|
let _ccStreamEnded = false;
|
|
7316
7350
|
let _ccHeartbeatTimer = null;
|
|
7317
7351
|
let _ccLastHeartbeatAt = Date.now();
|
|
7352
|
+
// W-mpetru71000re5de — per-stream backpressure clock + approximate queued
|
|
7353
|
+
// bytes. _bpStartedAt timestamps the first write that returned false (and
|
|
7354
|
+
// is reset on 'drain'); _queuedBytes accumulates write sizes pushed past
|
|
7355
|
+
// res.writable's highWaterMark (best-effort — Node's internal buffer
|
|
7356
|
+
// length is private). Used by writeCcEvent (shed) + the heartbeat tick
|
|
7357
|
+
// (force-close stuck streams).
|
|
7358
|
+
let _bpStartedAt = null;
|
|
7359
|
+
let _queuedBytes = 0;
|
|
7360
|
+
try {
|
|
7361
|
+
res.on('drain', () => { _bpStartedAt = null; _queuedBytes = 0; });
|
|
7362
|
+
} catch { /* listener registration is best-effort */ }
|
|
7318
7363
|
// W-mpdavudb000v8446 — SSE delivery telemetry. Previously writeCcEvent
|
|
7319
7364
|
// swallowed all write failures (res.destroyed / res.write returning false
|
|
7320
7365
|
// for backpressure / sync throw), and the [cc-timing] log only proved
|
|
@@ -7351,6 +7396,30 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7351
7396
|
_logFail('json-serialize-failed', { error: String((err && err.message) || err).slice(0, 200) });
|
|
7352
7397
|
return false;
|
|
7353
7398
|
}
|
|
7399
|
+
// W-mpetru71000re5de — shed wire frames once the per-tab queue exceeds
|
|
7400
|
+
// the cap. Safe ONLY because liveState.text / liveState.tools are
|
|
7401
|
+
// populated BEFORE writer({...}) is called at every call site:
|
|
7402
|
+
// - dashboard.js:6753-6754 (legacy direct path: text → writer)
|
|
7403
|
+
// - dashboard.js:6760-6761 (legacy direct path: tools → writer)
|
|
7404
|
+
// - dashboard.js:6854-6855 (pool path: text → writer)
|
|
7405
|
+
// - dashboard.js:6864-6865 (pool path: tools → writer)
|
|
7406
|
+
// The reconnect path (dashboard.js:7048-7083) replays from liveState.*,
|
|
7407
|
+
// so dropped wire frames are fully recoverable on reattach. We still
|
|
7408
|
+
// bump _ccTelemetry counters so the [cc-stream] outcome log line stays
|
|
7409
|
+
// truthful about what the orchestrator produced — only the wire was
|
|
7410
|
+
// shed, the work happened.
|
|
7411
|
+
if (_queuedBytes > SSE_MAX_QUEUE_BYTES) {
|
|
7412
|
+
try {
|
|
7413
|
+
shared.log('warn', `[cc-sse-shed] tab=${tabId || _ccTelemetry.tabId || 'unknown'} type=${type} queuedBytes=${_queuedBytes} wireBytes=${wire.length}`);
|
|
7414
|
+
} catch { /* telemetry is best-effort */ }
|
|
7415
|
+
if (type === 'chunk') {
|
|
7416
|
+
_ccTelemetry.chunks++;
|
|
7417
|
+
_ccTelemetry.bytes += Buffer.byteLength(String((payload && payload.text) || ''), 'utf8');
|
|
7418
|
+
} else if (type === 'tool') {
|
|
7419
|
+
_ccTelemetry.tools++;
|
|
7420
|
+
}
|
|
7421
|
+
return true;
|
|
7422
|
+
}
|
|
7354
7423
|
let writeOk;
|
|
7355
7424
|
try { writeOk = res.write(wire); }
|
|
7356
7425
|
catch (err) {
|
|
@@ -7362,8 +7431,16 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7362
7431
|
// The write IS still queued, so don't treat this as a failure, but
|
|
7363
7432
|
// surface it so a slow consumer is visible in telemetry. Most CC
|
|
7364
7433
|
// chunks are small enough that we never hit this in practice.
|
|
7434
|
+
// W-mpetru71000re5de — also start (or extend) the per-stream
|
|
7435
|
+
// backpressure clock and accumulate approximate queued bytes. The
|
|
7436
|
+
// heartbeat tick force-closes the stream once the clock exceeds
|
|
7437
|
+
// SSE_STUCK_KILL_MS; writeCcEvent above sheds further frames once
|
|
7438
|
+
// _queuedBytes exceeds SSE_MAX_QUEUE_BYTES. _bpStartedAt + _queuedBytes
|
|
7439
|
+
// are reset by the res.on('drain') listener registered above.
|
|
7440
|
+
if (_bpStartedAt == null) _bpStartedAt = Date.now();
|
|
7441
|
+
_queuedBytes += wire.length;
|
|
7365
7442
|
try {
|
|
7366
|
-
shared.log('warn', `[cc-sse-backpressure] tab=${tabId || _ccTelemetry.tabId || 'unknown'} type=${type} bytes=${wire.length}`);
|
|
7443
|
+
shared.log('warn', `[cc-sse-backpressure] tab=${tabId || _ccTelemetry.tabId || 'unknown'} type=${type} bytes=${wire.length} queuedBytes=${_queuedBytes} bpMs=${Date.now() - _bpStartedAt}`);
|
|
7367
7444
|
} catch { /* telemetry is best-effort */ }
|
|
7368
7445
|
}
|
|
7369
7446
|
if (payload && payload.type === 'chunk') {
|
|
@@ -7405,6 +7482,22 @@ What would you like to discuss or change? When you're happy, say "approve" and I
|
|
|
7405
7482
|
stopCcHeartbeat();
|
|
7406
7483
|
return;
|
|
7407
7484
|
}
|
|
7485
|
+
// W-mpetru71000re5de — force-close streams stuck on backpressure.
|
|
7486
|
+
// res.destroy() fires req.on('close'), which triggers the existing
|
|
7487
|
+
// teardown path (detach writer → schedule abort → cleanup), so the
|
|
7488
|
+
// queued bytes are sheddable via GC and the LLM is aborted. Catch-22
|
|
7489
|
+
// workaround: _scheduleCcLiveAbort bails while state.writer is
|
|
7490
|
+
// non-null and writer is non-null until req.close fires; that's
|
|
7491
|
+
// exactly what res.destroy() triggers.
|
|
7492
|
+
if (_bpStartedAt && Date.now() - _bpStartedAt > SSE_STUCK_KILL_MS) {
|
|
7493
|
+
const stuckMs = Date.now() - _bpStartedAt;
|
|
7494
|
+
try {
|
|
7495
|
+
shared.log('warn', `[cc-sse-stuck-close] tab=${tabId || _ccTelemetry.tabId || 'unknown'} stuckMs=${stuckMs} queuedBytes=${_queuedBytes}`);
|
|
7496
|
+
} catch { /* telemetry is best-effort */ }
|
|
7497
|
+
stopCcHeartbeat();
|
|
7498
|
+
try { res.destroy(); } catch { /* swallow — req.on('close') will still fire */ }
|
|
7499
|
+
return;
|
|
7500
|
+
}
|
|
7408
7501
|
_checkStall();
|
|
7409
7502
|
if (!writeCcEvent({ type: 'heartbeat' })) {
|
|
7410
7503
|
stopCcHeartbeat();
|
package/docs/README.md
CHANGED
|
@@ -21,7 +21,9 @@ Architecture, design proposals, and lifecycle references for people working on t
|
|
|
21
21
|
- [kb-sweep.md](kb-sweep.md) — Knowledge-base consolidation sweep (hash dedup → LLM batch dedup/reclassify → per-entry compress) and the detached runner that keeps it alive across `minions restart`.
|
|
22
22
|
- [managed-spawn.md](managed-spawn.md) — Engine-owned long-running services (managed-spawn primitive): sidecar schema, healthcheck examples, lifecycle, dashboard API, and the WI 1 (build) → WI 2 (test) chained-validation pattern.
|
|
23
23
|
- [plan-lifecycle.md](plan-lifecycle.md) — Full plan pipeline from `/plan` through PRD materialization, dispatch with dependency gating, verify task, and human archive.
|
|
24
|
+
- [pr-comment-followup.md](pr-comment-followup.md) — PR-comment follow-up dispatch contract: fix/review agents may spin off a new WI via `POST /api/work-items` with `meta.pr_followup` instead of broadening the current PR or rebutting the comment.
|
|
24
25
|
- [pr-review-fix-loop.md](pr-review-fix-loop.md) — How the engine moves a PR from creation through review, fix dispatch, and re-review, including stale-status guards.
|
|
26
|
+
- [qa-runbooks.md](qa-runbooks.md) — Per-project QA runbook schema, storage layout (`projects/<name>/runbooks/<id>.json`), CRUD endpoints, run-record lifecycle, and the `qa-validate` agent sidecar contract.
|
|
25
27
|
- [rfc-completion-json.md](rfc-completion-json.md) — RFC for replacing stdout regex-scraping with a structured `completion.json` control-plane protocol.
|
|
26
28
|
- [runtime-adapters.md](runtime-adapters.md) — Runtime adapter contract (`engine/runtimes/*`): how the engine talks to Claude Code, Copilot CLI, and future CLIs through a single capability-flagged interface.
|
|
27
29
|
- [self-improvement.md](self-improvement.md) — The six self-improvement mechanisms (learnings inbox, per-agent history, review feedback, quality metrics, etc.) that form Minions' continuous feedback loop.
|
package/docs/auto-discovery.md
CHANGED
|
@@ -17,6 +17,7 @@ tick()
|
|
|
17
17
|
2. consolidateInbox() Merge learnings into notes.md (Haiku-powered)
|
|
18
18
|
2.5 runCleanup() Periodic cleanup (every 10 ticks ≈ 10min)
|
|
19
19
|
2.52 sweepKeepProcesses() keep_processes TTL/dead-PID sweep (every 30 ticks)
|
|
20
|
+
2.53 sweepManagedSpawn() managed_spawn TTL/dead-PID/log-rotate sweep (every 30 ticks)
|
|
20
21
|
2.55 checkWatches() Persistent watch jobs (every 3 tick-equivalents)
|
|
21
22
|
2.6 pollPrStatus() Poll ADO + GitHub for build, review, merge status (wall-clock cadence from prPollStatusEvery × tickInterval, default ≈ 12min)
|
|
22
23
|
processPendingRebases() Run any rebase work queued from the previous tick
|
package/docs/managed-spawn.md
CHANGED
|
@@ -38,7 +38,7 @@ The sidecar lives at `<MINIONS_DIR>/agents/<agentId>/managed-spawn.json` and is
|
|
|
38
38
|
"cwd": "D:/repos/constellation", // must be inside a real git worktree (requireGitWorkdir: true) — monorepo subdirs ok, ancestor walked up to gitWorktreeMaxParentDepth parents
|
|
39
39
|
"env": { "CONSTELLATION_SERVER": "http://localhost:3000" }, // ≤32 keys; POSIX-shape + denylist enforced
|
|
40
40
|
"ports": [3001], // 1024-65535; ≤20 per spec; advisory only (engine doesn't bind)
|
|
41
|
-
"ttl_minutes": 240, // ≤1440 (24h hard cap); defaults to
|
|
41
|
+
"ttl_minutes": 240, // ≤1440 (24h hard cap); defaults to 720 (12h)
|
|
42
42
|
"attrs": { // opaque per-spec metadata, ≤2048 bytes serialized
|
|
43
43
|
"base_url": "http://localhost:3001",
|
|
44
44
|
"framework": "vite"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2006",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|