@spinabot/brigade 1.16.0 → 1.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1524,6 +1524,43 @@ async function continueBoot(args) {
1524
1524
  */
1525
1525
  const recentSystemEvents = new Map();
1526
1526
  const RECENT_SYSTEM_EVENTS_MAX = 30;
1527
+ // Cap how many transcript messages `resume` ships. A thread can grow to
1528
+ // thousands of messages; replaying ALL of them on every connect/reconnect/
1529
+ // resync would re-read + re-parse the whole JSONL synchronously and ship a
1530
+ // huge frame (risking the 32 MiB payload cap). Bound it to the recent tail —
1531
+ // the operator lands back in context without the cost scaling with thread
1532
+ // length. (Lazy-loading older history on scroll is a later enhancement.)
1533
+ const RESUME_TRANSCRIPT_MAX = 200;
1534
+ // Cap how many DISTINCT sessions we retain recovery state for. `seqCounters`
1535
+ // and `recentSystemEvents` would otherwise grow unbounded over a multi-day
1536
+ // daemon (every cron run, channel thread, sub-agent child key, and `/new`
1537
+ // mints a fresh key that never gets evicted). LRU-evict the coldest sessions
1538
+ // past this bound — safe because the durable transcript is the source of
1539
+ // truth: an evicted session simply rebuilds from disk on its next `resume`,
1540
+ // and a re-touched seq counter restarting at 0 only triggers a harmless
1541
+ // resync on any client still watching it.
1542
+ const RECOVERY_SESSION_MAX = 512;
1543
+ const evictColdRecoverySessions = () => {
1544
+ // JS Maps iterate in insertion order. The recentSystemEvents write below
1545
+ // moves a touched key to the end (delete+set), so its FIRST keys are the
1546
+ // least-recently-used; seqCounters evicts in creation order. Either way the
1547
+ // durable transcript is the source of truth, so eviction is safe — an
1548
+ // evicted session rebuilds from disk on its next `resume`, and a re-touched
1549
+ // seq counter restarting at 0 only makes a still-connected client issue one
1550
+ // harmless self-healing resync.
1551
+ while (recentSystemEvents.size > RECOVERY_SESSION_MAX) {
1552
+ const oldest = recentSystemEvents.keys().next().value;
1553
+ if (oldest === undefined)
1554
+ break;
1555
+ recentSystemEvents.delete(oldest);
1556
+ }
1557
+ while (seqCounters.size > RECOVERY_SESSION_MAX) {
1558
+ const oldest = seqCounters.keys().next().value;
1559
+ if (oldest === undefined)
1560
+ break;
1561
+ seqCounters.delete(oldest);
1562
+ }
1563
+ };
1527
1564
  /**
1528
1565
  * Process boot id (session generation / "epoch"). Constant for this gateway
1529
1566
  * process; a restart yields a new value. Advertised in `HelloOk` so a client
@@ -1586,13 +1623,21 @@ async function continueBoot(args) {
1586
1623
  event === "system-event";
1587
1624
  const seq = isOrderedFrame ? nextSeq(seqCounters, frameSessionId) : undefined;
1588
1625
  // Retain a bounded per-session tail of system-events for `resume` recovery.
1626
+ // delete+set moves this session to the end of the Map (LRU touch) so the
1627
+ // eviction sweep below drops the least-recently-active sessions first.
1589
1628
  if (event === "system-event" && frameSessionId) {
1590
1629
  const ring = recentSystemEvents.get(frameSessionId) ?? [];
1591
1630
  ring.push(payload);
1592
1631
  while (ring.length > RECENT_SYSTEM_EVENTS_MAX)
1593
1632
  ring.shift();
1633
+ recentSystemEvents.delete(frameSessionId);
1594
1634
  recentSystemEvents.set(frameSessionId, ring);
1595
1635
  }
1636
+ // Bound the recovery maps so a long-lived daemon that touches many
1637
+ // distinct session keys (cron runs, channel threads, sub-agent children,
1638
+ // `/new`) doesn't grow them without limit.
1639
+ if (isOrderedFrame && frameSessionId)
1640
+ evictColdRecoverySessions();
1596
1641
  const frame = seq !== undefined
1597
1642
  ? { type: "event", event, payload, seq }
1598
1643
  : { type: "event", event, payload };
@@ -2988,7 +3033,10 @@ async function continueBoot(args) {
2988
3033
  const p = (params ?? {});
2989
3034
  const targetAgentId = p.agentId?.trim() || agentId;
2990
3035
  const targetSessionKey = p.sessionKey?.trim() || defaultSessionKey(targetAgentId);
2991
- const messages = await readSessionTranscriptMessages({ sessionKey: targetSessionKey });
3036
+ const messages = await readSessionTranscriptMessages({
3037
+ sessionKey: targetSessionKey,
3038
+ limit: RESUME_TRANSCRIPT_MAX,
3039
+ });
2992
3040
  const headSeq = seqCounters.get(targetSessionKey) ?? 0;
2993
3041
  // Recovery for the two non-transcript event types so a (re)connecting
2994
3042
  // client loses NOTHING: tool-approval prompts still pending on this
@@ -3537,6 +3585,21 @@ async function continueBoot(args) {
3537
3585
  let configReadWarningSurfaced = false;
3538
3586
  const buildSessionsAccessCheck = () => {
3539
3587
  return ({ action, targetSessionKey }) => {
3588
+ // SAME-AGENT operator pass. The WS requester is the LOCAL OPERATOR
3589
+ // (localhost-bind + admin scope), anchored to the boot agent. The
3590
+ // operator owns EVERY session of their own agent, so any target under
3591
+ // that same agent passes — this guard's job is solely to refuse
3592
+ // CROSS-AGENT reach (gated below by visibility="all" + A2A policy).
3593
+ // Without this, the operator prompting a fresh same-agent thread
3594
+ // (`/new` → `agent:main:t-…`) or switching to any non-boot session of
3595
+ // their own agent was wrongly refused by the `visibility:"self"` rule
3596
+ // in `checkSessionToolAccess`, even though they plainly own it. The
3597
+ // agent's own `sessions_send` tool is unaffected — it calls
3598
+ // `checkSessionToolAccess` directly with the AGENT's session as the
3599
+ // requester, so its self/tree visibility still applies.
3600
+ if ((parseAgentSessionKey(targetSessionKey)?.agentId ?? agentId) === agentId) {
3601
+ return { allowed: true };
3602
+ }
3540
3603
  // Read the live config snapshot so `system.reload` that
3541
3604
  // tightens visibility/A2A takes effect on the very next RPC.
3542
3605
  // Sync `loadConfig()` would be ideal but the project's