instar 1.3.563 → 1.3.565

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/commands/server.d.ts.map +1 -1
  2. package/dist/commands/server.js +144 -29
  3. package/dist/commands/server.js.map +1 -1
  4. package/dist/core/PostUpdateMigrator.d.ts.map +1 -1
  5. package/dist/core/PostUpdateMigrator.js +18 -0
  6. package/dist/core/PostUpdateMigrator.js.map +1 -1
  7. package/dist/core/SlackForwardBridge.d.ts +44 -0
  8. package/dist/core/SlackForwardBridge.d.ts.map +1 -0
  9. package/dist/core/SlackForwardBridge.js +57 -0
  10. package/dist/core/SlackForwardBridge.js.map +1 -0
  11. package/dist/core/WorkEvidence.d.ts +18 -0
  12. package/dist/core/WorkEvidence.d.ts.map +1 -1
  13. package/dist/core/WorkEvidence.js +22 -0
  14. package/dist/core/WorkEvidence.js.map +1 -1
  15. package/dist/core/types.d.ts +14 -0
  16. package/dist/core/types.d.ts.map +1 -1
  17. package/dist/core/types.js.map +1 -1
  18. package/dist/monitoring/ResumeQueue.d.ts +12 -1
  19. package/dist/monitoring/ResumeQueue.d.ts.map +1 -1
  20. package/dist/monitoring/ResumeQueue.js +26 -3
  21. package/dist/monitoring/ResumeQueue.js.map +1 -1
  22. package/dist/monitoring/ResumeQueueDrainer.d.ts +49 -0
  23. package/dist/monitoring/ResumeQueueDrainer.d.ts.map +1 -1
  24. package/dist/monitoring/ResumeQueueDrainer.js +112 -3
  25. package/dist/monitoring/ResumeQueueDrainer.js.map +1 -1
  26. package/dist/scaffold/templates.d.ts.map +1 -1
  27. package/dist/scaffold/templates.js +1 -0
  28. package/dist/scaffold/templates.js.map +1 -1
  29. package/package.json +1 -1
  30. package/src/data/builtin-manifest.json +19 -19
  31. package/src/scaffold/templates.ts +1 -0
  32. package/upgrades/1.3.564.md +40 -0
  33. package/upgrades/1.3.565.md +29 -0
  34. package/upgrades/side-effects/resume-queue-stale-emergency-pause.md +212 -0
  35. package/upgrades/side-effects/slack-pool-dispatch-to-owner.md +127 -0
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "$schema": "./builtin-manifest.schema.json",
3
3
  "schemaVersion": 1,
4
- "generatedAt": "2026-06-14T11:52:44.425Z",
5
- "instarVersion": "1.3.563",
4
+ "generatedAt": "2026-06-14T18:12:03.052Z",
5
+ "instarVersion": "1.3.565",
6
6
  "entryCount": 201,
7
7
  "entries": {
8
8
  "hook:session-start": {
@@ -11,7 +11,7 @@
11
11
  "domain": "identity",
12
12
  "sourcePath": "src/core/PostUpdateMigrator.ts",
13
13
  "installedPath": ".instar/hooks/instar/session-start.sh",
14
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
14
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
15
15
  "since": "2025-01-01"
16
16
  },
17
17
  "hook:dangerous-command-guard": {
@@ -20,7 +20,7 @@
20
20
  "domain": "safety",
21
21
  "sourcePath": "src/core/PostUpdateMigrator.ts",
22
22
  "installedPath": ".instar/hooks/instar/dangerous-command-guard.sh",
23
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
23
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
24
24
  "since": "2025-01-01"
25
25
  },
26
26
  "hook:grounding-before-messaging": {
@@ -29,7 +29,7 @@
29
29
  "domain": "safety",
30
30
  "sourcePath": "src/core/PostUpdateMigrator.ts",
31
31
  "installedPath": ".instar/hooks/instar/grounding-before-messaging.sh",
32
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
32
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
33
33
  "since": "2025-01-01"
34
34
  },
35
35
  "hook:compaction-recovery": {
@@ -38,7 +38,7 @@
38
38
  "domain": "identity",
39
39
  "sourcePath": "src/core/PostUpdateMigrator.ts",
40
40
  "installedPath": ".instar/hooks/instar/compaction-recovery.sh",
41
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
41
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
42
42
  "since": "2025-01-01"
43
43
  },
44
44
  "hook:external-operation-gate": {
@@ -47,7 +47,7 @@
47
47
  "domain": "safety",
48
48
  "sourcePath": "src/core/PostUpdateMigrator.ts",
49
49
  "installedPath": ".instar/hooks/instar/external-operation-gate.js",
50
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
50
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
51
51
  "since": "2025-01-01"
52
52
  },
53
53
  "hook:deferral-detector": {
@@ -56,7 +56,7 @@
56
56
  "domain": "safety",
57
57
  "sourcePath": "src/core/PostUpdateMigrator.ts",
58
58
  "installedPath": ".instar/hooks/instar/deferral-detector.js",
59
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
59
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
60
60
  "since": "2025-01-01"
61
61
  },
62
62
  "hook:self-stop-guard": {
@@ -65,7 +65,7 @@
65
65
  "domain": "coherence",
66
66
  "sourcePath": "src/core/PostUpdateMigrator.ts",
67
67
  "installedPath": ".instar/hooks/instar/self-stop-guard.js",
68
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
68
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
69
69
  "since": "2025-01-01"
70
70
  },
71
71
  "hook:post-action-reflection": {
@@ -74,7 +74,7 @@
74
74
  "domain": "evolution",
75
75
  "sourcePath": "src/core/PostUpdateMigrator.ts",
76
76
  "installedPath": ".instar/hooks/instar/post-action-reflection.js",
77
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
77
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
78
78
  "since": "2025-01-01"
79
79
  },
80
80
  "hook:external-communication-guard": {
@@ -83,7 +83,7 @@
83
83
  "domain": "safety",
84
84
  "sourcePath": "src/core/PostUpdateMigrator.ts",
85
85
  "installedPath": ".instar/hooks/instar/external-communication-guard.js",
86
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
86
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
87
87
  "since": "2025-01-01"
88
88
  },
89
89
  "hook:scope-coherence-collector": {
@@ -92,7 +92,7 @@
92
92
  "domain": "coherence",
93
93
  "sourcePath": "src/core/PostUpdateMigrator.ts",
94
94
  "installedPath": ".instar/hooks/instar/scope-coherence-collector.js",
95
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
95
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
96
96
  "since": "2025-01-01"
97
97
  },
98
98
  "hook:scope-coherence-checkpoint": {
@@ -101,7 +101,7 @@
101
101
  "domain": "coherence",
102
102
  "sourcePath": "src/core/PostUpdateMigrator.ts",
103
103
  "installedPath": ".instar/hooks/instar/scope-coherence-checkpoint.js",
104
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
104
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
105
105
  "since": "2025-01-01"
106
106
  },
107
107
  "hook:free-text-guard": {
@@ -110,7 +110,7 @@
110
110
  "domain": "safety",
111
111
  "sourcePath": "src/core/PostUpdateMigrator.ts",
112
112
  "installedPath": ".instar/hooks/instar/free-text-guard.sh",
113
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
113
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
114
114
  "since": "2025-01-01"
115
115
  },
116
116
  "hook:claim-intercept": {
@@ -119,7 +119,7 @@
119
119
  "domain": "coherence",
120
120
  "sourcePath": "src/core/PostUpdateMigrator.ts",
121
121
  "installedPath": ".instar/hooks/instar/claim-intercept.js",
122
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
122
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
123
123
  "since": "2025-01-01"
124
124
  },
125
125
  "hook:claim-intercept-response": {
@@ -128,7 +128,7 @@
128
128
  "domain": "coherence",
129
129
  "sourcePath": "src/core/PostUpdateMigrator.ts",
130
130
  "installedPath": ".instar/hooks/instar/claim-intercept-response.js",
131
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
131
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
132
132
  "since": "2025-01-01"
133
133
  },
134
134
  "hook:stop-gate-router": {
@@ -137,7 +137,7 @@
137
137
  "domain": "safety",
138
138
  "sourcePath": "src/core/PostUpdateMigrator.ts",
139
139
  "installedPath": ".instar/hooks/instar/stop-gate-router.js",
140
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
140
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
141
141
  "since": "2025-01-01"
142
142
  },
143
143
  "hook:auto-approve-permissions": {
@@ -146,7 +146,7 @@
146
146
  "domain": "safety",
147
147
  "sourcePath": "src/core/PostUpdateMigrator.ts",
148
148
  "installedPath": ".instar/hooks/instar/auto-approve-permissions.js",
149
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
149
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
150
150
  "since": "2025-01-01"
151
151
  },
152
152
  "job:health-check": {
@@ -1554,7 +1554,7 @@
1554
1554
  "type": "subsystem",
1555
1555
  "domain": "updates",
1556
1556
  "sourcePath": "src/core/PostUpdateMigrator.ts",
1557
- "contentHash": "e0ddbc39521447ae1bfecddfc98b2bd579400889d5fac37cf7df7d380e6a2fb7",
1557
+ "contentHash": "67ee569178f3390f9f87fc675416d9d85b1eaf58a1291a8b0ee90b37c67af7b8",
1558
1558
  "since": "2025-01-01"
1559
1559
  },
1560
1560
  "subsystem:scheduler": {
@@ -420,6 +420,7 @@ This routes feedback to the Instar maintainers automatically. Valid types: \`bug
420
420
  - Read it: \`curl -H "Authorization: Bearer $AUTH" "http://localhost:${port}/sessions/reap-log?limit=50"\` → \`{ entries: [{ ts, type:'reaped'|'skipped'|'notify', session, reason, disposition, origin, skipped?, machine?, midWork?, workEvidence?, noticeId?, topicId?, outcome? }] }\`. Read-only. \`type:'notify'\` rows are reap-NOTICE delivery outcomes (append-only pairs: \`enqueued\` → \`sent\`/\`send-failed-escalated\`/\`no-topic\`/\`enqueue-failed\`; latest record per noticeId wins) — "did the user get told?" is auditable. \`midWork:true\` on a reaped row means the kill interrupted evidenced work.
421
421
  - Notices are PER-TOPIC and durable: every topic that lost a session gets one plain-English notice in THAT topic (bursts coalesce per topic; the lifeline gets unbound sessions + a cross-topic index), delivered through a durable store + always-on drain that retries with backoff. Rollback levers: \`{"monitoring": {"reapNotify": {"perTopic": false}}}\` (legacy single-buffer), \`{"drainEnabled": false}\` (legacy direct send), \`{"enabled": false}\` (no notices). Recovery-bounces and your own operator kills stay silent.
422
422
  - **Mid-work resume queue** (ships observe-only/dry-run by default): a session reaped MID-WORK (strong work evidence at kill time) is queued for ordered automatic revival once the machine recovers — at most one resume per minute, only after sustained calm + quota headroom. \`GET /sessions/resume-queue\` shows entries, paused/breaker state, and lastTickAt; \`POST /sessions/resume-queue/:id/cancel\` · \`/:id/requeue\` (gave-up entries only) · \`/resume\` (unpause) · \`/drain\` (manual single step). Emergency stops pause the queue; an explicit per-topic stop cancels that topic's entries. Jobs only auto-resume when their definition sets \`resumeOnReap: true\`.
423
+ - **A stale emergency-stop pause self-heals**: an emergency-stop pauses the WHOLE revival queue, and that pause used to never lift — silently stranding later, unrelated active-run revivals (the 2026-06-14 4-hour-silent-strand). Now: while the queue is paused with sessions waiting, you get ONE plain-English heads-up that revival is paused (Layer 1, always on); and if the pause is a stale emergency/sentinel stop AND an active autonomous run has since been recycled and queued well after the stop, the queue auto-resumes itself (Layer 2, on by default — \`monitoring.resumeQueue.autoResumeStalePause: false\` to disable; \`staleEmergencyPauseAutoResumeMin\` tunes the window, default 60). Any topic you actually stopped stays blocked by its per-topic operator-stop record even after the queue resumes, and a deliberate \`autonomous stop-all\` halt is NEVER auto-cleared. Proactive: user asks "why did my session restart by itself after a stop?" / "why is revival paused?" → GET /sessions/resume-queue (paused state) and the resume-queue audit log, then explain in plain words.
423
424
  - Proactive: user asks "where did my session go?" / "why did X disappear?" / "did something get killed?" → GET /sessions/reap-log and explain the most recent reaped/skipped entries for that session. User asks "did my interrupted work come back?" / "is a restart queued?" → GET /sessions/resume-queue and report the entry's status in plain words.
424
425
  - **Build-Session Yield Safety** (ACT-839; ships dev-enabled, dark on the fleet, per the Maturation Path standard): a session reaped while its WORKTREE holds uncommitted work (a build that died "standing by for tests") is resume-eligible on that alone — the killer collects a bounded, fail-open dirty-check pre-kill and tags \`uncommitted-worktree-work\`. On revival the continuation prompt leads with a commit-first directive, and a durable beacon-enabled commitment (\`GET /commitments\`) re-surfaces the obligation if the revived session stalls. An explicit operator/user kill is NEVER auto-revived on a dirty worktree alone. The die-again case is caught by the OrphanedWorkSentinel (\`GET /orphaned-work\`). Proactive: user asks "why did my build come back / why am I being told to commit?" → it was revived because its worktree had unsaved work; commit it or deliberately discard it.
425
426
 
@@ -0,0 +1,40 @@
1
+ # Upgrade Guide — vNEXT
2
+
3
+ <!-- assembled-by: assemble-next-md -->
4
+ <!-- bump: patch -->
5
+
6
+ ## What Changed
7
+
8
+ Fixed a latent bug where a **stale emergency-stop could silently strand the resume queue forever**. A MessageSentinel emergency-stop is topic-scoped in intent (it kills/clears/cancels only the topic the "stop" message arrived in — verified against the `routes.ts` handler), but it ALSO sets a **global** `resumeQueue.pause()` with no expiry and no re-arm. Nothing ever lifted that pause, so a stale emergency stop on one topic permanently disabled the revival net for *every other* topic. On 2026-06-14 an emergency stop from the previous day left the net off for ~18h; when an unrelated autonomous run was later recycled at its age cap, the resume-idle fix correctly queued it for revival — but a paused queue admits and never drains, so the run sat dead for ~4h until the operator messaged. This is the concrete cause behind the recurring "why do my sessions keep dying?" feeling.
9
+
10
+ `ResumeQueueDrainer.tick()` now runs two layers where the silent strand happened (both inert on a dry-run/observe-only queue, so the fleet default is unchanged):
11
+
12
+ - **Layer 1 (signal-only):** when the queue is paused with waiting work, raise ONE deduped `paused-waiting` aggregated attention notice — keyed on `(pausedAt | waitingCount)` so a new pause OR a growing backlog re-alerts, but a steady pause doesn't drip every tick. This alone makes the strand never silent again.
13
+ - **Layer 2 (bounded behavior change, on by default when the queue is live):** auto-resume a **stale** emergency/sentinel pause when an active-autonomous-run entry (`AGE_LIMIT_ACTIVE_RUN_REASON`) was queued **strictly more than** `staleEmergencyPauseAutoResumeMin` (default 60) minutes after the pause began, then fall through to normal draining. Every revived candidate still passes all deterministic reality gates AND the per-topic `operatorStopSince` validation — so a genuinely-stopped topic stays blocked. A FRESH "kill everything" is never auto-undone (the staleness window protects it), and a deliberate `autonomous stop-all` halt is NEVER auto-cleared.
14
+
15
+ `ResumeQueue.pause()` gains a deliberate-halt **upgrade**: a later non-auto-resumable reason (`autonomous stop-all`) overrides an existing auto-resumable emergency pause (the reverse never downgrades), so an operator's explicit halt issued during a stale pause is honored. The auto-resume predicate lives in ONE centralized, mechanically-enforced helper `isAutoResumableEmergencyPauseReason()` (a callsite-scan unit test pins every `ResumeQueue.pause()` reason's verdict, so a future rewording can't silently change behavior).
16
+
17
+ audience: agent-only
18
+ maturity: stable
19
+
20
+ Spec converged through `/spec-converge` (6 rounds; real cross-model review via codex-cli:gpt-5.5 + gemini-cli:gemini-2.5-pro). Future hardening (a structured `pauseKind` enum; broadening the trigger; original-context age) is tracked as evolution-action ACT-904, not deferred-and-forgotten.
21
+
22
+ ## What to Tell Your User
23
+
24
+ Nothing to announce proactively — the revival safety net ships in watch-only mode for most agents, so unless it was deliberately turned on, behavior is unchanged. If asked "why did my session restart by itself after a stop?" or "why is revival paused?": an emergency stop pauses the whole revival queue, and that pause used to never turn back off — silently stranding later, unrelated work. Now the agent tells you when revival is paused with work still waiting, and a stale emergency pause heals itself once a fresh active run has been recycled and queued well after the stop. Anything you actually stopped stays stopped — the per-topic stop record keeps blocking its revival even after the queue turns back on. If you'd rather the auto-heal stay off, just ask me to turn it off.
25
+
26
+ ## Summary of New Capabilities
27
+
28
+ A behavior-correctness fix (a permanent silent strand becomes a visible, self-healing one), plus two code-defaulted config knobs.
29
+
30
+ | Change | Effect |
31
+ |--------|--------|
32
+ | Layer 1 paused-waiting notice | A paused queue with waiting sessions raises ONE deduped attention notice instead of staying silent (re-alerts on a growing backlog) |
33
+ | Layer 2 stale-pause auto-resume | A stale emergency/sentinel pause auto-resumes when an active-run revival was queued > `staleEmergencyPauseAutoResumeMin` (60) min after the pause; per-topic `operatorStopSince` still guards genuinely-stopped topics |
34
+ | `pause()` deliberate-halt upgrade | A later `autonomous stop-all` overrides an in-flight auto-resumable emergency pause (never the reverse) |
35
+ | `isAutoResumableEmergencyPauseReason()` | One centralized, callsite-scan-tested predicate for which pause reasons are auto-resumable |
36
+ | `monitoring.resumeQueue.staleEmergencyPauseAutoResumeMin` (60) / `autoResumeStalePause` (true) | Code-defaulted knobs; `autoResumeStalePause:false` disables Layer 2 (Layer 1 always on) |
37
+
38
+ ## Evidence
39
+
40
+ Behavior-correctness fix proven from a real 2026-06-14 forensic chain (`logs/reap-log.jsonl`, `logs/resume-queue.jsonl`). All three test tiers green: `tests/unit/resume-queue-drainer.test.ts` (+20: Layer-1 once-per-episode + growing-backlog re-alert, dry-run silence, stale auto-resume happy path, exactly-at/just-over/malformed-timestamp boundaries, fresh-pause/stop-all/plain-mid-work negatives, `autoResumeStalePause:false`, post-resume `operatorStopSince` guard intact, both pause-overlap orderings, the closed-world predicate + mechanical `ResumeQueue.pause(` callsite scan); the suite fails for the right reason before the fix); `tests/integration/resume-queue-routes.test.ts` (+1: `GET /sessions/resume-queue` shows `paused:false` after a drainer auto-resume); `tests/e2e/reap-notify-resume-queue-lifecycle.test.ts` (+1: full lifecycle — emergency-stop pauses → active-run admitted later → drainer auto-resumes → entry `respawned`, the feature-is-alive assertion). `npx tsc --noEmit` clean; full lint + dark-gate green. Config keys are code-defaulted (not in ConfigDefaults — preserves the fleet flip), so no `migrateConfig` change is needed; CLAUDE.md awareness ships via the template + a dedicated idempotent `PostUpdateMigrator` block (so existing agents learn the self-heal too). Side-effects review: `upgrades/side-effects/resume-queue-stale-emergency-pause.md`.
@@ -0,0 +1,29 @@
1
+ # Upgrade Guide — vNEXT
2
+
3
+ <!-- assembled-by: assemble-next-md -->
4
+ <!-- bump: patch -->
5
+
6
+ ## What Changed
7
+
8
+ Extended the **WS1.1 dispatch-to-owner** machinery (Multi-Machine Session Pool) from Telegram to **Slack**, so a Slack conversation follows the user across machines. Previously the Slack adapter's inbound channel→session dispatch was LOCAL-ONLY: a Slack message bound a channel to whatever local session was already running and reused it, IGNORING pool ownership. A live multi-machine test surfaced the bug — a Slack channel's topic was transferred to a peer machine (`POST /pool/transfer` 200, ownership converged `reason:pinned`), but the NEXT Slack message in that channel was still injected into the already-running LOCAL session instead of being routed to the owner machine. Telegram's inbound path already followed a transfer; Slack's never did.
9
+
10
+ The fix routes Slack inbound through the SAME §L4 `SessionRouter` authority Telegram uses. The Slack `onMessage` handler now consults `_sessionRouter.route()` on the Slack routing key BEFORE local dispatch and short-circuits when `isRemotelyHandled` says the owner is a remote peer (it also honors the custody-ACK short-circuit so a durably-queued message isn't double-handled). The existing Slack dispatch body was extracted into a shared `slackInboundDispatch(message)` function so the live inbound path AND the owner-side mesh bridge replay through one code path (Structure > Willpower — they can't drift). A new pure module `src/core/SlackForwardBridge.ts` (`isSlackSessionKey` / `parseSlackRoutingKey` / `reconstructSlackMessage`) lets the owner-side `onAccepted` bridge distinguish a Slack routing key (non-numeric string `C…`/`C…:thread_ts`) from a Telegram topic key (pure number) and reconstruct the forwarded inbound Message. The whole feature is gated on the existing `_sessionPoolStage() !== 'dark'` — no new config key, route, or authority.
11
+
12
+ audience: agent-only
13
+ maturity: stable
14
+
15
+ ## What to Tell Your User
16
+
17
+ Nothing to announce proactively — the multi-machine session pool ships dark by default, so for any single-machine agent (and any agent that hasn't enabled the pool) nothing changes; the Slack inbound path is byte-identical to before. If asked: when you run the agent on more than one machine with the session pool enabled and you move a Slack conversation to another machine, the next Slack message in that channel now correctly goes to the machine that owns the conversation, instead of being answered by the stale session on the old machine. This is the same "follow the user across machines" behavior Telegram already had, now working for Slack.
18
+
19
+ ## Summary of New Capabilities
20
+
21
+ No standalone new capability and no new config key — this completes the existing dispatch-to-owner capability for a second platform (Slack), behind the already-shipped `multiMachine.sessionPool` dark gate.
22
+
23
+ ## Evidence
24
+
25
+ Observed before/after for the live multi-machine repro that surfaced the bug:
26
+
27
+ - **Before:** A Slack channel's topic was transferred/pinned to a peer machine (Mac Mini): `POST /pool/transfer` returned 200 and ownership converged (`reason:pinned`, `pendingReplacement:false`). The NEXT Slack message in that channel was STILL injected into the already-running LOCAL (Laptop) session — never routed to the owner machine. Laptop `logs/server.log` showed the message dispatched locally with no `[session-pool] slack route` line, because the Slack `onMessage` handler never consulted the SessionRouter at all. Telegram's inbound path under the identical scenario correctly forwarded to the owner.
28
+ - **After:** The Slack `onMessage` handler logs `[session-pool] slack route key=<C…> → action=forwarded owner=<peer> … acked=true` and short-circuits local dispatch (`… handled by owner … — not dispatching locally`); the owner machine's `onAccepted` bridge logs `[session-pool] owner-side Slack dispatch for forwarded key <C…>` and resumes/spawns the conversation there. The forwarded message is deduped on the owner's ledger (a redelivery ACKs `duplicate` and is not re-dispatched).
29
+ - **Reproduced in test, not just unit-passing:** `tests/integration/session-router-dispatch.test.ts` drives a Slack-shaped routing key (`C0123ABCD:1716200000.001500`) through the real MeshRpc transport and asserts `action: 'forwarded', owner: 'OWNER'` with the owner's ledger recording it exactly once. `tests/e2e/session-pool-delivermessage-e2e.test.ts` posts a signed forwarded Slack-keyed `deliverMessage` to a real `/mesh/rpc` route and asserts the owner-side bridge dispatches it to Slack with the right channel + thread + sender, while a numeric Telegram key routes to the Telegram path and a redelivery is deduped (`slackDispatched` length stays 1).
@@ -0,0 +1,212 @@
1
+ # Side-Effects Review — Resume queue: stale emergency-stop pause auto-recovery
2
+
3
+ **Version / slug:** `resume-queue-stale-emergency-pause`
4
+ **Date:** `2026-06-14`
5
+ **Author:** `Echo (instar-dev agent)`
6
+ **Second-pass reviewer:** `cross-model (codex-cli:gpt-5.5, gemini-cli:gemini-2.5-pro) via /spec-converge — 6 rounds`
7
+
8
+ ## Summary of the change
9
+
10
+ A paused resume queue used to early-return `{blocked:'paused'}` at the top of
11
+ `ResumeQueueDrainer.tick()`, silently stranding every waiting revival for the life of
12
+ the pause. A MessageSentinel emergency-stop (topic-scoped in its real intent) sets a
13
+ GLOBAL pause that never lifts, so a stale emergency stop on one topic permanently
14
+ disabled the revival net for all topics (the 2026-06-14 4-hour-silent-strand
15
+ incident). This change replaces that early-return with two layers, both at the same
16
+ chokepoint and both inert on a dry-run queue: **Layer 1** (signal-only) raises ONE
17
+ deduped `paused-waiting` aggregated attention notice when the queue is paused with
18
+ waiting work; **Layer 2** (bounded behavior change, on by default) auto-resumes a
19
+ STALE emergency/sentinel pause when an `AGE_LIMIT_ACTIVE_RUN_REASON` entry was queued
20
+ strictly more than `staleEmergencyPauseAutoResumeMin` (default 60) minutes after the
21
+ pause began, then falls through to normal draining. Files touched:
22
+ `src/monitoring/ResumeQueueDrainer.ts` (the two layers + 2 config keys),
23
+ `src/core/WorkEvidence.ts` (new `isAutoResumableEmergencyPauseReason` predicate),
24
+ `src/monitoring/ResumeQueue.ts` (`pause()` now upgrades an auto-resumable pause when a
25
+ deliberate halt arrives), `src/commands/server.ts` (thread the 2 code-defaulted keys),
26
+ `src/core/types.ts` (2 optional config keys), `src/scaffold/templates.ts` +
27
+ `src/core/PostUpdateMigrator.ts` (Agent Awareness), and the 3 test tiers.
28
+
29
+ ## Decision-point inventory
30
+
31
+ - `ResumeQueueDrainer.tick()` paused-queue branch — **modify** — was an
32
+ unconditional early-return; now runs Layer 1 (signal) + Layer 2 (a bounded
33
+ authority change that REMOVES a stale over-broad block).
34
+ - `ResumeQueue.pause()` overlap semantics — **modify** — first-writer-wins gains a
35
+ deliberate-halt UPGRADE (a non-auto-resumable reason overrides an existing
36
+ auto-resumable pause; never the reverse).
37
+ - `isAutoResumableEmergencyPauseReason()` — **add** — the centralized closed-world
38
+ predicate that decides which pause reasons are auto-resumable.
39
+
40
+ ## 1. Over-block
41
+
42
+ No new block/allow surface is ADDED. Layer 2 strictly REMOVES a block (a stale global
43
+ pause). The only way this could "over-block" is by FAILING to auto-resume when it
44
+ should — which is the safe direction (the queue stays paused, exactly today's
45
+ behavior, and Layer 1 keeps alerting). Concrete safe-side cases that intentionally do
46
+ NOT auto-resume: a fresh emergency pause (active-run queued < threshold after
47
+ pausedAt), a deliberate `autonomous stop-all` pause, a plain mid-work entry (reason ≠
48
+ `AGE_LIMIT_ACTIVE_RUN_REASON`), a malformed `pausedAt`/`queuedAt`, `autoResumeStalePause:false`,
49
+ or a dry-run queue. All keep the pause — none wrongly clear it.
50
+
51
+ ## 2. Under-block
52
+
53
+ Layer 2 could in principle clear a pause the operator wanted kept. Mitigations: (a)
54
+ the staleness window (default 60 min) means a fresh "kill everything" is never
55
+ auto-undone; (b) only the strongest re-engagement signal
56
+ (`AGE_LIMIT_ACTIVE_RUN_REASON`, queued AFTER the stop) triggers it; (c) **every topic
57
+ the operator actually stopped stays blocked by the per-topic `operatorStopSince`
58
+ validation in `validateReality` even after the queue auto-resumes** — verified by a
59
+ unit test (`operatorStopSince:() => true` still yields `invalidated:operator-stop`
60
+ post auto-resume); (d) a deliberate `autonomous stop-all` is never auto-cleared and
61
+ now UPGRADES an in-flight emergency pause so a later deliberate halt wins. The known
62
+ accepted residual (gemini r2/r5): an autonomous run that was contextually old but only
63
+ age-reaped after the stop satisfies the predicate — accepted because the per-topic
64
+ guard still blocks a genuinely-stopped topic, and the entry queued-after-stop
65
+ genuinely reflects a run left running through the stop window.
66
+
67
+ ## 3. Level-of-abstraction fit
68
+
69
+ Correct layer. Layer 1 is a low-cost SIGNAL feeding the EXISTING aggregated attention
70
+ surface (`raiseAggregated` → the single `resume-queue:aggregate` P17 item) — it does
71
+ not run parallel to a smarter gate, it reuses the one that exists. Layer 2 is a
72
+ deterministic predicate over durable queue state at the drainer chokepoint where the
73
+ strand happens; it does not re-implement any existing primitive, and it explicitly
74
+ DEFERS the real per-topic safety decision to the already-existing finer-grained
75
+ `operatorStopSince` gate rather than duplicating it. The pause-reason classification
76
+ lives in one centralized, tested predicate rather than being re-derived per callsite.
77
+
78
+ ## 4. Signal vs authority compliance
79
+
80
+ **Required reference:** [docs/signal-vs-authority.md](../../docs/signal-vs-authority.md)
81
+
82
+ - [x] No — Layer 1 produces a signal consumed by the existing aggregated attention gate.
83
+ - [x] Yes — but the logic NARROWS authority toward a finer-grained gate that already
84
+ exists. Layer 2 REMOVES an over-broad, stale, blunt block (the global pause) only
85
+ when a precise deterministic staleness predicate holds; it does not ADD a new
86
+ brittle blocker, and every revived candidate still passes ALL deterministic reality
87
+ gates AND the per-topic `operatorStopSince` validation. The substring reason match
88
+ is anchored to a closed, internally-generated set of pause reasons, centralized in
89
+ one tested predicate, with a mechanical `src/`-callsite-scan test pinning every
90
+ current reason's verdict so a future reason can't silently change behavior; a
91
+ non-match resolves to the SAFE side (pause stays). Per signal-vs-authority, the
92
+ change is strictly additive to safety on the stopped topic and removes a
93
+ false-negative-on-revival for unrelated topics.
94
+
95
+ ## 5. Interactions
96
+
97
+ - **Shadowing:** the new branch runs in place of the old `if (queue.isPaused()) return`
98
+ early-return — it can either keep the pause (same as before) or fall through to the
99
+ rest of `tick()` (which runs all the existing calm/quota/reality gates unchanged). No
100
+ existing check is shadowed; the fall-through ADDS the gates rather than skipping them.
101
+ - **Double-fire:** Layer 1 dedupes on `(pausedAt | waitingCount)` so it cannot drip
102
+ every tick; the existing `raiseResumeAggregated` in server.ts collapses all kinds
103
+ into one rolling item. No double-notice.
104
+ - **Races:** `pause()`/`unpause()` are synchronous mutations on the single-writer,
105
+ lockfile-guarded, in-memory-authoritative queue; the drainer's `ticking` re-entrancy
106
+ guard prevents overlapping ticks. `unpause()` correctly accumulates `frozenMs` into
107
+ each waiting entry's TTL clock (uses the existing lever, not a raw flag).
108
+ - **Feedback loops:** the auto-resume → spawn → (if it re-reaps) re-enqueue path is
109
+ already bounded by the resurrection cap in `ResumeQueue.considerEnqueue`. A topic
110
+ that keeps getting reaped-and-revived still hits `maxResurrections` and gives up
111
+ loudly — Layer 2 does not bypass that cap.
112
+
113
+ ## 6. External surfaces
114
+
115
+ - **Other agents on the same machine:** none — per-machine queue.
116
+ - **Install base:** the two config keys are code-defaulted (absent from
117
+ ConfigDefaults). Existing agents pick up the new drainer behavior on update+restart.
118
+ - **External systems:** none. No Telegram/Slack/GitHub/Cloudflare call added beyond
119
+ the existing attention surface (Layer 1 reuses it).
120
+ - **Persistent state:** `unpause()` mutates the existing `state/resume-queue.json`
121
+ pause fields and a new `pause-upgraded`/`auto-resumed-stale-pause`/`paused-waiting`
122
+ audit event in `logs/resume-queue.jsonl`. No schema field added to entries or
123
+ persisted state. The Layer-1 dedupe marker is in-memory only.
124
+ - **Timing:** the staleness comparison uses the queue's single injected clock; strict
125
+ `>`; malformed timestamps fail safe.
126
+ - **Operator surface (Mobile-Complete):** no NEW operator-facing action. The existing
127
+ `POST /sessions/resume-queue/resume` lever (dashboard/phone-reachable) is unchanged;
128
+ Layer 1 points the operator at "ask me to resume it, or resume it from the dashboard"
129
+ (no raw API in the user-facing body).
130
+
131
+ ## 6b. Operator-surface quality (Operator-Surface Quality standard)
132
+
133
+ No operator surface — not applicable. This change touches no dashboard renderer/markup
134
+ file, no approval page, and no grant/revoke/secret-drop form. (The CLAUDE.md template
135
+ + migrator edits are agent-awareness text, not an operator UI.)
136
+
137
+ ## 7. Multi-machine posture (Cross-Machine Coherence)
138
+
139
+ **Machine-local BY DESIGN.** The resume queue is one-queue-per-machine: a
140
+ single-writer lockfile (`state/resume-queue.lock`, pid + hostname + heartbeat), a
141
+ host-local state dir, and a hard invariant that a foreign-host lock is refused (never
142
+ probed/reclaimed). Pause/unpause state lives in that per-machine
143
+ `state/resume-queue.json`. Both layers are pure additions to the per-machine drainer
144
+ tick: Layer 1 routes through the existing per-machine `resume-queue:aggregate`
145
+ attention item (no new cross-machine surface, no generated URL); Layer 2's `unpause()`
146
+ mutates only the local queue and acts only on locally-queued entries. No timestamp
147
+ crosses a machine boundary (`pausedAt`/`queuedAt` are always same-process-stamped). A
148
+ topic transfer already closes the source session and the queue does not follow a moved
149
+ topic, so there is no strand-on-transfer concern. Emits a user-facing notice (Layer 1)
150
+ — it routes through the per-machine attention surface and is deduped per pause episode,
151
+ so no one-voice violation; holds durable state (the per-machine pause record, which
152
+ does not strand on transfer because the queue is machine-local); generates no URLs.
153
+
154
+ ## 8. Rollback cost
155
+
156
+ - **Hot-fix release:** pure code change — revert the PR and ship as the next patch.
157
+ - **Data migration:** none. No persisted entry/state schema field added. Existing
158
+ `state/resume-queue.json` files are read/written exactly as before (the new audit
159
+ events are append-only log rows).
160
+ - **Agent state repair:** none. `monitoring.resumeQueue.autoResumeStalePause: false`
161
+ disables Layer 2 instantly (read live at tick time, no restart needed for the read —
162
+ though a config change requires a session/server restart to load). Reverting restores
163
+ the prior permanent-pause behavior exactly.
164
+ - **User visibility:** no regression during a rollback window. The worst case after a
165
+ rollback is the return of the original bug (a stale pause can strand again) — but
166
+ Layer 1's alert and Layer 2's behavior are independent, and Layer 1 carries no
167
+ behavioral risk.
168
+
169
+ ## Migration parity
170
+
171
+ - **Config defaults:** `staleEmergencyPauseAutoResumeMin` (60) and
172
+ `autoResumeStalePause` (true) are CODE-defaulted in `ResumeQueueDrainerConfig` /
173
+ `DEFAULT_RESUME_DRAINER_CONFIG` and threaded from `monitoring.resumeQueue.*` in
174
+ server.ts via `?? default` — deliberately NOT frozen into ConfigDefaults (preserving
175
+ the fleet flip, consistent with the other resumeQueue.* keys). **No `migrateConfig()`
176
+ change needed** — existing agents pick up the new behavior on update+restart with the
177
+ code defaults.
178
+ - **CLAUDE.md template:** added a bullet to `generateClaudeMd()` (new agents via init)
179
+ AND a dedicated idempotent `PostUpdateMigrator` block sniffed on the unique phrase
180
+ `autoResumeStalePause` (existing agents, even those that already have the resume-queue
181
+ section — the parent block's `/sessions/resume-queue` sniff would otherwise skip
182
+ them). Both are content-sniffed and safe to run repeatedly.
183
+ - **No hook/skill changes.**
184
+
185
+ ## Conclusion
186
+
187
+ The review (6 spec-converge rounds with two real cross-model reviewers) hardened the
188
+ change substantially: the topic-scoped emergency-stop premise went from asserted to
189
+ code-cited; `pause()` gained a deliberate-halt upgrade so a later `stop-all` is honored
190
+ over an in-flight emergency pause; the substring match was centralized and mechanically
191
+ enforced by a callsite-scan test; Layer-1 dedupe became count-aware; clock/boundary
192
+ discipline and the per-topic-guard dependency were made explicit and test-backed. The
193
+ change is strictly additive to safety on a genuinely-stopped topic and removes a
194
+ false-negative-on-revival for unrelated topics. All three test tiers are green (unit
195
+ 51, integration 9, e2e 8). Clear to ship.
196
+
197
+ ## Second-pass review (if required)
198
+
199
+ **Reviewer:** cross-model external pass (codex-cli:gpt-5.5 + gemini-cli:gemini-2.5-pro), 6 rounds
200
+ **Independent read of the artifact: concur**
201
+
202
+ The external reviewers converged to MINOR ISSUES with no architectural objection; the
203
+ final round (codex r6) produced zero new material findings. Every prior-round finding
204
+ is resolved in the spec/code or recorded as an accepted tradeoff / Future item
205
+ (`pauseKind` enum), with the lone fresh round-5 finding (pause-upgrade) implemented.
206
+
207
+ ## Evidence pointers
208
+
209
+ - Spec: `docs/specs/resume-queue-stale-emergency-pause.md` (frontmatter
210
+ `review-convergence`, `cross-model-review: codex-cli:gpt-5.5`, `approved: true`).
211
+ - Convergence report: `docs/specs/reports/resume-queue-stale-emergency-pause-convergence.md`.
212
+ - Tests: `tests/unit/resume-queue-drainer.test.ts` (51), `tests/integration/resume-queue-routes.test.ts` (9), `tests/e2e/reap-notify-resume-queue-lifecycle.test.ts` (8).
@@ -0,0 +1,127 @@
1
+ # Side-Effects Review — Slack inbound dispatch consults pool placement (WS1.1 Slack arm)
2
+
3
+ **Version / slug:** `slack-pool-dispatch-to-owner`
4
+ **Date:** `2026-06-14`
5
+ **Author:** `Instar Agent (echo)`
6
+ **Second-pass reviewer:** `dispatch-reviewer subagent (Phase 5 — touches inbound dispatch)`
7
+
8
+ ## Summary of the change
9
+
10
+ The Slack adapter's inbound channel→session dispatch was LOCAL-ONLY: a Slack message bound a channel to whatever local session was already running and reused it, IGNORING pool ownership. So when a Slack channel's topic was transferred/pinned to a peer machine (ownership converged, `reason:pinned`), the NEXT Slack message in that channel was still injected into the already-running LOCAL session instead of being routed to the owner machine. Telegram's inbound path already followed a transfer (WS1.1 dispatch-to-owner: SessionRouter consultation + the owner-side `deliverMessage` bridge). This change extends that SAME machinery to Slack: (1) the Slack `onMessage` handler now consults `_sessionRouter.route()` on the Slack routing key BEFORE local dispatch and short-circuits when `isRemotelyHandled` says the owner is a remote peer; (2) the existing Slack dispatch body was extracted into a shared `slackInboundDispatch(message)` function so the live inbound path AND the owner-side bridge replay through one code path; (3) a new pure module `src/core/SlackForwardBridge.ts` (`isSlackSessionKey` / `parseSlackRoutingKey` / `reconstructSlackMessage`) lets the owner-side `onAccepted` bridge distinguish a Slack key (non-numeric string `C…`/`C…:ts`) from a Telegram topic key (pure number) and reconstruct the inbound Message. Files: `src/commands/server.ts` (Slack `onMessage` + owner-side `onAccepted` branch), `src/core/SlackForwardBridge.ts` (new). The whole feature is gated on the existing `_sessionPoolStage() !== 'dark'` — when dark (the fleet default and any single-machine install) the Slack path is byte-identical to today.
11
+
12
+ ## Decision-point inventory
13
+
14
+ - `Slack onMessage → SessionRouter.route()` (src/commands/server.ts) — **modify** — Slack inbound now consults the §L4 SessionRouter (the existing dispatch authority) before local dispatch, mirroring Telegram. New consultation, not a new authority.
15
+ - `Owner-side onAccepted bridge` (src/commands/server.ts) — **modify** — the forwarded-deliverMessage handler gained a Slack arm: a non-numeric session key reconstructs a Slack Message and replays it through `slackInboundDispatch`; a numeric key keeps the unchanged Telegram path.
16
+ - `isSlackSessionKey` (src/core/SlackForwardBridge.ts) — **add** — a structural validator (numeric vs non-numeric key) selecting WHICH dispatch arm to use. Holds no block/allow authority.
17
+ - `Slack emergency-stop / pause sentinel intercept` — **pass-through** — moved verbatim from the old `onMessage` closure into the new `onMessage` handler; runs on the receiving machine (local-process actions), never forwarded. Unchanged behavior.
18
+
19
+ ---
20
+
21
+ ## 1. Over-block
22
+
23
+ **What legitimate inputs does this change reject that it shouldn't?**
24
+
25
+ No block/allow surface — over-block not applicable. The change only ROUTES an inbound message (local dispatch vs forward-to-owner vs durable-queue custody). No Slack message is ever rejected by this change. When the SessionRouter consultation throws, the code falls through to today's local dispatch (fail-safe); when the pool is dark the consultation is skipped entirely.
26
+
27
+ ---
28
+
29
+ ## 2. Under-block
30
+
31
+ **What failure modes does this still miss?**
32
+
33
+ No block/allow surface — under-block not applicable. As a routing concern, the residual gaps are: (a) if the owner machine advertises but cannot durably receive (`ownerSupportsForward` false / version skew), the SessionRouter's existing conservative path keeps the message in OUR durable queue rather than forwarding — same as Telegram, by design. (b) The Telegram path also has an inbound-queue ORDERING gate (`_inboundQueue.hasQueued`) that enqueues a live message behind existing queued entries; I mirrored the custody-ACK short-circuit but NOT that ordering pre-gate. Impact is bounded: the inbound queue ships dark, and without the pre-gate a live Slack message for an in-custody session would fall through to the router (which itself custody-checks) rather than strictly ordering behind the queue — a parity refinement, not a correctness break for the primary follow-the-transfer fix. Tracked as a follow-up below, not deferred silently.
34
+
35
+ ---
36
+
37
+ ## 3. Level-of-abstraction fit
38
+
39
+ **Is this at the right layer?**
40
+
41
+ Yes. The dispatch authority (SessionRouter, §L4) already exists and is platform-agnostic — it keys on a `string` `sessionKey` and makes the place/forward/queue decision. The right fix is to FEED the Slack inbound path INTO that existing authority, exactly as Telegram does — not to build a parallel Slack-specific ownership resolver. The new `SlackForwardBridge` helpers are low-level structural primitives (key discrimination + Message reconstruction) with no decision authority — the correct layer for them. `parseSlackRoutingKey` deliberately mirrors `SlackAdapter.parseRoutingKey` so the owner-side reconstruction matches the live path's key derivation.
42
+
43
+ ---
44
+
45
+ ## 4. Signal vs authority compliance
46
+
47
+ **Required reference:** [docs/signal-vs-authority.md](../../docs/signal-vs-authority.md)
48
+
49
+ **Does this change hold blocking authority with brittle logic?**
50
+
51
+ - [x] No — this change produces a signal consumed by an existing smart gate.
52
+ - [ ] No — this change has no block/allow surface.
53
+ - [ ] Yes — but the logic is a smart gate with full conversational context.
54
+ - [ ] ⚠️ Yes, with brittle logic — STOP.
55
+
56
+ The change consults the existing SessionRouter authority (the single owner of the §L4 dispatch decision) and acts on its `RouteOutcome`. `isSlackSessionKey` is a structural validator (numeric vs non-numeric) used only to select the dispatch arm in the owner-side bridge — it never blocks a message; both arms dispatch. No new brittle blocker with authority was introduced.
57
+
58
+ ---
59
+
60
+ ## 5. Interactions
61
+
62
+ **Does this interact with existing checks, recovery paths, or infrastructure?**
63
+
64
+ - **Shadowing:** The SessionRouter consultation runs AFTER the sentinel emergency-stop/pause intercept (preserved at the top of `onMessage`) and BEFORE local dispatch. Emergency-stop/pause still short-circuit first (correct — they're local actions). When the pool is dark the consultation block is skipped so the sentinel + local path run exactly as before.
65
+ - **Double-fire:** The exact bug this fixes is double-dispatch (spawn-on-owner AND inject-locally). `isRemotelyHandled(outcome, _meshSelfId)` short-circuits local dispatch whenever the session ended up on another machine, and the custody-ACK short-circuit prevents local fall-through when the durable queue took custody. The owner-side bridge dedupes via the existing `recordReceipt`/ledger (a redelivered messageId ACKs `duplicate` and is NOT re-dispatched — proven in the e2e test).
66
+ - **Races:** The SessionRouter serializes per `sessionKey` (its `chains` map), so two Slack messages for the same routing key dispatch in order, one in-flight — same guarantee Telegram gets. The shared `slackInboundDispatch` reads `getSessionForChannel(routingKey)` the same way the old closure did; no new shared mutable state was introduced.
67
+ - **Feedback loops:** None. The owner-side bridge calls `markRemoteInjected`/`reportPeerInjectError` on the inbound queue (best-effort, gated on `_inboundQueue` which is dark by default) exactly as the Telegram bridge does.
68
+
69
+ ---
70
+
71
+ ## 6. External surfaces
72
+
73
+ **Does this change anything visible outside the immediate code path?**
74
+
75
+ - **Other agents on the same machine:** none.
76
+ - **Other users of the install base:** none while dark (fleet default). When the session pool is enabled on a multi-machine Slack-using agent, a Slack conversation now correctly follows a topic transfer between machines — the intended, user-positive behavior.
77
+ - **External systems (Slack):** the owner-side bridge fetches channel history via the Slack API on the OWNER machine (Slack history is server-side, reachable from any machine), and replies via the same `slack-reply.sh` relay path. No new Slack API surface; reuses existing adapter methods.
78
+ - **Persistent state:** none new. Reuses the existing MessageProcessingLedger (receipt dedupe) and the dark-by-default inbound queue. No new config keys, so no migration parity work needed.
79
+ - **Timing/runtime:** the forward is async/bounded by the SessionRouter's existing deliver retry/timeout config — unchanged.
80
+ - **Operator surface (Mobile-Complete):** No operator-facing actions added — this is internal dispatch routing.
81
+
82
+ ---
83
+
84
+ ## 6b. Operator-surface quality
85
+
86
+ No operator surface — not applicable. The change touches no `dashboard/*` file, approval page, or grant/revoke/secret-drop form.
87
+
88
+ ---
89
+
90
+ ## 7. Multi-machine posture (Cross-Machine Coherence)
91
+
92
+ **Posture: replicated (dispatch-to-owner).** This feature IS a multi-machine coherence feature — it makes a Slack conversation follow the user across machines. The replication path is the §L4 SessionRouter + the `deliverMessage` mesh verb + the journal-backed `SessionOwnershipRegistry` (the exact path Telegram already uses, WS1.1). Ownership is a LOCAL read of the placement view; a remote-owned conversation forwards over the mesh to the owner, which spawns/injects with CONTINUATION context.
93
+
94
+ - **User-facing notices / one-voice:** The dispatch itself produces no user-facing notice; the conversation's replies come from exactly one session (the owner's), which is the one-voice property this fix RESTORES (before it, both the stale local session and the new owner could answer).
95
+ - **Durable state on topic transfer:** No new durable state strands — the owner-side bridge reconstructs the Message from the forwarded payload and the session registry is per-machine, resolved fresh on each side.
96
+ - **URLs across machine boundaries:** none generated.
97
+ - **Single-machine / dark:** strict no-op — gated on `_sessionPoolStage() !== 'dark'`; a single-machine or dark-pool agent runs the byte-identical local dispatch.
98
+
99
+ ---
100
+
101
+ ## 8. Rollback cost
102
+
103
+ Pure code change — revert the commit and ship as the next patch. No persistent state is created (reuses existing ledger + dark inbound queue), no new config key, no migration. While dark (fleet default) the change is inert, so a rollback has zero user-visible effect on the install base. On a multi-machine Slack agent with the pool enabled, rollback simply restores the prior local-only Slack dispatch (the pre-fix behavior).
104
+
105
+ ---
106
+
107
+ ## Conclusion
108
+
109
+ The review produced no design changes — the implementation already feeds the existing SessionRouter authority rather than adding a parallel brittle blocker, and is gated dark/additive. One parity refinement (the inbound-queue ORDERING pre-gate that the Telegram path has) is surfaced as a tracked follow-up rather than silently deferred; it does not affect the correctness of the primary follow-the-transfer fix because the SessionRouter custody-checks regardless. The change is clear to ship behind the existing dark pool gate. (Separately surfaced as a tracked follow-up, NOT fixed here to avoid scope-creep: ~33 `[mesh-rpc] rejected session-status: stale-timestamp` rejections observed in a live multi-machine log despite `/pool` reporting `clockSkew:ok` — needs live cross-machine timestamp-vs-receipt diagnosis; widening the 30s tolerance blindly would weaken the replay-window guard.)
110
+
111
+ ---
112
+
113
+ ## Second-pass review (if required)
114
+
115
+ **Reviewer:** dispatch-reviewer subagent
116
+ **Independent read of the artifact: concur**
117
+
118
+ Concur with the review. The change feeds the existing §L4 SessionRouter authority rather than adding a new blocker — `isSlackSessionKey` is a pure numeric-vs-string validator selecting a dispatch arm (both arms dispatch; no block authority), so it is signal-vs-authority compliant. Double-dispatch is closed on both ends: the inbound path short-circuits via `isRemotelyHandled` + the custody-ACK check (identical arms to Telegram's), and the owner-side Slack branch sits inside `DeliverMessageHandler`'s `recordReceipt`-gated `onAccepted`, so a redelivered forward ACKs `duplicate` and never re-dispatches (proven by the e2e test). On a `route()` throw the path falls through to local dispatch (fail-safe, never drops), and the whole block is dark-gated so single-machine/dark agents are byte-identical. The admitted Telegram-parity divergence (the inbound-queue ordering pre-gate) is correctness-neutral for the primary fix because the inbound queue ships dark and the SessionRouter custody-checks regardless — acceptable as a tracked follow-up.
119
+
120
+ ---
121
+
122
+ ## Evidence pointers
123
+
124
+ - Unit: `tests/unit/SlackForwardBridge.test.ts` (8 tests — both sides of the Slack-vs-Telegram key boundary + reconstruction), `tests/unit/slack-thread-session-wiring.test.ts` (21 tests — re-anchored on `slackInboundDispatch` + new WS1.1 pool-routing wiring assertions).
125
+ - Integration: `tests/integration/session-router-dispatch.test.ts` (Slack-shaped routing key forwards to the remote owner over real MeshRpc).
126
+ - E2E "feature alive": `tests/e2e/session-pool-delivermessage-e2e.test.ts` (owner-side `onAccepted` dispatches a forwarded Slack key to Slack with channel+thread+sender; a numeric Telegram key routes to the Telegram path; redelivery deduped).
127
+ - Wiring ratchet preserved: `tests/unit/session-pool-activation-wiring.test.ts` updated for the split dark-gate / `!telegram` gate.