agent-tempo 1.7.0-beta.11 → 1.7.0-beta.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/package.json +1 -1
- package/dist/client/subscribe.d.ts +10 -0
- package/dist/client/subscribe.js +2 -0
- package/dist/pi/mission-control/board.d.ts +9 -8
- package/dist/pi/mission-control/extension.d.ts +64 -0
- package/dist/pi/mission-control/extension.js +191 -22
- package/dist/pi/mission-control/render.d.ts +12 -0
- package/dist/pi/mission-control/render.js +40 -15
- package/package.json +3 -2
package/dashboard/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-tempo-dashboard",
|
|
3
3
|
"private": true,
|
|
4
|
-
"version": "1.7.0-beta.
|
|
4
|
+
"version": "1.7.0-beta.12",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "Web dashboard for agent-tempo. Bundled into the npm package; served by the daemon at /dashboard/*.",
|
|
7
7
|
"scripts": {
|
|
@@ -81,6 +81,16 @@ export interface SubscribeDeps {
|
|
|
81
81
|
* present (Node 20), the wrapper falls back to fetch.
|
|
82
82
|
*/
|
|
83
83
|
EventSourceImpl?: typeof EventSource;
|
|
84
|
+
/**
|
|
85
|
+
* #826 — force the fetch transport even when a native `EventSource` is
|
|
86
|
+
* available and no token is set. The fetch path is the only one that
|
|
87
|
+
* surfaces a permanent **401/404** as a thrown {@link SubscribeHttpError};
|
|
88
|
+
* native `EventSource` swallows those into its own silent reconnect cycle.
|
|
89
|
+
* The mission-control board needs that hard-error visibility (404 → `gone`,
|
|
90
|
+
* 401 → auth hint), so it sets this. TUI / dashboard leave it unset and keep
|
|
91
|
+
* the auto-selection (native `EventSource` on a tokenless loopback board).
|
|
92
|
+
*/
|
|
93
|
+
forceFetch?: boolean;
|
|
84
94
|
/**
|
|
85
95
|
* Override sleep — used by tests to fast-forward backoff. Accepts an
|
|
86
96
|
* `AbortSignal` so the wrapper can wake early on abort.
|
package/dist/client/subscribe.js
CHANGED
|
@@ -216,6 +216,8 @@ function makeIterator(args) {
|
|
|
216
216
|
* for `Authorization: Bearer …` and is the only option in Node 20.
|
|
217
217
|
*/
|
|
218
218
|
function canUseEventSource(deps) {
|
|
219
|
+
if (deps.forceFetch)
|
|
220
|
+
return false; // #826 — caller needs throw-on-permanent
|
|
219
221
|
if (deps.token)
|
|
220
222
|
return false;
|
|
221
223
|
return resolveEventSource(deps) !== undefined;
|
|
@@ -41,14 +41,15 @@ export declare const DEFAULT_TAIL_LIMIT = 200;
|
|
|
41
41
|
*
|
|
42
42
|
* - `'connecting'` — initial / post-rebind, before the first coarse event lands.
|
|
43
43
|
* - `'live'` — at least one coarse event has arrived on the current connection.
|
|
44
|
-
* - `'reconnecting'` — the coarse stream
|
|
45
|
-
*
|
|
46
|
-
* not cleared).
|
|
47
|
-
*
|
|
48
|
-
*
|
|
49
|
-
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
44
|
+
* - `'reconnecting'` — the coarse stream ended OR went silent past the watchdog
|
|
45
|
+
* threshold (#826), and the board is RE-ARMING. Rows are KEPT (rendered stale,
|
|
46
|
+
* not cleared). #828: the extension now auto-re-subscribes with bounded
|
|
47
|
+
* equal-jitter backoff (genuine transient blips are still swallowed INSIDE
|
|
48
|
+
* `createSubscribe`, so the board only reaches here on a real stream-death).
|
|
49
|
+
* The variant is carried on `connectionDetail` (no new enum value): an arming
|
|
50
|
+
* detail → `[RECONNECTING]`, a settled detail (re-arm capped at 30s) →
|
|
51
|
+
* `[STREAM DOWN]`, and the 401-auth path (which does NOT auto-re-arm — a
|
|
52
|
+
* re-sub would just 401 again) keeps the `[STREAM ENDED]` + set-token hint.
|
|
52
53
|
* - `'gone'` — a hard 404 on the per-ensemble stream: the ensemble's maestro is
|
|
53
54
|
* gone. {@link setConnection} CLEARS the player list on this transition and the
|
|
54
55
|
* extension STOPS the stream; the renderer shows "ENSEMBLE DESTROYED".
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type PiRole } from '../../config';
|
|
2
|
+
import { createSubscribe } from '../../client/subscribe';
|
|
2
3
|
import { type BoardModel, type CommandLevel } from './board';
|
|
3
4
|
import { MissionControlActions, type ActionResult } from './actions';
|
|
4
5
|
import { type InfraProgress } from '../../cli/ensure-infra';
|
|
@@ -32,6 +33,14 @@ export interface MissionControlDeps {
|
|
|
32
33
|
* `'player'` and `'none'` both keep it dormant.
|
|
33
34
|
*/
|
|
34
35
|
role?: PiRole;
|
|
36
|
+
/**
|
|
37
|
+
* #826/#828 — override the coarse-stream subscribe factory (test seam).
|
|
38
|
+
* Defaults to {@link createSubscribe}. Lets a fake-timer test inject a mock
|
|
39
|
+
* `subscribe` generator to drive the watchdog + re-arm loop deterministically
|
|
40
|
+
* and assert the single-loop invariant (subscribe called exactly N times, not
|
|
41
|
+
* N+1). Production never sets it.
|
|
42
|
+
*/
|
|
43
|
+
createSubscribeImpl?: typeof createSubscribe;
|
|
35
44
|
}
|
|
36
45
|
/**
|
|
37
46
|
* Infra-bootstrap seam (#700 P1). Defaults to the real {@link ensureInfra}; the
|
|
@@ -93,6 +102,61 @@ export declare function classifyCoarseStreamEnd(err: unknown, aborted: boolean):
|
|
|
93
102
|
connection: 'gone' | 'reconnecting';
|
|
94
103
|
detail?: string;
|
|
95
104
|
} | null;
|
|
105
|
+
/** #826 — watchdog poll cadence (how often we compare now − lastCoarseEventAt). */
|
|
106
|
+
export declare const WATCHDOG_TICK_MS = 5000;
|
|
107
|
+
/**
|
|
108
|
+
* #826 — board-level staleness threshold. The daemon emits a `heartbeat` SSE
|
|
109
|
+
* event every ≤10s on a live `/v1/events` stream, so >35s of TOTAL silence
|
|
110
|
+
* (3.5× heartbeat) means the stream is wedged/dead — a half-open socket from a
|
|
111
|
+
* hard `agent-tempo down` (ECONNREFUSED / dead TCP), which neither a 404 nor
|
|
112
|
+
* force-fetch's INTERNAL retry surfaces (that loop reconnects forever, never
|
|
113
|
+
* throws). Sits ABOVE the fetch loop's 30s internal backoff cap, so a healthy
|
|
114
|
+
* cycling loop still receiving heartbeats never trips it — this gap IS the
|
|
115
|
+
* no-double-retry boundary (watchdog = safety net ABOVE the transport).
|
|
116
|
+
*/
|
|
117
|
+
export declare const COARSE_STALE_MS = 35000;
|
|
118
|
+
/**
|
|
119
|
+
* #828 — after this many consecutive failed re-arms the board stops claiming
|
|
120
|
+
* it's actively "reconnecting" and settles to the honest "[STREAM DOWN] —
|
|
121
|
+
* retrying every 30s" wording. Re-arm itself NEVER stops (a permanently silent
|
|
122
|
+
* wedge is the #752 silent-wedge class); only the label changes. ~5 steps takes
|
|
123
|
+
* the backoff ramp to its 30s cap.
|
|
124
|
+
*/
|
|
125
|
+
export declare const REARM_SETTLE_THRESHOLD = 5;
|
|
126
|
+
/**
|
|
127
|
+
* #828 — equal-jitter backoff for the Nth re-arm attempt: `b/2 + rand(0, b/2)`
|
|
128
|
+
* where `b = min(1s·2^attempt, 30s)`. `Math.random()` is fine here — this is
|
|
129
|
+
* client code, not workflow code (the determinism rule does not apply). Jitter
|
|
130
|
+
* spreads re-arms so a fleet of boards doesn't thundering-herd a recovering
|
|
131
|
+
* daemon. `randomFn` is injectable for deterministic tests.
|
|
132
|
+
*/
|
|
133
|
+
export declare function rearmDelayMs(attempt: number, randomFn?: () => number): number;
|
|
134
|
+
/**
|
|
135
|
+
* #828 — the reconnecting sub-variant wording for the Nth re-arm attempt: still
|
|
136
|
+
* ramping (< {@link REARM_SETTLE_THRESHOLD}) → "attempting to reconnect…";
|
|
137
|
+
* settled (≥) → "retrying every 30s". Carried on the model's `connectionDetail`
|
|
138
|
+
* (NO new BoardConnection enum value) and read by the renderer to pick the
|
|
139
|
+
* marker. Pure + exported for unit testing.
|
|
140
|
+
*/
|
|
141
|
+
export declare function reconnectDetailForAttempt(attempt: number): string;
|
|
142
|
+
/**
|
|
143
|
+
* #828 — should a coarse stream-END auto-re-arm? Gate (architect ruling):
|
|
144
|
+
* - `null` (aborted teardown/rebind) → no
|
|
145
|
+
* - `gone` (404 — maestro torn down; a re-sub just 404s) → no (terminal by design)
|
|
146
|
+
* - `reconnecting` WITH a detail (the 401 auth path — tight-looping a
|
|
147
|
+
* guaranteed-fail) → no; keep the set-token hint
|
|
148
|
+
* - `reconnecting` WITHOUT a detail (generic stream-drop / normal-end) → yes
|
|
149
|
+
* Pure + exported for unit testing.
|
|
150
|
+
*/
|
|
151
|
+
export declare function shouldRearmOnStreamEnd(end: {
|
|
152
|
+
connection: 'gone' | 'reconnecting';
|
|
153
|
+
detail?: string;
|
|
154
|
+
} | null): boolean;
|
|
155
|
+
/**
|
|
156
|
+
* #826 — is the coarse stream stale (silent past {@link COARSE_STALE_MS})?
|
|
157
|
+
* `lastEventAt === 0` means "not connected yet" → never stale. Pure.
|
|
158
|
+
*/
|
|
159
|
+
export declare function isCoarseStale(lastEventAt: number, now: number): boolean;
|
|
96
160
|
/**
|
|
97
161
|
* The operator-command + board controller. Holds the model + the action client;
|
|
98
162
|
* command methods are independently unit-testable with a fake actions + ctx.
|
|
@@ -33,13 +33,17 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.Controller = void 0;
|
|
36
|
+
exports.Controller = exports.REARM_SETTLE_THRESHOLD = exports.COARSE_STALE_MS = exports.WATCHDOG_TICK_MS = void 0;
|
|
37
37
|
exports.buildAnswerWake = buildAnswerWake;
|
|
38
38
|
exports.parseEnsembleUpArgs = parseEnsembleUpArgs;
|
|
39
39
|
exports.parseRecruitArgs = parseRecruitArgs;
|
|
40
40
|
exports.parseReleaseArg = parseReleaseArg;
|
|
41
41
|
exports.formatOutcome = formatOutcome;
|
|
42
42
|
exports.classifyCoarseStreamEnd = classifyCoarseStreamEnd;
|
|
43
|
+
exports.rearmDelayMs = rearmDelayMs;
|
|
44
|
+
exports.reconnectDetailForAttempt = reconnectDetailForAttempt;
|
|
45
|
+
exports.shouldRearmOnStreamEnd = shouldRearmOnStreamEnd;
|
|
46
|
+
exports.isCoarseStale = isCoarseStale;
|
|
43
47
|
exports.registerPlannerTools = registerPlannerTools;
|
|
44
48
|
exports.createMissionControlExtension = createMissionControlExtension;
|
|
45
49
|
/**
|
|
@@ -217,6 +221,73 @@ function classifyCoarseStreamEnd(err, aborted) {
|
|
|
217
221
|
}
|
|
218
222
|
return { connection: 'reconnecting' };
|
|
219
223
|
}
|
|
224
|
+
// ── #826/#828 — coarse-stream watchdog + auto-re-arm ───────────────────────
|
|
225
|
+
/** #826 — watchdog poll cadence (how often we compare now − lastCoarseEventAt). */
|
|
226
|
+
exports.WATCHDOG_TICK_MS = 5_000;
|
|
227
|
+
/**
|
|
228
|
+
* #826 — board-level staleness threshold. The daemon emits a `heartbeat` SSE
|
|
229
|
+
* event every ≤10s on a live `/v1/events` stream, so >35s of TOTAL silence
|
|
230
|
+
* (3.5× heartbeat) means the stream is wedged/dead — a half-open socket from a
|
|
231
|
+
* hard `agent-tempo down` (ECONNREFUSED / dead TCP), which neither a 404 nor
|
|
232
|
+
* force-fetch's INTERNAL retry surfaces (that loop reconnects forever, never
|
|
233
|
+
* throws). Sits ABOVE the fetch loop's 30s internal backoff cap, so a healthy
|
|
234
|
+
* cycling loop still receiving heartbeats never trips it — this gap IS the
|
|
235
|
+
* no-double-retry boundary (watchdog = safety net ABOVE the transport).
|
|
236
|
+
*/
|
|
237
|
+
exports.COARSE_STALE_MS = 35_000;
|
|
238
|
+
/** #828 re-arm backoff: base 1s, ×2, cap 30s. */
|
|
239
|
+
const REARM_BASE_MS = 1_000;
|
|
240
|
+
const REARM_MAX_MS = 30_000;
|
|
241
|
+
/**
|
|
242
|
+
* #828 — after this many consecutive failed re-arms the board stops claiming
|
|
243
|
+
* it's actively "reconnecting" and settles to the honest "[STREAM DOWN] —
|
|
244
|
+
* retrying every 30s" wording. Re-arm itself NEVER stops (a permanently silent
|
|
245
|
+
* wedge is the #752 silent-wedge class); only the label changes. ~5 steps takes
|
|
246
|
+
* the backoff ramp to its 30s cap.
|
|
247
|
+
*/
|
|
248
|
+
exports.REARM_SETTLE_THRESHOLD = 5;
|
|
249
|
+
/**
|
|
250
|
+
* #828 — equal-jitter backoff for the Nth re-arm attempt: `b/2 + rand(0, b/2)`
|
|
251
|
+
* where `b = min(1s·2^attempt, 30s)`. `Math.random()` is fine here — this is
|
|
252
|
+
* client code, not workflow code (the determinism rule does not apply). Jitter
|
|
253
|
+
* spreads re-arms so a fleet of boards doesn't thundering-herd a recovering
|
|
254
|
+
* daemon. `randomFn` is injectable for deterministic tests.
|
|
255
|
+
*/
|
|
256
|
+
function rearmDelayMs(attempt, randomFn = Math.random) {
|
|
257
|
+
const b = Math.min(REARM_BASE_MS * 2 ** Math.max(0, attempt), REARM_MAX_MS);
|
|
258
|
+
return b / 2 + randomFn() * (b / 2);
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* #828 — the reconnecting sub-variant wording for the Nth re-arm attempt: still
|
|
262
|
+
* ramping (< {@link REARM_SETTLE_THRESHOLD}) → "attempting to reconnect…";
|
|
263
|
+
* settled (≥) → "retrying every 30s". Carried on the model's `connectionDetail`
|
|
264
|
+
* (NO new BoardConnection enum value) and read by the renderer to pick the
|
|
265
|
+
* marker. Pure + exported for unit testing.
|
|
266
|
+
*/
|
|
267
|
+
function reconnectDetailForAttempt(attempt) {
|
|
268
|
+
return attempt >= exports.REARM_SETTLE_THRESHOLD ? render_1.STREAM_DOWN_DETAIL : render_1.RECONNECT_ARMING_DETAIL;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* #828 — should a coarse stream-END auto-re-arm? Gate (architect ruling):
|
|
272
|
+
* - `null` (aborted teardown/rebind) → no
|
|
273
|
+
* - `gone` (404 — maestro torn down; a re-sub just 404s) → no (terminal by design)
|
|
274
|
+
* - `reconnecting` WITH a detail (the 401 auth path — tight-looping a
|
|
275
|
+
* guaranteed-fail) → no; keep the set-token hint
|
|
276
|
+
* - `reconnecting` WITHOUT a detail (generic stream-drop / normal-end) → yes
|
|
277
|
+
* Pure + exported for unit testing.
|
|
278
|
+
*/
|
|
279
|
+
function shouldRearmOnStreamEnd(end) {
|
|
280
|
+
return end !== null && end.connection === 'reconnecting' && end.detail === undefined;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* #826 — is the coarse stream stale (silent past {@link COARSE_STALE_MS})?
|
|
284
|
+
* `lastEventAt === 0` means "not connected yet" → never stale. Pure.
|
|
285
|
+
*/
|
|
286
|
+
function isCoarseStale(lastEventAt, now) {
|
|
287
|
+
if (lastEventAt === 0)
|
|
288
|
+
return false;
|
|
289
|
+
return now - lastEventAt > exports.COARSE_STALE_MS;
|
|
290
|
+
}
|
|
220
291
|
/**
|
|
221
292
|
* The operator-command + board controller. Holds the model + the action client;
|
|
222
293
|
* command methods are independently unit-testable with a fake actions + ctx.
|
|
@@ -783,6 +854,15 @@ function createMissionControlExtension(deps = {}) {
|
|
|
783
854
|
let renderTimer = null;
|
|
784
855
|
let lastRenderedRevision = -1;
|
|
785
856
|
let activeCtx = null;
|
|
857
|
+
// #826/#828 — coarse-stream liveness + auto-re-arm state.
|
|
858
|
+
// `lastCoarseEventAt` is stamped on EVERY received coarse event (incl. the
|
|
859
|
+
// daemon's ≤10s `heartbeat`), so the watchdog measures true silence. `0` =
|
|
860
|
+
// not connected yet. `rearmAttempt` drives the #828 backoff + settle wording;
|
|
861
|
+
// `rearmTimer` is the single pending re-arm (one at a time — no stacking).
|
|
862
|
+
let watchdogTimer = null;
|
|
863
|
+
let rearmTimer = null;
|
|
864
|
+
let rearmAttempt = 0;
|
|
865
|
+
let lastCoarseEventAt = 0;
|
|
786
866
|
// #790 — the CURRENT ensemble binding lives on `ctrl.model.ensemble`
|
|
787
867
|
// (re-keyed by Controller.rebind BEFORE onRebind fires), so the SSE
|
|
788
868
|
// closures below read it at (re)open time instead of capturing the
|
|
@@ -795,7 +875,58 @@ function createMissionControlExtension(deps = {}) {
|
|
|
795
875
|
lastRenderedRevision = ctrl.model.revision;
|
|
796
876
|
activeCtx.ui.setWidget(WIDGET_KEY, (0, render_1.renderBoard)(ctrl.model, ctrl.localHost), { placement: 'aboveEditor' });
|
|
797
877
|
};
|
|
878
|
+
// #823 — flip the board's connection state + render the banner immediately
|
|
879
|
+
// (don't wait for the throttle tick when the stream's liveness changes).
|
|
880
|
+
const markConnection = (state, detail) => {
|
|
881
|
+
(0, board_1.setConnection)(ctrl.model, state, detail);
|
|
882
|
+
renderNow();
|
|
883
|
+
};
|
|
884
|
+
// #828 — cancel the single pending re-arm timer (idempotent).
|
|
885
|
+
const cancelRearm = () => {
|
|
886
|
+
if (rearmTimer) {
|
|
887
|
+
clearTimeout(rearmTimer);
|
|
888
|
+
rearmTimer = null;
|
|
889
|
+
}
|
|
890
|
+
};
|
|
891
|
+
// #828 — schedule the next coarse re-arm with bounded equal-jitter backoff.
|
|
892
|
+
// At most ONE pending at a time (the `rearmTimer` guard stops the watchdog and
|
|
893
|
+
// a stream-end both stacking re-arms). Reflects the arming/settled wording on
|
|
894
|
+
// the banner immediately, then re-opens the stream after the delay. NEVER
|
|
895
|
+
// gives up — the delay caps at 30s and `rearmAttempt` keeps the cadence there.
|
|
896
|
+
const scheduleRearm = () => {
|
|
897
|
+
if (rearmTimer)
|
|
898
|
+
return;
|
|
899
|
+
markConnection('reconnecting', reconnectDetailForAttempt(rearmAttempt));
|
|
900
|
+
const delay = rearmDelayMs(rearmAttempt);
|
|
901
|
+
rearmAttempt++;
|
|
902
|
+
rearmTimer = setTimeout(() => {
|
|
903
|
+
rearmTimer = null;
|
|
904
|
+
startCoarse(); // aborts the old/wedged loop at its top, opens a fresh one
|
|
905
|
+
}, delay);
|
|
906
|
+
if (typeof rearmTimer.unref === 'function')
|
|
907
|
+
rearmTimer.unref();
|
|
908
|
+
};
|
|
909
|
+
// #828 — apply a classified coarse stream-END to the board. A generic
|
|
910
|
+
// stream-drop re-arms (backoff); `gone` (404) is terminal (clear roster +
|
|
911
|
+
// cancel any pending re-arm); a 401 keeps the auth hint WITHOUT auto-re-arm
|
|
912
|
+
// (tight-looping a guaranteed-fail). `null` = aborted teardown/rebind/re-arm.
|
|
913
|
+
const handleStreamEnd = (end) => {
|
|
914
|
+
if (!end)
|
|
915
|
+
return;
|
|
916
|
+
if (shouldRearmOnStreamEnd(end)) {
|
|
917
|
+
scheduleRearm();
|
|
918
|
+
return;
|
|
919
|
+
}
|
|
920
|
+
if (end.connection === 'gone')
|
|
921
|
+
cancelRearm();
|
|
922
|
+
markConnection(end.connection, end.detail);
|
|
923
|
+
};
|
|
798
924
|
const startCoarse = () => {
|
|
925
|
+
// #828 — guarantee EXACTLY ONE coarse loop alive: abort any prior stream (a
|
|
926
|
+
// wedged one being re-armed, or one that just ended) before opening a fresh
|
|
927
|
+
// one. The aborted prior loop exits via classifyCoarseStreamEnd(_, true)→null
|
|
928
|
+
// (no state change). session_start's first call has none (abort is a no-op).
|
|
929
|
+
coarseAbort?.abort();
|
|
799
930
|
// #54 — accurate posture: a tokenless board is FULLY functional against a
|
|
800
931
|
// local (loopback) daemon, which grants full trust. Only a REMOTE / 0.0.0.0
|
|
801
932
|
// daemon requires the admin token (it 401s tokenless reads + actions).
|
|
@@ -807,25 +938,33 @@ function createMissionControlExtension(deps = {}) {
|
|
|
807
938
|
// expected teardown abort log a spurious "coarse SSE ended: AbortError".
|
|
808
939
|
const ac = new AbortController();
|
|
809
940
|
coarseAbort = ac;
|
|
810
|
-
//
|
|
811
|
-
// (
|
|
812
|
-
|
|
941
|
+
// #826 — FORCE the fetch transport: only it throws on a permanent 401/404
|
|
942
|
+
// (native EventSource swallows those into a silent reconnect cycle), and the
|
|
943
|
+
// board needs that to flip to `gone` / surface the auth hint. H5: omit baseUrl
|
|
944
|
+
// → createSubscribe re-resolves the daemon port per (re)connect, so a daemon
|
|
945
|
+
// restart on a new port self-heals.
|
|
946
|
+
const subscribe = (deps.createSubscribeImpl ?? subscribe_1.createSubscribe)({
|
|
947
|
+
forceFetch: true,
|
|
813
948
|
...(deps.baseUrl ? { baseUrl: deps.baseUrl } : {}),
|
|
814
949
|
...(adminToken ? { token: adminToken } : {}),
|
|
815
950
|
});
|
|
816
|
-
// #
|
|
817
|
-
//
|
|
818
|
-
|
|
819
|
-
(0, board_1.setConnection)(ctrl.model, state, detail);
|
|
820
|
-
renderNow();
|
|
821
|
-
};
|
|
951
|
+
// #826 — fresh connect: reset the staleness clock so the watchdog measures
|
|
952
|
+
// silence on THIS attempt, not the gap accrued since the last dead stream.
|
|
953
|
+
lastCoarseEventAt = Date.now();
|
|
822
954
|
void (async () => {
|
|
823
955
|
try {
|
|
824
956
|
for await (const ev of subscribe(ctrl.model.ensemble, { signal: ac.signal })) {
|
|
825
|
-
// #
|
|
826
|
-
//
|
|
827
|
-
|
|
957
|
+
// #826 — stamp liveness on EVERY received event (incl. the no-op
|
|
958
|
+
// `heartbeat` the daemon emits ≤10s) so the watchdog sees the pulse.
|
|
959
|
+
lastCoarseEventAt = Date.now();
|
|
960
|
+
// #823/#828 — the first event on this connection proves the stream is
|
|
961
|
+
// live: clear a prior reconnecting banner AND reset the re-arm backoff
|
|
962
|
+
// (a recovered stream starts the next failure's ramp from scratch).
|
|
963
|
+
if (ctrl.model.connection !== 'live') {
|
|
828
964
|
markConnection('live');
|
|
965
|
+
rearmAttempt = 0;
|
|
966
|
+
cancelRearm();
|
|
967
|
+
}
|
|
829
968
|
// #700 P2 — an `answer` event isn't a board event; it WAKES the
|
|
830
969
|
// planner (its only inbound channel is this SSE stream). Inject via
|
|
831
970
|
// pi.sendMessage(triggerTurn) — feature-detected (a fake/older Pi
|
|
@@ -839,24 +978,41 @@ function createMissionControlExtension(deps = {}) {
|
|
|
839
978
|
}
|
|
840
979
|
(0, board_1.applyTempoEvent)(ctrl.model, ev);
|
|
841
980
|
}
|
|
842
|
-
//
|
|
843
|
-
|
|
844
|
-
// non-aborted normal end is a defensive transient path → reconnecting.
|
|
845
|
-
const end = classifyCoarseStreamEnd(undefined, ac.signal.aborted);
|
|
846
|
-
if (end)
|
|
847
|
-
markConnection(end.connection, end.detail);
|
|
981
|
+
// Stream ended without throwing — classify + (maybe) re-arm.
|
|
982
|
+
handleStreamEnd(classifyCoarseStreamEnd(undefined, ac.signal.aborted));
|
|
848
983
|
}
|
|
849
984
|
catch (err) {
|
|
850
|
-
//
|
|
851
|
-
// Aborted teardown/rebind → null (no change, no spurious log).
|
|
985
|
+
// Map the (permanent) stream error to a board transition.
|
|
986
|
+
// Aborted teardown/rebind/re-arm → null (no change, no spurious log).
|
|
852
987
|
const end = classifyCoarseStreamEnd(err, ac.signal.aborted);
|
|
853
988
|
if (!end)
|
|
854
989
|
return;
|
|
855
990
|
log(end.connection === 'gone' ? 'coarse SSE — ensemble gone:' : 'coarse SSE ended:', err instanceof Error ? err.message : err);
|
|
856
|
-
|
|
991
|
+
handleStreamEnd(end);
|
|
857
992
|
}
|
|
858
993
|
})();
|
|
859
994
|
};
|
|
995
|
+
// #826 — watchdog: a wedged/dead socket (half-open from a hard `agent-tempo
|
|
996
|
+
// down`) never throws and never ends the loop, so neither the 404 path nor
|
|
997
|
+
// force-fetch's internal retry catches it. Detect it by TOTAL silence past
|
|
998
|
+
// COARSE_STALE_MS (no heartbeat) and re-arm. Skips when already `gone`
|
|
999
|
+
// (terminal), in the 401 auth-ended state (no auto-re-arm by design — its
|
|
1000
|
+
// connectionDetail is the auth hint, not an arming/settled marker), or when a
|
|
1001
|
+
// re-arm is already pending (no stacking with the stream-end path).
|
|
1002
|
+
const checkStale = () => {
|
|
1003
|
+
if (ctrl.model.connection === 'gone')
|
|
1004
|
+
return;
|
|
1005
|
+
if (ctrl.model.connection === 'reconnecting' &&
|
|
1006
|
+
ctrl.model.connectionDetail !== render_1.RECONNECT_ARMING_DETAIL &&
|
|
1007
|
+
ctrl.model.connectionDetail !== render_1.STREAM_DOWN_DETAIL)
|
|
1008
|
+
return;
|
|
1009
|
+
if (rearmTimer)
|
|
1010
|
+
return;
|
|
1011
|
+
if (!isCoarseStale(lastCoarseEventAt, Date.now()))
|
|
1012
|
+
return;
|
|
1013
|
+
log(`coarse stream stale — no daemon heartbeat for >${exports.COARSE_STALE_MS / 1000}s; re-arming`);
|
|
1014
|
+
scheduleRearm();
|
|
1015
|
+
};
|
|
860
1016
|
const openTail = (playerId) => {
|
|
861
1017
|
tailAbort?.abort();
|
|
862
1018
|
tailAbort = null;
|
|
@@ -889,6 +1045,8 @@ function createMissionControlExtension(deps = {}) {
|
|
|
889
1045
|
coarseAbort = null;
|
|
890
1046
|
tailAbort?.abort();
|
|
891
1047
|
tailAbort = null;
|
|
1048
|
+
cancelRearm();
|
|
1049
|
+
rearmAttempt = 0; // #828 — fresh ensemble: drop any pending re-arm + reset the backoff ramp
|
|
892
1050
|
startCoarse(); // reads the already-re-keyed ctrl.model.ensemble
|
|
893
1051
|
renderNow(); // show the re-keyed (empty) board immediately, not at the next throttle tick
|
|
894
1052
|
};
|
|
@@ -897,6 +1055,11 @@ function createMissionControlExtension(deps = {}) {
|
|
|
897
1055
|
coarseAbort = null;
|
|
898
1056
|
tailAbort?.abort();
|
|
899
1057
|
tailAbort = null;
|
|
1058
|
+
cancelRearm(); // #828 — drop any pending re-arm
|
|
1059
|
+
if (watchdogTimer) {
|
|
1060
|
+
clearInterval(watchdogTimer);
|
|
1061
|
+
watchdogTimer = null;
|
|
1062
|
+
} // #826
|
|
900
1063
|
if (renderTimer) {
|
|
901
1064
|
clearInterval(renderTimer);
|
|
902
1065
|
renderTimer = null;
|
|
@@ -908,10 +1071,16 @@ function createMissionControlExtension(deps = {}) {
|
|
|
908
1071
|
pi.on('session_start', (_event, ctx) => {
|
|
909
1072
|
activeCtx = ctx;
|
|
910
1073
|
lastRenderedRevision = -1;
|
|
1074
|
+
rearmAttempt = 0; // #828 — fresh session
|
|
911
1075
|
startCoarse();
|
|
912
1076
|
renderTimer = setInterval(renderNow, throttleMs);
|
|
913
1077
|
if (typeof renderTimer.unref === 'function')
|
|
914
1078
|
renderTimer.unref();
|
|
1079
|
+
// #826 — coarse-stream staleness watchdog (catches a wedged/dead socket
|
|
1080
|
+
// that never throws). `.unref()` so it can't keep the process alive.
|
|
1081
|
+
watchdogTimer = setInterval(checkStale, exports.WATCHDOG_TICK_MS);
|
|
1082
|
+
if (typeof watchdogTimer.unref === 'function')
|
|
1083
|
+
watchdogTimer.unref();
|
|
915
1084
|
renderNow();
|
|
916
1085
|
});
|
|
917
1086
|
pi.on('session_shutdown', () => teardown());
|
|
@@ -1,4 +1,16 @@
|
|
|
1
1
|
import { type BoardModel } from './board';
|
|
2
|
+
/**
|
|
3
|
+
* #828 — `connectionDetail` sentinels that select the `reconnecting` sub-marker
|
|
4
|
+
* (the variant is carried on the model's `connectionDetail`, so NO new
|
|
5
|
+
* `BoardConnection` enum value is needed). The extension sets one of these while
|
|
6
|
+
* the coarse stream auto-re-arms:
|
|
7
|
+
* - {@link RECONNECT_ARMING_DETAIL} — still ramping (< settle threshold) → `[RECONNECTING]`
|
|
8
|
+
* - {@link STREAM_DOWN_DETAIL} — settled (re-arm capped at 30s, daemon still dead) → `[STREAM DOWN]`
|
|
9
|
+
* Any OTHER reconnecting detail (e.g. the 401 auth hint) renders as `[STREAM
|
|
10
|
+
* ENDED]` — that path does NOT auto-re-arm.
|
|
11
|
+
*/
|
|
12
|
+
export declare const RECONNECT_ARMING_DETAIL = "attempting to reconnect\u2026";
|
|
13
|
+
export declare const STREAM_DOWN_DETAIL = "retrying every 30s \u2014 /ensemble to rebind";
|
|
2
14
|
/**
|
|
3
15
|
* #836 — Pi's `InteractiveMode` hard-caps a widget at this many lines and, beyond
|
|
4
16
|
* it, naively slices the top N and appends its own dev-speak `... (widget
|
|
@@ -1,10 +1,22 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.MAX_WIDGET_LINES = void 0;
|
|
3
|
+
exports.MAX_WIDGET_LINES = exports.STREAM_DOWN_DETAIL = exports.RECONNECT_ARMING_DETAIL = void 0;
|
|
4
4
|
exports.renderBoard = renderBoard;
|
|
5
5
|
const board_1 = require("./board");
|
|
6
6
|
/** How many recent fine-tail frames to show under the selected player. */
|
|
7
7
|
const TAIL_RENDER_LINES = 12;
|
|
8
|
+
/**
|
|
9
|
+
* #828 — `connectionDetail` sentinels that select the `reconnecting` sub-marker
|
|
10
|
+
* (the variant is carried on the model's `connectionDetail`, so NO new
|
|
11
|
+
* `BoardConnection` enum value is needed). The extension sets one of these while
|
|
12
|
+
* the coarse stream auto-re-arms:
|
|
13
|
+
* - {@link RECONNECT_ARMING_DETAIL} — still ramping (< settle threshold) → `[RECONNECTING]`
|
|
14
|
+
* - {@link STREAM_DOWN_DETAIL} — settled (re-arm capped at 30s, daemon still dead) → `[STREAM DOWN]`
|
|
15
|
+
* Any OTHER reconnecting detail (e.g. the 401 auth hint) renders as `[STREAM
|
|
16
|
+
* ENDED]` — that path does NOT auto-re-arm.
|
|
17
|
+
*/
|
|
18
|
+
exports.RECONNECT_ARMING_DETAIL = 'attempting to reconnect…';
|
|
19
|
+
exports.STREAM_DOWN_DETAIL = 'retrying every 30s — /ensemble to rebind';
|
|
8
20
|
/**
|
|
9
21
|
* #836 — Pi's `InteractiveMode` hard-caps a widget at this many lines and, beyond
|
|
10
22
|
* it, naively slices the top N and appends its own dev-speak `... (widget
|
|
@@ -137,23 +149,36 @@ function renderBoard(model, localHost) {
|
|
|
137
149
|
// A dropped stream outranks PAUSED/HELD: the suspension flags below are then
|
|
138
150
|
// last-known-only and the operator needs to know the view itself may be stale.
|
|
139
151
|
//
|
|
140
|
-
//
|
|
141
|
-
//
|
|
142
|
-
//
|
|
143
|
-
//
|
|
144
|
-
//
|
|
145
|
-
//
|
|
146
|
-
//
|
|
147
|
-
//
|
|
148
|
-
//
|
|
152
|
+
// #828 — the coarse stream now AUTO-RE-ARMS (bounded-backoff re-subscribe), so
|
|
153
|
+
// the `reconnecting` state has three honest variants, discriminated by
|
|
154
|
+
// `connectionDetail` (no new `BoardConnection` enum value):
|
|
155
|
+
// - arming (re-arm in flight, < settle) → `[RECONNECTING]` "attempting to reconnect…"
|
|
156
|
+
// - settled (re-arm capped at 30s, still dead) → `[STREAM DOWN]` "retrying every 30s"
|
|
157
|
+
// - other detail (the 401 auth path — does NOT auto-re-arm) → `[STREAM ENDED]` + hint
|
|
158
|
+
// The #827 "STREAM ENDED, reopens on re-bind" honest-label note is now reversed
|
|
159
|
+
// for the arming/settled variants because re-arm is real (#828); only the 401
|
|
160
|
+
// path keeps the stream-ended wording (it genuinely needs a manual re-bind /
|
|
161
|
+
// new token).
|
|
149
162
|
let marker = '';
|
|
150
163
|
let what = '';
|
|
151
164
|
if (model.connection === 'reconnecting') {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
165
|
+
if (model.connectionDetail === exports.RECONNECT_ARMING_DETAIL) {
|
|
166
|
+
marker = ' · [RECONNECTING]';
|
|
167
|
+
what = `RECONNECTING — ${exports.RECONNECT_ARMING_DETAIL}`;
|
|
168
|
+
}
|
|
169
|
+
else if (model.connectionDetail === exports.STREAM_DOWN_DETAIL) {
|
|
170
|
+
marker = ' · [STREAM DOWN]';
|
|
171
|
+
what = `STREAM DOWN — ${exports.STREAM_DOWN_DETAIL}`;
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
// 401 auth (or any non-re-arming reconnecting): the stream ended and only a
|
|
175
|
+
// manual re-bind / new token recovers it — keep the honest stream-ended copy.
|
|
176
|
+
marker = ' · [STREAM ENDED]';
|
|
177
|
+
const tail = 'last-known state, reopens on /ensemble re-bind';
|
|
178
|
+
what = model.connectionDetail
|
|
179
|
+
? `STREAM ENDED — ${model.connectionDetail}; ${tail}`
|
|
180
|
+
: `STREAM ENDED — coarse stream dropped; ${tail}`;
|
|
181
|
+
}
|
|
157
182
|
}
|
|
158
183
|
else if (model.paused) {
|
|
159
184
|
marker = ' · [PAUSED]';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-tempo",
|
|
3
|
-
"version": "1.7.0-beta.
|
|
3
|
+
"version": "1.7.0-beta.12",
|
|
4
4
|
"description": "Many agents, one tempo. Durable coordination for multi-agent work via Temporal.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -82,13 +82,14 @@
|
|
|
82
82
|
"test": "mocha && vitest run",
|
|
83
83
|
"lint:surface-drift": "node scripts/check-surface-drift.js",
|
|
84
84
|
"lint:no-stale-scaffold": "node scripts/check-no-stale-scaffold.js",
|
|
85
|
+
"lint:no-stray-src-js": "node scripts/check-no-stray-src-js.js",
|
|
85
86
|
"lint:test-ensemble-literals": "bash scripts/check-test-ensemble-literals.sh",
|
|
86
87
|
"lint:skip-reasons": "node scripts/lint-skip-reasons.js",
|
|
87
88
|
"lint:lockstep-version": "node -e \"const r=require('./package.json').version,d=require('./dashboard/package.json').version;if(r!==d){console.error('Version drift: root='+r+' dashboard='+d+'. Bump dashboard/package.json#version to match root.');process.exit(1);}console.log('Lockstep OK: '+r);\"",
|
|
88
89
|
"lint:lockfile-canonical": "bash scripts/check-lockfile-canonical.sh",
|
|
89
90
|
"lint:dashboard-css-sync": "npm run build:scripts && node dist/scripts/check-components-css-sync.js",
|
|
90
91
|
"lint:pi-drift": "node scripts/check-pi-drift.js",
|
|
91
|
-
"check:all": "npm run lint:test-ensemble-literals && npm run lint:skip-reasons && npm run lint:lockstep-version && npm run lint:lockfile-canonical && npm run lint:surface-drift && npm run lint:no-stale-scaffold && npm run build && npm run lint:pi-drift && npm run lint:dashboard-css-sync && npm test && npm --prefix dashboard run lint && npm --prefix dashboard run test && npm run size-limit && npm run verify-tarball"
|
|
92
|
+
"check:all": "npm run lint:test-ensemble-literals && npm run lint:skip-reasons && npm run lint:lockstep-version && npm run lint:lockfile-canonical && npm run lint:surface-drift && npm run lint:no-stale-scaffold && npm run lint:no-stray-src-js && npm run build && npm run lint:pi-drift && npm run lint:dashboard-css-sync && npm test && npm --prefix dashboard run lint && npm --prefix dashboard run test && npm run size-limit && npm run verify-tarball"
|
|
92
93
|
},
|
|
93
94
|
"optionalDependencies": {
|
|
94
95
|
"@anthropic-ai/sdk": "~0.91.1",
|