agent-tempo 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +8 -2
- package/dashboard/dist/assets/index-D6Xyje_n.js +62 -0
- package/dashboard/dist/assets/index-D6Xyje_n.js.map +1 -0
- package/dashboard/dist/index.html +1 -1
- package/dashboard/package.json +1 -1
- package/dist/activities/claude-stop.d.ts +21 -0
- package/dist/activities/claude-stop.js +94 -0
- package/dist/cli/commands.d.ts +39 -0
- package/dist/cli/commands.js +83 -2
- package/dist/cli/legacy-migration.js +8 -2
- package/dist/cli/sa-preflight.d.ts +27 -3
- package/dist/cli/sa-preflight.js +169 -9
- package/dist/cli/startup.js +34 -8
- package/dist/client/core.js +9 -0
- package/dist/client/interface.d.ts +21 -0
- package/dist/daemon.js +1 -0
- package/dist/http/catalog.js +17 -3
- package/dist/http/event-types.d.ts +41 -0
- package/dist/http/orphans.d.ts +76 -0
- package/dist/http/orphans.js +93 -0
- package/dist/http/server.js +13 -0
- package/dist/reconcile/orphans.d.ts +37 -27
- package/dist/reconcile/orphans.js +93 -6
- package/dist/tui/index.js +1 -0
- package/dist/utils/bg-preflight.d.ts +25 -0
- package/dist/utils/bg-preflight.js +154 -0
- package/package.json +5 -4
- package/dashboard/dist/assets/index-_5jV0Znu.js +0 -62
- package/dashboard/dist/assets/index-_5jV0Znu.js.map +0 -1
package/dist/client/core.js
CHANGED
|
@@ -941,6 +941,15 @@ function createTempoClientCore(client, opts = {}) {
|
|
|
941
941
|
taskQueue,
|
|
942
942
|
});
|
|
943
943
|
},
|
|
944
|
+
async listAllOrphans(opts = {}) {
|
|
945
|
+
// Lazy import — `reconcile/orphans` pulls validation/visibility
|
|
946
|
+
// helpers we don't need on every TempoClient consumer.
|
|
947
|
+
const { listAllOrphansCached } = await Promise.resolve().then(() => __importStar(require('../reconcile/orphans')));
|
|
948
|
+
return listAllOrphansCached(client, {
|
|
949
|
+
force: Boolean(opts.force),
|
|
950
|
+
...(opts.ensemble ? { ensemble: opts.ensemble } : {}),
|
|
951
|
+
});
|
|
952
|
+
},
|
|
944
953
|
async recall(ensemble, playerId) {
|
|
945
954
|
// #128: direct session queries, no maestro round-trip. Throws rather
|
|
946
955
|
// than returning empties so the CLI / TUI wrappers can surface a
|
|
@@ -344,6 +344,27 @@ export interface TempoClientCore {
|
|
|
344
344
|
listHosts(opts?: {
|
|
345
345
|
force?: boolean;
|
|
346
346
|
}): Promise<HostInfo[]>;
|
|
347
|
+
/**
|
|
348
|
+
* #579 — cluster-wide cross-host orphan listing (readonly). Used by the
|
|
349
|
+
* dashboard's `/orphans` screen via the daemon's `GET /v1/orphans`
|
|
350
|
+
* endpoint. Thin wrapper over `queryOrphanedSessions` with `allHosts: true`.
|
|
351
|
+
* Returns raw `OrphanCandidate` rows so callers can join with their own
|
|
352
|
+
* host-liveness / migrate-command rendering.
|
|
353
|
+
*
|
|
354
|
+
* `opts.ensemble` narrows the visibility query to a single ensemble.
|
|
355
|
+
* `opts.force` bypasses the 3-second in-process cache (mirrors
|
|
356
|
+
* `listHosts`). Result is cached keyed on the ensemble filter so
|
|
357
|
+
* `?ensemble=foo` and the unfiltered call don't clobber each other.
|
|
358
|
+
*
|
|
359
|
+
* Never throws on per-candidate failure — partial-tolerant by design
|
|
360
|
+
* (the underlying `queryOrphanedSessions` already skips unreachable
|
|
361
|
+
* candidates and returns what it could resolve). Visibility-query timeout
|
|
362
|
+
* also returns partial.
|
|
363
|
+
*/
|
|
364
|
+
listAllOrphans(opts?: {
|
|
365
|
+
ensemble?: string;
|
|
366
|
+
force?: boolean;
|
|
367
|
+
}): Promise<import('../reconcile/orphans').OrphanCandidate[]>;
|
|
347
368
|
/** Get active schedules for an ensemble. */
|
|
348
369
|
getSchedules(ensemble: string): Promise<ScheduleEntry[]>;
|
|
349
370
|
/** Cancel a named schedule in an ensemble. */
|
package/dist/daemon.js
CHANGED
|
@@ -770,6 +770,7 @@ async function main() {
|
|
|
770
770
|
const result = await verifySearchAttributes({
|
|
771
771
|
temporalAddress: config.temporalAddress,
|
|
772
772
|
temporalNamespace: config.temporalNamespace,
|
|
773
|
+
temporalApiKey: config.temporalApiKey,
|
|
773
774
|
});
|
|
774
775
|
if (!result.ok && !result.probeError) {
|
|
775
776
|
process.stderr.write('ERROR: ' + result.message + '\n');
|
package/dist/http/catalog.js
CHANGED
|
@@ -108,9 +108,23 @@ async function handleCreateEnsemble(req, res, client) {
|
|
|
108
108
|
}
|
|
109
109
|
const held = startMode === 'hold';
|
|
110
110
|
const allowed = (0, body_1.allowedAgentsForCurrentMode)();
|
|
111
|
-
//
|
|
112
|
-
// `
|
|
113
|
-
//
|
|
111
|
+
// 0. Bootstrap the maestro session + hub workflow for this ensemble.
|
|
112
|
+
// `client.recruit()` submits an outbox entry on the maestro workflow —
|
|
113
|
+
// it must exist before we can signal it. The CLI's `agent-tempo up` path
|
|
114
|
+
// pre-creates the conductor workflow which bootstraps the maestro, but
|
|
115
|
+
// the dashboard create-ensemble flow goes directly through the client.
|
|
116
|
+
try {
|
|
117
|
+
await client.ensureMaestroSession(name);
|
|
118
|
+
}
|
|
119
|
+
catch (err) {
|
|
120
|
+
return (0, responses_1.errorResponse)(res, 500, {
|
|
121
|
+
error: 'maestro-bootstrap-failed',
|
|
122
|
+
message: err instanceof Error ? err.message : String(err),
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
// 1. Recruit the conductor — lineup's `conductor` block (if present)
|
|
126
|
+
// wins on name + agent + part; the top-level `host` is the conductor's
|
|
127
|
+
// spawn target either way.
|
|
114
128
|
const conductorBlock = resolved?.conductor;
|
|
115
129
|
const conductorName = conductorBlock?.name ?? 'conductor';
|
|
116
130
|
const conductorAgent = pickAgent(conductorBlock?.agent, allowed);
|
|
@@ -212,6 +212,47 @@ export interface PlayerSummaryV1 {
|
|
|
212
212
|
* reason `overflow`.
|
|
213
213
|
*/
|
|
214
214
|
export type EventIdToken = string;
|
|
215
|
+
/**
|
|
216
|
+
* §4.x — single row in the `/v1/orphans` cluster-wide orphan listing.
|
|
217
|
+
*
|
|
218
|
+
* An orphan is a session workflow whose `attachmentInfo.phase ∈ {detached,
|
|
219
|
+
* draining, attached, processing, awaiting}` but whose home-host daemon
|
|
220
|
+
* isn't running an adapter for it — typically because the home host is
|
|
221
|
+
* down or the adapter crashed without orderly destroy. The dashboard
|
|
222
|
+
* `/orphans` screen surfaces these so an operator on a live host can
|
|
223
|
+
* migrate the player over.
|
|
224
|
+
*
|
|
225
|
+
* `hostLiveness` is joined server-side against `listHosts()` so the
|
|
226
|
+
* dashboard doesn't have to re-issue a hosts query per row:
|
|
227
|
+
* - `'live'` — `preferredHost` matches a host with `freshness === 'live'`
|
|
228
|
+
* - `'stale'` — matches a host with `freshness === 'stale'`
|
|
229
|
+
* - `'missing'` — `preferredHost` is null OR no matching host record
|
|
230
|
+
*
|
|
231
|
+
* `migrateCommand` is the TUI slash-command the operator pastes into their
|
|
232
|
+
* own session on the migrate target. `--yes-steal=` (NOT
|
|
233
|
+
* `--confirm-steal-from-host`) is the actual flag accepted by
|
|
234
|
+
* `src/tui/commands.ts:handleMigrate`. When `preferredHost` is null the
|
|
235
|
+
* command targets the local host and includes the steal guard pre-filled
|
|
236
|
+
* with the last-known host (or a literal `'(unknown)'` when even that is
|
|
237
|
+
* missing — the operator must edit it before submit).
|
|
238
|
+
*/
|
|
239
|
+
export interface OrphanV1 {
|
|
240
|
+
playerId: string;
|
|
241
|
+
ensemble: string;
|
|
242
|
+
workflowId: string;
|
|
243
|
+
preferredHost: string | null;
|
|
244
|
+
hostLiveness: 'live' | 'stale' | 'missing';
|
|
245
|
+
phase: AttachmentPhase;
|
|
246
|
+
detachedSince: string | null;
|
|
247
|
+
lastHeartbeatAt: string | null;
|
|
248
|
+
migrateCommand: string;
|
|
249
|
+
}
|
|
250
|
+
/** §4.x — response shape for `GET /v1/orphans[?ensemble=<name>]`. */
|
|
251
|
+
export interface OrphansV1 {
|
|
252
|
+
v: 1;
|
|
253
|
+
capturedAt: string;
|
|
254
|
+
orphans: OrphanV1[];
|
|
255
|
+
}
|
|
215
256
|
/**
|
|
216
257
|
* The PR-1 sentinel `lastEventId` value — emitted on `/v1/state/:ensemble`
|
|
217
258
|
* before PR-2 lights up the aggregate poll loop. Subscribers passing this
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `GET /v1/orphans[?ensemble=<name>]` handler — surfaces cluster-wide
|
|
3
|
+
* cross-host orphans to the dashboard (#579).
|
|
4
|
+
*
|
|
5
|
+
* Pipeline:
|
|
6
|
+
* 1. `TempoClient.listAllOrphans` → `OrphanCandidate[]` (visibility query
|
|
7
|
+
* with `allHosts: true`, 3-second daemon-edge cache, partial-tolerant
|
|
8
|
+
* on per-candidate failures).
|
|
9
|
+
* 2. `TempoClient.listHosts` → `HostInfo[]` for the freshness join
|
|
10
|
+
* (also independently cached for 3s).
|
|
11
|
+
* 3. Map each candidate to the `OrphanV1` wire shape, joining
|
|
12
|
+
* `hostLiveness` from the hosts snapshot and rendering the operator
|
|
13
|
+
* `migrateCommand` via {@link renderMigrateCommand}.
|
|
14
|
+
*
|
|
15
|
+
* **NOT a wrapper around `restoreOrphansOnce`** — see ADR follow-up
|
|
16
|
+
* 2026-05-16 (architect option 3): the readonly branch of that helper
|
|
17
|
+
* collapses each candidate down to `{ playerId, ensemble, outcome }` and
|
|
18
|
+
* loses the wire fields we need (`workflowId`, `phase`, `detachedSince`,
|
|
19
|
+
* `lastHeartbeatAt`, `preferredHost`). The dashboard handler therefore
|
|
20
|
+
* calls `queryOrphanedSessions` directly (via `listAllOrphansCached`)
|
|
21
|
+
* and shares the cross-host detail formatter with the readonly branch
|
|
22
|
+
* via {@link buildCrossHostDetail} so the two surfaces never drift.
|
|
23
|
+
*
|
|
24
|
+
* Auth: same bearer + CORS gates as every other `/v1/*` read — applied
|
|
25
|
+
* at the dispatcher in `src/http/server.ts`, not here.
|
|
26
|
+
*/
|
|
27
|
+
import * as http from 'http';
|
|
28
|
+
import type { OrphansV1, OrphanV1 } from './event-types';
|
|
29
|
+
import type { TempoClient } from '../client/interface';
|
|
30
|
+
import type { HostInfo } from '../types';
|
|
31
|
+
import type { OrphanCandidate } from '../reconcile/orphans';
|
|
32
|
+
/**
|
|
33
|
+
* Render the TUI `/migrate` slash command the operator pastes into their
|
|
34
|
+
* own session to recover an orphan. Wording mirrors
|
|
35
|
+
* `src/tui/commands.ts:handleMigrate` exactly:
|
|
36
|
+
* - positional `<playerId> <host>`
|
|
37
|
+
* - flag form `--yes-steal=<currentHost>` (NOT `--confirm-steal-from-host`)
|
|
38
|
+
*
|
|
39
|
+
* When `preferredHost` is non-null the bot can target it directly: the
|
|
40
|
+
* operator just runs `/migrate <player> <preferredHost>` from any
|
|
41
|
+
* session. When it's null we don't know where the player was last seen,
|
|
42
|
+
* so the rendered command targets `<dashboardHost>` and pre-fills the
|
|
43
|
+
* steal guard with the candidate's last-known adapter host (from
|
|
44
|
+
* `OrphanSummary.lastAdapter.hostname`) — falling through to the literal
|
|
45
|
+
* `(unknown)` when even that's missing. The operator MUST edit the
|
|
46
|
+
* placeholder before submit; rendering it literally guarantees the
|
|
47
|
+
* `/migrate` validator catches the slip rather than silently steaming
|
|
48
|
+
* ahead.
|
|
49
|
+
*/
|
|
50
|
+
export declare function renderMigrateCommand(args: {
|
|
51
|
+
playerId: string;
|
|
52
|
+
preferredHost: string | null;
|
|
53
|
+
dashboardHost: string;
|
|
54
|
+
lastAdapterHost: string | null;
|
|
55
|
+
}): string;
|
|
56
|
+
/**
|
|
57
|
+
* Map a single `OrphanCandidate` to its `OrphanV1` wire shape.
|
|
58
|
+
*
|
|
59
|
+
* Pure / side-effect-free — exposed for unit tests that want to exercise
|
|
60
|
+
* the join logic without spinning up a Temporal client.
|
|
61
|
+
*/
|
|
62
|
+
export declare function buildOrphanRow(args: {
|
|
63
|
+
candidate: OrphanCandidate;
|
|
64
|
+
hostsByName: Map<string, HostInfo>;
|
|
65
|
+
dashboardHost: string;
|
|
66
|
+
}): OrphanV1;
|
|
67
|
+
/** Build the full `OrphansV1` payload. Exposed for unit tests. */
|
|
68
|
+
export declare function buildOrphansResponse(client: TempoClient, ensembleFilter: string | undefined, dashboardHost: string): Promise<OrphansV1>;
|
|
69
|
+
/**
|
|
70
|
+
* Request handler. Bearer + CORS already enforced upstream — by the time
|
|
71
|
+
* we're called the caller is authorized.
|
|
72
|
+
*/
|
|
73
|
+
export declare function handleOrphans(res: http.ServerResponse, ctx: {
|
|
74
|
+
client: TempoClient;
|
|
75
|
+
dashboardHost: string;
|
|
76
|
+
}, ensembleFilter: string | undefined): Promise<void>;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.renderMigrateCommand = renderMigrateCommand;
|
|
4
|
+
exports.buildOrphanRow = buildOrphanRow;
|
|
5
|
+
exports.buildOrphansResponse = buildOrphansResponse;
|
|
6
|
+
exports.handleOrphans = handleOrphans;
|
|
7
|
+
const responses_1 = require("./responses");
|
|
8
|
+
/**
|
|
9
|
+
* Render the TUI `/migrate` slash command the operator pastes into their
|
|
10
|
+
* own session to recover an orphan. Wording mirrors
|
|
11
|
+
* `src/tui/commands.ts:handleMigrate` exactly:
|
|
12
|
+
* - positional `<playerId> <host>`
|
|
13
|
+
* - flag form `--yes-steal=<currentHost>` (NOT `--confirm-steal-from-host`)
|
|
14
|
+
*
|
|
15
|
+
* When `preferredHost` is non-null the bot can target it directly: the
|
|
16
|
+
* operator just runs `/migrate <player> <preferredHost>` from any
|
|
17
|
+
* session. When it's null we don't know where the player was last seen,
|
|
18
|
+
* so the rendered command targets `<dashboardHost>` and pre-fills the
|
|
19
|
+
* steal guard with the candidate's last-known adapter host (from
|
|
20
|
+
* `OrphanSummary.lastAdapter.hostname`) — falling through to the literal
|
|
21
|
+
* `(unknown)` when even that's missing. The operator MUST edit the
|
|
22
|
+
* placeholder before submit; rendering it literally guarantees the
|
|
23
|
+
* `/migrate` validator catches the slip rather than silently steaming
|
|
24
|
+
* ahead.
|
|
25
|
+
*/
|
|
26
|
+
function renderMigrateCommand(args) {
|
|
27
|
+
const { playerId, preferredHost, dashboardHost, lastAdapterHost } = args;
|
|
28
|
+
if (preferredHost) {
|
|
29
|
+
return `/migrate ${playerId} ${preferredHost}`;
|
|
30
|
+
}
|
|
31
|
+
const stealFrom = lastAdapterHost ?? '(unknown)';
|
|
32
|
+
return `/migrate ${playerId} ${dashboardHost} --force --yes-steal=${stealFrom}`;
|
|
33
|
+
}
|
|
34
|
+
/** Map host freshness → `OrphanV1.hostLiveness`. */
|
|
35
|
+
function deriveLiveness(preferredHost, hostsByName) {
|
|
36
|
+
if (!preferredHost)
|
|
37
|
+
return 'missing';
|
|
38
|
+
const h = hostsByName.get(preferredHost);
|
|
39
|
+
if (!h)
|
|
40
|
+
return 'missing';
|
|
41
|
+
return h.freshness === 'live' ? 'live' : 'stale';
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Map a single `OrphanCandidate` to its `OrphanV1` wire shape.
|
|
45
|
+
*
|
|
46
|
+
* Pure / side-effect-free — exposed for unit tests that want to exercise
|
|
47
|
+
* the join logic without spinning up a Temporal client.
|
|
48
|
+
*/
|
|
49
|
+
function buildOrphanRow(args) {
|
|
50
|
+
const { candidate, hostsByName, dashboardHost } = args;
|
|
51
|
+
const summary = candidate.summary;
|
|
52
|
+
const preferredHost = summary.preferredHost ?? null;
|
|
53
|
+
const lastAdapterHost = summary.lastAdapter?.hostname ?? null;
|
|
54
|
+
return {
|
|
55
|
+
playerId: summary.playerId,
|
|
56
|
+
ensemble: summary.ensemble,
|
|
57
|
+
workflowId: candidate.workflowId,
|
|
58
|
+
preferredHost,
|
|
59
|
+
hostLiveness: deriveLiveness(preferredHost, hostsByName),
|
|
60
|
+
phase: candidate.info.phase,
|
|
61
|
+
detachedSince: summary.detachedSince ?? null,
|
|
62
|
+
lastHeartbeatAt: candidate.info.currentAttachment?.lastHeartbeatAt ?? null,
|
|
63
|
+
migrateCommand: renderMigrateCommand({
|
|
64
|
+
playerId: summary.playerId,
|
|
65
|
+
preferredHost,
|
|
66
|
+
dashboardHost,
|
|
67
|
+
lastAdapterHost,
|
|
68
|
+
}),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/** Build the full `OrphansV1` payload. Exposed for unit tests. */
|
|
72
|
+
async function buildOrphansResponse(client, ensembleFilter, dashboardHost) {
|
|
73
|
+
// Fire both reads in parallel — independent caches, cheap join.
|
|
74
|
+
const [candidates, hosts] = await Promise.all([
|
|
75
|
+
client.listAllOrphans(ensembleFilter ? { ensemble: ensembleFilter } : {}),
|
|
76
|
+
client.listHosts(),
|
|
77
|
+
]);
|
|
78
|
+
const hostsByName = new Map(hosts.map((h) => [h.hostname, h]));
|
|
79
|
+
const orphans = candidates.map((candidate) => buildOrphanRow({ candidate, hostsByName, dashboardHost }));
|
|
80
|
+
return {
|
|
81
|
+
v: 1,
|
|
82
|
+
capturedAt: new Date().toISOString(),
|
|
83
|
+
orphans,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Request handler. Bearer + CORS already enforced upstream — by the time
|
|
88
|
+
* we're called the caller is authorized.
|
|
89
|
+
*/
|
|
90
|
+
async function handleOrphans(res, ctx, ensembleFilter) {
|
|
91
|
+
const payload = await buildOrphansResponse(ctx.client, ensembleFilter, ctx.dashboardHost);
|
|
92
|
+
(0, responses_1.jsonResponse)(res, 200, payload);
|
|
93
|
+
}
|
package/dist/http/server.js
CHANGED
|
@@ -65,6 +65,7 @@ const catalog_1 = require("./catalog");
|
|
|
65
65
|
const port_file_1 = require("./port-file");
|
|
66
66
|
const responses_1 = require("./responses");
|
|
67
67
|
const snapshot_1 = require("./snapshot");
|
|
68
|
+
const orphans_1 = require("./orphans");
|
|
68
69
|
const sse_handler_1 = require("./sse-handler");
|
|
69
70
|
const log = (...args) => console.error(`[agent-tempo:http ${new Date().toISOString()}]`, ...args);
|
|
70
71
|
/** Default bind addr per SSE-PROTOCOL.md §1. */
|
|
@@ -302,6 +303,18 @@ async function handle(req, res, ctx) {
|
|
|
302
303
|
if (pathname === '/v1/hosts') {
|
|
303
304
|
return handleHosts(res, ctx);
|
|
304
305
|
}
|
|
306
|
+
// #579 — cluster-wide cross-host orphan listing for the dashboard.
|
|
307
|
+
// Same bearer + CORS gate as `/v1/hosts`; optional `?ensemble=<name>`
|
|
308
|
+
// narrows to one ensemble.
|
|
309
|
+
if (pathname === '/v1/orphans') {
|
|
310
|
+
const ensembleFilter = url.searchParams.get('ensemble') ?? undefined;
|
|
311
|
+
return (0, orphans_1.handleOrphans)(res, {
|
|
312
|
+
client: ctx.client,
|
|
313
|
+
// os.hostname() is OS-cached + sub-millisecond — no need to thread
|
|
314
|
+
// through HandleContext just for one rendering site.
|
|
315
|
+
dashboardHost: require('os').hostname(),
|
|
316
|
+
}, ensembleFilter);
|
|
317
|
+
}
|
|
305
318
|
// Catalog reads (issue #400) — `listAgentTypes` / `listLineups`
|
|
306
319
|
// touch local fs only, no Temporal calls; cheap to serve per-request.
|
|
307
320
|
if (pathname === '/v1/agent-types') {
|
|
@@ -1,30 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Orphan-session query — shared by `reconcileOnBoot()` in `src/daemon.ts` and
|
|
3
|
-
* the `agent-tempo restore` CLI command (`src/cli/commands.ts`).
|
|
4
|
-
*
|
|
5
|
-
* Design §10.1: a session is an **orphan** when the workflow is `Running` but
|
|
6
|
-
* no adapter process is alive to own its attachment. Two candidate shapes
|
|
7
|
-
* matter:
|
|
8
|
-
*
|
|
9
|
-
* 1. **Active-host sessions** — `AgentTempoAttachedHost = local` AND phase
|
|
10
|
-
* is `attached` / `processing` / `awaiting` / `draining`. The attachment
|
|
11
|
-
* exists but the adapter process may have died.
|
|
12
|
-
* 2. **Detached-home sessions** — `AgentTempoAttachmentState = detached` AND
|
|
13
|
-
* `AgentTempoHostname = local`. No adapter at all; the home host is us.
|
|
14
|
-
*
|
|
15
|
-
* For each candidate we query `attachmentInfo` + `orphanSummary`. If the
|
|
16
|
-
* adapter process is alive (`isAdapterProcessAlive` returns true) we skip —
|
|
17
|
-
* that's the daemon-restarted-under-a-live-adapter case, not an orphan.
|
|
18
|
-
*
|
|
19
|
-
* `isAdapterProcessAlive` is stubbed as `() => false` for v0.25.0-beta.1 per
|
|
20
|
-
* PR-E engineer brief §8 answer 1. No adapter PID file convention exists yet
|
|
21
|
-
* (only the daemon process has `daemon.pid`; Copilot bridges write their own
|
|
22
|
-
* per-session file but Claude Code CLI does not). A conservative always-dead
|
|
23
|
-
* stub is safe: false negatives cost an extra `claimAttachment` attempt,
|
|
24
|
-
* which the caller catches as `AttachmentConflict` and backs off silently
|
|
25
|
-
* (design §10.6). False positives — skipping a session that needs restore —
|
|
26
|
-
* are the worse failure mode.
|
|
27
|
-
*/
|
|
28
1
|
import type { Client } from '@temporalio/client';
|
|
29
2
|
import type { AttachmentInfo, AttachmentPhase, OrphanSummary } from '../types';
|
|
30
3
|
/**
|
|
@@ -123,6 +96,43 @@ export declare function buildOrphanQuery(opts: BuildOrphanQueryOpts): string;
|
|
|
123
96
|
* so the boot path can call `restoreOrphansOnce` again on a subsequent
|
|
124
97
|
* cycle to pick up missed candidates.
|
|
125
98
|
*/
|
|
99
|
+
/**
|
|
100
|
+
* 3-second cache TTL — mirrors `src/utils/hosts.ts:CACHE_TTL_MS`. Keeps
|
|
101
|
+
* rapid-fire dashboard refreshes cheap (the React query layer also has
|
|
102
|
+
* its own staleTime, this is defense in depth at the daemon edge).
|
|
103
|
+
*/
|
|
104
|
+
export declare const ORPHANS_CACHE_TTL_MS = 3000;
|
|
105
|
+
/**
|
|
106
|
+
* Test hook — never call from production code. Convention per
|
|
107
|
+
* `docs/adr/0006-test-hooks-naming.md`.
|
|
108
|
+
*/
|
|
109
|
+
export declare function __resetOrphansCacheForTests(): void;
|
|
110
|
+
/**
|
|
111
|
+
* Cluster-wide cross-host orphan listing for the dashboard (#579 / ADR
|
|
112
|
+
* follow-up). Thin wrapper over {@link queryOrphanedSessions} with
|
|
113
|
+
* `allHosts: true` and a 3-second in-process cache keyed by ensemble
|
|
114
|
+
* filter. Cache shape mirrors `src/utils/hosts.ts:listHosts`.
|
|
115
|
+
*
|
|
116
|
+
* Use the `cleanup` deadline (30s) rather than the `boot` deadline (60s)
|
|
117
|
+
* since this is called from a user-facing read endpoint where partial
|
|
118
|
+
* results are preferable to a 60s wait.
|
|
119
|
+
*/
|
|
120
|
+
export declare function listAllOrphansCached(client: Client, opts?: {
|
|
121
|
+
ensemble?: string;
|
|
122
|
+
force?: boolean;
|
|
123
|
+
}, log?: (...args: unknown[]) => void): Promise<OrphanCandidate[]>;
|
|
124
|
+
/**
|
|
125
|
+
* Cross-host detail resolver shared by `restoreOrphansOnce`'s readonly
|
|
126
|
+
* branch (#151 `agent-tempo restore --all-hosts`) and the dashboard's
|
|
127
|
+
* `/v1/orphans` handler (#579).
|
|
128
|
+
*
|
|
129
|
+
* Returns `null` when an `ensembleFilter` is provided and the candidate
|
|
130
|
+
* is outside that ensemble (caller should `continue`). Otherwise returns
|
|
131
|
+
* the preferred-host string used by both surfaces to group / annotate:
|
|
132
|
+
* `OrphanSummary.preferredHost` first, falling back to the candidate's
|
|
133
|
+
* last-known adapter host, finally `'(unknown)'`.
|
|
134
|
+
*/
|
|
135
|
+
export declare function buildCrossHostDetail(candidate: OrphanCandidate, ensembleFilter?: string): string | null;
|
|
126
136
|
export declare function queryOrphanedSessions(client: Client, filter: OrphanQueryFilter, log?: (...args: unknown[]) => void, deadlineMs?: number): Promise<OrphanCandidate[]>;
|
|
127
137
|
/**
|
|
128
138
|
* Options for {@link restoreOrphansOnce}. `policy: 'never'` is NOT included
|
|
@@ -1,10 +1,42 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ORPHANS_CACHE_TTL_MS = void 0;
|
|
3
4
|
exports.isAdapterProcessAliveStub = isAdapterProcessAliveStub;
|
|
4
5
|
exports.buildOrphanQuery = buildOrphanQuery;
|
|
6
|
+
exports.__resetOrphansCacheForTests = __resetOrphansCacheForTests;
|
|
7
|
+
exports.listAllOrphansCached = listAllOrphansCached;
|
|
8
|
+
exports.buildCrossHostDetail = buildCrossHostDetail;
|
|
5
9
|
exports.queryOrphanedSessions = queryOrphanedSessions;
|
|
6
10
|
exports.formatRestoreOutcome = formatRestoreOutcome;
|
|
7
11
|
exports.restoreOrphansOnce = restoreOrphansOnce;
|
|
12
|
+
/**
|
|
13
|
+
* Orphan-session query — shared by `reconcileOnBoot()` in `src/daemon.ts` and
|
|
14
|
+
* the `agent-tempo restore` CLI command (`src/cli/commands.ts`).
|
|
15
|
+
*
|
|
16
|
+
* Design §10.1: a session is an **orphan** when the workflow is `Running` but
|
|
17
|
+
* no adapter process is alive to own its attachment. Two candidate shapes
|
|
18
|
+
* matter:
|
|
19
|
+
*
|
|
20
|
+
* 1. **Active-host sessions** — `AgentTempoAttachedHost = local` AND phase
|
|
21
|
+
* is `attached` / `processing` / `awaiting` / `draining`. The attachment
|
|
22
|
+
* exists but the adapter process may have died.
|
|
23
|
+
* 2. **Detached-home sessions** — `AgentTempoAttachmentState = detached` AND
|
|
24
|
+
* `AgentTempoHostname = local`. No adapter at all; the home host is us.
|
|
25
|
+
*
|
|
26
|
+
* For each candidate we query `attachmentInfo` + `orphanSummary`. If the
|
|
27
|
+
* adapter process is alive (`isAdapterProcessAlive` returns true) we skip —
|
|
28
|
+
* that's the daemon-restarted-under-a-live-adapter case, not an orphan.
|
|
29
|
+
*
|
|
30
|
+
* `isAdapterProcessAlive` is stubbed as `() => false` for v0.25.0-beta.1 per
|
|
31
|
+
* PR-E engineer brief §8 answer 1. No adapter PID file convention exists yet
|
|
32
|
+
* (only the daemon process has `daemon.pid`; Copilot bridges write their own
|
|
33
|
+
* per-session file but Claude Code CLI does not). A conservative always-dead
|
|
34
|
+
* stub is safe: false negatives cost an extra `claimAttachment` attempt,
|
|
35
|
+
* which the caller catches as `AttachmentConflict` and backs off silently
|
|
36
|
+
* (design §10.6). False positives — skipping a session that needs restore —
|
|
37
|
+
* are the worse failure mode.
|
|
38
|
+
*/
|
|
39
|
+
const os_1 = require("os");
|
|
8
40
|
const signals_1 = require("../workflows/signals");
|
|
9
41
|
const config_1 = require("../config");
|
|
10
42
|
const client_1 = require("../client");
|
|
@@ -119,6 +151,64 @@ function buildOrphanQuery(opts) {
|
|
|
119
151
|
* so the boot path can call `restoreOrphansOnce` again on a subsequent
|
|
120
152
|
* cycle to pick up missed candidates.
|
|
121
153
|
*/
|
|
154
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
155
|
+
// #579 — cached cluster-wide orphan listing for the dashboard
|
|
156
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
157
|
+
/**
|
|
158
|
+
* 3-second cache TTL — mirrors `src/utils/hosts.ts:CACHE_TTL_MS`. Keeps
|
|
159
|
+
* rapid-fire dashboard refreshes cheap (the React query layer also has
|
|
160
|
+
* its own staleTime, this is defense in depth at the daemon edge).
|
|
161
|
+
*/
|
|
162
|
+
exports.ORPHANS_CACHE_TTL_MS = 3_000;
|
|
163
|
+
const orphansCache = new Map();
|
|
164
|
+
/**
|
|
165
|
+
* Test hook — never call from production code. Convention per
|
|
166
|
+
* `docs/adr/0006-test-hooks-naming.md`.
|
|
167
|
+
*/
|
|
168
|
+
function __resetOrphansCacheForTests() {
|
|
169
|
+
orphansCache.clear();
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Cluster-wide cross-host orphan listing for the dashboard (#579 / ADR
|
|
173
|
+
* follow-up). Thin wrapper over {@link queryOrphanedSessions} with
|
|
174
|
+
* `allHosts: true` and a 3-second in-process cache keyed by ensemble
|
|
175
|
+
* filter. Cache shape mirrors `src/utils/hosts.ts:listHosts`.
|
|
176
|
+
*
|
|
177
|
+
* Use the `cleanup` deadline (30s) rather than the `boot` deadline (60s)
|
|
178
|
+
* since this is called from a user-facing read endpoint where partial
|
|
179
|
+
* results are preferable to a 60s wait.
|
|
180
|
+
*/
|
|
181
|
+
async function listAllOrphansCached(client, opts = {}, log = () => { }) {
|
|
182
|
+
const key = opts.ensemble ?? '__all__';
|
|
183
|
+
const now = Date.now();
|
|
184
|
+
const cached = orphansCache.get(key);
|
|
185
|
+
if (!opts.force && cached && now - cached.timestamp < exports.ORPHANS_CACHE_TTL_MS) {
|
|
186
|
+
return cached.candidates;
|
|
187
|
+
}
|
|
188
|
+
const candidates = await queryOrphanedSessions(client, {
|
|
189
|
+
hostname: (0, os_1.hostname)(),
|
|
190
|
+
allHosts: true,
|
|
191
|
+
...(opts.ensemble !== undefined ? { ensemble: opts.ensemble } : {}),
|
|
192
|
+
}, log, visibility_deadline_1.VISIBILITY_DEADLINES_MS.orphanQueryCleanup);
|
|
193
|
+
orphansCache.set(key, { timestamp: now, candidates });
|
|
194
|
+
return candidates;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Cross-host detail resolver shared by `restoreOrphansOnce`'s readonly
|
|
198
|
+
* branch (#151 `agent-tempo restore --all-hosts`) and the dashboard's
|
|
199
|
+
* `/v1/orphans` handler (#579).
|
|
200
|
+
*
|
|
201
|
+
* Returns `null` when an `ensembleFilter` is provided and the candidate
|
|
202
|
+
* is outside that ensemble (caller should `continue`). Otherwise returns
|
|
203
|
+
* the preferred-host string used by both surfaces to group / annotate:
|
|
204
|
+
* `OrphanSummary.preferredHost` first, falling back to the candidate's
|
|
205
|
+
* last-known adapter host, finally `'(unknown)'`.
|
|
206
|
+
*/
|
|
207
|
+
function buildCrossHostDetail(candidate, ensembleFilter) {
|
|
208
|
+
if (ensembleFilter && candidate.summary.ensemble !== ensembleFilter)
|
|
209
|
+
return null;
|
|
210
|
+
return candidate.summary.preferredHost ?? candidate.summary.lastAdapter?.hostname ?? '(unknown)';
|
|
211
|
+
}
|
|
122
212
|
async function queryOrphanedSessions(client, filter, log = () => { }, deadlineMs = visibility_deadline_1.VISIBILITY_DEADLINES_MS.orphanQueryBoot) {
|
|
123
213
|
const isAlive = filter.isAdapterProcessAlive ?? isAdapterProcessAliveStub;
|
|
124
214
|
const query = buildOrphanQuery({
|
|
@@ -267,12 +357,9 @@ async function restoreOrphansOnce(client, opts, log = () => { }) {
|
|
|
267
357
|
// as orphan candidate) and ensemble narrowing are both applied here too.
|
|
268
358
|
if (allHostsReadonly) {
|
|
269
359
|
for (const o of orphans) {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
// candidate's home hostname (from `OrphanSummary.lastAdapter.hostname`)
|
|
274
|
-
// and finally `(unknown)`. The CLI groups by this value.
|
|
275
|
-
const detail = o.summary.preferredHost ?? o.summary.lastAdapter?.hostname ?? '(unknown)';
|
|
360
|
+
const detail = buildCrossHostDetail(o, opts.ensemble);
|
|
361
|
+
if (detail === null)
|
|
362
|
+
continue; // ensemble narrowing excluded it
|
|
276
363
|
record(o, { kind: 'skipped', reason: 'crossHost', detail });
|
|
277
364
|
}
|
|
278
365
|
return summary;
|
package/dist/tui/index.js
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test hook — never call from production code. Convention per
|
|
3
|
+
* `docs/adr/0006-test-hooks-naming.md`.
|
|
4
|
+
*/
|
|
5
|
+
export declare function __resetBgPreflightCacheForTests(): void;
|
|
6
|
+
export interface BgPreflightResult {
|
|
7
|
+
ok: boolean;
|
|
8
|
+
/** Populated when `ok === false`. Single line, ready to surface to the user. */
|
|
9
|
+
error?: string;
|
|
10
|
+
/** True when the result was served from the daemon-lifetime cache. */
|
|
11
|
+
cached: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Probe whether `claude --bg` can spawn in the given cwd without prompting
|
|
15
|
+
* the operator for permission acceptance. First call probes; subsequent
|
|
16
|
+
* calls for the same `(host, cwd)` hit the in-process cache.
|
|
17
|
+
*
|
|
18
|
+
* Returns `{ ok: true }` when the dry-run succeeded (exit 0). On any other
|
|
19
|
+
* exit code (or spawn-side ENOENT), returns `{ ok: false, error: ... }`
|
|
20
|
+
* with an actionable message. Never throws — callers handle the result.
|
|
21
|
+
*/
|
|
22
|
+
export declare function bgPreflight(cwd: string, options?: {
|
|
23
|
+
claudeBin?: string;
|
|
24
|
+
host?: string;
|
|
25
|
+
}): BgPreflightResult;
|