agent-tempo 1.7.0-beta.12 → 1.7.0-beta.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/package.json +1 -1
- package/dist/activities/outbox.js +10 -0
- package/dist/activities/resolve.d.ts +83 -18
- package/dist/activities/resolve.js +139 -26
- package/dist/cli/commands.js +26 -2
- package/dist/spawn.d.ts +27 -23
- package/dist/spawn.js +40 -32
- package/dist/tools/cue.js +12 -0
- package/dist/tools/ensemble.js +33 -2
- package/package.json +1 -1
package/dashboard/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-tempo-dashboard",
|
|
3
3
|
"private": true,
|
|
4
|
-
"version": "1.7.0-beta.
|
|
4
|
+
"version": "1.7.0-beta.13",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "Web dashboard for agent-tempo. Bundled into the npm package; served by the daemon at /dashboard/*.",
|
|
7
7
|
"scripts": {
|
|
@@ -46,6 +46,7 @@ const git_info_1 = require("../git-info");
|
|
|
46
46
|
const spawn_1 = require("../spawn");
|
|
47
47
|
const config_2 = require("../config");
|
|
48
48
|
const resolve_1 = require("./resolve");
|
|
49
|
+
const visibility_deadline_1 = require("../utils/visibility-deadline");
|
|
49
50
|
const action_counters_1 = require("../utils/action-counters");
|
|
50
51
|
const search_attributes_1 = require("../utils/search-attributes");
|
|
51
52
|
const agent_types_1 = require("../ensemble/agent-types");
|
|
@@ -86,6 +87,15 @@ function isRetryableTemporalError(err) {
|
|
|
86
87
|
// to call unconditionally.
|
|
87
88
|
if (err instanceof activity_1.ApplicationFailure)
|
|
88
89
|
return false;
|
|
90
|
+
// #845 Mode A: a truncated visibility scan (resolveSession's deadline
|
|
91
|
+
// tripped mid-scan, #336/#529) is a LATENCY failure — the target may well
|
|
92
|
+
// exist; the scan just didn't finish. Treat it as transient so the
|
|
93
|
+
// activity retry policy re-runs resolveSession with a fresh 10s deadline
|
|
94
|
+
// (backoff-bounded) instead of collapsing it into a permanent
|
|
95
|
+
// "No active session found". This must NOT re-add an in-resolver retry
|
|
96
|
+
// loop — the bounding lives in Temporal's policy, not a hot-path scan.
|
|
97
|
+
if ((0, visibility_deadline_1.isVisibilityTimeout)(err))
|
|
98
|
+
return true;
|
|
89
99
|
const e = err;
|
|
90
100
|
const name = e?.name ?? '';
|
|
91
101
|
const msg = e?.message ?? '';
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import { Client, WorkflowHandle } from '@temporalio/client';
|
|
2
2
|
import { AttachmentPhase } from '../types';
|
|
3
|
+
/**
|
|
4
|
+
* Mode-B describe-by-id timeout (#845). The strongly-consistent
|
|
5
|
+
* `describe()` fallback on `resolveSession`'s not-found branch is a single
|
|
6
|
+
* O(1) RPC; 2s mirrors {@link DEFAULT_QUERY_TIMEOUT_MS} — two orders of
|
|
7
|
+
* magnitude over a healthy describe — so a wedged frontend can't re-hang
|
|
8
|
+
* the outbox loop the visibility deadline (#336/#529) was added to bound.
|
|
9
|
+
*/
|
|
10
|
+
export declare const RESOLVE_DESCRIBE_TIMEOUT_MS = 2000;
|
|
3
11
|
/** Shared query for listing running session workflows. Exported for the
|
|
4
12
|
* ensemble-scoped variants in `client/core.ts` (#751). */
|
|
5
13
|
export declare const SESSION_LIST_QUERY = "WorkflowType = \"agentSessionWorkflow\" AND ExecutionStatus = \"Running\"";
|
|
@@ -18,15 +26,36 @@ export declare const SESSION_LIST_QUERY = "WorkflowType = \"agentSessionWorkflow
|
|
|
18
26
|
* in `scanEnsembleSessionsCloud`. Enforced by
|
|
19
27
|
* tests/conformance/decision-path-fence.test.ts.
|
|
20
28
|
*
|
|
21
|
-
* **
|
|
22
|
-
* `VISIBILITY_DEADLINES_MS.resolveSession` (default 10s). On
|
|
23
|
-
* throws `VisibilityIteratorTimeoutError` rather than returning
|
|
24
|
-
* — silent `null` on a partially-scanned set would be
|
|
25
|
-
* from "definitely not found
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
29
|
+
* **Mode A — deadline truncation (#336/#529):** the visibility iterator is
|
|
30
|
+
* bounded by `VISIBILITY_DEADLINES_MS.resolveSession` (default 10s). On
|
|
31
|
+
* timeout it throws `VisibilityIteratorTimeoutError` rather than returning
|
|
32
|
+
* `null` — silent `null` on a partially-scanned set would be
|
|
33
|
+
* indistinguishable from "definitely not found." The throw is classified
|
|
34
|
+
* **retryable** by the outbox activity (`isRetryableTemporalError`), so
|
|
35
|
+
* Temporal's activity retry policy re-runs the lookup with a fresh
|
|
36
|
+
* deadline rather than collapsing it to a permanent "player not found."
|
|
37
|
+
* Synchronous tool/CLI callers surface it as a distinct "resolution
|
|
38
|
+
* incomplete — retry," never "not found."
|
|
39
|
+
*
|
|
40
|
+
* **Mode B — visibility-index lag (#845):** `list()` can complete normally
|
|
41
|
+
* (no throw) yet miss a freshly-started workflow because the visibility
|
|
42
|
+
* index trails the workflow store (observed live as a 3/8→8/8 roster
|
|
43
|
+
* during post-restart worker warmup). An early-exhausting scan is NOT
|
|
44
|
+
* proof of absence. So on the not-found branch we do **exactly one**
|
|
45
|
+
* strongly-consistent `describe()` against the *derived* workflow id —
|
|
46
|
+
* an O(1) read by primary key that bypasses the lagging index. This is a
|
|
47
|
+
* point lookup, NOT a re-scan: it cannot re-introduce the unbounded-scan
|
|
48
|
+
* hang the deadline guard was added to prevent.
|
|
49
|
+
*
|
|
50
|
+
* **Documented Mode-B limitation:** the derived id
|
|
51
|
+
* `agent-session-{ensemble}-{playerName}` is minted from a player's
|
|
52
|
+
* INITIAL name at spawn; `set_name` does not change the workflow id. So
|
|
53
|
+
* describe-by-derived-id false-negatives for a player that was both
|
|
54
|
+
* RENAMED and is currently index-lagged — it falls back to `null` (looks
|
|
55
|
+
* absent) for that narrow intersection. Accepted by design: it closes the
|
|
56
|
+
* gap for the cold-boot/warmup incident class (nobody renames mid-boot),
|
|
57
|
+
* and a second full re-scan to cover renamed∩lagged would put scan cost on
|
|
58
|
+
* every genuine typo'd-name lookup. See issue #845.
|
|
30
59
|
*/
|
|
31
60
|
export declare function resolveSession(client: Client, ensemble: string, playerName: string): Promise<WorkflowHandle | null>;
|
|
32
61
|
/** Info returned for each session by scanEnsembleSessions. */
|
|
@@ -82,19 +111,55 @@ export interface EnsembleSessionInfo {
|
|
|
82
111
|
*/
|
|
83
112
|
export declare function scanEnsembleSessionsCloud(client: Client, ensemble: string, log?: (...args: unknown[]) => void): Promise<EnsembleSessionInfo[]>;
|
|
84
113
|
/**
|
|
85
|
-
*
|
|
86
|
-
*
|
|
87
|
-
*
|
|
114
|
+
* Result of {@link scanEnsembleSessionsWithStatus} — the session rows plus
|
|
115
|
+
* whether the visibility scan completed or was cut short (#845).
|
|
116
|
+
*
|
|
117
|
+
* `truncated` is the Mode-A signal: a `VisibilityIteratorTimeoutError`
|
|
118
|
+
* fired (the wall-clock deadline tripped mid-scan), so `sessions` is a
|
|
119
|
+
* partial snapshot, NOT the full roster. Callers that render a roster (the
|
|
120
|
+
* `ensemble` tool) MUST surface this so a partial set is never mistaken
|
|
121
|
+
* for a complete one. NOTE: this does NOT cover Mode B (visibility-index
|
|
122
|
+
* lag) — there the scan completes normally and `truncated` is `false` even
|
|
123
|
+
* though a freshly-started workflow may be missing; that's best-effort by
|
|
124
|
+
* design and self-heals on the next tick.
|
|
125
|
+
*
|
|
126
|
+
* `scanned` is the number of running workflows the iterator visited before
|
|
127
|
+
* completing or timing out — useful for warn logs ("partial: 3 of ≥N").
|
|
128
|
+
*/
|
|
129
|
+
export interface EnsembleScanResult {
|
|
130
|
+
sessions: EnsembleSessionInfo[];
|
|
131
|
+
truncated: boolean;
|
|
132
|
+
scanned: number;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Scan all running session workflows in an ensemble, reporting whether the
|
|
136
|
+
* scan completed or was truncated by the visibility deadline (#845).
|
|
137
|
+
*
|
|
138
|
+
* This is the single source of truth for the local-profile ensemble scan;
|
|
139
|
+
* {@link scanEnsembleSessions} is a thin array-facade over it that drops
|
|
140
|
+
* the status fields for the many callers that don't need them.
|
|
88
141
|
*
|
|
89
142
|
* **Deadline (#336/#529):** the iterator is bounded by
|
|
90
|
-
* `VISIBILITY_DEADLINES_MS.scanEnsembleSessions` (default 15s). On
|
|
91
|
-
*
|
|
92
|
-
*
|
|
93
|
-
*
|
|
94
|
-
* best-effort snapshot that the next tick / re-invocation will fill in.
|
|
143
|
+
* `VISIBILITY_DEADLINES_MS.scanEnsembleSessions` (default 15s). On timeout
|
|
144
|
+
* the accumulated rows are returned with `truncated: true` and a warn log
|
|
145
|
+
* — the scan is **partial-tolerant by design**, but the truncation is now
|
|
146
|
+
* SIGNALLED rather than silent so a roster renderer can flag it.
|
|
95
147
|
*
|
|
96
148
|
* T0.1 (#748): this legacy shape is the `costProfile: 'local'` path —
|
|
97
|
-
* byte-identical to pre-#748 behavior. The cloud profile uses
|
|
149
|
+
* byte-identical row data to pre-#748 behavior. The cloud profile uses
|
|
98
150
|
* {@link scanEnsembleSessionsCloud}.
|
|
99
151
|
*/
|
|
152
|
+
export declare function scanEnsembleSessionsWithStatus(client: Client, ensemble: string, log?: (...args: unknown[]) => void): Promise<EnsembleScanResult>;
|
|
153
|
+
/**
|
|
154
|
+
* Scan all running session workflows in an ensemble — array facade over
|
|
155
|
+
* {@link scanEnsembleSessionsWithStatus}.
|
|
156
|
+
*
|
|
157
|
+
* Returns just the session rows; the truncation/scan-status fields are
|
|
158
|
+
* dropped. This is the byte-identical shape the maestro refresh activity,
|
|
159
|
+
* the #785 upgrade-snapshot, and the other roster consumers already depend
|
|
160
|
+
* on — keeping it a thin delegate means the truncation-signalling work
|
|
161
|
+
* (#845) does NOT ripple through those call sites. Callers that need to
|
|
162
|
+
* know whether the scan was complete (the `ensemble` tool) call the rich
|
|
163
|
+
* sibling directly.
|
|
164
|
+
*/
|
|
100
165
|
export declare function scanEnsembleSessions(client: Client, ensemble: string, log?: (...args: unknown[]) => void): Promise<EnsembleSessionInfo[]>;
|
|
@@ -1,13 +1,23 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.SESSION_LIST_QUERY = void 0;
|
|
3
|
+
exports.SESSION_LIST_QUERY = exports.RESOLVE_DESCRIBE_TIMEOUT_MS = void 0;
|
|
4
4
|
exports.resolveSession = resolveSession;
|
|
5
5
|
exports.scanEnsembleSessionsCloud = scanEnsembleSessionsCloud;
|
|
6
|
+
exports.scanEnsembleSessionsWithStatus = scanEnsembleSessionsWithStatus;
|
|
6
7
|
exports.scanEnsembleSessions = scanEnsembleSessions;
|
|
8
|
+
const config_1 = require("../config");
|
|
7
9
|
const search_attributes_1 = require("../utils/search-attributes");
|
|
8
10
|
const signals_1 = require("../workflows/signals");
|
|
9
11
|
const query_timeout_1 = require("../utils/query-timeout");
|
|
10
12
|
const visibility_deadline_1 = require("../utils/visibility-deadline");
|
|
13
|
+
/**
|
|
14
|
+
* Mode-B describe-by-id timeout (#845). The strongly-consistent
|
|
15
|
+
* `describe()` fallback on `resolveSession`'s not-found branch is a single
|
|
16
|
+
* O(1) RPC; 2s mirrors {@link DEFAULT_QUERY_TIMEOUT_MS} — two orders of
|
|
17
|
+
* magnitude over a healthy describe — so a wedged frontend can't re-hang
|
|
18
|
+
* the outbox loop the visibility deadline (#336/#529) was added to bound.
|
|
19
|
+
*/
|
|
20
|
+
exports.RESOLVE_DESCRIBE_TIMEOUT_MS = 2000;
|
|
11
21
|
/** Shared query for listing running session workflows. Exported for the
|
|
12
22
|
* ensemble-scoped variants in `client/core.ts` (#751). */
|
|
13
23
|
exports.SESSION_LIST_QUERY = `WorkflowType = "agentSessionWorkflow" AND ExecutionStatus = "Running"`;
|
|
@@ -26,15 +36,36 @@ exports.SESSION_LIST_QUERY = `WorkflowType = "agentSessionWorkflow" AND Executio
|
|
|
26
36
|
* in `scanEnsembleSessionsCloud`. Enforced by
|
|
27
37
|
* tests/conformance/decision-path-fence.test.ts.
|
|
28
38
|
*
|
|
29
|
-
* **
|
|
30
|
-
* `VISIBILITY_DEADLINES_MS.resolveSession` (default 10s). On
|
|
31
|
-
* throws `VisibilityIteratorTimeoutError` rather than returning
|
|
32
|
-
* — silent `null` on a partially-scanned set would be
|
|
33
|
-
* from "definitely not found
|
|
34
|
-
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
39
|
+
* **Mode A — deadline truncation (#336/#529):** the visibility iterator is
|
|
40
|
+
* bounded by `VISIBILITY_DEADLINES_MS.resolveSession` (default 10s). On
|
|
41
|
+
* timeout it throws `VisibilityIteratorTimeoutError` rather than returning
|
|
42
|
+
* `null` — silent `null` on a partially-scanned set would be
|
|
43
|
+
* indistinguishable from "definitely not found." The throw is classified
|
|
44
|
+
* **retryable** by the outbox activity (`isRetryableTemporalError`), so
|
|
45
|
+
* Temporal's activity retry policy re-runs the lookup with a fresh
|
|
46
|
+
* deadline rather than collapsing it to a permanent "player not found."
|
|
47
|
+
* Synchronous tool/CLI callers surface it as a distinct "resolution
|
|
48
|
+
* incomplete — retry," never "not found."
|
|
49
|
+
*
|
|
50
|
+
* **Mode B — visibility-index lag (#845):** `list()` can complete normally
|
|
51
|
+
* (no throw) yet miss a freshly-started workflow because the visibility
|
|
52
|
+
* index trails the workflow store (observed live as a 3/8→8/8 roster
|
|
53
|
+
* during post-restart worker warmup). An early-exhausting scan is NOT
|
|
54
|
+
* proof of absence. So on the not-found branch we do **exactly one**
|
|
55
|
+
* strongly-consistent `describe()` against the *derived* workflow id —
|
|
56
|
+
* an O(1) read by primary key that bypasses the lagging index. This is a
|
|
57
|
+
* point lookup, NOT a re-scan: it cannot re-introduce the unbounded-scan
|
|
58
|
+
* hang the deadline guard was added to prevent.
|
|
59
|
+
*
|
|
60
|
+
* **Documented Mode-B limitation:** the derived id
|
|
61
|
+
* `agent-session-{ensemble}-{playerName}` is minted from a player's
|
|
62
|
+
* INITIAL name at spawn; `set_name` does not change the workflow id. So
|
|
63
|
+
* describe-by-derived-id false-negatives for a player that was both
|
|
64
|
+
* RENAMED and is currently index-lagged — it falls back to `null` (looks
|
|
65
|
+
* absent) for that narrow intersection. Accepted by design: it closes the
|
|
66
|
+
* gap for the cold-boot/warmup incident class (nobody renames mid-boot),
|
|
67
|
+
* and a second full re-scan to cover renamed∩lagged would put scan cost on
|
|
68
|
+
* every genuine typo'd-name lookup. See issue #845.
|
|
38
69
|
*/
|
|
39
70
|
async function resolveSession(client, ensemble, playerName) {
|
|
40
71
|
for await (const wf of (0, visibility_deadline_1.iterateWithDeadline)(client.workflow.list({ query: exports.SESSION_LIST_QUERY }), visibility_deadline_1.VISIBILITY_DEADLINES_MS.resolveSession, 'resolveSession')) {
|
|
@@ -49,16 +80,77 @@ async function resolveSession(client, ensemble, playerName) {
|
|
|
49
80
|
}
|
|
50
81
|
}
|
|
51
82
|
catch (err) {
|
|
52
|
-
// Re-throw deadline timeouts — callers that wrap us in
|
|
53
|
-
//
|
|
54
|
-
//
|
|
55
|
-
// outbox logs / user-facing tool errors.
|
|
83
|
+
// Re-throw deadline timeouts (Mode A) — callers that wrap us in
|
|
84
|
+
// try/catch treat the typed throw as a soft "lookup timed out" path,
|
|
85
|
+
// distinct from the not-found `null` below.
|
|
56
86
|
if ((0, visibility_deadline_1.isVisibilityTimeout)(err))
|
|
57
87
|
throw err;
|
|
58
88
|
// Workflow may have just completed, or worker is wedged (#433) — skip
|
|
59
89
|
}
|
|
60
90
|
}
|
|
61
|
-
|
|
91
|
+
// Mode B (#845): the scan completed without a match, but the visibility
|
|
92
|
+
// index may simply be lagging a just-started workflow. One strongly-
|
|
93
|
+
// consistent describe-by-derived-id disambiguates "index lag" from
|
|
94
|
+
// "genuinely absent" without a second scan.
|
|
95
|
+
return resolveByDerivedId(client, ensemble, playerName);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Mode-B (#845) strongly-consistent fallback for {@link resolveSession}.
|
|
99
|
+
*
|
|
100
|
+
* Reads the session workflow by its *derived* id
|
|
101
|
+
* (`agent-session-{ensemble}-{playerName}`) via a single bounded
|
|
102
|
+
* `describe()` — a primary-key lookup that bypasses the eventually-
|
|
103
|
+
* consistent visibility index. Returns the handle whenever the execution
|
|
104
|
+
* is `RUNNING`; otherwise `null` (genuinely absent, terminated/completed,
|
|
105
|
+
* renamed-false-negative, or describe timed out).
|
|
106
|
+
*
|
|
107
|
+
* Deliberately RUNNING-only — NO attachment-phase filter (#845 JC2): a
|
|
108
|
+
* `gone` player has a LIVE workflow with a terminal adapter, which the
|
|
109
|
+
* #822/#834 deliverability contract handles as warn-but-queue, not
|
|
110
|
+
* "not found". Filtering it here would regress #834 for the lagged-gone
|
|
111
|
+
* window and diverge from the main scan loop (which has no phase filter).
|
|
112
|
+
*/
|
|
113
|
+
async function resolveByDerivedId(client, ensemble, playerName) {
|
|
114
|
+
let timer;
|
|
115
|
+
try {
|
|
116
|
+
// `getHandle` is a lazy, no-RPC handle construction in the real client;
|
|
117
|
+
// kept inside the try purely so a defensive throw can never escape the
|
|
118
|
+
// fallback (it must only ever upgrade a null to a handle, never error).
|
|
119
|
+
const handle = client.workflow.getHandle((0, config_1.sessionWorkflowId)(ensemble, playerName));
|
|
120
|
+
const timeout = new Promise((_, reject) => {
|
|
121
|
+
timer = setTimeout(() => reject(new Error('describe-by-id timed out')), exports.RESOLVE_DESCRIBE_TIMEOUT_MS);
|
|
122
|
+
timer.unref?.();
|
|
123
|
+
});
|
|
124
|
+
const desc = await Promise.race([handle.describe(), timeout]);
|
|
125
|
+
// Only a live (RUNNING) execution is a valid resolve target. A
|
|
126
|
+
// COMPLETED/TERMINATED latest run at this id means the player is gone,
|
|
127
|
+
// or the id was reused by a since-closed run → null. A RUNNING run
|
|
128
|
+
// under a reused id is legitimately the current player → return it.
|
|
129
|
+
//
|
|
130
|
+
// No attachment-phase filter (#845 JC2, architect ruling): the main
|
|
131
|
+
// scan loop returns the handle for ANY running session — phase=`gone`
|
|
132
|
+
// included — and #822/#834 treat `gone` as warn-but-QUEUE (the cue
|
|
133
|
+
// durably queues and auto-redelivers on re-attach), NOT "not found".
|
|
134
|
+
// Returning null for a lagged-`gone` player would bypass #822, re-
|
|
135
|
+
// introduce the false-not-found #834 fixed, and make resolution depend
|
|
136
|
+
// on visibility-index timing. The "don't deliver to a torn-down
|
|
137
|
+
// adapter" concern lives at the deliverability layer, not here.
|
|
138
|
+
if (desc.status.name !== 'RUNNING')
|
|
139
|
+
return null;
|
|
140
|
+
return handle;
|
|
141
|
+
}
|
|
142
|
+
catch {
|
|
143
|
+
// NotFound → genuinely absent (or the renamed∩lagged false-negative
|
|
144
|
+
// documented on resolveSession). Timeout/other → treat as absent; the
|
|
145
|
+
// caller's not-found path (or the activity retry policy for Mode A)
|
|
146
|
+
// handles it. We never throw from the fallback — it can only upgrade a
|
|
147
|
+
// null to a found handle, never turn a clean lookup into an error.
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
finally {
|
|
151
|
+
if (timer)
|
|
152
|
+
clearTimeout(timer);
|
|
153
|
+
}
|
|
62
154
|
}
|
|
63
155
|
/**
|
|
64
156
|
* T0.1 (#748) — cloud-profile ensemble scan. Observation path ONLY (see the
|
|
@@ -162,25 +254,30 @@ async function scanEnsembleSessionsCloud(client, ensemble, log = () => { }) {
|
|
|
162
254
|
return sessions;
|
|
163
255
|
}
|
|
164
256
|
/**
|
|
165
|
-
* Scan all running session workflows in an ensemble
|
|
166
|
-
*
|
|
167
|
-
*
|
|
257
|
+
* Scan all running session workflows in an ensemble, reporting whether the
|
|
258
|
+
* scan completed or was truncated by the visibility deadline (#845).
|
|
259
|
+
*
|
|
260
|
+
* This is the single source of truth for the local-profile ensemble scan;
|
|
261
|
+
* {@link scanEnsembleSessions} is a thin array-facade over it that drops
|
|
262
|
+
* the status fields for the many callers that don't need them.
|
|
168
263
|
*
|
|
169
264
|
* **Deadline (#336/#529):** the iterator is bounded by
|
|
170
|
-
* `VISIBILITY_DEADLINES_MS.scanEnsembleSessions` (default 15s). On
|
|
171
|
-
*
|
|
172
|
-
*
|
|
173
|
-
*
|
|
174
|
-
* best-effort snapshot that the next tick / re-invocation will fill in.
|
|
265
|
+
* `VISIBILITY_DEADLINES_MS.scanEnsembleSessions` (default 15s). On timeout
|
|
266
|
+
* the accumulated rows are returned with `truncated: true` and a warn log
|
|
267
|
+
* — the scan is **partial-tolerant by design**, but the truncation is now
|
|
268
|
+
* SIGNALLED rather than silent so a roster renderer can flag it.
|
|
175
269
|
*
|
|
176
270
|
* T0.1 (#748): this legacy shape is the `costProfile: 'local'` path —
|
|
177
|
-
* byte-identical to pre-#748 behavior. The cloud profile uses
|
|
271
|
+
* byte-identical row data to pre-#748 behavior. The cloud profile uses
|
|
178
272
|
* {@link scanEnsembleSessionsCloud}.
|
|
179
273
|
*/
|
|
180
|
-
async function
|
|
274
|
+
async function scanEnsembleSessionsWithStatus(client, ensemble, log = () => { }) {
|
|
181
275
|
const sessions = [];
|
|
276
|
+
let truncated = false;
|
|
277
|
+
let scanned = 0;
|
|
182
278
|
try {
|
|
183
279
|
for await (const workflow of (0, visibility_deadline_1.iterateWithDeadline)(client.workflow.list({ query: exports.SESSION_LIST_QUERY }), visibility_deadline_1.VISIBILITY_DEADLINES_MS.scanEnsembleSessions, 'scanEnsembleSessions')) {
|
|
280
|
+
scanned++;
|
|
184
281
|
try {
|
|
185
282
|
const handle = client.workflow.getHandle(workflow.workflowId);
|
|
186
283
|
// Issue #433 — bound the metadata + part queries so a single wedged
|
|
@@ -234,11 +331,27 @@ async function scanEnsembleSessions(client, ensemble, log = () => { }) {
|
|
|
234
331
|
}
|
|
235
332
|
catch (err) {
|
|
236
333
|
if ((0, visibility_deadline_1.isVisibilityTimeout)(err)) {
|
|
334
|
+
truncated = true;
|
|
237
335
|
log(`scanEnsembleSessions: ${err.message} — returning partial (${sessions.length} sessions)`);
|
|
238
336
|
}
|
|
239
337
|
else {
|
|
240
338
|
throw err;
|
|
241
339
|
}
|
|
242
340
|
}
|
|
243
|
-
return sessions;
|
|
341
|
+
return { sessions, truncated, scanned };
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Scan all running session workflows in an ensemble — array facade over
|
|
345
|
+
* {@link scanEnsembleSessionsWithStatus}.
|
|
346
|
+
*
|
|
347
|
+
* Returns just the session rows; the truncation/scan-status fields are
|
|
348
|
+
* dropped. This is the byte-identical shape the maestro refresh activity,
|
|
349
|
+
* the #785 upgrade-snapshot, and the other roster consumers already depend
|
|
350
|
+
* on — keeping it a thin delegate means the truncation-signalling work
|
|
351
|
+
* (#845) does NOT ripple through those call sites. Callers that need to
|
|
352
|
+
* know whether the scan was complete (the `ensemble` tool) call the rich
|
|
353
|
+
* sibling directly.
|
|
354
|
+
*/
|
|
355
|
+
async function scanEnsembleSessions(client, ensemble, log = () => { }) {
|
|
356
|
+
return (await scanEnsembleSessionsWithStatus(client, ensemble, log)).sessions;
|
|
244
357
|
}
|
package/dist/cli/commands.js
CHANGED
|
@@ -65,6 +65,7 @@ const croner_1 = require("croner");
|
|
|
65
65
|
const client_1 = require("@temporalio/client");
|
|
66
66
|
const spawn_1 = require("../spawn");
|
|
67
67
|
const probe_1 = require("../pi/probe");
|
|
68
|
+
const install_1 = require("../pi/install");
|
|
68
69
|
const config_1 = require("../config");
|
|
69
70
|
const git_info_1 = require("../git-info");
|
|
70
71
|
const connection_1 = require("../connection");
|
|
@@ -1318,10 +1319,33 @@ async function up(opts) {
|
|
|
1318
1319
|
if (!process.env.ANTHROPIC_API_KEY) {
|
|
1319
1320
|
out.warn('ANTHROPIC_API_KEY is not set — the Pi conductor will fall back to Pi\'s own auth/default model. Set it if Pi needs an Anthropic key.');
|
|
1320
1321
|
}
|
|
1322
|
+
// #825 — extension-registration guard (mirrors command-center's #820 Bug-2
|
|
1323
|
+
// guard). `up --agent pi` no longer passes an inline `-e` (that risked a
|
|
1324
|
+
// divergent-copy double-load, #825); it now relies on the player extension
|
|
1325
|
+
// being registered in Pi's settings.json. On a box that never ran `install-pi`,
|
|
1326
|
+
// a plain `pi` would launch with NO extension — no claim/heartbeat, a silent
|
|
1327
|
+
// non-conductor (the #820 Bug-2 failure, transplanted to the conductor). So
|
|
1328
|
+
// auto-install idempotently before spawning; fail loud with the manual command
|
|
1329
|
+
// if the write fails. (Checks the GLOBAL settings.json, like command-center; a
|
|
1330
|
+
// user who ran `install-pi --project` still works — `pi` loads the project
|
|
1331
|
+
// path and the same realpath dedupes, so the redundant global install is a
|
|
1332
|
+
// harmless idempotent write, never a second load.)
|
|
1333
|
+
if (!(0, install_1.arePiExtensionsRegistered)()) {
|
|
1334
|
+
try {
|
|
1335
|
+
const result = (0, install_1.installPiExtensions)();
|
|
1336
|
+
out.log(out.dim(` Registered the Pi extensions in ${result.settingsPath} (first-run install-pi).`));
|
|
1337
|
+
}
|
|
1338
|
+
catch (err) {
|
|
1339
|
+
out.error('Cannot start Pi conductor — the Pi extensions are not registered and auto-install failed: ' +
|
|
1340
|
+
`${err instanceof Error ? err.message : String(err)}. Run \`agent-tempo install-pi\` manually, then retry.`);
|
|
1341
|
+
process.exit(1);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1321
1344
|
let piSpawn;
|
|
1322
1345
|
try {
|
|
1323
|
-
// resolvePiInteractiveBinary
|
|
1324
|
-
//
|
|
1346
|
+
// resolvePiInteractiveBinary throws fail-clean (Pi CLI missing) — caught
|
|
1347
|
+
// here, no terminal launched. #825: no more `-e`/extension resolution — the
|
|
1348
|
+
// player extension loads from settings.json, registered + guarded just above.
|
|
1325
1349
|
piSpawn = (0, spawn_1.buildPiConductorSpawn)({
|
|
1326
1350
|
ensemble: opts.ensemble,
|
|
1327
1351
|
sessionName,
|
package/dist/spawn.d.ts
CHANGED
|
@@ -129,18 +129,6 @@ export declare function resolvePiInteractiveBinary(deps?: {
|
|
|
129
129
|
cmd: string;
|
|
130
130
|
args: string[];
|
|
131
131
|
};
|
|
132
|
-
/**
|
|
133
|
-
* Resolve the absolute path to the BUNDLED `dist/pi/extension.js` for `pi -e <abs>`
|
|
134
|
-
* (#666). Pi loads the BUILT CommonJS extension even in dev. Mirrors
|
|
135
|
-
* {@link resolvePiPath}'s dev/prod `__dirname` split: prod `__dirname` = `dist/`
|
|
136
|
-
* (→ `dist/pi/extension.js`); dev `__dirname` = `src/` (→ sibling `dist/pi/…`).
|
|
137
|
-
* Existence-checked + fail-clean ("run npm run build"). Injectable for tests.
|
|
138
|
-
*/
|
|
139
|
-
export declare function resolvePiExtensionPath(deps?: {
|
|
140
|
-
exists?: (p: string) => boolean;
|
|
141
|
-
isDev?: boolean;
|
|
142
|
-
baseDir?: string;
|
|
143
|
-
}): string;
|
|
144
132
|
/** Inputs for {@link buildPiConductorSpawn} (pure — unit-tested without spawning). */
|
|
145
133
|
export interface PiConductorSpawnOpts {
|
|
146
134
|
ensemble: string;
|
|
@@ -154,20 +142,28 @@ export interface PiConductorSpawnOpts {
|
|
|
154
142
|
conductorTypeName?: string;
|
|
155
143
|
/** Forwarded if set (warn-not-fail upstream when unset). */
|
|
156
144
|
anthropicApiKey?: string;
|
|
157
|
-
/** Injectable
|
|
145
|
+
/** Injectable binary resolver (defaults to the real one, which fails-clean on miss). */
|
|
158
146
|
resolveBinary?: () => {
|
|
159
147
|
cmd: string;
|
|
160
148
|
args: string[];
|
|
161
149
|
};
|
|
162
|
-
resolveExtension?: () => string;
|
|
163
150
|
}
|
|
164
151
|
/**
|
|
165
152
|
* Build the interactive Pi conductor spawn spec — `{ cmd, args, env }` for
|
|
166
153
|
* {@link launchInTerminal} (#666 C3). PURE + injectable so the env/args mapping is
|
|
167
|
-
* unit-tested. The default
|
|
168
|
-
*
|
|
169
|
-
*
|
|
170
|
-
*
|
|
154
|
+
* unit-tested. The default binary resolver THROWS fail-clean (binary missing)
|
|
155
|
+
* BEFORE a terminal is launched.
|
|
156
|
+
*
|
|
157
|
+
* #825 — NO inline `-e <ext>`. `up --agent pi` now relies on the player extension
|
|
158
|
+
* being registered in Pi's `settings.json` (by `installPiExtensions`, guarded
|
|
159
|
+
* before launch in the `up` pi branch) + the `resolvePiRole`→`'player'` gate
|
|
160
|
+
* (`PLAYER_NAME` is set in the env below). This collapses the two Pi-launch paths
|
|
161
|
+
* onto ONE registration source, so no divergent on-disk copy (e.g. dev `node
|
|
162
|
+
* dist/cli.js`'s repo `dist/pi/extension.js` vs the global settings.json copy) can
|
|
163
|
+
* escape Pi's realpath-dedup and double-load the player factory. Mirrors
|
|
164
|
+
* {@link buildPiCommandCenterSpawn}. `args` = `[...binArgs]`; conductor
|
|
165
|
+
* INSTRUCTIONS arrive via the lineup-baked workflow messages → cue pump (no
|
|
166
|
+
* `--system-prompt` for the MVP).
|
|
171
167
|
*/
|
|
172
168
|
export declare function buildPiConductorSpawn(opts: PiConductorSpawnOpts): {
|
|
173
169
|
cmd: string;
|
|
@@ -196,11 +192,19 @@ export interface PiCommandCenterSpawnOpts {
|
|
|
196
192
|
* Build the interactive Pi COMMAND-CENTER (mission-control) spawn spec —
|
|
197
193
|
* `{ cmd, args, env }` for {@link launchInTerminal} (#729). PURE + injectable.
|
|
198
194
|
*
|
|
199
|
-
*
|
|
200
|
-
* registers BOTH Pi extensions in `~/.pi/agent/settings.json`, so a
|
|
201
|
-
* auto-loads them and {@link resolvePiRole} (via the env below) picks
|
|
202
|
-
*
|
|
203
|
-
*
|
|
195
|
+
* Like {@link buildPiConductorSpawn} (post-#825), this passes NO `-e <ext>`:
|
|
196
|
+
* install-pi registers BOTH Pi extensions in `~/.pi/agent/settings.json`, so a
|
|
197
|
+
* plain `pi` auto-loads them and {@link resolvePiRole} (via the env below) picks
|
|
198
|
+
* exactly one.
|
|
199
|
+
*
|
|
200
|
+
* #825 (comment correction): a SAME-path `-e` would NOT cause a re-registration
|
|
201
|
+
* error — the #825 spike found Pi realpath-dedupes CLI `-e` paths against
|
|
202
|
+
* `settings.json` (`mergePaths` → `canonicalizePath`/`realpathSync`), and even an
|
|
203
|
+
* un-deduped duplicate is first-registration-wins at the tool layer (no throw,
|
|
204
|
+
* Pi 0.79.x). The real reason both spawn specs OMIT `-e` is a SINGLE registration
|
|
205
|
+
* source: it prevents a DIVERGENT on-disk copy (a different physical path that
|
|
206
|
+
* escapes realpath-dedup) from double-loading the extension factory. The env
|
|
207
|
+
* carries the OPERATOR subset only:
|
|
204
208
|
* - `AGENT_TEMPO_PI_ROLE=command-center` → the DETERMINISTIC role force (top of
|
|
205
209
|
* {@link resolvePiRole}'s precedence — beats an inherited `PLAYER_NAME`).
|
|
206
210
|
* - `AGENT_TEMPO_MISSION_CONTROL=1` → the role opt-in (kept for legacy parity /
|
package/dist/spawn.js
CHANGED
|
@@ -14,7 +14,6 @@ exports.buildTerminalCommand = buildTerminalCommand;
|
|
|
14
14
|
exports.launchInTerminal = launchInTerminal;
|
|
15
15
|
exports.spawnInTerminal = spawnInTerminal;
|
|
16
16
|
exports.resolvePiInteractiveBinary = resolvePiInteractiveBinary;
|
|
17
|
-
exports.resolvePiExtensionPath = resolvePiExtensionPath;
|
|
18
17
|
exports.buildPiConductorSpawn = buildPiConductorSpawn;
|
|
19
18
|
exports.buildPiCommandCenterSpawn = buildPiCommandCenterSpawn;
|
|
20
19
|
exports.spawnCopilotBridge = spawnCopilotBridge;
|
|
@@ -305,7 +304,18 @@ function writeSecretEnvFile(secretEnv, opts) {
|
|
|
305
304
|
content = keys.map((k) => `set -gx ${k} ${fishQuote(secretEnv[k])}`).join('\n') + '\n';
|
|
306
305
|
}
|
|
307
306
|
else if (opts.syntax === 'cmd') {
|
|
308
|
-
|
|
307
|
+
// #847 — `@`-prefix EVERY line at the GENERATOR level (a structural map, not a
|
|
308
|
+
// per-line author choice). cmd `call`s this file into the persistent `cmd /k`
|
|
309
|
+
// session under its default echo-ON, which echoes each line of a called batch
|
|
310
|
+
// FILE to the terminal — so an un-prefixed `set "ANTHROPIC_API_KEY=…"` printed
|
|
311
|
+
// the SECRET VALUE to scrollback (the #847 leak; #689 had closed only the
|
|
312
|
+
// command-line/history vector). The per-line `@` is self-contained: unlike
|
|
313
|
+
// `@echo off`, it does NOT persist echo state to the caller (`call` shares the
|
|
314
|
+
// parent echo scope, and neither `call` nor `setlocal` scopes echo), so the
|
|
315
|
+
// trailing `del`, the bin launch, and the user's prompt still echo normally.
|
|
316
|
+
// Mapping at the generator means any FUTURE non-`set` line is suppressed too.
|
|
317
|
+
const lines = keys.map((k) => `set "${k}=${cmdEscape(secretEnv[k])}"`);
|
|
318
|
+
content = lines.map((line) => `@${line}`).join('\r\n') + '\r\n';
|
|
309
319
|
}
|
|
310
320
|
else {
|
|
311
321
|
content = keys.map((k) => `export ${k}=${shellQuote(secretEnv[k])}`).join('\n') + '\n';
|
|
@@ -627,37 +637,27 @@ function resolvePiInteractiveBinary(deps = {}) {
|
|
|
627
637
|
throw new Error('Pi CLI not found. Install it with `npm install -g pi-ai` and ensure `pi` is on PATH ' +
|
|
628
638
|
'(or add the @earendil-works/pi-coding-agent package). The conductor needs the interactive Pi CLI.');
|
|
629
639
|
}
|
|
630
|
-
/**
|
|
631
|
-
* Resolve the absolute path to the BUNDLED `dist/pi/extension.js` for `pi -e <abs>`
|
|
632
|
-
* (#666). Pi loads the BUILT CommonJS extension even in dev. Mirrors
|
|
633
|
-
* {@link resolvePiPath}'s dev/prod `__dirname` split: prod `__dirname` = `dist/`
|
|
634
|
-
* (→ `dist/pi/extension.js`); dev `__dirname` = `src/` (→ sibling `dist/pi/…`).
|
|
635
|
-
* Existence-checked + fail-clean ("run npm run build"). Injectable for tests.
|
|
636
|
-
*/
|
|
637
|
-
function resolvePiExtensionPath(deps = {}) {
|
|
638
|
-
const exists = deps.exists ?? fs_1.existsSync;
|
|
639
|
-
const isDev = deps.isDev ?? __filename.endsWith('.ts');
|
|
640
|
-
const base = deps.baseDir ?? __dirname;
|
|
641
|
-
const extPath = isDev
|
|
642
|
-
? (0, path_1.resolve)(base, '..', 'dist', 'pi', 'extension.js') // dev: src/ → repo/dist/pi/extension.js
|
|
643
|
-
: (0, path_1.resolve)(base, 'pi', 'extension.js'); // prod: dist/ → dist/pi/extension.js
|
|
644
|
-
if (!exists(extPath)) {
|
|
645
|
-
throw new Error(`Pi conductor extension not found at ${extPath}. Run \`npm run build\` first.`);
|
|
646
|
-
}
|
|
647
|
-
return extPath;
|
|
648
|
-
}
|
|
649
640
|
/**
|
|
650
641
|
* Build the interactive Pi conductor spawn spec — `{ cmd, args, env }` for
|
|
651
642
|
* {@link launchInTerminal} (#666 C3). PURE + injectable so the env/args mapping is
|
|
652
|
-
* unit-tested. The default
|
|
653
|
-
*
|
|
654
|
-
*
|
|
655
|
-
*
|
|
643
|
+
* unit-tested. The default binary resolver THROWS fail-clean (binary missing)
|
|
644
|
+
* BEFORE a terminal is launched.
|
|
645
|
+
*
|
|
646
|
+
* #825 — NO inline `-e <ext>`. `up --agent pi` now relies on the player extension
|
|
647
|
+
* being registered in Pi's `settings.json` (by `installPiExtensions`, guarded
|
|
648
|
+
* before launch in the `up` pi branch) + the `resolvePiRole`→`'player'` gate
|
|
649
|
+
* (`PLAYER_NAME` is set in the env below). This collapses the two Pi-launch paths
|
|
650
|
+
* onto ONE registration source, so no divergent on-disk copy (e.g. dev `node
|
|
651
|
+
* dist/cli.js`'s repo `dist/pi/extension.js` vs the global settings.json copy) can
|
|
652
|
+
* escape Pi's realpath-dedup and double-load the player factory. Mirrors
|
|
653
|
+
* {@link buildPiCommandCenterSpawn}. `args` = `[...binArgs]`; conductor
|
|
654
|
+
* INSTRUCTIONS arrive via the lineup-baked workflow messages → cue pump (no
|
|
655
|
+
* `--system-prompt` for the MVP).
|
|
656
656
|
*/
|
|
657
657
|
function buildPiConductorSpawn(opts) {
|
|
658
658
|
const { cmd, args: binArgs } = (opts.resolveBinary ?? resolvePiInteractiveBinary)();
|
|
659
|
-
|
|
660
|
-
const args = [...binArgs
|
|
659
|
+
// #825 — single registration source: no inline `-e` (see the doc-comment above).
|
|
660
|
+
const args = [...binArgs];
|
|
661
661
|
const env = {
|
|
662
662
|
...opts.temporalEnvVars,
|
|
663
663
|
[config_1.ENV.TASK_QUEUE]: opts.taskQueue,
|
|
@@ -678,11 +678,19 @@ function buildPiConductorSpawn(opts) {
|
|
|
678
678
|
* Build the interactive Pi COMMAND-CENTER (mission-control) spawn spec —
|
|
679
679
|
* `{ cmd, args, env }` for {@link launchInTerminal} (#729). PURE + injectable.
|
|
680
680
|
*
|
|
681
|
-
*
|
|
682
|
-
* registers BOTH Pi extensions in `~/.pi/agent/settings.json`, so a
|
|
683
|
-
* auto-loads them and {@link resolvePiRole} (via the env below) picks
|
|
684
|
-
*
|
|
685
|
-
*
|
|
681
|
+
* Like {@link buildPiConductorSpawn} (post-#825), this passes NO `-e <ext>`:
|
|
682
|
+
* install-pi registers BOTH Pi extensions in `~/.pi/agent/settings.json`, so a
|
|
683
|
+
* plain `pi` auto-loads them and {@link resolvePiRole} (via the env below) picks
|
|
684
|
+
* exactly one.
|
|
685
|
+
*
|
|
686
|
+
* #825 (comment correction): a SAME-path `-e` would NOT cause a re-registration
|
|
687
|
+
* error — the #825 spike found Pi realpath-dedupes CLI `-e` paths against
|
|
688
|
+
* `settings.json` (`mergePaths` → `canonicalizePath`/`realpathSync`), and even an
|
|
689
|
+
* un-deduped duplicate is first-registration-wins at the tool layer (no throw,
|
|
690
|
+
* Pi 0.79.x). The real reason both spawn specs OMIT `-e` is a SINGLE registration
|
|
691
|
+
* source: it prevents a DIVERGENT on-disk copy (a different physical path that
|
|
692
|
+
* escapes realpath-dedup) from double-loading the extension factory. The env
|
|
693
|
+
* carries the OPERATOR subset only:
|
|
686
694
|
* - `AGENT_TEMPO_PI_ROLE=command-center` → the DETERMINISTIC role force (top of
|
|
687
695
|
* {@link resolvePiRole}'s precedence — beats an inherited `PLAYER_NAME`).
|
|
688
696
|
* - `AGENT_TEMPO_MISSION_CONTROL=1` → the role opt-in (kept for legacy parity /
|
package/dist/tools/cue.js
CHANGED
|
@@ -10,6 +10,7 @@ const resolve_1 = require("./resolve");
|
|
|
10
10
|
const resolve_2 = require("../activities/resolve");
|
|
11
11
|
const signals_1 = require("../workflows/signals");
|
|
12
12
|
const query_timeout_1 = require("../utils/query-timeout");
|
|
13
|
+
const visibility_deadline_1 = require("../utils/visibility-deadline");
|
|
13
14
|
const descriptor_1 = require("./descriptor");
|
|
14
15
|
const validation_1 = require("../utils/validation");
|
|
15
16
|
const suspension_1 = require("../utils/suspension");
|
|
@@ -139,6 +140,17 @@ function buildCueTool(client, config, getPlayerId, handle) {
|
|
|
139
140
|
return (0, descriptor_1.ok)(`Message sent to ${playerId}. (outbox: ${entryId})`);
|
|
140
141
|
}
|
|
141
142
|
catch (err) {
|
|
143
|
+
// #845 Mode A: a truncated roster scan is NOT "player not found" —
|
|
144
|
+
// the target may exist; the visibility scan just hit its deadline
|
|
145
|
+
// (e.g. post-restart worker warmup). Surface a DISTINCT, actionable
|
|
146
|
+
// "resolution incomplete — retry" so the operator doesn't conclude
|
|
147
|
+
// the player vanished. (The `if (!resolved)` not-found path above
|
|
148
|
+
// only fires on a clean `null`; the timeout throws past it to here.)
|
|
149
|
+
if ((0, visibility_deadline_1.isVisibilityTimeout)(err)) {
|
|
150
|
+
return (0, descriptor_1.fail)(`Could not resolve "${playerId}": roster resolution incomplete — the ` +
|
|
151
|
+
`visibility scan hit its deadline (likely worker warmup), not a ` +
|
|
152
|
+
`"player not found". Retry in a moment.`);
|
|
153
|
+
}
|
|
142
154
|
return (0, descriptor_1.fail)(`Failed to send message to ${playerId}: ${(0, descriptor_1.formatError)(err)}`);
|
|
143
155
|
}
|
|
144
156
|
},
|
package/dist/tools/ensemble.js
CHANGED
|
@@ -42,6 +42,7 @@ const resolve_1 = require("../activities/resolve");
|
|
|
42
42
|
const descriptor_1 = require("./descriptor");
|
|
43
43
|
const duration_1 = require("../utils/duration");
|
|
44
44
|
const suspension_1 = require("../utils/suspension");
|
|
45
|
+
const visibility_deadline_1 = require("../utils/visibility-deadline");
|
|
45
46
|
/**
|
|
46
47
|
* Default dormancy threshold (1 hour). Per #563: a `detached` player whose
|
|
47
48
|
* last activity is older than this is considered dormant. `phase === 'gone'`
|
|
@@ -102,12 +103,29 @@ function buildEnsembleTool(client, config, getPlayerId, ownWorkflowId) {
|
|
|
102
103
|
self: client.workflow.getHandle(ownWorkflowId),
|
|
103
104
|
});
|
|
104
105
|
let sessions;
|
|
106
|
+
let truncated = false;
|
|
107
|
+
let scanned = 0;
|
|
105
108
|
try {
|
|
106
|
-
|
|
109
|
+
const scan = await (0, resolve_1.scanEnsembleSessionsWithStatus)(client, config.ensemble);
|
|
110
|
+
sessions = scan.sessions;
|
|
111
|
+
truncated = scan.truncated;
|
|
112
|
+
scanned = scan.scanned;
|
|
107
113
|
}
|
|
108
114
|
catch (err) {
|
|
109
115
|
return (0, descriptor_1.fail)(`Error listing workflows: ${(0, descriptor_1.formatError)(err)}`);
|
|
110
116
|
}
|
|
117
|
+
// #845 Mode A — when the visibility scan hit its wall-clock deadline,
|
|
118
|
+
// `sessions` is a PARTIAL roster. Surface that explicitly so an
|
|
119
|
+
// operator never mistakes a mid-scan snapshot for the full ensemble
|
|
120
|
+
// (the incident: a 3/8 roster read as "5 players vanished"). Report
|
|
121
|
+
// `scanned` (workflows enumerated before the deadline) rather than the
|
|
122
|
+
// shown-row count — the shown count is post scope/dormancy filtering,
|
|
123
|
+
// so "N shown" would understate how far the scan actually got.
|
|
124
|
+
const partialBanner = truncated
|
|
125
|
+
? `⚠ partial roster — ${scanned} workflow(s) enumerated before the ` +
|
|
126
|
+
`${Math.round(visibility_deadline_1.VISIBILITY_DEADLINES_MS.scanEnsembleSessions / 1000)}s visibility deadline ` +
|
|
127
|
+
`(likely worker warmup); some players may be missing — re-run to refresh.`
|
|
128
|
+
: undefined;
|
|
111
129
|
// Apply scope filters
|
|
112
130
|
let ownGitRoot;
|
|
113
131
|
if (scope === 'repo') {
|
|
@@ -138,12 +156,25 @@ function buildEnsembleTool(client, config, getPlayerId, ownWorkflowId) {
|
|
|
138
156
|
// #752: PAUSED/HELD banner leads the output so it can't be missed.
|
|
139
157
|
const banner = (0, suspension_1.formatSuspensionBanner)(await suspensionPromise, config.ensemble);
|
|
140
158
|
if (active.length === 0 && dormant.length === 0) {
|
|
159
|
+
// #845 CRITICAL: check truncation FIRST. A truncated scan that
|
|
160
|
+
// yielded zero rows must NOT render as "No active sessions found" —
|
|
161
|
+
// false-empty is the most dangerous case (an operator concludes the
|
|
162
|
+
// whole ensemble died and takes destructive action). Surface the
|
|
163
|
+
// partial banner instead.
|
|
164
|
+
if (partialBanner) {
|
|
165
|
+
return (0, descriptor_1.ok)([banner, partialBanner].filter(Boolean).join('\n\n'));
|
|
166
|
+
}
|
|
141
167
|
return (0, descriptor_1.ok)(banner ? `${banner}\n\nNo active sessions found.` : 'No active sessions found.');
|
|
142
168
|
}
|
|
143
169
|
// #563 summary line — surface both counts so operators can see what's
|
|
144
170
|
// being hidden behind the dormant filter without re-running.
|
|
145
171
|
const summary = `**${config.ensemble}**: ${active.length} active, ${dormant.length} dormant`;
|
|
146
|
-
|
|
172
|
+
// Lead banners (suspension #752 + partial-roster #845) precede the
|
|
173
|
+
// summary so neither can be missed above the roster.
|
|
174
|
+
const sections = [
|
|
175
|
+
...[banner, partialBanner].filter(Boolean),
|
|
176
|
+
summary,
|
|
177
|
+
];
|
|
147
178
|
const showActive = dormantFilter !== 'show-only';
|
|
148
179
|
const showDormant = dormantFilter !== 'hide';
|
|
149
180
|
if (showActive) {
|