agent-tempo 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +8 -2
- package/dashboard/dist/assets/index-D6Xyje_n.js +62 -0
- package/dashboard/dist/assets/index-D6Xyje_n.js.map +1 -0
- package/dashboard/dist/index.html +1 -1
- package/dashboard/package.json +1 -1
- package/dist/activities/claude-stop.d.ts +21 -0
- package/dist/activities/claude-stop.js +94 -0
- package/dist/cli/commands.d.ts +39 -0
- package/dist/cli/commands.js +83 -2
- package/dist/cli/legacy-migration.js +8 -2
- package/dist/cli/sa-preflight.d.ts +27 -3
- package/dist/cli/sa-preflight.js +169 -9
- package/dist/cli/startup.js +34 -8
- package/dist/client/core.js +9 -0
- package/dist/client/interface.d.ts +21 -0
- package/dist/daemon.js +1 -0
- package/dist/http/catalog.js +17 -3
- package/dist/http/event-types.d.ts +41 -0
- package/dist/http/orphans.d.ts +76 -0
- package/dist/http/orphans.js +93 -0
- package/dist/http/server.js +13 -0
- package/dist/reconcile/orphans.d.ts +37 -27
- package/dist/reconcile/orphans.js +93 -6
- package/dist/tui/index.js +1 -0
- package/dist/utils/bg-preflight.d.ts +25 -0
- package/dist/utils/bg-preflight.js +154 -0
- package/package.json +5 -4
- package/dashboard/dist/assets/index-_5jV0Znu.js +0 -62
- package/dashboard/dist/assets/index-_5jV0Znu.js.map +0 -1
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
rel="stylesheet"
|
|
13
13
|
href="https://fonts.googleapis.com/css2?family=Instrument+Sans:wght@400;500;600;700&family=Instrument+Serif:ital@0;1&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
14
14
|
/>
|
|
15
|
-
<script type="module" crossorigin src="/dashboard/assets/index-
|
|
15
|
+
<script type="module" crossorigin src="/dashboard/assets/index-D6Xyje_n.js"></script>
|
|
16
16
|
<link rel="stylesheet" crossorigin href="/dashboard/assets/index-CB78ToNE.css">
|
|
17
17
|
</head>
|
|
18
18
|
<body>
|
package/dashboard/package.json
CHANGED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export interface ClaudeStopInput {
|
|
2
|
+
/** Supervisor's 8-char short id from `SessionMetadata.bgShortId`. */
|
|
3
|
+
shortId: string;
|
|
4
|
+
/** Custom claude binary path (defaults to `'claude'` on PATH). */
|
|
5
|
+
claudeBin?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface ClaudeStopResult {
|
|
8
|
+
success: boolean;
|
|
9
|
+
/** One of `'stopped'` | `'already-gone'` | `'error'`. */
|
|
10
|
+
outcome: 'stopped' | 'already-gone' | 'error';
|
|
11
|
+
/** Populated on `outcome === 'error'` — the raw stderr/stdout snippet for diagnostics. */
|
|
12
|
+
detail?: string;
|
|
13
|
+
/** Exit code from `claude stop`. Undefined on spawn-side failure. */
|
|
14
|
+
exitCode?: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Invoke `claude stop <shortId>` synchronously. Never throws — returns a
|
|
18
|
+
* structured `ClaudeStopResult` the workflow can act on. Routes through
|
|
19
|
+
* `resolveClaudePath` so the `claudeBin` config option is honoured.
|
|
20
|
+
*/
|
|
21
|
+
export declare function claudeStop(input: ClaudeStopInput): Promise<ClaudeStopResult>;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.claudeStop = claudeStop;
|
|
4
|
+
/**
|
|
5
|
+
* #596 / ADR 0016 — `claude stop <shortId>` per-host activity.
|
|
6
|
+
*
|
|
7
|
+
* `claude --bg` hands the session to Anthropic's per-user Claude Code
|
|
8
|
+
* supervisor. We don't own the PID anymore (the supervisor does), so the
|
|
9
|
+
* existing `hard-terminate` PID-scan helper would always return
|
|
10
|
+
* `strategy: 'none'` and leak the supervisor job. Instead, ask the
|
|
11
|
+
* supervisor to stop the job via its documented CLI verb.
|
|
12
|
+
*
|
|
13
|
+
* **Routing**: registered on the per-host activity queue
|
|
14
|
+
* (`agent-tempo-{hostname}`) so the `claude` CLI runs on the same machine
|
|
15
|
+
* the supervisor lives on. The workflow proxy in `src/workflows/session.ts`
|
|
16
|
+
* picks the right host via `AgentTempoHostname` exactly the same way
|
|
17
|
+
* `hardTerminateAttachment` already does.
|
|
18
|
+
*
|
|
19
|
+
* **Idempotency**: `claude stop` exits 1 with `No job matching '<id>'` when
|
|
20
|
+
* the job is already gone. We classify that as success — destroy must be
|
|
21
|
+
* idempotent and a re-run of an already-stopped session is a normal flow.
|
|
22
|
+
* Any other non-zero exit (network/socket failure, supervisor itself wedged,
|
|
23
|
+
* unexpected CLI shape) is reported as `success: false` so the workflow's
|
|
24
|
+
* destroy path can fall back to the existing `hardTerminateAttachment`
|
|
25
|
+
* PID-scan as defense in depth.
|
|
26
|
+
*
|
|
27
|
+
* **Timing**: 15-second timeout. The supervisor responds in <100ms on a
|
|
28
|
+
* healthy host, but Windows process-spawn overhead and antivirus scanners
|
|
29
|
+
* can stretch the worst case. 15s leaves slack without holding up destroy.
|
|
30
|
+
*/
|
|
31
|
+
const child_process_1 = require("child_process");
|
|
32
|
+
const activity_1 = require("@temporalio/activity");
|
|
33
|
+
const spawn_1 = require("../spawn");
|
|
34
|
+
const log = (...args) => console.error('[agent-tempo:claude-stop]', ...args);
|
|
35
|
+
/**
|
|
36
|
+
* Validate the supervisor short id before shelling out. Anthropic uses
|
|
37
|
+
* lowercase hex from the full UUID's first 8 chars; reject anything else
|
|
38
|
+
* to avoid passing user-influenced strings into argv.
|
|
39
|
+
*/
|
|
40
|
+
function isValidShortId(s) {
|
|
41
|
+
return typeof s === 'string' && /^[0-9a-f]{8}$/.test(s);
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Invoke `claude stop <shortId>` synchronously. Never throws — returns a
|
|
45
|
+
* structured `ClaudeStopResult` the workflow can act on. Routes through
|
|
46
|
+
* `resolveClaudePath` so the `claudeBin` config option is honoured.
|
|
47
|
+
*/
|
|
48
|
+
async function claudeStop(input) {
|
|
49
|
+
const { shortId, claudeBin } = input;
|
|
50
|
+
if (!isValidShortId(shortId)) {
|
|
51
|
+
throw activity_1.ApplicationFailure.nonRetryable(`claudeStop: invalid shortId "${shortId}" (must be 8 lowercase hex chars). ` +
|
|
52
|
+
`This indicates corrupt SessionMetadata.bgShortId — check the recruit path.`);
|
|
53
|
+
}
|
|
54
|
+
const bin = (0, spawn_1.resolveClaudePath)(claudeBin);
|
|
55
|
+
let result;
|
|
56
|
+
try {
|
|
57
|
+
result = (0, child_process_1.spawnSync)(bin, ['stop', shortId], {
|
|
58
|
+
encoding: 'utf8',
|
|
59
|
+
timeout: 15_000,
|
|
60
|
+
shell: process.platform === 'win32',
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
65
|
+
log(`claude stop ${shortId} failed to launch: ${detail}`);
|
|
66
|
+
return { success: false, outcome: 'error', detail };
|
|
67
|
+
}
|
|
68
|
+
if (result.error) {
|
|
69
|
+
log(`claude stop ${shortId} spawn error: ${result.error.message}`);
|
|
70
|
+
return { success: false, outcome: 'error', detail: result.error.message };
|
|
71
|
+
}
|
|
72
|
+
const exitCode = result.status ?? -1;
|
|
73
|
+
const stderr = (result.stderr || '').toString();
|
|
74
|
+
const stdout = (result.stdout || '').toString();
|
|
75
|
+
const combined = `${stderr}\n${stdout}`.trim();
|
|
76
|
+
if (exitCode === 0) {
|
|
77
|
+
log(`claude stop ${shortId} → stopped`);
|
|
78
|
+
return { success: true, outcome: 'stopped', exitCode };
|
|
79
|
+
}
|
|
80
|
+
// Anthropic's "already gone" signal — exit 1 with `No job matching '<id>'`
|
|
81
|
+
// somewhere in the combined output. Match case-insensitively to absorb
|
|
82
|
+
// small phrasing drift across CLI versions.
|
|
83
|
+
if (/no job matching/i.test(combined)) {
|
|
84
|
+
log(`claude stop ${shortId} → already-gone (exit ${exitCode}, idempotent success)`);
|
|
85
|
+
return { success: true, outcome: 'already-gone', exitCode };
|
|
86
|
+
}
|
|
87
|
+
log(`claude stop ${shortId} → error (exit ${exitCode}): ${combined || '(no output)'}`);
|
|
88
|
+
return {
|
|
89
|
+
success: false,
|
|
90
|
+
outcome: 'error',
|
|
91
|
+
exitCode,
|
|
92
|
+
detail: combined || `exit ${exitCode} with no output`,
|
|
93
|
+
};
|
|
94
|
+
}
|
package/dist/cli/commands.d.ts
CHANGED
|
@@ -132,6 +132,45 @@ export type StopTemporalResult = {
|
|
|
132
132
|
* profile collateral damage.
|
|
133
133
|
*/
|
|
134
134
|
export declare function stopTemporalServer(opts: StopTemporalServerOpts): StopTemporalResult;
|
|
135
|
+
/**
|
|
136
|
+
* Minimal child handle {@link startTemporalForDestroy} needs — `ChildProcess`
|
|
137
|
+
* satisfies it. Kept narrow so unit tests can inject a fake without spawning.
|
|
138
|
+
*
|
|
139
|
+
* @internal
|
|
140
|
+
*/
|
|
141
|
+
export interface SpawnedTemporalChild {
|
|
142
|
+
kill(): void;
|
|
143
|
+
unref(): void;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Dependency seam for {@link startTemporalForDestroy} — production callers
|
|
147
|
+
* pass nothing and get the real spawn + reachability probe. Tests inject
|
|
148
|
+
* stubs plus a tiny `pollDelayMs` so the readiness loop runs instantly.
|
|
149
|
+
*
|
|
150
|
+
* @internal
|
|
151
|
+
*/
|
|
152
|
+
export interface StartTemporalForDestroyDeps {
|
|
153
|
+
/** Readiness probe — defaults to {@link isTemporalReachable} for `config`. */
|
|
154
|
+
isReachable?: () => Promise<boolean>;
|
|
155
|
+
/** Spawn hook — defaults to a detached `temporal server start-dev`. */
|
|
156
|
+
spawn?: () => SpawnedTemporalChild;
|
|
157
|
+
/** Readiness poll attempts. Default 20. */
|
|
158
|
+
attempts?: number;
|
|
159
|
+
/** Delay between readiness polls, ms. Default 500 (→ 20×500ms = 10s). */
|
|
160
|
+
pollDelayMs?: number;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Start a temporary Temporal dev server just long enough for `down --destroy`
|
|
164
|
+
* to terminate workflows when Temporal happened to be down. Polls for
|
|
165
|
+
* readiness; on timeout it kills the child it spawned so `down` never leaves
|
|
166
|
+
* a stray Temporal process booting in the background. Exported for unit
|
|
167
|
+
* tests — production callers pass only `config`.
|
|
168
|
+
*
|
|
169
|
+
* @internal
|
|
170
|
+
*/
|
|
171
|
+
export declare function startTemporalForDestroy(config: Config, deps?: StartTemporalForDestroyDeps): Promise<{
|
|
172
|
+
started: boolean;
|
|
173
|
+
}>;
|
|
135
174
|
export declare function down(opts: DownOpts): Promise<void>;
|
|
136
175
|
interface AgentTypesCommandOpts {
|
|
137
176
|
subcommand?: string;
|
package/dist/cli/commands.js
CHANGED
|
@@ -40,6 +40,7 @@ exports.up = up;
|
|
|
40
40
|
exports.formatScheduleRecurrence = formatScheduleRecurrence;
|
|
41
41
|
exports.lineupScheduleToEntry = lineupScheduleToEntry;
|
|
42
42
|
exports.stopTemporalServer = stopTemporalServer;
|
|
43
|
+
exports.startTemporalForDestroy = startTemporalForDestroy;
|
|
43
44
|
exports.down = down;
|
|
44
45
|
exports.agentTypesCommand = agentTypesCommand;
|
|
45
46
|
exports.broadcast = broadcast;
|
|
@@ -1547,6 +1548,44 @@ function stopTemporalServer(opts) {
|
|
|
1547
1548
|
return { action: 'failed', error: err };
|
|
1548
1549
|
}
|
|
1549
1550
|
}
|
|
1551
|
+
/**
|
|
1552
|
+
* Start a temporary Temporal dev server just long enough for `down --destroy`
|
|
1553
|
+
* to terminate workflows when Temporal happened to be down. Polls for
|
|
1554
|
+
* readiness; on timeout it kills the child it spawned so `down` never leaves
|
|
1555
|
+
* a stray Temporal process booting in the background. Exported for unit
|
|
1556
|
+
* tests — production callers pass only `config`.
|
|
1557
|
+
*
|
|
1558
|
+
* @internal
|
|
1559
|
+
*/
|
|
1560
|
+
async function startTemporalForDestroy(config, deps = {}) {
|
|
1561
|
+
const attempts = deps.attempts ?? 20;
|
|
1562
|
+
const pollDelayMs = deps.pollDelayMs ?? 500;
|
|
1563
|
+
const isReachable = deps.isReachable ?? (() => isTemporalReachable(config));
|
|
1564
|
+
const spawn = deps.spawn ?? (() => {
|
|
1565
|
+
(0, fs_1.mkdirSync)(config_1.AGENT_TEMPO_HOME, { recursive: true });
|
|
1566
|
+
const port = config.temporalAddress.split(':')[1] || '7233';
|
|
1567
|
+
return (0, child_process_1.spawn)('temporal', [
|
|
1568
|
+
'server', 'start-dev',
|
|
1569
|
+
'--port', port,
|
|
1570
|
+
'--db-filename', DEFAULT_DB_PATH,
|
|
1571
|
+
], { detached: true, stdio: 'ignore' });
|
|
1572
|
+
});
|
|
1573
|
+
const child = spawn();
|
|
1574
|
+
child.unref();
|
|
1575
|
+
for (let i = 0; i < attempts; i++) {
|
|
1576
|
+
await new Promise(r => setTimeout(r, pollDelayMs));
|
|
1577
|
+
if (await isReachable())
|
|
1578
|
+
return { started: true };
|
|
1579
|
+
}
|
|
1580
|
+
// Timed out. The detached child may still be booting and would come up
|
|
1581
|
+
// orphaned moments after we give up — kill the process we spawned so
|
|
1582
|
+
// `down` doesn't leave a stray Temporal server behind.
|
|
1583
|
+
try {
|
|
1584
|
+
child.kill();
|
|
1585
|
+
}
|
|
1586
|
+
catch { /* already exited */ }
|
|
1587
|
+
return { started: false };
|
|
1588
|
+
}
|
|
1550
1589
|
async function down(opts) {
|
|
1551
1590
|
const config = (0, config_1.getConfig)(opts);
|
|
1552
1591
|
out.heading('agent-tempo teardown');
|
|
@@ -1555,7 +1594,35 @@ async function down(opts) {
|
|
|
1555
1594
|
: ` Stopping daemon + Temporal. Workflows stay parked for the next ${out.dim('agent-tempo up')}.`);
|
|
1556
1595
|
// Step 1 (destroy mode only): enumerate + terminate workflows across every
|
|
1557
1596
|
// ensemble, after a typed confirmation showing the user what's at stake.
|
|
1558
|
-
|
|
1597
|
+
let temporalUp = await isTemporalReachable(config);
|
|
1598
|
+
// `--destroy` can only terminate workflows while Temporal is reachable.
|
|
1599
|
+
// Workflow state lives durably on disk in ~/.agent-tempo/, so if Temporal
|
|
1600
|
+
// happens to be down when the user runs `down --destroy`, skipping the
|
|
1601
|
+
// destroy step here silently leaves every workflow to be resurrected the
|
|
1602
|
+
// next time anything starts the daemon (an `up`, a `status`, or the TUI).
|
|
1603
|
+
// To make `--destroy` actually mean it, start Temporal temporarily just
|
|
1604
|
+
// long enough to run the terminations — Step 4 below stops it again.
|
|
1605
|
+
let startedTemporalForDestroy = false;
|
|
1606
|
+
if (opts.destroy && !temporalUp) {
|
|
1607
|
+
if (!temporalCliExists()) {
|
|
1608
|
+
out.warn('temporal CLI not found — cannot destroy workflows; they will persist on disk.');
|
|
1609
|
+
}
|
|
1610
|
+
else {
|
|
1611
|
+
out.log(` ${out.dim('...')} Temporal is down — starting it temporarily to destroy workflows...`);
|
|
1612
|
+
const { started } = await startTemporalForDestroy(config);
|
|
1613
|
+
if (started) {
|
|
1614
|
+
temporalUp = true;
|
|
1615
|
+
startedTemporalForDestroy = true;
|
|
1616
|
+
out.success('Temporal started for cleanup');
|
|
1617
|
+
}
|
|
1618
|
+
else {
|
|
1619
|
+
out.warn('Could not start Temporal within 10s — workflows may survive teardown. ' +
|
|
1620
|
+
'Re-run `agent-tempo down --destroy` once Temporal is up. ' +
|
|
1621
|
+
'A stray Temporal process may have been left starting — check with ' +
|
|
1622
|
+
'`agent-tempo status` and stop it manually if one is still running.');
|
|
1623
|
+
}
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1559
1626
|
if (opts.destroy && temporalUp) {
|
|
1560
1627
|
try {
|
|
1561
1628
|
const connection = await (0, connection_1.createTemporalConnection)(config);
|
|
@@ -1625,6 +1692,15 @@ async function down(opts) {
|
|
|
1625
1692
|
const confirmed = await typedConfirmPrompt(` This terminates every workflow (${totalTargets}) and cannot be undone.`, 'destroy');
|
|
1626
1693
|
if (!confirmed) {
|
|
1627
1694
|
out.log('Aborted.');
|
|
1695
|
+
// We may have started Temporal solely to run this destroy.
|
|
1696
|
+
// Aborting at the confirmation prompt must not leave that
|
|
1697
|
+
// server orphaned — stop it before the hard exit. We own it
|
|
1698
|
+
// outright, so force past the cross-profile guard.
|
|
1699
|
+
if (startedTemporalForDestroy) {
|
|
1700
|
+
if (stopTemporalServer({ killSharedTemporal: true }).action === 'killed') {
|
|
1701
|
+
out.log(` ${out.dim('Temporal server stopped')}`);
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1628
1704
|
process.exit(0);
|
|
1629
1705
|
}
|
|
1630
1706
|
}
|
|
@@ -1676,7 +1752,12 @@ async function down(opts) {
|
|
|
1676
1752
|
// skips the kill when the OPPOSITE profile is likely active;
|
|
1677
1753
|
// `--kill-shared-temporal` is the explicit opt-in to override.
|
|
1678
1754
|
if (temporalUp) {
|
|
1679
|
-
|
|
1755
|
+
// When we started Temporal ourselves just for the destroy step, always
|
|
1756
|
+
// stop it again — the cross-profile guard is about not killing a server
|
|
1757
|
+
// the *other* profile owns, but this one we own outright.
|
|
1758
|
+
const result = stopTemporalServer({
|
|
1759
|
+
killSharedTemporal: opts.killSharedTemporal || startedTemporalForDestroy,
|
|
1760
|
+
});
|
|
1680
1761
|
switch (result.action) {
|
|
1681
1762
|
case 'killed':
|
|
1682
1763
|
out.success('Temporal server stopped');
|
|
@@ -55,10 +55,16 @@ exports.formatMigrationResult = formatMigrationResult;
|
|
|
55
55
|
* 5. **Partial-copy resume.** Per-file SHA-256 in the marker — re-running
|
|
56
56
|
* a partially-completed run finishes only the missing/changed files.
|
|
57
57
|
* 6. **Files copied.** Allowlist — `config.json`, `.bootstrap-cache.json`,
|
|
58
|
-
* any `*.yaml` user-stashed lineup files, plus subdirs `
|
|
58
|
+
* any `*.yaml` user-stashed lineup files, plus subdirs `ensembles/`,
|
|
59
59
|
* `state/`, `coat-check/` (forward-compat — fine if absent). The
|
|
60
60
|
* volatile runtime trio (`daemon.pid`, `daemon.port`, `daemon.log`)
|
|
61
61
|
* is intentionally skipped — let the daemon recreate them.
|
|
62
|
+
* (Historical note: the original brief named the lineup subdir
|
|
63
|
+
* `lineups/`, but the actual on-disk name has always been
|
|
64
|
+
* `ensembles/` — see `src/ensemble/{saver,loader}.ts`. The original
|
|
65
|
+
* implementation copied the brief's typo verbatim, which silently
|
|
66
|
+
* stranded user lineups across the v0.x → v1.x migration. Fixed by
|
|
67
|
+
* pointing the allowlist at the real directory name.)
|
|
62
68
|
* 7. **Volatile-state guard.** If `daemon.pid` is present in the legacy
|
|
63
69
|
* home (likely-running daemon), refuses unless `force: true`.
|
|
64
70
|
*
|
|
@@ -80,7 +86,7 @@ const VOLATILE_FILES = new Set(['daemon.pid', 'daemon.port', 'daemon.log']);
|
|
|
80
86
|
/** Allowlisted top-level files. `*.yaml` is matched as a glob. */
|
|
81
87
|
const ALLOWLIST_FILES = new Set(['config.json', '.bootstrap-cache.json']);
|
|
82
88
|
/** Allowlisted top-level subdirs (recursive copy). Forward-compat — fine if absent. */
|
|
83
|
-
const ALLOWLIST_SUBDIRS = ['
|
|
89
|
+
const ALLOWLIST_SUBDIRS = ['ensembles', 'state', 'coat-check'];
|
|
84
90
|
function legacyHomeFor(profile, home) {
|
|
85
91
|
return path.join(home, profile === 'dev' ? LEGACY_DEV_HOME_DIR_NAME : LEGACY_PROD_HOME_DIR_NAME);
|
|
86
92
|
}
|
|
@@ -6,10 +6,13 @@ export declare const REQUIRED_SEARCH_ATTRIBUTES: ReadonlyArray<{
|
|
|
6
6
|
export interface SearchAttributePreflightOpts {
|
|
7
7
|
temporalAddress: string;
|
|
8
8
|
temporalNamespace: string;
|
|
9
|
+
/** API key for Temporal Cloud — triggers SDK-based probe when set. */
|
|
10
|
+
temporalApiKey?: string;
|
|
9
11
|
/**
|
|
10
12
|
* Optional test seam — given a namespace, return the set of search
|
|
11
13
|
* attribute names that ARE currently registered. Defaults to
|
|
12
|
-
* {@link
|
|
14
|
+
* {@link sdkProbeRegisteredAttributes} when `temporalApiKey` is set,
|
|
15
|
+
* otherwise {@link defaultProbeRegisteredAttributes} which shells out to
|
|
13
16
|
* `temporal operator search-attribute list`.
|
|
14
17
|
*/
|
|
15
18
|
probe?: (opts: {
|
|
@@ -35,15 +38,36 @@ export declare function defaultProbeRegisteredAttributes(opts: {
|
|
|
35
38
|
temporalAddress: string;
|
|
36
39
|
temporalNamespace: string;
|
|
37
40
|
}): Promise<Set<string>>;
|
|
41
|
+
/** Returns true if the address looks like a Temporal Cloud endpoint. */
|
|
42
|
+
export declare function isTemporalCloud(address: string): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* SDK-based probe — uses the Temporal Client SDK to verify search attribute
|
|
45
|
+
* existence by issuing a visibility query. Works with Temporal Cloud API keys
|
|
46
|
+
* where the `temporal operator` CLI commands are unauthorized.
|
|
47
|
+
*
|
|
48
|
+
* Strategy: for each required attribute, issue `listWorkflowExecutions` with
|
|
49
|
+
* a query referencing that attribute. If the attribute is registered the query
|
|
50
|
+
* returns (possibly empty) results. If not registered, Temporal responds with
|
|
51
|
+
* INVALID_ARGUMENT containing "is not a valid search attribute".
|
|
52
|
+
*/
|
|
53
|
+
export declare function sdkProbeRegisteredAttributes(opts: {
|
|
54
|
+
temporalAddress: string;
|
|
55
|
+
temporalNamespace: string;
|
|
56
|
+
temporalApiKey?: string;
|
|
57
|
+
}): Promise<Set<string>>;
|
|
38
58
|
/**
|
|
39
59
|
* Format the missing-SA error message. Paste-friendly: operators copy the
|
|
40
|
-
*
|
|
60
|
+
* registration commands verbatim. Cloud-aware: shows `tcld` commands for
|
|
61
|
+
* Temporal Cloud namespaces.
|
|
41
62
|
*/
|
|
42
|
-
export declare function formatPreflightError(missing: ReadonlyArray<typeof REQUIRED_SEARCH_ATTRIBUTES[number]>, namespace: string, probeError?: string): string;
|
|
63
|
+
export declare function formatPreflightError(missing: ReadonlyArray<typeof REQUIRED_SEARCH_ATTRIBUTES[number]>, namespace: string, probeError?: string, cloud?: boolean): string;
|
|
43
64
|
/**
|
|
44
65
|
* Verify all {@link REQUIRED_SEARCH_ATTRIBUTES} are registered on the
|
|
45
66
|
* given namespace. Returns a structured result — callers decide whether
|
|
46
67
|
* to log+continue (boot bootstrap step) or exit non-zero (daemon start).
|
|
68
|
+
*
|
|
69
|
+
* When `temporalApiKey` is set (or address is a Cloud endpoint), uses the
|
|
70
|
+
* SDK-based probe instead of shelling out to `temporal operator`.
|
|
47
71
|
*/
|
|
48
72
|
export declare function verifySearchAttributes(opts: SearchAttributePreflightOpts): Promise<SearchAttributePreflightResult>;
|
|
49
73
|
/**
|
package/dist/cli/sa-preflight.js
CHANGED
|
@@ -1,7 +1,42 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
2
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
36
|
exports.REQUIRED_SEARCH_ATTRIBUTES = void 0;
|
|
4
37
|
exports.defaultProbeRegisteredAttributes = defaultProbeRegisteredAttributes;
|
|
38
|
+
exports.isTemporalCloud = isTemporalCloud;
|
|
39
|
+
exports.sdkProbeRegisteredAttributes = sdkProbeRegisteredAttributes;
|
|
5
40
|
exports.formatPreflightError = formatPreflightError;
|
|
6
41
|
exports.verifySearchAttributes = verifySearchAttributes;
|
|
7
42
|
exports.classifyRegistrationOutput = classifyRegistrationOutput;
|
|
@@ -29,6 +64,16 @@ exports.assertSearchAttributesOrExit = assertSearchAttributesOrExit;
|
|
|
29
64
|
* - `src/daemon.ts` boot path calls {@link verifySearchAttributes}
|
|
30
65
|
* directly to fail fast on `agent-tempo daemon start` before the worker
|
|
31
66
|
* tries to register workflows.
|
|
67
|
+
*
|
|
68
|
+
* Cloud support:
|
|
69
|
+
* Temporal Cloud's operator gRPC service is not accessible with namespace
|
|
70
|
+
* API keys, causing `temporal operator search-attribute list/create` to
|
|
71
|
+
* fail with "Request unauthorized". When a Cloud namespace is detected
|
|
72
|
+
* (address contains `.tmprl.cloud` or an API key is configured), the
|
|
73
|
+
* preflight uses an SDK-based probe: issue a visibility query referencing
|
|
74
|
+
* each required attribute — a registered attribute returns an empty result
|
|
75
|
+
* set; an unregistered one throws INVALID_ARGUMENT. Registration
|
|
76
|
+
* instructions surface `tcld` commands instead of `temporal operator`.
|
|
32
77
|
*/
|
|
33
78
|
const child_process_1 = require("child_process");
|
|
34
79
|
/** Single source of truth — must match `SEARCH_ATTRIBUTES` in `src/cli/startup.ts`. */
|
|
@@ -73,22 +118,123 @@ async function defaultProbeRegisteredAttributes(opts) {
|
|
|
73
118
|
}
|
|
74
119
|
return names;
|
|
75
120
|
}
|
|
121
|
+
/** Returns true if the address looks like a Temporal Cloud endpoint. */
|
|
122
|
+
function isTemporalCloud(address) {
|
|
123
|
+
return address.includes('.tmprl.cloud');
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Substrings Temporal servers use to signal "this search attribute is not
|
|
127
|
+
* registered". The exact wording varies across server versions and storage
|
|
128
|
+
* backends (e.g. 'is not a valid search attribute', 'is not defined',
|
|
129
|
+
* 'no mapping defined for the field'), so we match a set rather than a
|
|
130
|
+
* single phrase — a wording change must not cause the probe to misclassify
|
|
131
|
+
* an unregistered SA as an unexpected error and re-throw.
|
|
132
|
+
*/
|
|
133
|
+
const UNREGISTERED_SA_MARKERS = [
|
|
134
|
+
'is not a valid search attribute',
|
|
135
|
+
'is not defined',
|
|
136
|
+
'no mapping defined for the field',
|
|
137
|
+
'unknown or unindexed search attribute',
|
|
138
|
+
];
|
|
139
|
+
/** gRPC status code for INVALID_ARGUMENT. */
|
|
140
|
+
const GRPC_INVALID_ARGUMENT = 3;
|
|
141
|
+
/**
|
|
142
|
+
* Classify a visibility-query error as "search attribute not registered".
|
|
143
|
+
*
|
|
144
|
+
* A registered-but-empty attribute returns results; an unregistered one
|
|
145
|
+
* fails. Temporal reports the failure as gRPC INVALID_ARGUMENT — we key on
|
|
146
|
+
* that status code first (wording-independent) and fall back to known
|
|
147
|
+
* message substrings for transports/versions that don't surface a code.
|
|
148
|
+
*/
|
|
149
|
+
function isUnregisteredAttributeError(err) {
|
|
150
|
+
const msg = (err?.message || '').toLowerCase();
|
|
151
|
+
if (UNREGISTERED_SA_MARKERS.some((m) => msg.includes(m)))
|
|
152
|
+
return true;
|
|
153
|
+
// gRPC ServiceError exposes a numeric `code`; INVALID_ARGUMENT means the
|
|
154
|
+
// query referenced an attribute the namespace doesn't know about.
|
|
155
|
+
if (err?.code === GRPC_INVALID_ARGUMENT)
|
|
156
|
+
return true;
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* SDK-based probe — uses the Temporal Client SDK to verify search attribute
|
|
161
|
+
* existence by issuing a visibility query. Works with Temporal Cloud API keys
|
|
162
|
+
* where the `temporal operator` CLI commands are unauthorized.
|
|
163
|
+
*
|
|
164
|
+
* Strategy: for each required attribute, issue `listWorkflowExecutions` with
|
|
165
|
+
* a query referencing that attribute. If the attribute is registered the query
|
|
166
|
+
* returns (possibly empty) results. If not registered, Temporal responds with
|
|
167
|
+
* INVALID_ARGUMENT containing "is not a valid search attribute".
|
|
168
|
+
*/
|
|
169
|
+
async function sdkProbeRegisteredAttributes(opts) {
|
|
170
|
+
const { Connection } = await Promise.resolve().then(() => __importStar(require('@temporalio/client')));
|
|
171
|
+
const tls = opts.temporalApiKey ? true : undefined;
|
|
172
|
+
const conn = await Connection.connect({
|
|
173
|
+
address: opts.temporalAddress,
|
|
174
|
+
tls: tls,
|
|
175
|
+
apiKey: opts.temporalApiKey,
|
|
176
|
+
});
|
|
177
|
+
const registered = new Set();
|
|
178
|
+
try {
|
|
179
|
+
for (const attr of exports.REQUIRED_SEARCH_ATTRIBUTES) {
|
|
180
|
+
// The probe value only has to be syntactically valid for the
|
|
181
|
+
// attribute's type so the visibility query parses. We support
|
|
182
|
+
// Keyword (quoted string literal) and Bool (`true`) here — the only
|
|
183
|
+
// two types in REQUIRED_SEARCH_ATTRIBUTES. A new SA type would need
|
|
184
|
+
// its own literal form added below.
|
|
185
|
+
const testValue = attr.type === 'Bool' ? 'true' : '"__probe__"';
|
|
186
|
+
try {
|
|
187
|
+
await conn.workflowService.listWorkflowExecutions({
|
|
188
|
+
namespace: opts.temporalNamespace,
|
|
189
|
+
query: `${attr.name} = ${testValue}`,
|
|
190
|
+
pageSize: 1,
|
|
191
|
+
});
|
|
192
|
+
registered.add(attr.name);
|
|
193
|
+
}
|
|
194
|
+
catch (err) {
|
|
195
|
+
if (isUnregisteredAttributeError(err)) {
|
|
196
|
+
// Attribute not registered — don't add to set
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
// Unexpected error — re-throw
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
finally {
|
|
206
|
+
await conn.close();
|
|
207
|
+
}
|
|
208
|
+
return registered;
|
|
209
|
+
}
|
|
76
210
|
/**
|
|
77
211
|
* Format the missing-SA error message. Paste-friendly: operators copy the
|
|
78
|
-
*
|
|
212
|
+
* registration commands verbatim. Cloud-aware: shows `tcld` commands for
|
|
213
|
+
* Temporal Cloud namespaces.
|
|
79
214
|
*/
|
|
80
|
-
function formatPreflightError(missing, namespace, probeError) {
|
|
215
|
+
function formatPreflightError(missing, namespace, probeError, cloud) {
|
|
81
216
|
const lines = [];
|
|
82
217
|
lines.push(`Required search attributes not registered on namespace '${namespace}'.`);
|
|
83
218
|
if (probeError) {
|
|
84
219
|
lines.push(`(Could not probe namespace state: ${probeError})`);
|
|
85
220
|
}
|
|
86
221
|
lines.push('');
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
222
|
+
if (cloud) {
|
|
223
|
+
lines.push('Register via tcld (Temporal Cloud CLI) or the Cloud UI, then restart the daemon:');
|
|
224
|
+
lines.push('');
|
|
225
|
+
const saFlags = missing.map((attr) => `--sa "${attr.name}=${attr.type}"`).join(' \\\n ');
|
|
226
|
+
lines.push(` tcld namespace search-attributes add --namespace ${namespace} \\\n ${saFlags}`);
|
|
227
|
+
lines.push('');
|
|
228
|
+
lines.push('Or add them manually in the Temporal Cloud UI:');
|
|
229
|
+
lines.push(` https://cloud.temporal.io → Namespaces → ${namespace} → Search Attributes`);
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
lines.push('Run these commands once per Temporal namespace, then restart the daemon:');
|
|
233
|
+
lines.push('');
|
|
234
|
+
for (const attr of missing) {
|
|
235
|
+
lines.push(` temporal operator search-attribute create ` +
|
|
236
|
+
`--name ${attr.name} --type ${attr.type} --namespace ${namespace}`);
|
|
237
|
+
}
|
|
92
238
|
}
|
|
93
239
|
lines.push('');
|
|
94
240
|
lines.push('(See docs/ops/v1.0-migration.md for the full upgrade walkthrough.)');
|
|
@@ -98,9 +244,23 @@ function formatPreflightError(missing, namespace, probeError) {
|
|
|
98
244
|
* Verify all {@link REQUIRED_SEARCH_ATTRIBUTES} are registered on the
|
|
99
245
|
* given namespace. Returns a structured result — callers decide whether
|
|
100
246
|
* to log+continue (boot bootstrap step) or exit non-zero (daemon start).
|
|
247
|
+
*
|
|
248
|
+
* When `temporalApiKey` is set (or address is a Cloud endpoint), uses the
|
|
249
|
+
* SDK-based probe instead of shelling out to `temporal operator`.
|
|
101
250
|
*/
|
|
102
251
|
async function verifySearchAttributes(opts) {
|
|
103
|
-
const
|
|
252
|
+
const cloud = isTemporalCloud(opts.temporalAddress) || !!opts.temporalApiKey;
|
|
253
|
+
const defaultProbe = cloud
|
|
254
|
+
? () => sdkProbeRegisteredAttributes({
|
|
255
|
+
temporalAddress: opts.temporalAddress,
|
|
256
|
+
temporalNamespace: opts.temporalNamespace,
|
|
257
|
+
temporalApiKey: opts.temporalApiKey,
|
|
258
|
+
})
|
|
259
|
+
: () => defaultProbeRegisteredAttributes({
|
|
260
|
+
temporalAddress: opts.temporalAddress,
|
|
261
|
+
temporalNamespace: opts.temporalNamespace,
|
|
262
|
+
});
|
|
263
|
+
const probe = opts.probe ?? defaultProbe;
|
|
104
264
|
let registered;
|
|
105
265
|
let probeError;
|
|
106
266
|
try {
|
|
@@ -121,7 +281,7 @@ async function verifySearchAttributes(opts) {
|
|
|
121
281
|
ok: false,
|
|
122
282
|
missing,
|
|
123
283
|
probeError,
|
|
124
|
-
message: formatPreflightError(missing, opts.temporalNamespace, probeError),
|
|
284
|
+
message: formatPreflightError(missing, opts.temporalNamespace, probeError, cloud),
|
|
125
285
|
};
|
|
126
286
|
}
|
|
127
287
|
/**
|
package/dist/cli/startup.js
CHANGED
|
@@ -376,14 +376,40 @@ async function stepSearchAttrs(cache, config, now) {
|
|
|
376
376
|
if (isCacheFresh(cache.steps.searchAttrs, TTL_24H, now)) {
|
|
377
377
|
return { status: 'skipped', durationMs: 0 };
|
|
378
378
|
}
|
|
379
|
-
const
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
379
|
+
const cloud = (0, sa_preflight_1.isTemporalCloud)(config.temporalAddress) || !!config.temporalApiKey;
|
|
380
|
+
const { result: outcome, durationMs } = await timed(async () => {
|
|
381
|
+
if (cloud) {
|
|
382
|
+
// For Temporal Cloud, use SDK probe to verify SAs are present.
|
|
383
|
+
// Registration must be done via tcld or the Cloud UI — we cannot
|
|
384
|
+
// use `temporal operator search-attribute create`.
|
|
385
|
+
try {
|
|
386
|
+
const registered = await (0, sa_preflight_1.sdkProbeRegisteredAttributes)({
|
|
387
|
+
temporalAddress: config.temporalAddress,
|
|
388
|
+
temporalNamespace: config.temporalNamespace,
|
|
389
|
+
temporalApiKey: config.temporalApiKey,
|
|
390
|
+
});
|
|
391
|
+
const missing = sa_preflight_1.REQUIRED_SEARCH_ATTRIBUTES.filter((a) => !registered.has(a.name));
|
|
392
|
+
if (missing.length > 0) {
|
|
393
|
+
const saFlags = missing.map((a) => `--sa "${a.name}=${a.type}"`).join(' ');
|
|
394
|
+
return {
|
|
395
|
+
status: 'failed',
|
|
396
|
+
durationMs: 0,
|
|
397
|
+
detail: `${missing.length} search attribute(s) not registered on Temporal Cloud.\n` +
|
|
398
|
+
` Register via: tcld namespace search-attributes add --namespace ${config.temporalNamespace} ${saFlags}\n` +
|
|
399
|
+
` Or add them in the Cloud UI: https://cloud.temporal.io`,
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
return { status: 'ok', durationMs: 0 };
|
|
403
|
+
}
|
|
404
|
+
catch (err) {
|
|
405
|
+
return {
|
|
406
|
+
status: 'failed',
|
|
407
|
+
durationMs: 0,
|
|
408
|
+
detail: `SDK probe failed: ${err?.message || err}`,
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
// Self-hosted path: per-attr classification via `registerSearchAttribute` (#605)
|
|
387
413
|
const failures = [];
|
|
388
414
|
for (const attr of sa_preflight_1.REQUIRED_SEARCH_ATTRIBUTES) {
|
|
389
415
|
const r = (0, sa_preflight_1.registerSearchAttribute)(attr, config.temporalAddress, config.temporalNamespace);
|