agent-tempo 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@
12
12
  rel="stylesheet"
13
13
  href="https://fonts.googleapis.com/css2?family=Instrument+Sans:wght@400;500;600;700&family=Instrument+Serif:ital@0;1&family=JetBrains+Mono:wght@400;500&display=swap"
14
14
  />
15
- <script type="module" crossorigin src="/dashboard/assets/index-_5jV0Znu.js"></script>
15
+ <script type="module" crossorigin src="/dashboard/assets/index-D6Xyje_n.js"></script>
16
16
  <link rel="stylesheet" crossorigin href="/dashboard/assets/index-CB78ToNE.css">
17
17
  </head>
18
18
  <body>
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-tempo-dashboard",
3
3
  "private": true,
4
- "version": "1.0.1",
4
+ "version": "1.2.0",
5
5
  "type": "module",
6
6
  "description": "Web dashboard for agent-tempo. Bundled into the npm package; served by the daemon at /dashboard/*.",
7
7
  "scripts": {
@@ -0,0 +1,21 @@
1
+ export interface ClaudeStopInput {
2
+ /** Supervisor's 8-char short id from `SessionMetadata.bgShortId`. */
3
+ shortId: string;
4
+ /** Custom claude binary path (defaults to `'claude'` on PATH). */
5
+ claudeBin?: string;
6
+ }
7
+ export interface ClaudeStopResult {
8
+ success: boolean;
9
+ /** One of `'stopped'` | `'already-gone'` | `'error'`. */
10
+ outcome: 'stopped' | 'already-gone' | 'error';
11
+ /** Populated on `outcome === 'error'` — the raw stderr/stdout snippet for diagnostics. */
12
+ detail?: string;
13
+ /** Exit code from `claude stop`. Undefined on spawn-side failure. */
14
+ exitCode?: number;
15
+ }
16
+ /**
17
+ * Invoke `claude stop <shortId>` synchronously. Never throws — returns a
18
+ * structured `ClaudeStopResult` the workflow can act on. Routes through
19
+ * `resolveClaudePath` so the `claudeBin` config option is honoured.
20
+ */
21
+ export declare function claudeStop(input: ClaudeStopInput): Promise<ClaudeStopResult>;
@@ -0,0 +1,94 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.claudeStop = claudeStop;
4
+ /**
5
+ * #596 / ADR 0016 — `claude stop <shortId>` per-host activity.
6
+ *
7
+ * `claude --bg` hands the session to Anthropic's per-user Claude Code
8
+ * supervisor. We don't own the PID anymore (the supervisor does), so the
9
+ * existing `hard-terminate` PID-scan helper would always return
10
+ * `strategy: 'none'` and leak the supervisor job. Instead, ask the
11
+ * supervisor to stop the job via its documented CLI verb.
12
+ *
13
+ * **Routing**: registered on the per-host activity queue
14
+ * (`agent-tempo-{hostname}`) so the `claude` CLI runs on the same machine
15
+ * the supervisor lives on. The workflow proxy in `src/workflows/session.ts`
16
+ * picks the right host via `AgentTempoHostname` exactly the same way
17
+ * `hardTerminateAttachment` already does.
18
+ *
19
+ * **Idempotency**: `claude stop` exits 1 with `No job matching '<id>'` when
20
+ * the job is already gone. We classify that as success — destroy must be
21
+ * idempotent and a re-run of an already-stopped session is a normal flow.
22
+ * Any other non-zero exit (network/socket failure, supervisor itself wedged,
23
+ * unexpected CLI shape) is reported as `success: false` so the workflow's
24
+ * destroy path can fall back to the existing `hardTerminateAttachment`
25
+ * PID-scan as defense in depth.
26
+ *
27
+ * **Timing**: 15-second timeout. The supervisor responds in <100ms on a
28
+ * healthy host, but Windows process-spawn overhead and antivirus scanners
29
+ * can stretch the worst case. 15s leaves slack without holding up destroy.
30
+ */
31
+ const child_process_1 = require("child_process");
32
+ const activity_1 = require("@temporalio/activity");
33
+ const spawn_1 = require("../spawn");
34
+ const log = (...args) => console.error('[agent-tempo:claude-stop]', ...args);
35
+ /**
36
+ * Validate the supervisor short id before shelling out. Anthropic uses
37
+ * lowercase hex from the full UUID's first 8 chars; reject anything else
38
+ * to avoid passing user-influenced strings into argv.
39
+ */
40
+ function isValidShortId(s) {
41
+ return typeof s === 'string' && /^[0-9a-f]{8}$/.test(s);
42
+ }
43
+ /**
44
+ * Invoke `claude stop <shortId>` synchronously. Never throws — returns a
45
+ * structured `ClaudeStopResult` the workflow can act on. Routes through
46
+ * `resolveClaudePath` so the `claudeBin` config option is honoured.
47
+ */
48
+ async function claudeStop(input) {
49
+ const { shortId, claudeBin } = input;
50
+ if (!isValidShortId(shortId)) {
51
+ throw activity_1.ApplicationFailure.nonRetryable(`claudeStop: invalid shortId "${shortId}" (must be 8 lowercase hex chars). ` +
52
+ `This indicates corrupt SessionMetadata.bgShortId — check the recruit path.`);
53
+ }
54
+ const bin = (0, spawn_1.resolveClaudePath)(claudeBin);
55
+ let result;
56
+ try {
57
+ result = (0, child_process_1.spawnSync)(bin, ['stop', shortId], {
58
+ encoding: 'utf8',
59
+ timeout: 15_000,
60
+ shell: process.platform === 'win32',
61
+ });
62
+ }
63
+ catch (err) {
64
+ const detail = err instanceof Error ? err.message : String(err);
65
+ log(`claude stop ${shortId} failed to launch: ${detail}`);
66
+ return { success: false, outcome: 'error', detail };
67
+ }
68
+ if (result.error) {
69
+ log(`claude stop ${shortId} spawn error: ${result.error.message}`);
70
+ return { success: false, outcome: 'error', detail: result.error.message };
71
+ }
72
+ const exitCode = result.status ?? -1;
73
+ const stderr = (result.stderr || '').toString();
74
+ const stdout = (result.stdout || '').toString();
75
+ const combined = `${stderr}\n${stdout}`.trim();
76
+ if (exitCode === 0) {
77
+ log(`claude stop ${shortId} → stopped`);
78
+ return { success: true, outcome: 'stopped', exitCode };
79
+ }
80
+ // Anthropic's "already gone" signal — exit 1 with `No job matching '<id>'`
81
+ // somewhere in the combined output. Match case-insensitively to absorb
82
+ // small phrasing drift across CLI versions.
83
+ if (/no job matching/i.test(combined)) {
84
+ log(`claude stop ${shortId} → already-gone (exit ${exitCode}, idempotent success)`);
85
+ return { success: true, outcome: 'already-gone', exitCode };
86
+ }
87
+ log(`claude stop ${shortId} → error (exit ${exitCode}): ${combined || '(no output)'}`);
88
+ return {
89
+ success: false,
90
+ outcome: 'error',
91
+ exitCode,
92
+ detail: combined || `exit ${exitCode} with no output`,
93
+ };
94
+ }
@@ -132,6 +132,45 @@ export type StopTemporalResult = {
132
132
  * profile collateral damage.
133
133
  */
134
134
  export declare function stopTemporalServer(opts: StopTemporalServerOpts): StopTemporalResult;
135
+ /**
136
+ * Minimal child handle {@link startTemporalForDestroy} needs — `ChildProcess`
137
+ * satisfies it. Kept narrow so unit tests can inject a fake without spawning.
138
+ *
139
+ * @internal
140
+ */
141
+ export interface SpawnedTemporalChild {
142
+ kill(): void;
143
+ unref(): void;
144
+ }
145
+ /**
146
+ * Dependency seam for {@link startTemporalForDestroy} — production callers
147
+ * pass nothing and get the real spawn + reachability probe. Tests inject
148
+ * stubs plus a tiny `pollDelayMs` so the readiness loop runs instantly.
149
+ *
150
+ * @internal
151
+ */
152
+ export interface StartTemporalForDestroyDeps {
153
+ /** Readiness probe — defaults to {@link isTemporalReachable} for `config`. */
154
+ isReachable?: () => Promise<boolean>;
155
+ /** Spawn hook — defaults to a detached `temporal server start-dev`. */
156
+ spawn?: () => SpawnedTemporalChild;
157
+ /** Readiness poll attempts. Default 20. */
158
+ attempts?: number;
159
+ /** Delay between readiness polls, ms. Default 500 (→ 20×500ms = 10s). */
160
+ pollDelayMs?: number;
161
+ }
162
+ /**
163
+ * Start a temporary Temporal dev server just long enough for `down --destroy`
164
+ * to terminate workflows when Temporal happened to be down. Polls for
165
+ * readiness; on timeout it kills the child it spawned so `down` never leaves
166
+ * a stray Temporal process booting in the background. Exported for unit
167
+ * tests — production callers pass only `config`.
168
+ *
169
+ * @internal
170
+ */
171
+ export declare function startTemporalForDestroy(config: Config, deps?: StartTemporalForDestroyDeps): Promise<{
172
+ started: boolean;
173
+ }>;
135
174
  export declare function down(opts: DownOpts): Promise<void>;
136
175
  interface AgentTypesCommandOpts {
137
176
  subcommand?: string;
@@ -40,6 +40,7 @@ exports.up = up;
40
40
  exports.formatScheduleRecurrence = formatScheduleRecurrence;
41
41
  exports.lineupScheduleToEntry = lineupScheduleToEntry;
42
42
  exports.stopTemporalServer = stopTemporalServer;
43
+ exports.startTemporalForDestroy = startTemporalForDestroy;
43
44
  exports.down = down;
44
45
  exports.agentTypesCommand = agentTypesCommand;
45
46
  exports.broadcast = broadcast;
@@ -1547,6 +1548,44 @@ function stopTemporalServer(opts) {
1547
1548
  return { action: 'failed', error: err };
1548
1549
  }
1549
1550
  }
1551
+ /**
1552
+ * Start a temporary Temporal dev server just long enough for `down --destroy`
1553
+ * to terminate workflows when Temporal happened to be down. Polls for
1554
+ * readiness; on timeout it kills the child it spawned so `down` never leaves
1555
+ * a stray Temporal process booting in the background. Exported for unit
1556
+ * tests — production callers pass only `config`.
1557
+ *
1558
+ * @internal
1559
+ */
1560
+ async function startTemporalForDestroy(config, deps = {}) {
1561
+ const attempts = deps.attempts ?? 20;
1562
+ const pollDelayMs = deps.pollDelayMs ?? 500;
1563
+ const isReachable = deps.isReachable ?? (() => isTemporalReachable(config));
1564
+ const spawn = deps.spawn ?? (() => {
1565
+ (0, fs_1.mkdirSync)(config_1.AGENT_TEMPO_HOME, { recursive: true });
1566
+ const port = config.temporalAddress.split(':')[1] || '7233';
1567
+ return (0, child_process_1.spawn)('temporal', [
1568
+ 'server', 'start-dev',
1569
+ '--port', port,
1570
+ '--db-filename', DEFAULT_DB_PATH,
1571
+ ], { detached: true, stdio: 'ignore' });
1572
+ });
1573
+ const child = spawn();
1574
+ child.unref();
1575
+ for (let i = 0; i < attempts; i++) {
1576
+ await new Promise(r => setTimeout(r, pollDelayMs));
1577
+ if (await isReachable())
1578
+ return { started: true };
1579
+ }
1580
+ // Timed out. The detached child may still be booting and would come up
1581
+ // orphaned moments after we give up — kill the process we spawned so
1582
+ // `down` doesn't leave a stray Temporal server behind.
1583
+ try {
1584
+ child.kill();
1585
+ }
1586
+ catch { /* already exited */ }
1587
+ return { started: false };
1588
+ }
1550
1589
  async function down(opts) {
1551
1590
  const config = (0, config_1.getConfig)(opts);
1552
1591
  out.heading('agent-tempo teardown');
@@ -1555,7 +1594,35 @@ async function down(opts) {
1555
1594
  : ` Stopping daemon + Temporal. Workflows stay parked for the next ${out.dim('agent-tempo up')}.`);
1556
1595
  // Step 1 (destroy mode only): enumerate + terminate workflows across every
1557
1596
  // ensemble, after a typed confirmation showing the user what's at stake.
1558
- const temporalUp = await isTemporalReachable(config);
1597
+ let temporalUp = await isTemporalReachable(config);
1598
+ // `--destroy` can only terminate workflows while Temporal is reachable.
1599
+ // Workflow state lives durably on disk in ~/.agent-tempo/, so if Temporal
1600
+ // happens to be down when the user runs `down --destroy`, skipping the
1601
+ // destroy step here silently leaves every workflow to be resurrected the
1602
+ // next time anything starts the daemon (an `up`, a `status`, or the TUI).
1603
+ // To make `--destroy` actually mean it, start Temporal temporarily just
1604
+ // long enough to run the terminations — Step 4 below stops it again.
1605
+ let startedTemporalForDestroy = false;
1606
+ if (opts.destroy && !temporalUp) {
1607
+ if (!temporalCliExists()) {
1608
+ out.warn('temporal CLI not found — cannot destroy workflows; they will persist on disk.');
1609
+ }
1610
+ else {
1611
+ out.log(` ${out.dim('...')} Temporal is down — starting it temporarily to destroy workflows...`);
1612
+ const { started } = await startTemporalForDestroy(config);
1613
+ if (started) {
1614
+ temporalUp = true;
1615
+ startedTemporalForDestroy = true;
1616
+ out.success('Temporal started for cleanup');
1617
+ }
1618
+ else {
1619
+ out.warn('Could not start Temporal within 10s — workflows may survive teardown. ' +
1620
+ 'Re-run `agent-tempo down --destroy` once Temporal is up. ' +
1621
+ 'A stray Temporal process may have been left starting — check with ' +
1622
+ '`agent-tempo status` and stop it manually if one is still running.');
1623
+ }
1624
+ }
1625
+ }
1559
1626
  if (opts.destroy && temporalUp) {
1560
1627
  try {
1561
1628
  const connection = await (0, connection_1.createTemporalConnection)(config);
@@ -1625,6 +1692,15 @@ async function down(opts) {
1625
1692
  const confirmed = await typedConfirmPrompt(` This terminates every workflow (${totalTargets}) and cannot be undone.`, 'destroy');
1626
1693
  if (!confirmed) {
1627
1694
  out.log('Aborted.');
1695
+ // We may have started Temporal solely to run this destroy.
1696
+ // Aborting at the confirmation prompt must not leave that
1697
+ // server orphaned — stop it before the hard exit. We own it
1698
+ // outright, so force past the cross-profile guard.
1699
+ if (startedTemporalForDestroy) {
1700
+ if (stopTemporalServer({ killSharedTemporal: true }).action === 'killed') {
1701
+ out.log(` ${out.dim('Temporal server stopped')}`);
1702
+ }
1703
+ }
1628
1704
  process.exit(0);
1629
1705
  }
1630
1706
  }
@@ -1676,7 +1752,12 @@ async function down(opts) {
1676
1752
  // skips the kill when the OPPOSITE profile is likely active;
1677
1753
  // `--kill-shared-temporal` is the explicit opt-in to override.
1678
1754
  if (temporalUp) {
1679
- const result = stopTemporalServer({ killSharedTemporal: opts.killSharedTemporal });
1755
+ // When we started Temporal ourselves just for the destroy step, always
1756
+ // stop it again — the cross-profile guard is about not killing a server
1757
+ // the *other* profile owns, but this one we own outright.
1758
+ const result = stopTemporalServer({
1759
+ killSharedTemporal: opts.killSharedTemporal || startedTemporalForDestroy,
1760
+ });
1680
1761
  switch (result.action) {
1681
1762
  case 'killed':
1682
1763
  out.success('Temporal server stopped');
@@ -55,10 +55,16 @@ exports.formatMigrationResult = formatMigrationResult;
55
55
  * 5. **Partial-copy resume.** Per-file SHA-256 in the marker — re-running
56
56
  * a partially-completed run finishes only the missing/changed files.
57
57
  * 6. **Files copied.** Allowlist — `config.json`, `.bootstrap-cache.json`,
58
- * any `*.yaml` user-stashed lineup files, plus subdirs `lineups/`,
58
+ * any `*.yaml` user-stashed lineup files, plus subdirs `ensembles/`,
59
59
  * `state/`, `coat-check/` (forward-compat — fine if absent). The
60
60
  * volatile runtime trio (`daemon.pid`, `daemon.port`, `daemon.log`)
61
61
  * is intentionally skipped — let the daemon recreate them.
62
+ * (Historical note: the original brief named the lineup subdir
63
+ * `lineups/`, but the actual on-disk name has always been
64
+ * `ensembles/` — see `src/ensemble/{saver,loader}.ts`. The original
65
+ * implementation copied the brief's typo verbatim, which silently
66
+ * stranded user lineups across the v0.x → v1.x migration. Fixed by
67
+ * pointing the allowlist at the real directory name.)
62
68
  * 7. **Volatile-state guard.** If `daemon.pid` is present in the legacy
63
69
  * home (likely-running daemon), refuses unless `force: true`.
64
70
  *
@@ -80,7 +86,7 @@ const VOLATILE_FILES = new Set(['daemon.pid', 'daemon.port', 'daemon.log']);
80
86
  /** Allowlisted top-level files. `*.yaml` is matched as a glob. */
81
87
  const ALLOWLIST_FILES = new Set(['config.json', '.bootstrap-cache.json']);
82
88
  /** Allowlisted top-level subdirs (recursive copy). Forward-compat — fine if absent. */
83
- const ALLOWLIST_SUBDIRS = ['lineups', 'state', 'coat-check'];
89
+ const ALLOWLIST_SUBDIRS = ['ensembles', 'state', 'coat-check'];
84
90
  function legacyHomeFor(profile, home) {
85
91
  return path.join(home, profile === 'dev' ? LEGACY_DEV_HOME_DIR_NAME : LEGACY_PROD_HOME_DIR_NAME);
86
92
  }
@@ -6,10 +6,13 @@ export declare const REQUIRED_SEARCH_ATTRIBUTES: ReadonlyArray<{
6
6
  export interface SearchAttributePreflightOpts {
7
7
  temporalAddress: string;
8
8
  temporalNamespace: string;
9
+ /** API key for Temporal Cloud — triggers SDK-based probe when set. */
10
+ temporalApiKey?: string;
9
11
  /**
10
12
  * Optional test seam — given a namespace, return the set of search
11
13
  * attribute names that ARE currently registered. Defaults to
12
- * {@link defaultProbeRegisteredAttributes} which shells out to
14
+ * {@link sdkProbeRegisteredAttributes} when `temporalApiKey` is set,
15
+ * otherwise {@link defaultProbeRegisteredAttributes} which shells out to
13
16
  * `temporal operator search-attribute list`.
14
17
  */
15
18
  probe?: (opts: {
@@ -35,15 +38,36 @@ export declare function defaultProbeRegisteredAttributes(opts: {
35
38
  temporalAddress: string;
36
39
  temporalNamespace: string;
37
40
  }): Promise<Set<string>>;
41
+ /** Returns true if the address looks like a Temporal Cloud endpoint. */
42
+ export declare function isTemporalCloud(address: string): boolean;
43
+ /**
44
+ * SDK-based probe — uses the Temporal Client SDK to verify search attribute
45
+ * existence by issuing a visibility query. Works with Temporal Cloud API keys
46
+ * where the `temporal operator` CLI commands are unauthorized.
47
+ *
48
+ * Strategy: for each required attribute, issue `listWorkflowExecutions` with
49
+ * a query referencing that attribute. If the attribute is registered the query
50
+ * returns (possibly empty) results. If not registered, Temporal responds with
51
+ * INVALID_ARGUMENT containing "is not a valid search attribute".
52
+ */
53
+ export declare function sdkProbeRegisteredAttributes(opts: {
54
+ temporalAddress: string;
55
+ temporalNamespace: string;
56
+ temporalApiKey?: string;
57
+ }): Promise<Set<string>>;
38
58
  /**
39
59
  * Format the missing-SA error message. Paste-friendly: operators copy the
40
- * `temporal operator search-attribute create` block verbatim.
60
+ * registration commands verbatim. Cloud-aware: shows `tcld` commands for
61
+ * Temporal Cloud namespaces.
41
62
  */
42
- export declare function formatPreflightError(missing: ReadonlyArray<typeof REQUIRED_SEARCH_ATTRIBUTES[number]>, namespace: string, probeError?: string): string;
63
+ export declare function formatPreflightError(missing: ReadonlyArray<typeof REQUIRED_SEARCH_ATTRIBUTES[number]>, namespace: string, probeError?: string, cloud?: boolean): string;
43
64
  /**
44
65
  * Verify all {@link REQUIRED_SEARCH_ATTRIBUTES} are registered on the
45
66
  * given namespace. Returns a structured result — callers decide whether
46
67
  * to log+continue (boot bootstrap step) or exit non-zero (daemon start).
68
+ *
69
+ * When `temporalApiKey` is set (or address is a Cloud endpoint), uses the
70
+ * SDK-based probe instead of shelling out to `temporal operator`.
47
71
  */
48
72
  export declare function verifySearchAttributes(opts: SearchAttributePreflightOpts): Promise<SearchAttributePreflightResult>;
49
73
  /**
@@ -1,7 +1,42 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
2
35
  Object.defineProperty(exports, "__esModule", { value: true });
3
36
  exports.REQUIRED_SEARCH_ATTRIBUTES = void 0;
4
37
  exports.defaultProbeRegisteredAttributes = defaultProbeRegisteredAttributes;
38
+ exports.isTemporalCloud = isTemporalCloud;
39
+ exports.sdkProbeRegisteredAttributes = sdkProbeRegisteredAttributes;
5
40
  exports.formatPreflightError = formatPreflightError;
6
41
  exports.verifySearchAttributes = verifySearchAttributes;
7
42
  exports.classifyRegistrationOutput = classifyRegistrationOutput;
@@ -29,6 +64,16 @@ exports.assertSearchAttributesOrExit = assertSearchAttributesOrExit;
29
64
  * - `src/daemon.ts` boot path calls {@link verifySearchAttributes}
30
65
  * directly to fail fast on `agent-tempo daemon start` before the worker
31
66
  * tries to register workflows.
67
+ *
68
+ * Cloud support:
69
+ * Temporal Cloud's operator gRPC service is not accessible with namespace
70
+ * API keys, causing `temporal operator search-attribute list/create` to
71
+ * fail with "Request unauthorized". When a Cloud namespace is detected
72
+ * (address contains `.tmprl.cloud` or an API key is configured), the
73
+ * preflight uses an SDK-based probe: issue a visibility query referencing
74
+ * each required attribute — a registered attribute returns an empty result
75
+ * set; an unregistered one throws INVALID_ARGUMENT. Registration
76
+ * instructions surface `tcld` commands instead of `temporal operator`.
32
77
  */
33
78
  const child_process_1 = require("child_process");
34
79
  /** Single source of truth — must match `SEARCH_ATTRIBUTES` in `src/cli/startup.ts`. */
@@ -73,22 +118,123 @@ async function defaultProbeRegisteredAttributes(opts) {
73
118
  }
74
119
  return names;
75
120
  }
121
+ /** Returns true if the address looks like a Temporal Cloud endpoint. */
122
+ function isTemporalCloud(address) {
123
+ return address.includes('.tmprl.cloud');
124
+ }
125
+ /**
126
+ * Substrings Temporal servers use to signal "this search attribute is not
127
+ * registered". The exact wording varies across server versions and storage
128
+ * backends (e.g. 'is not a valid search attribute', 'is not defined',
129
+ * 'no mapping defined for the field'), so we match a set rather than a
130
+ * single phrase — a wording change must not cause the probe to misclassify
131
+ * an unregistered SA as an unexpected error and re-throw.
132
+ */
133
+ const UNREGISTERED_SA_MARKERS = [
134
+ 'is not a valid search attribute',
135
+ 'is not defined',
136
+ 'no mapping defined for the field',
137
+ 'unknown or unindexed search attribute',
138
+ ];
139
+ /** gRPC status code for INVALID_ARGUMENT. */
140
+ const GRPC_INVALID_ARGUMENT = 3;
141
+ /**
142
+ * Classify a visibility-query error as "search attribute not registered".
143
+ *
144
+ * A registered-but-empty attribute returns results; an unregistered one
145
+ * fails. Temporal reports the failure as gRPC INVALID_ARGUMENT — we key on
146
+ * that status code first (wording-independent) and fall back to known
147
+ * message substrings for transports/versions that don't surface a code.
148
+ */
149
+ function isUnregisteredAttributeError(err) {
150
+ const msg = (err?.message || '').toLowerCase();
151
+ if (UNREGISTERED_SA_MARKERS.some((m) => msg.includes(m)))
152
+ return true;
153
+ // gRPC ServiceError exposes a numeric `code`; INVALID_ARGUMENT means the
154
+ // query referenced an attribute the namespace doesn't know about.
155
+ if (err?.code === GRPC_INVALID_ARGUMENT)
156
+ return true;
157
+ return false;
158
+ }
159
+ /**
160
+ * SDK-based probe — uses the Temporal Client SDK to verify search attribute
161
+ * existence by issuing a visibility query. Works with Temporal Cloud API keys
162
+ * where the `temporal operator` CLI commands are unauthorized.
163
+ *
164
+ * Strategy: for each required attribute, issue `listWorkflowExecutions` with
165
+ * a query referencing that attribute. If the attribute is registered the query
166
+ * returns (possibly empty) results. If not registered, Temporal responds with
167
+ * INVALID_ARGUMENT containing "is not a valid search attribute".
168
+ */
169
+ async function sdkProbeRegisteredAttributes(opts) {
170
+ const { Connection } = await Promise.resolve().then(() => __importStar(require('@temporalio/client')));
171
+ const tls = opts.temporalApiKey ? true : undefined;
172
+ const conn = await Connection.connect({
173
+ address: opts.temporalAddress,
174
+ tls: tls,
175
+ apiKey: opts.temporalApiKey,
176
+ });
177
+ const registered = new Set();
178
+ try {
179
+ for (const attr of exports.REQUIRED_SEARCH_ATTRIBUTES) {
180
+ // The probe value only has to be syntactically valid for the
181
+ // attribute's type so the visibility query parses. We support
182
+ // Keyword (quoted string literal) and Bool (`true`) here — the only
183
+ // two types in REQUIRED_SEARCH_ATTRIBUTES. A new SA type would need
184
+ // its own literal form added below.
185
+ const testValue = attr.type === 'Bool' ? 'true' : '"__probe__"';
186
+ try {
187
+ await conn.workflowService.listWorkflowExecutions({
188
+ namespace: opts.temporalNamespace,
189
+ query: `${attr.name} = ${testValue}`,
190
+ pageSize: 1,
191
+ });
192
+ registered.add(attr.name);
193
+ }
194
+ catch (err) {
195
+ if (isUnregisteredAttributeError(err)) {
196
+ // Attribute not registered — don't add to set
197
+ }
198
+ else {
199
+ // Unexpected error — re-throw
200
+ throw err;
201
+ }
202
+ }
203
+ }
204
+ }
205
+ finally {
206
+ await conn.close();
207
+ }
208
+ return registered;
209
+ }
76
210
  /**
77
211
  * Format the missing-SA error message. Paste-friendly: operators copy the
78
- * `temporal operator search-attribute create` block verbatim.
212
+ * registration commands verbatim. Cloud-aware: shows `tcld` commands for
213
+ * Temporal Cloud namespaces.
79
214
  */
80
- function formatPreflightError(missing, namespace, probeError) {
215
+ function formatPreflightError(missing, namespace, probeError, cloud) {
81
216
  const lines = [];
82
217
  lines.push(`Required search attributes not registered on namespace '${namespace}'.`);
83
218
  if (probeError) {
84
219
  lines.push(`(Could not probe namespace state: ${probeError})`);
85
220
  }
86
221
  lines.push('');
87
- lines.push('Run these commands once per Temporal namespace, then restart the daemon:');
88
- lines.push('');
89
- for (const attr of missing) {
90
- lines.push(` temporal operator search-attribute create ` +
91
- `--name ${attr.name} --type ${attr.type} --namespace ${namespace}`);
222
+ if (cloud) {
223
+ lines.push('Register via tcld (Temporal Cloud CLI) or the Cloud UI, then restart the daemon:');
224
+ lines.push('');
225
+ const saFlags = missing.map((attr) => `--sa "${attr.name}=${attr.type}"`).join(' \\\n ');
226
+ lines.push(` tcld namespace search-attributes add --namespace ${namespace} \\\n ${saFlags}`);
227
+ lines.push('');
228
+ lines.push('Or add them manually in the Temporal Cloud UI:');
229
+ lines.push(` https://cloud.temporal.io → Namespaces → ${namespace} → Search Attributes`);
230
+ }
231
+ else {
232
+ lines.push('Run these commands once per Temporal namespace, then restart the daemon:');
233
+ lines.push('');
234
+ for (const attr of missing) {
235
+ lines.push(` temporal operator search-attribute create ` +
236
+ `--name ${attr.name} --type ${attr.type} --namespace ${namespace}`);
237
+ }
92
238
  }
93
239
  lines.push('');
94
240
  lines.push('(See docs/ops/v1.0-migration.md for the full upgrade walkthrough.)');
@@ -98,9 +244,23 @@ function formatPreflightError(missing, namespace, probeError) {
98
244
  * Verify all {@link REQUIRED_SEARCH_ATTRIBUTES} are registered on the
99
245
  * given namespace. Returns a structured result — callers decide whether
100
246
  * to log+continue (boot bootstrap step) or exit non-zero (daemon start).
247
+ *
248
+ * When `temporalApiKey` is set (or address is a Cloud endpoint), uses the
249
+ * SDK-based probe instead of shelling out to `temporal operator`.
101
250
  */
102
251
  async function verifySearchAttributes(opts) {
103
- const probe = opts.probe ?? defaultProbeRegisteredAttributes;
252
+ const cloud = isTemporalCloud(opts.temporalAddress) || !!opts.temporalApiKey;
253
+ const defaultProbe = cloud
254
+ ? () => sdkProbeRegisteredAttributes({
255
+ temporalAddress: opts.temporalAddress,
256
+ temporalNamespace: opts.temporalNamespace,
257
+ temporalApiKey: opts.temporalApiKey,
258
+ })
259
+ : () => defaultProbeRegisteredAttributes({
260
+ temporalAddress: opts.temporalAddress,
261
+ temporalNamespace: opts.temporalNamespace,
262
+ });
263
+ const probe = opts.probe ?? defaultProbe;
104
264
  let registered;
105
265
  let probeError;
106
266
  try {
@@ -121,7 +281,7 @@ async function verifySearchAttributes(opts) {
121
281
  ok: false,
122
282
  missing,
123
283
  probeError,
124
- message: formatPreflightError(missing, opts.temporalNamespace, probeError),
284
+ message: formatPreflightError(missing, opts.temporalNamespace, probeError, cloud),
125
285
  };
126
286
  }
127
287
  /**
@@ -376,14 +376,40 @@ async function stepSearchAttrs(cache, config, now) {
376
376
  if (isCacheFresh(cache.steps.searchAttrs, TTL_24H, now)) {
377
377
  return { status: 'skipped', durationMs: 0 };
378
378
  }
379
- const { result: outcome, durationMs } = await timed(() => {
380
- // Per-attr classification via `registerSearchAttribute` (#605)
381
- // distinguishes `already-exists` (idempotent expected case) from real
382
- // failures. Pre-#605 every non-zero exit was swallowed as "already
383
- // registered", masking the SQLite dev-server's 10-Keyword-per-namespace
384
- // cap and other genuine errors until a downstream workflow start failed
385
- // with the confusing `INVALID_ARGUMENT: search attribute ... is not
386
- // defined`.
379
+ const cloud = (0, sa_preflight_1.isTemporalCloud)(config.temporalAddress) || !!config.temporalApiKey;
380
+ const { result: outcome, durationMs } = await timed(async () => {
381
+ if (cloud) {
382
+ // For Temporal Cloud, use SDK probe to verify SAs are present.
383
+ // Registration must be done via tcld or the Cloud UI — we cannot
384
+ // use `temporal operator search-attribute create`.
385
+ try {
386
+ const registered = await (0, sa_preflight_1.sdkProbeRegisteredAttributes)({
387
+ temporalAddress: config.temporalAddress,
388
+ temporalNamespace: config.temporalNamespace,
389
+ temporalApiKey: config.temporalApiKey,
390
+ });
391
+ const missing = sa_preflight_1.REQUIRED_SEARCH_ATTRIBUTES.filter((a) => !registered.has(a.name));
392
+ if (missing.length > 0) {
393
+ const saFlags = missing.map((a) => `--sa "${a.name}=${a.type}"`).join(' ');
394
+ return {
395
+ status: 'failed',
396
+ durationMs: 0,
397
+ detail: `${missing.length} search attribute(s) not registered on Temporal Cloud.\n` +
398
+ ` Register via: tcld namespace search-attributes add --namespace ${config.temporalNamespace} ${saFlags}\n` +
399
+ ` Or add them in the Cloud UI: https://cloud.temporal.io`,
400
+ };
401
+ }
402
+ return { status: 'ok', durationMs: 0 };
403
+ }
404
+ catch (err) {
405
+ return {
406
+ status: 'failed',
407
+ durationMs: 0,
408
+ detail: `SDK probe failed: ${err?.message || err}`,
409
+ };
410
+ }
411
+ }
412
+ // Self-hosted path: per-attr classification via `registerSearchAttribute` (#605)
387
413
  const failures = [];
388
414
  for (const attr of sa_preflight_1.REQUIRED_SEARCH_ATTRIBUTES) {
389
415
  const r = (0, sa_preflight_1.registerSearchAttribute)(attr, config.temporalAddress, config.temporalNamespace);