claude-tempo 0.26.0-beta.2 → 0.26.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CLAUDE.md +11 -6
  2. package/dist/activities/outbox.js +155 -44
  3. package/dist/activities/resolve.js +2 -4
  4. package/dist/adapters/base.d.ts +95 -5
  5. package/dist/adapters/base.js +463 -184
  6. package/dist/adapters/claude-code/adapter.d.ts +22 -8
  7. package/dist/adapters/claude-code/adapter.js +63 -16
  8. package/dist/adapters/copilot/adapter.js +7 -5
  9. package/dist/adapters/terminal-error.d.ts +27 -0
  10. package/dist/adapters/terminal-error.js +39 -0
  11. package/dist/cli/commands.d.ts +0 -10
  12. package/dist/cli/commands.js +19 -506
  13. package/dist/cli/config-command.js +10 -2
  14. package/dist/cli/daemon-command.d.ts +47 -0
  15. package/dist/cli/daemon-command.js +356 -0
  16. package/dist/cli/daemon.d.ts +52 -0
  17. package/dist/cli/daemon.js +148 -2
  18. package/dist/cli/help-text.d.ts +1 -0
  19. package/dist/cli/help-text.js +142 -0
  20. package/dist/cli/upgrade-command.d.ts +5 -0
  21. package/dist/cli/upgrade-command.js +240 -0
  22. package/dist/cli.js +105 -47
  23. package/dist/client/index.js +5 -7
  24. package/dist/daemon.d.ts +11 -11
  25. package/dist/daemon.js +47 -75
  26. package/dist/scripts/run-shard.js +121 -0
  27. package/dist/scripts/verify-daemon-isolation-guard.js +128 -0
  28. package/dist/tools/broadcast.js +2 -2
  29. package/dist/tools/ensemble.js +2 -0
  30. package/dist/tui/commands.d.ts +36 -0
  31. package/dist/tui/commands.js +71 -6
  32. package/dist/tui/components/PlayerDetailView.js +7 -2
  33. package/dist/tui/store.js +32 -6
  34. package/dist/types.d.ts +19 -1
  35. package/dist/utils/search-attributes.d.ts +76 -0
  36. package/dist/utils/search-attributes.js +86 -0
  37. package/dist/utils/validation.d.ts +14 -0
  38. package/dist/utils/validation.js +15 -1
  39. package/dist/workflows/attachment-math.d.ts +56 -0
  40. package/dist/workflows/attachment-math.js +47 -0
  41. package/dist/workflows/session.js +92 -27
  42. package/dist/workflows/signals.d.ts +1 -0
  43. package/dist/workflows/signals.js +16 -1
  44. package/package.json +10 -4
  45. package/workflow-bundle.js +167 -29
package/CLAUDE.md CHANGED
@@ -20,12 +20,15 @@ src/
20
20
  ├── cli.ts # CLI entry point (claude-tempo command)
21
21
  ├── daemon.ts # Daemon entry point — runs Temporal workers as a detached background process
22
22
  ├── cli/
23
- │ ├── commands.ts # CLI command implementations (up, start, conduct, status, stop, upgrade, …)
24
- │ ├── config-command.ts # config subcommand (interactive + set/show)
25
- │ ├── daemon.ts # Daemon management utilities (start, stop, status, logs, isDaemonRunning)
23
+ │ ├── commands.ts # CLI command implementations (up, start, conduct, status, stop, …)
24
+ │ ├── config-command.ts # config subcommand (interactive + set/show) — crash-proof for show/set
25
+ │ ├── daemon.ts # Daemon management utilities (start, stop, status, heartbeat, isDaemonRunning)
26
+ │ ├── daemon-command.ts # daemon subcommand handler — crash-proof, no Temporal deps
27
+ │ ├── help-text.ts # help output — crash-proof, no Temporal deps
26
28
  │ ├── mcp.ts # MCP server registration helpers (init, global vs project)
27
29
  │ ├── output.ts # Shared CLI output formatting helpers
28
- └── preflight.ts # Environment preflight checks
30
+ ├── preflight.ts # Environment preflight checks
31
+ │ └── upgrade-command.ts # upgrade subcommand — crash-proof; dynamic-imports Temporal only for active-session warning
29
32
  ├── adapters/
30
33
  │ ├── README.md # Adapter contract documentation
31
34
  │ ├── index.ts # Adapter registry bootstrap + barrel exports
@@ -44,6 +47,7 @@ src/
44
47
  │ ├── session.ts # claude-session workflow
45
48
  │ ├── scheduler.ts # durable scheduler workflow (one per ensemble)
46
49
  │ ├── maestro.ts # Maestro workflows — per-ensemble hub and global hub
50
+ │ ├── attachment-math.ts # Pure CAN-boundary lease-extension helper (no Temporal imports)
47
51
  │ ├── maestro-signals.ts / scheduler-signals.ts / signals.ts # Signal/query/update type defs
48
52
  │ └── index.ts # Workflow re-exports for worker bundle
49
53
  ├── activities/
@@ -73,7 +77,7 @@ src/
73
77
  │ ├── components/ # Ink components — see docs/tui.md for inventory
74
78
  │ └── utils/ # format, platform, theme, fullscreen, history
75
79
  ├── utils/
76
- │ ├── validation.ts / worktree.ts / safe-path.ts / duration.ts
80
+ │ ├── validation.ts / worktree.ts / safe-path.ts / duration.ts / search-attributes.ts
77
81
  ├── types.ts # Shared type definitions
78
82
  ├── git-info.ts # Git repository detection helper
79
83
  └── config.ts # Env var handling
@@ -86,7 +90,7 @@ touching `src/tui/`.
86
90
 
87
91
  ```bash
88
92
  npm install
89
- npm run build # compiles TS + pre-bundles workflow code into workflow-bundle.js
93
+ npm run build # compiles TS, scripts/*.ts → dist/scripts/, and pre-bundles workflow code into workflow-bundle.js
90
94
  npm test
91
95
  ```
92
96
 
@@ -113,6 +117,7 @@ daemon worker notes, `npx ts-node` dev runner).
113
117
  - **Part**: A player's description of what it's working on
114
118
  - **Outbox**: Outbound requests (cue, report, recruit, restart, detach, destroy, …) go through the session's workflow outbox instead of directly signaling other workflows. The dispatch loop processes entries via activities, decoupling tools from cross-workflow signaling.
115
119
  - **Attachment phase** (v0.26): Seven phases tracked on the session workflow — `booting → attached → processing | awaiting → draining → detached → gone`. The phase is authoritative for lifecycle truth: adapters drive it via `claimAttachment` / `adapterExited` / `forceDetach` / `destroy`, and the workflow publishes it on the `ClaudeTempoAttachmentState` search attribute. Replaced the v0.25 `ClaudeTempoStatus` heuristic (removed in v0.26). See [docs/concepts.md](docs/concepts.md) for the phase table and [docs/ops/v0.26-migration.md](docs/ops/v0.26-migration.md) for the upgrade path.
120
+ - **Adapter heartbeat observability** (#249): After `claimAttachment`, the base adapter logs `first heartbeat scheduled in Xms` then `heartbeat#1 delivered` on the first tick. Every 10 ticks it emits `heartbeats-delivered=N / phase-ticks=N` breadcrumbs. Any silent guard trip in `tickHeartbeat` / `tickPhaseWatcher` now emits a structured `guard tripped: {stopped, reconnecting, …}` log instead of silently orphaning the timer. The phase-watcher emits `WARNING: heartbeat staleness` when `lastHeartbeatAt` falls more than 2× `heartbeatMs` behind `now`. Grep `[claude-tempo:adapter]` to confirm loop health without parsing Temporal history.
116
121
  - **Per-host task queues**: `host` param on `recruit`/`restart`/`migrate` routes to `claude-tempo-{hostname}` task queue. See [docs/concepts.md](docs/concepts.md) for cross-machine recruiting details.
117
122
  - **Wire protocol**: All signal/query/update names are documented in [`docs/WIRE-PROTOCOL.md`](docs/WIRE-PROTOCOL.md) and are stable — renaming or removing any is a breaking change. **Process**: update `docs/WIRE-PROTOCOL.md` in the same commit as any new signal, query, or update.
118
123
  - **Daemon**: Standalone background process (`src/daemon.ts`) that runs all Temporal workers. Auto-started by any `claude-tempo` command. PID at `~/.claude-tempo/daemon.pid`; logs at `~/.claude-tempo/daemon.log`.
@@ -51,6 +51,87 @@ const adapters_1 = require("../adapters");
51
51
  const hard_terminate_1 = require("./hard-terminate");
52
52
  const signals_1 = require("../workflows/signals");
53
53
  const log = (...args) => console.error('[claude-tempo:outbox]', ...args);
54
+ /**
55
+ * Classify a Temporal client error raised by `handle.query` / `handle.signal`
56
+ * / `handle.executeUpdate` as retryable (transient) vs permanent (#140).
57
+ *
58
+ * ## Contract
59
+ * - Returns `true` → caller should **re-throw the underlying Error** so the
60
+ * activity's retry policy can back off and retry (per-worker config).
61
+ * - Returns `false` → caller should wrap in `ApplicationFailure.nonRetryable`
62
+ * so the outbox surfaces a permanent failure and stops retrying.
63
+ *
64
+ * ## Safety posture
65
+ * **Conservative default: unknown → non-retryable.** Over-classifying as
66
+ * retryable causes infinite retry loops on genuinely permanent errors. The
67
+ * activity will fail fast on unknown cases; a follow-up PR can whitelist more
68
+ * transient signatures if we see false-permanent rates in the wild.
69
+ *
70
+ * ## Why name/message sniffing, not `instanceof`
71
+ * Matches the established pattern in `src/adapters/terminal-error.ts`
72
+ * `isTerminalWorkflowError`: the Temporal Node SDK surfaces slightly different
73
+ * error shapes between `@temporalio/client`, the gRPC layer, and
74
+ * `WorkflowUpdateFailedError` wrappers. Sniffing on name + message is resilient
75
+ * across those shapes. Activity-side classification is kept separate here so
76
+ * `src/activities/` has no adapter-module dependency.
77
+ */
78
+ function isRetryableTemporalError(err) {
79
+ // ApplicationFailure instances have already been classified by the thrower
80
+ // (nonRetryable=true/false). The calling code paths in this module only ask
81
+ // about non-ApplicationFailure errors, but this guard makes the helper safe
82
+ // to call unconditionally.
83
+ if (err instanceof activity_1.ApplicationFailure)
84
+ return false;
85
+ const e = err;
86
+ const name = e?.name ?? '';
87
+ const msg = e?.message ?? '';
88
+ // ── Permanent: workflow is genuinely gone or validator rejected the op. ──
89
+ if (name.includes('WorkflowNotFound') ||
90
+ name.includes('WorkflowExecutionAlreadyCompleted') ||
91
+ // Update rejected by the workflow-side validator (e.g. `WorkflowGone`
92
+ // thrown from `claimAttachment`'s validator on a destroyed session).
93
+ // A retry won't make the validator change its mind.
94
+ name.includes('WorkflowUpdateFailed') ||
95
+ msg.includes('WorkflowGone') ||
96
+ msg.includes('workflow execution already completed'))
97
+ return false;
98
+ // ── Transient: RPC / network / temporary SDK unavailability. ──
99
+ if (name.includes('TransportError') ||
100
+ name.includes('TimeoutError') ||
101
+ msg.includes('DEADLINE_EXCEEDED') ||
102
+ msg.includes('UNAVAILABLE') ||
103
+ msg.includes('RESOURCE_EXHAUSTED') ||
104
+ msg.includes('CANCELLED') ||
105
+ /\bECONNRESET\b/.test(msg) ||
106
+ /\bECONNREFUSED\b/.test(msg) ||
107
+ /\bETIMEDOUT\b/.test(msg) ||
108
+ /\bENOTFOUND\b/.test(msg) ||
109
+ /\bEAI_AGAIN\b/.test(msg))
110
+ return true;
111
+ // Unknown shape — stay permanent (see "Safety posture" above).
112
+ return false;
113
+ }
114
+ /**
115
+ * Standard shape for the 3 §8.2 deliver activities' catch-all tail.
116
+ * Centralises the branch so each activity body stays concise.
117
+ *
118
+ * - If `err` is already an `ApplicationFailure` (typed permanent — e.g. the
119
+ * explicit "not found" / "destroyed" throws), re-throw as-is.
120
+ * - If `err` is retryable per {@link isRetryableTemporalError}, re-throw the
121
+ * original `Error` so the activity retry policy handles it.
122
+ * - Otherwise wrap in `ApplicationFailure.nonRetryable` with a caller-supplied
123
+ * context prefix (e.g. `Detach failed for "alice"`).
124
+ */
125
+ function classifyAndRethrow(err, contextPrefix) {
126
+ if (err instanceof activity_1.ApplicationFailure)
127
+ throw err;
128
+ if (isRetryableTemporalError(err)) {
129
+ // Re-throw the original so the activity retry policy backs off and retries.
130
+ // Normalise non-Error throwables (extremely rare) into Error form.
131
+ throw err instanceof Error ? err : new Error(String(err));
132
+ }
133
+ throw activity_1.ApplicationFailure.nonRetryable(`${contextPrefix}: ${err instanceof Error ? err.message : String(err)}`);
134
+ }
54
135
  /**
55
136
  * Create outbox delivery activities bound to a Temporal client and config.
56
137
  * The returned object is registered with the worker as activities.
@@ -59,12 +140,20 @@ function createOutboxActivities(client, config) {
59
140
  return {
60
141
  async deliverCue(input) {
61
142
  const { ensemble, fromPlayerId, targetPlayerId, message } = input;
62
- const handle = await (0, resolve_1.resolveSession)(client, ensemble, targetPlayerId);
63
- if (!handle) {
64
- throw activity_1.ApplicationFailure.nonRetryable(`No active session found for "${targetPlayerId}"`);
143
+ try {
144
+ const handle = await (0, resolve_1.resolveSession)(client, ensemble, targetPlayerId);
145
+ if (!handle) {
146
+ throw activity_1.ApplicationFailure.nonRetryable(`No active session found for "${targetPlayerId}"`);
147
+ }
148
+ await handle.signal('receiveMessage', { from: fromPlayerId, text: message });
149
+ return { success: true };
150
+ }
151
+ catch (err) {
152
+ // #236: transient RPC errors (e.g. DEADLINE_EXCEEDED on the signal call)
153
+ // retry per the activity policy; WorkflowNotFound / validator rejections
154
+ // stay permanent. Unknown errors default to non-retryable.
155
+ classifyAndRethrow(err, `Cue failed for "${targetPlayerId}"`);
65
156
  }
66
- await handle.signal('receiveMessage', { from: fromPlayerId, text: message });
67
- return { success: true };
68
157
  },
69
158
  async deliverReport(input) {
70
159
  const { ensemble, fromPlayerId, text, reportType } = input;
@@ -76,37 +165,44 @@ function createOutboxActivities(client, config) {
76
165
  return { success: true };
77
166
  }
78
167
  catch (err) {
79
- if (err instanceof activity_1.ApplicationFailure)
80
- throw err;
81
- throw activity_1.ApplicationFailure.nonRetryable(`Failed to deliver report to conductor: ${err instanceof Error ? err.message : String(err)}`);
168
+ // #236: describe() / signal() hitting a transient RPC error now retries;
169
+ // WorkflowNotFound (conductor gone) stays permanent as before.
170
+ classifyAndRethrow(err, 'Failed to deliver report to conductor');
82
171
  }
83
172
  },
84
173
  async terminateSession(input) {
85
174
  const { ensemble, targetPlayerId, terminatedBy } = input;
86
- const handle = await (0, resolve_1.resolveSession)(client, ensemble, targetPlayerId);
87
- if (!handle) {
88
- throw activity_1.ApplicationFailure.nonRetryable(`No active session found for "${targetPlayerId}"`);
89
- }
90
- // PR-C commit 4: use the V2 `destroy` update — explicit operator termination
91
- // per §2.5 (abandon in-flight, phase=gone, COMPLETE). The former
92
- // `updateMetadata({ status: 'terminated' })` signal path was retired.
93
- await handle.executeUpdate('destroy', {
94
- args: [{ reason: 'stop via tool', terminatedBy }],
95
- });
96
- // Notify conductor about the termination (best effort)
97
175
  try {
98
- const conductorId = (0, config_1.conductorWorkflowId)(ensemble);
99
- const conductorHandle = client.workflow.getHandle(conductorId);
100
- await conductorHandle.signal('receiveMessage', {
101
- from: 'system',
102
- text: `Session "${targetPlayerId}" was terminated by ${terminatedBy}.`,
103
- responseRequested: false,
176
+ const handle = await (0, resolve_1.resolveSession)(client, ensemble, targetPlayerId);
177
+ if (!handle) {
178
+ throw activity_1.ApplicationFailure.nonRetryable(`No active session found for "${targetPlayerId}"`);
179
+ }
180
+ // PR-C commit 4: use the V2 `destroy` update explicit operator termination
181
+ // per §2.5 (abandon in-flight, phase=gone, COMPLETE). The former
182
+ // `updateMetadata({ status: 'terminated' })` signal path was retired.
183
+ await handle.executeUpdate('destroy', {
184
+ args: [{ reason: 'stop via tool', terminatedBy }],
104
185
  });
186
+ // Notify conductor about the termination (best effort)
187
+ try {
188
+ const conductorId = (0, config_1.conductorWorkflowId)(ensemble);
189
+ const conductorHandle = client.workflow.getHandle(conductorId);
190
+ await conductorHandle.signal('receiveMessage', {
191
+ from: 'system',
192
+ text: `Session "${targetPlayerId}" was terminated by ${terminatedBy}.`,
193
+ responseRequested: false,
194
+ });
195
+ }
196
+ catch {
197
+ // Conductor may not exist — that's fine
198
+ }
199
+ return { success: true };
105
200
  }
106
- catch {
107
- // Conductor may not exist that's fine
201
+ catch (err) {
202
+ // #236: transient RPC on the destroy update now retries; validator rejection
203
+ // (WorkflowGone, AttachmentMismatch) stays permanent.
204
+ classifyAndRethrow(err, `Terminate failed for "${targetPlayerId}"`);
108
205
  }
109
- return { success: true };
110
206
  },
111
207
  async startRecruitedSession(input) {
112
208
  const { ensemble, targetName, workDir, isConductor, initialMessage, fromPlayerId, agent, systemPrompt, taskQueue, agentDefinition, agentDefinitionDescription, held } = input;
@@ -177,7 +273,11 @@ function createOutboxActivities(client, config) {
177
273
  return { success: true, sessionId };
178
274
  }
179
275
  catch (err) {
180
- throw activity_1.ApplicationFailure.nonRetryable(`Failed to start recruited session "${targetName}": ${err instanceof Error ? err.message : String(err)}`);
276
+ // #236: transient RPC during workflow.start (e.g. temporal server flap)
277
+ // now retries; WorkflowNotFound / validation / auth failures stay permanent.
278
+ // Note: this activity's pre-#236 catch was missing the ApplicationFailure
279
+ // passthrough guard — `classifyAndRethrow` restores it for free.
280
+ classifyAndRethrow(err, `Failed to start recruited session "${targetName}"`);
181
281
  }
182
282
  },
183
283
  async spawnProcess(input) {
@@ -267,7 +367,14 @@ function createOutboxActivities(client, config) {
267
367
  return { success: true };
268
368
  }
269
369
  catch (err) {
270
- throw activity_1.ApplicationFailure.nonRetryable(`Failed to spawn process for "${targetName}": ${err instanceof Error ? err.message : String(err)}`);
370
+ // #236: spawnProcess throws predominantly OS-side errors (ENOENT/EACCES
371
+ // on the claude binary, EAGAIN on process-table overflow). The classifier
372
+ // is tuned for Temporal RPC; OS errors don't match its transient
373
+ // signatures, so they still flow through as non-retryable — byte-for-byte
374
+ // behavior preservation. The upside of going through the helper: if a
375
+ // future OS error surfaces a transient shape we add to the classifier,
376
+ // spawnProcess benefits automatically.
377
+ classifyAndRethrow(err, `Failed to spawn process for "${targetName}"`);
271
378
  }
272
379
  },
273
380
  async releasePlayer(input) {
@@ -288,9 +395,9 @@ function createOutboxActivities(client, config) {
288
395
  return { success: true };
289
396
  }
290
397
  catch (err) {
291
- if (err instanceof activity_1.ApplicationFailure)
292
- throw err;
293
- throw activity_1.ApplicationFailure.nonRetryable(`Release failed for "${targetPlayerId}": ${err instanceof Error ? err.message : String(err)}`);
398
+ // #236: transient RPC on outboxLocked query / releaseHeld signal now
399
+ // retries; WorkflowNotFound / not-held validation stay permanent.
400
+ classifyAndRethrow(err, `Release failed for "${targetPlayerId}"`);
294
401
  }
295
402
  },
296
403
  /**
@@ -318,9 +425,10 @@ function createOutboxActivities(client, config) {
318
425
  return { success: true };
319
426
  }
320
427
  catch (err) {
321
- if (err instanceof activity_1.ApplicationFailure)
322
- throw err;
323
- throw activity_1.ApplicationFailure.nonRetryable(`Detach failed for "${targetPlayerId}": ${err instanceof Error ? err.message : String(err)}`);
428
+ // #140: re-throw transient RPC/network errors so the activity retry
429
+ // policy handles them; permanent cases (validator rejection, workflow
430
+ // gone, unknown) become `ApplicationFailure.nonRetryable`.
431
+ classifyAndRethrow(err, `Detach failed for "${targetPlayerId}"`);
324
432
  }
325
433
  },
326
434
  /**
@@ -359,9 +467,10 @@ function createOutboxActivities(client, config) {
359
467
  return { success: true };
360
468
  }
361
469
  catch (err) {
362
- if (err instanceof activity_1.ApplicationFailure)
363
- throw err;
364
- throw activity_1.ApplicationFailure.nonRetryable(`Destroy failed for "${targetPlayerId}": ${err instanceof Error ? err.message : String(err)}`);
470
+ // #140: transient errors (network, RPC timeout) become retryable;
471
+ // permanent cases (WorkflowNotFound, validator rejection) stay
472
+ // non-retryable. Unknown errors default to non-retryable.
473
+ classifyAndRethrow(err, `Destroy failed for "${targetPlayerId}"`);
365
474
  }
366
475
  },
367
476
  /**
@@ -393,7 +502,7 @@ function createOutboxActivities(client, config) {
393
502
  try {
394
503
  await handle.signal(signals_1.requestDetachSignal, {
395
504
  reason: 'restart',
396
- deadlineMs: 5_000,
505
+ deadlineMs: validation_1.DEFAULT_RESTART_DETACH_DEADLINE_MS,
397
506
  });
398
507
  }
399
508
  catch {
@@ -433,7 +542,7 @@ function createOutboxActivities(client, config) {
433
542
  host: targetHost,
434
543
  adapterId,
435
544
  adapterClass,
436
- leaseMs: 90_000,
545
+ leaseMs: validation_1.DEFAULT_RESTART_LEASE_MS,
437
546
  }],
438
547
  });
439
548
  // Step 5 — optional context replay.
@@ -503,9 +612,11 @@ function createOutboxActivities(client, config) {
503
612
  return { success: true };
504
613
  }
505
614
  catch (err) {
506
- if (err instanceof activity_1.ApplicationFailure)
507
- throw err;
508
- throw activity_1.ApplicationFailure.nonRetryable(`Restart failed for "${targetPlayerId}": ${err instanceof Error ? err.message : String(err)}`);
615
+ // #140: the §8.2 restart algorithm fires many RPCs; any of them may
616
+ // hit a transient network/RPC error. Those get retried. Validator
617
+ // rejections (e.g. claim race), workflow-gone, and unknown errors
618
+ // stay permanent to avoid wedging the outbox on a dead target.
619
+ classifyAndRethrow(err, `Restart failed for "${targetPlayerId}"`);
509
620
  }
510
621
  },
511
622
  /**
@@ -2,6 +2,7 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.resolveSession = resolveSession;
4
4
  exports.scanEnsembleSessions = scanEnsembleSessions;
5
+ const search_attributes_1 = require("../utils/search-attributes");
5
6
  /** Shared query for listing running session workflows. */
6
7
  const SESSION_LIST_QUERY = `WorkflowType = "claudeSessionWorkflow" AND ExecutionStatus = "Running"`;
7
8
  /**
@@ -43,10 +44,7 @@ async function scanEnsembleSessions(client, ensemble) {
43
44
  const part = await handle.query('getPart');
44
45
  // Attachment phase lives in the `ClaudeTempoAttachmentState` search
45
46
  // attribute (written by the workflow on every phase transition).
46
- const phaseArr = workflow.searchAttributes?.ClaudeTempoAttachmentState;
47
- const phase = Array.isArray(phaseArr) && phaseArr.length > 0
48
- ? phaseArr[0]
49
- : undefined;
47
+ const phase = (0, search_attributes_1.getAttachmentPhase)(workflow);
50
48
  sessions.push({
51
49
  workflowId: workflow.workflowId,
52
50
  playerId: metadata.playerId,
@@ -65,6 +65,27 @@ export declare abstract class BaseAttachment {
65
65
  private stopped;
66
66
  private terminalFired;
67
67
  private knownPhase;
68
+ /**
69
+ * `true` once a heartbeat has successfully landed on the current attachment (or rebind).
70
+ * Cleared on `startV2Lifecycle`, reconnect-loop success, and CAN rebind so each freshly
71
+ * live attachment emits its own `heartbeat#1 delivered` diagnostic. Added in #249 to
72
+ * distinguish "claim OK but heartbeat loop died" from "adapter just hasn't ticked yet."
73
+ */
74
+ private firstHeartbeatLogged;
75
+ /**
76
+ * Monotonic heartbeat counter for the current attachment cycle. Reset on
77
+ * claim/reconnect/CAN-rebind. Emitted periodically (every {@link HEARTBEAT_SUMMARY_EVERY}
78
+ * ticks) so a long-running session leaves breadcrumbs in the log proving the loop is
79
+ * alive — operators can `grep 'heartbeats-delivered='` to confirm health without
80
+ * parsing Temporal history. Added in #249.
81
+ */
82
+ private heartbeatsSent;
83
+ /**
84
+ * Mirror of {@link heartbeatsSent} for the phase-watcher loop. Same emission cadence,
85
+ * same rationale — the watcher is the only self-heal surface when the heartbeat loop
86
+ * dies silently, so a summary log line proves it's still live too.
87
+ */
88
+ private phaseTicksDone;
68
89
  private readonly phaseChangeListeners;
69
90
  private readonly leaseRevokedListeners;
70
91
  private readonly terminalListeners;
@@ -133,17 +154,68 @@ export declare abstract class BaseAttachment {
133
154
  */
134
155
  protected stopV2Lifecycle(reason?: DetachReason, graceful?: boolean): Promise<void>;
135
156
  private scheduleHeartbeat;
157
+ /**
158
+ * Emit a loud diagnostic when a tick early-returns via one of its guard paths (#249).
159
+ * Pre-#249 these returns were silent — the only observable effect was "heartbeats stop
160
+ * arriving." Now operators can grep `adapter.*guard tripped` to confirm or rule out
161
+ * tick-orphan as a failure mode without needing workflow history.
162
+ *
163
+ * `terminalFired=true` / `stopped=true` guards are load-bearing on the terminal path
164
+ * (don't want to re-enter terminal) so they're expected during teardown; we still log
165
+ * them but at the same level — operators can correlate timestamps against the preceding
166
+ * `terminal (...) — stopping delivery poll permanently` line.
167
+ */
168
+ private logGuardTrip;
169
+ /**
170
+ * Single tick of the heartbeat loop. Try/finally scaffolding (#249) guarantees
171
+ * reschedule in every path except genuinely terminal state (`stopped`,
172
+ * `terminalFired`) or when the reconnect loop has taken ownership of scheduling
173
+ * (`reconnecting`). Pre-#249 the three early-return paths at the top + the
174
+ * handled-terminal-error path silently orphaned the timer forever; a transient
175
+ * `reconnecting=true` window or a null-handle race was enough to kill the loop
176
+ * with no log and no teardown.
177
+ *
178
+ * Handled terminals (CAN rebind, destroy) still short-circuit via `return` —
179
+ * the `finally` block re-checks `reconnecting` / `terminalFired` before
180
+ * rescheduling, so the reconnect/terminal machinery keeps ownership of
181
+ * whatever comes next.
182
+ */
136
183
  private tickHeartbeat;
137
184
  private schedulePhaseWatcher;
185
+ /**
186
+ * Single tick of the phase-watcher loop. Same orphan-resistance scaffolding as
187
+ * {@link tickHeartbeat} (#249): try/finally reschedule, unconditional unless
188
+ * `stopped` / `terminalFired` / `reconnecting`. When the heartbeat loop dies
189
+ * silently, the watcher is the only remaining self-heal surface — losing it
190
+ * too meant the adapter had no path back to a healthy state short of process
191
+ * restart.
192
+ */
138
193
  private tickPhaseWatcher;
139
194
  /**
140
- * Classify an error as terminal (WorkflowNotFound / ExecutionAlreadyCompleted / phase gone).
195
+ * Shared error-classification path for the heartbeat + phase-watcher ticks (#226).
196
+ *
197
+ * Returns `true` if the error was a terminal-class (handled inline: CAN rebind
198
+ * kicked off, or destroy fired). Returns `false` when the caller should treat
199
+ * the error as transient and continue its backoff.
200
+ *
201
+ * Always consults `fetchHistory` on any terminal-class error, because the
202
+ * Temporal SDK can't distinguish CAN-close from true-complete at the error
203
+ * level — see {@link isTerminalWorkflowError}. The history lookup is cheap
204
+ * (only runs on terminal, so at most once per adapter lifetime per terminal)
205
+ * and safer than re-querying by workflow id (which could race a fresh session
206
+ * reusing the id).
207
+ */
208
+ private handleRunEndError;
209
+ /**
210
+ * Fetch the closed pinned run's history and return the runId of a CAN successor
211
+ * if present, else `null`. Scoped to the pinned (old) run via `this.pinnedHandle`,
212
+ * so it can't be fooled by a fresh session that happens to reuse the workflow id.
141
213
  *
142
- * Uses name-sniffing rather than `instanceof` to avoid tight coupling to
143
- * `@temporalio/client` internals errors surface through both the Client SDK
144
- * and the server's gRPC layer with slightly different shapes.
214
+ * Called only on the terminal path from {@link handleRunEndError}, so the cost
215
+ * of `fetchHistory` (a full event stream for the closed run) is paid at most
216
+ * once per terminal not on every tick.
145
217
  */
146
- private isWorkflowGone;
218
+ private findCanSuccessorRunId;
147
219
  private fireTerminal;
148
220
  /**
149
221
  * Opt-in reconnect policy. Default: return `false` — the base class behaves
@@ -194,6 +266,24 @@ export declare abstract class BaseAttachment {
194
266
  * when the reason is potentially recoverable.
195
267
  */
196
268
  private fireTerminalOrReconnect;
269
+ /**
270
+ * #226 CAN rebind. Transparently repoints `pinnedHandle` at the successor run,
271
+ * keeps the existing `attachmentId` / `leaseMs` (the workflow extended the lease
272
+ * by one heartbeat interval during the CAN transition per §2.3, so the lease is
273
+ * still live on the new run), notifies the subclass to restart its delivery
274
+ * loop, and resumes heartbeat + phase-watcher.
275
+ *
276
+ * Why this is safe without re-claiming:
277
+ * - The new run carries forward `currentAttachment` verbatim from the old run.
278
+ * - The adapter's `attachmentId` still matches, so the next `heartbeat` /
279
+ * `markDelivered` / `adapterExited` signal on the new pinned handle will be
280
+ * accepted unchanged by the workflow's handlers.
281
+ * - If the lease actually did expire before we got here (e.g. adapter was
282
+ * offline through multiple CAN cycles), the next phase-watcher tick on the
283
+ * new pinned handle will see `phase=detached` + no current attachment and
284
+ * fall through to the existing #201 reclaim path — belt-and-suspenders.
285
+ */
286
+ private runCanRebind;
197
287
  /**
198
288
  * Budget-bounded reconnect loop.
199
289
  *