yzcode-cli 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/assistant/sessionHistory.ts +87 -0
  2. package/bootstrap/state.ts +1769 -0
  3. package/bridge/bridgeApi.ts +539 -0
  4. package/bridge/bridgeConfig.ts +48 -0
  5. package/bridge/bridgeDebug.ts +135 -0
  6. package/bridge/bridgeEnabled.ts +202 -0
  7. package/bridge/bridgeMain.ts +2999 -0
  8. package/bridge/bridgeMessaging.ts +461 -0
  9. package/bridge/bridgePermissionCallbacks.ts +43 -0
  10. package/bridge/bridgePointer.ts +210 -0
  11. package/bridge/bridgeStatusUtil.ts +163 -0
  12. package/bridge/bridgeUI.ts +530 -0
  13. package/bridge/capacityWake.ts +56 -0
  14. package/bridge/codeSessionApi.ts +168 -0
  15. package/bridge/createSession.ts +384 -0
  16. package/bridge/debugUtils.ts +141 -0
  17. package/bridge/envLessBridgeConfig.ts +165 -0
  18. package/bridge/flushGate.ts +71 -0
  19. package/bridge/inboundAttachments.ts +175 -0
  20. package/bridge/inboundMessages.ts +80 -0
  21. package/bridge/initReplBridge.ts +569 -0
  22. package/bridge/jwtUtils.ts +256 -0
  23. package/bridge/pollConfig.ts +110 -0
  24. package/bridge/pollConfigDefaults.ts +82 -0
  25. package/bridge/remoteBridgeCore.ts +1008 -0
  26. package/bridge/replBridge.ts +2406 -0
  27. package/bridge/replBridgeHandle.ts +36 -0
  28. package/bridge/replBridgeTransport.ts +370 -0
  29. package/bridge/sessionIdCompat.ts +57 -0
  30. package/bridge/sessionRunner.ts +550 -0
  31. package/bridge/trustedDevice.ts +210 -0
  32. package/bridge/types.ts +262 -0
  33. package/bridge/workSecret.ts +127 -0
  34. package/buddy/CompanionSprite.tsx +371 -0
  35. package/buddy/companion.ts +133 -0
  36. package/buddy/prompt.ts +36 -0
  37. package/buddy/sprites.ts +514 -0
  38. package/buddy/types.ts +148 -0
  39. package/buddy/useBuddyNotification.tsx +98 -0
  40. package/coordinator/coordinatorMode.ts +369 -0
  41. package/memdir/findRelevantMemories.ts +141 -0
  42. package/memdir/memdir.ts +507 -0
  43. package/memdir/memoryAge.ts +53 -0
  44. package/memdir/memoryScan.ts +94 -0
  45. package/memdir/memoryTypes.ts +271 -0
  46. package/memdir/paths.ts +278 -0
  47. package/memdir/teamMemPaths.ts +292 -0
  48. package/memdir/teamMemPrompts.ts +100 -0
  49. package/migrations/migrateAutoUpdatesToSettings.ts +61 -0
  50. package/migrations/migrateBypassPermissionsAcceptedToSettings.ts +40 -0
  51. package/migrations/migrateEnableAllProjectMcpServersToSettings.ts +118 -0
  52. package/migrations/migrateFennecToOpus.ts +45 -0
  53. package/migrations/migrateLegacyOpusToCurrent.ts +57 -0
  54. package/migrations/migrateOpusToOpus1m.ts +43 -0
  55. package/migrations/migrateReplBridgeEnabledToRemoteControlAtStartup.ts +22 -0
  56. package/migrations/migrateSonnet1mToSonnet45.ts +48 -0
  57. package/migrations/migrateSonnet45ToSonnet46.ts +67 -0
  58. package/migrations/resetAutoModeOptInForDefaultOffer.ts +51 -0
  59. package/migrations/resetProToOpusDefault.ts +51 -0
  60. package/native-ts/color-diff/index.ts +999 -0
  61. package/native-ts/file-index/index.ts +370 -0
  62. package/native-ts/yoga-layout/enums.ts +134 -0
  63. package/native-ts/yoga-layout/index.ts +2578 -0
  64. package/outputStyles/loadOutputStylesDir.ts +98 -0
  65. package/package.json +22 -5
  66. package/plugins/builtinPlugins.ts +159 -0
  67. package/plugins/bundled/index.ts +23 -0
  68. package/schemas/hooks.ts +222 -0
  69. package/screens/Doctor.tsx +575 -0
  70. package/screens/REPL.tsx +5006 -0
  71. package/screens/ResumeConversation.tsx +399 -0
  72. package/server/createDirectConnectSession.ts +88 -0
  73. package/server/directConnectManager.ts +213 -0
  74. package/server/types.ts +57 -0
  75. package/skills/bundled/batch.ts +124 -0
  76. package/skills/bundled/claudeApi.ts +196 -0
  77. package/skills/bundled/claudeApiContent.ts +75 -0
  78. package/skills/bundled/claudeInChrome.ts +34 -0
  79. package/skills/bundled/debug.ts +103 -0
  80. package/skills/bundled/index.ts +79 -0
  81. package/skills/bundled/keybindings.ts +339 -0
  82. package/skills/bundled/loop.ts +92 -0
  83. package/skills/bundled/loremIpsum.ts +282 -0
  84. package/skills/bundled/remember.ts +82 -0
  85. package/skills/bundled/scheduleRemoteAgents.ts +447 -0
  86. package/skills/bundled/simplify.ts +69 -0
  87. package/skills/bundled/skillify.ts +197 -0
  88. package/skills/bundled/stuck.ts +79 -0
  89. package/skills/bundled/updateConfig.ts +475 -0
  90. package/skills/bundled/verify/SKILL.md +3 -0
  91. package/skills/bundled/verify/examples/cli.md +3 -0
  92. package/skills/bundled/verify/examples/server.md +3 -0
  93. package/skills/bundled/verify.ts +30 -0
  94. package/skills/bundled/verifyContent.ts +13 -0
  95. package/skills/bundledSkills.ts +220 -0
  96. package/skills/loadSkillsDir.ts +1086 -0
  97. package/skills/mcpSkillBuilders.ts +44 -0
  98. package/tasks/DreamTask/DreamTask.ts +157 -0
  99. package/tasks/InProcessTeammateTask/InProcessTeammateTask.tsx +126 -0
  100. package/tasks/InProcessTeammateTask/types.ts +121 -0
  101. package/tasks/LocalAgentTask/LocalAgentTask.tsx +683 -0
  102. package/tasks/LocalMainSessionTask.ts +479 -0
  103. package/tasks/LocalShellTask/LocalShellTask.tsx +523 -0
  104. package/tasks/LocalShellTask/guards.ts +41 -0
  105. package/tasks/LocalShellTask/killShellTasks.ts +76 -0
  106. package/tasks/RemoteAgentTask/RemoteAgentTask.tsx +856 -0
  107. package/tasks/pillLabel.ts +82 -0
  108. package/tasks/stopTask.ts +100 -0
  109. package/tasks/types.ts +46 -0
  110. package/upstreamproxy/relay.ts +455 -0
  111. package/upstreamproxy/upstreamproxy.ts +285 -0
  112. package/vim/motions.ts +82 -0
  113. package/vim/operators.ts +556 -0
  114. package/vim/textObjects.ts +186 -0
  115. package/vim/transitions.ts +490 -0
  116. package/vim/types.ts +199 -0
  117. package/voice/voiceModeEnabled.ts +54 -0
@@ -0,0 +1,2999 @@
1
+ import { feature } from 'bun:bundle'
2
+ import { randomUUID } from 'crypto'
3
+ import { hostname, tmpdir } from 'os'
4
+ import { basename, join, resolve } from 'path'
5
+ import { getRemoteSessionUrl } from '../constants/product.js'
6
+ import { shutdownDatadog } from '../services/analytics/datadog.js'
7
+ import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
8
+ import { checkGate_CACHED_OR_BLOCKING } from '../services/analytics/growthbook.js'
9
+ import {
10
+ type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
11
+ logEvent,
12
+ logEventAsync,
13
+ } from '../services/analytics/index.js'
14
+ import { isInBundledMode } from '../utils/bundledMode.js'
15
+ import { logForDebugging } from '../utils/debug.js'
16
+ import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
17
+ import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
18
+ import { errorMessage } from '../utils/errors.js'
19
+ import { truncateToWidth } from '../utils/format.js'
20
+ import { logError } from '../utils/log.js'
21
+ import { sleep } from '../utils/sleep.js'
22
+ import { createAgentWorktree, removeAgentWorktree } from '../utils/worktree.js'
23
+ import {
24
+ BridgeFatalError,
25
+ createBridgeApiClient,
26
+ isExpiredErrorType,
27
+ isSuppressible403,
28
+ validateBridgeId,
29
+ } from './bridgeApi.js'
30
+ import { formatDuration } from './bridgeStatusUtil.js'
31
+ import { createBridgeLogger } from './bridgeUI.js'
32
+ import { createCapacityWake } from './capacityWake.js'
33
+ import { describeAxiosError } from './debugUtils.js'
34
+ import { createTokenRefreshScheduler } from './jwtUtils.js'
35
+ import { getPollIntervalConfig } from './pollConfig.js'
36
+ import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
37
+ import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
38
+ import { getTrustedDeviceToken } from './trustedDevice.js'
39
+ import {
40
+ BRIDGE_LOGIN_ERROR,
41
+ type BridgeApiClient,
42
+ type BridgeConfig,
43
+ type BridgeLogger,
44
+ DEFAULT_SESSION_TIMEOUT_MS,
45
+ type SessionDoneStatus,
46
+ type SessionHandle,
47
+ type SessionSpawner,
48
+ type SessionSpawnOpts,
49
+ type SpawnMode,
50
+ } from './types.js'
51
+ import {
52
+ buildCCRv2SdkUrl,
53
+ buildSdkUrl,
54
+ decodeWorkSecret,
55
+ registerWorker,
56
+ sameSessionId,
57
+ } from './workSecret.js'
58
+
59
+ export type BackoffConfig = {
60
+ connInitialMs: number
61
+ connCapMs: number
62
+ connGiveUpMs: number
63
+ generalInitialMs: number
64
+ generalCapMs: number
65
+ generalGiveUpMs: number
66
+ /** SIGTERM→SIGKILL grace period on shutdown. Default 30s. */
67
+ shutdownGraceMs?: number
68
+ /** stopWorkWithRetry base delay (1s/2s/4s backoff). Default 1000ms. */
69
+ stopWorkBaseDelayMs?: number
70
+ }
71
+
72
+ const DEFAULT_BACKOFF: BackoffConfig = {
73
+ connInitialMs: 2_000,
74
+ connCapMs: 120_000, // 2 minutes
75
+ connGiveUpMs: 600_000, // 10 minutes
76
+ generalInitialMs: 500,
77
+ generalCapMs: 30_000,
78
+ generalGiveUpMs: 600_000, // 10 minutes
79
+ }
80
+
81
+ /** Status update interval for the live display (ms). */
82
+ const STATUS_UPDATE_INTERVAL_MS = 1_000
83
+ const SPAWN_SESSIONS_DEFAULT = 32
84
+
85
+ /**
86
+ * GrowthBook gate for multi-session spawn modes (--spawn / --capacity / --create-session-in-dir).
87
+ * Sibling of tengu_ccr_bridge_multi_environment (multiple envs per host:dir) —
88
+ * this one enables multiple sessions per environment.
89
+ * Rollout staged via targeting rules: ants first, then gradual external.
90
+ *
91
+ * Uses the blocking gate check so a stale disk-cache miss doesn't unfairly
92
+ * deny access. The fast path (cache has true) is still instant; only the
93
+ * cold-start path awaits the server fetch, and that fetch also seeds the
94
+ * disk cache for next time.
95
+ */
96
+ async function isMultiSessionSpawnEnabled(): Promise<boolean> {
97
+ return checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge_multi_session')
98
+ }
99
+
100
+ /**
101
+ * Returns the threshold for detecting system sleep/wake in the poll loop.
102
+ * Must exceed the max backoff cap — otherwise normal backoff delays trigger
103
+ * false sleep detection (resetting the error budget indefinitely). Using
104
+ * 2× the connection backoff cap, matching the pattern in WebSocketTransport
105
+ * and replBridge.
106
+ */
107
+ function pollSleepDetectionThresholdMs(backoff: BackoffConfig): number {
108
+ return backoff.connCapMs * 2
109
+ }
110
+
111
+ /**
112
+ * Returns the args that must precede CLI flags when spawning a child claude
113
+ * process. In compiled binaries, process.execPath is the claude binary itself
114
+ * and args go directly to it. In npm installs (node running cli.js),
115
+ * process.execPath is the node runtime — the child spawn must pass the script
116
+ * path as the first arg, otherwise node interprets --sdk-url as a node option
117
+ * and exits with "bad option: --sdk-url". See anthropics/claude-code#28334.
118
+ */
119
+ function spawnScriptArgs(): string[] {
120
+ if (isInBundledMode() || !process.argv[1]) {
121
+ return []
122
+ }
123
+ return [process.argv[1]]
124
+ }
125
+
126
+ /** Attempt to spawn a session; returns error string if spawn throws. */
127
+ function safeSpawn(
128
+ spawner: SessionSpawner,
129
+ opts: SessionSpawnOpts,
130
+ dir: string,
131
+ ): SessionHandle | string {
132
+ try {
133
+ return spawner.spawn(opts, dir)
134
+ } catch (err) {
135
+ const errMsg = errorMessage(err)
136
+ logError(new Error(`Session spawn failed: ${errMsg}`))
137
+ return errMsg
138
+ }
139
+ }
140
+
141
+ export async function runBridgeLoop(
142
+ config: BridgeConfig,
143
+ environmentId: string,
144
+ environmentSecret: string,
145
+ api: BridgeApiClient,
146
+ spawner: SessionSpawner,
147
+ logger: BridgeLogger,
148
+ signal: AbortSignal,
149
+ backoffConfig: BackoffConfig = DEFAULT_BACKOFF,
150
+ initialSessionId?: string,
151
+ getAccessToken?: () => string | undefined | Promise<string | undefined>,
152
+ ): Promise<void> {
153
+ // Local abort controller so that onSessionDone can stop the poll loop.
154
+ // Linked to the incoming signal so external aborts also work.
155
+ const controller = new AbortController()
156
+ if (signal.aborted) {
157
+ controller.abort()
158
+ } else {
159
+ signal.addEventListener('abort', () => controller.abort(), { once: true })
160
+ }
161
+ const loopSignal = controller.signal
162
+
163
+ const activeSessions = new Map<string, SessionHandle>()
164
+ const sessionStartTimes = new Map<string, number>()
165
+ const sessionWorkIds = new Map<string, string>()
166
+ // Compat-surface ID (session_*) computed once at spawn and cached so
167
+ // cleanup and status-update ticks use the same key regardless of whether
168
+ // the tengu_bridge_repl_v2_cse_shim_enabled gate flips mid-session.
169
+ const sessionCompatIds = new Map<string, string>()
170
+ // Session ingress JWTs for heartbeat auth, keyed by sessionId.
171
+ // Stored separately from handle.accessToken because the token refresh
172
+ // scheduler overwrites that field with the OAuth token (~3h55m in).
173
+ const sessionIngressTokens = new Map<string, string>()
174
+ const sessionTimers = new Map<string, ReturnType<typeof setTimeout>>()
175
+ const completedWorkIds = new Set<string>()
176
+ const sessionWorktrees = new Map<
177
+ string,
178
+ {
179
+ worktreePath: string
180
+ worktreeBranch?: string
181
+ gitRoot?: string
182
+ hookBased?: boolean
183
+ }
184
+ >()
185
+ // Track sessions killed by the timeout watchdog so onSessionDone can
186
+ // distinguish them from server-initiated or shutdown interrupts.
187
+ const timedOutSessions = new Set<string>()
188
+ // Sessions that already have a title (server-set or bridge-derived) so
189
+ // onFirstUserMessage doesn't clobber a user-assigned --name / web rename.
190
+ // Keyed by compatSessionId to match logger.setSessionTitle's key.
191
+ const titledSessions = new Set<string>()
192
+ // Signal to wake the at-capacity sleep early when a session completes,
193
+ // so the bridge can immediately accept new work.
194
+ const capacityWake = createCapacityWake(loopSignal)
195
+
196
+ /**
197
+ * Heartbeat all active work items.
198
+ * Returns 'ok' if at least one heartbeat succeeded, 'auth_failed' if any
199
+ * got a 401/403 (JWT expired — re-queued via reconnectSession so the next
200
+ * poll delivers fresh work), or 'failed' if all failed for other reasons.
201
+ */
202
+ async function heartbeatActiveWorkItems(): Promise<
203
+ 'ok' | 'auth_failed' | 'fatal' | 'failed'
204
+ > {
205
+ let anySuccess = false
206
+ let anyFatal = false
207
+ const authFailedSessions: string[] = []
208
+ for (const [sessionId] of activeSessions) {
209
+ const workId = sessionWorkIds.get(sessionId)
210
+ const ingressToken = sessionIngressTokens.get(sessionId)
211
+ if (!workId || !ingressToken) {
212
+ continue
213
+ }
214
+ try {
215
+ await api.heartbeatWork(environmentId, workId, ingressToken)
216
+ anySuccess = true
217
+ } catch (err) {
218
+ logForDebugging(
219
+ `[bridge:heartbeat] Failed for sessionId=${sessionId} workId=${workId}: ${errorMessage(err)}`,
220
+ )
221
+ if (err instanceof BridgeFatalError) {
222
+ logEvent('tengu_bridge_heartbeat_error', {
223
+ status:
224
+ err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
225
+ error_type: (err.status === 401 || err.status === 403
226
+ ? 'auth_failed'
227
+ : 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
228
+ })
229
+ if (err.status === 401 || err.status === 403) {
230
+ authFailedSessions.push(sessionId)
231
+ } else {
232
+ // 404/410 = environment expired or deleted — no point retrying
233
+ anyFatal = true
234
+ }
235
+ }
236
+ }
237
+ }
238
+ // JWT expired → trigger server-side re-dispatch. Without this, work stays
239
+ // ACK'd out of the Redis PEL and poll returns empty forever (CC-1263).
240
+ // The existingHandle path below delivers the fresh token to the child.
241
+ // sessionId is already in the format /bridge/reconnect expects: it comes
242
+ // from work.data.id, which matches the server's EnvironmentInstance store
243
+ // (cse_* under the compat gate, session_* otherwise).
244
+ for (const sessionId of authFailedSessions) {
245
+ logger.logVerbose(
246
+ `Session ${sessionId} token expired — re-queuing via bridge/reconnect`,
247
+ )
248
+ try {
249
+ await api.reconnectSession(environmentId, sessionId)
250
+ logForDebugging(
251
+ `[bridge:heartbeat] Re-queued sessionId=${sessionId} via bridge/reconnect`,
252
+ )
253
+ } catch (err) {
254
+ logger.logError(
255
+ `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
256
+ )
257
+ logForDebugging(
258
+ `[bridge:heartbeat] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
259
+ { level: 'error' },
260
+ )
261
+ }
262
+ }
263
+ if (anyFatal) {
264
+ return 'fatal'
265
+ }
266
+ if (authFailedSessions.length > 0) {
267
+ return 'auth_failed'
268
+ }
269
+ return anySuccess ? 'ok' : 'failed'
270
+ }
271
+
272
+ // Sessions spawned with CCR v2 env vars. v2 children cannot use OAuth
273
+ // tokens (CCR worker endpoints validate the JWT's session_id claim,
274
+ // register_worker.go:32), so onRefresh triggers server re-dispatch
275
+ // instead — the next poll delivers fresh work with a new JWT via the
276
+ // existingHandle path below.
277
+ const v2Sessions = new Set<string>()
278
+
279
+ // Proactive token refresh: schedules a timer 5min before the session
280
+ // ingress JWT expires. v1 delivers OAuth directly; v2 calls
281
+ // reconnectSession to trigger server re-dispatch (CC-1263: without
282
+ // this, v2 daemon sessions silently die at ~5h since the server does
283
+ // not auto-re-dispatch ACK'd work on lease expiry).
284
+ const tokenRefresh = getAccessToken
285
+ ? createTokenRefreshScheduler({
286
+ getAccessToken,
287
+ onRefresh: (sessionId, oauthToken) => {
288
+ const handle = activeSessions.get(sessionId)
289
+ if (!handle) {
290
+ return
291
+ }
292
+ if (v2Sessions.has(sessionId)) {
293
+ logger.logVerbose(
294
+ `Refreshing session ${sessionId} token via bridge/reconnect`,
295
+ )
296
+ void api
297
+ .reconnectSession(environmentId, sessionId)
298
+ .catch((err: unknown) => {
299
+ logger.logError(
300
+ `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
301
+ )
302
+ logForDebugging(
303
+ `[bridge:token] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
304
+ { level: 'error' },
305
+ )
306
+ })
307
+ } else {
308
+ handle.updateAccessToken(oauthToken)
309
+ }
310
+ },
311
+ label: 'bridge',
312
+ })
313
+ : null
314
+ const loopStartTime = Date.now()
315
+ // Track all in-flight cleanup promises (stopWork, worktree removal) so
316
+ // the shutdown sequence can await them before process.exit().
317
+ const pendingCleanups = new Set<Promise<unknown>>()
318
+ function trackCleanup(p: Promise<unknown>): void {
319
+ pendingCleanups.add(p)
320
+ void p.finally(() => pendingCleanups.delete(p))
321
+ }
322
+ let connBackoff = 0
323
+ let generalBackoff = 0
324
+ let connErrorStart: number | null = null
325
+ let generalErrorStart: number | null = null
326
+ let lastPollErrorTime: number | null = null
327
+ let statusUpdateTimer: ReturnType<typeof setInterval> | null = null
328
+ // Set by BridgeFatalError and give-up paths so the shutdown block can
329
+ // skip the resume message (resume is impossible after env expiry/auth
330
+ // failure/sustained connection errors).
331
+ let fatalExit = false
332
+
333
+ logForDebugging(
334
+ `[bridge:work] Starting poll loop spawnMode=${config.spawnMode} maxSessions=${config.maxSessions} environmentId=${environmentId}`,
335
+ )
336
+ logForDiagnosticsNoPII('info', 'bridge_loop_started', {
337
+ max_sessions: config.maxSessions,
338
+ spawn_mode: config.spawnMode,
339
+ })
340
+
341
+ // For ant users, show where session debug logs will land so they can tail them.
342
+ // sessionRunner.ts uses the same base path. File appears once a session spawns.
343
+ if (process.env.USER_TYPE === 'ant') {
344
+ let debugGlob: string
345
+ if (config.debugFile) {
346
+ const ext = config.debugFile.lastIndexOf('.')
347
+ debugGlob =
348
+ ext > 0
349
+ ? `${config.debugFile.slice(0, ext)}-*${config.debugFile.slice(ext)}`
350
+ : `${config.debugFile}-*`
351
+ } else {
352
+ debugGlob = join(tmpdir(), 'claude', 'bridge-session-*.log')
353
+ }
354
+ logger.setDebugLogPath(debugGlob)
355
+ }
356
+
357
+ logger.printBanner(config, environmentId)
358
+
359
+ // Seed the logger's session count + spawn mode before any render. Without
360
+ // this, setAttached() below renders with the logger's default sessionMax=1,
361
+ // showing "Capacity: 0/1" until the status ticker kicks in (which is gated
362
+ // by !initialSessionId and only starts after the poll loop picks up work).
363
+ logger.updateSessionCount(0, config.maxSessions, config.spawnMode)
364
+
365
+ // If an initial session was pre-created, show its URL from the start so
366
+ // the user can click through immediately (matching /remote-control behavior).
367
+ if (initialSessionId) {
368
+ logger.setAttached(initialSessionId)
369
+ }
370
+
371
+ /** Refresh the inline status display. Shows idle or active depending on state. */
372
+ function updateStatusDisplay(): void {
373
+ // Push the session count (no-op when maxSessions === 1) so the
374
+ // next renderStatusLine tick shows the current count.
375
+ logger.updateSessionCount(
376
+ activeSessions.size,
377
+ config.maxSessions,
378
+ config.spawnMode,
379
+ )
380
+
381
+ // Push per-session activity into the multi-session display.
382
+ for (const [sid, handle] of activeSessions) {
383
+ const act = handle.currentActivity
384
+ if (act) {
385
+ logger.updateSessionActivity(sessionCompatIds.get(sid) ?? sid, act)
386
+ }
387
+ }
388
+
389
+ if (activeSessions.size === 0) {
390
+ logger.updateIdleStatus()
391
+ return
392
+ }
393
+
394
+ // Show the most recently started session that is still actively working.
395
+ // Sessions whose current activity is 'result' or 'error' are between
396
+ // turns — the CLI emitted its result but the process stays alive waiting
397
+ // for the next user message. Skip updating so the status line keeps
398
+ // whatever state it had (Attached / session title).
399
+ const [sessionId, handle] = [...activeSessions.entries()].pop()!
400
+ const startTime = sessionStartTimes.get(sessionId)
401
+ if (!startTime) return
402
+
403
+ const activity = handle.currentActivity
404
+ if (!activity || activity.type === 'result' || activity.type === 'error') {
405
+ // Session is between turns — keep current status (Attached/titled).
406
+ // In multi-session mode, still refresh so bullet-list activities stay current.
407
+ if (config.maxSessions > 1) logger.refreshDisplay()
408
+ return
409
+ }
410
+
411
+ const elapsed = formatDuration(Date.now() - startTime)
412
+
413
+ // Build trail from recent tool activities (last 5)
414
+ const trail = handle.activities
415
+ .filter(a => a.type === 'tool_start')
416
+ .slice(-5)
417
+ .map(a => a.summary)
418
+
419
+ logger.updateSessionStatus(sessionId, elapsed, activity, trail)
420
+ }
421
+
422
+ /** Start the status display update ticker. */
423
+ function startStatusUpdates(): void {
424
+ stopStatusUpdates()
425
+ // Call immediately so the first transition (e.g. Connecting → Ready)
426
+ // happens without delay, avoiding concurrent timer races.
427
+ updateStatusDisplay()
428
+ statusUpdateTimer = setInterval(
429
+ updateStatusDisplay,
430
+ STATUS_UPDATE_INTERVAL_MS,
431
+ )
432
+ }
433
+
434
+ /** Stop the status display update ticker. */
435
+ function stopStatusUpdates(): void {
436
+ if (statusUpdateTimer) {
437
+ clearInterval(statusUpdateTimer)
438
+ statusUpdateTimer = null
439
+ }
440
+ }
441
+
442
+ function onSessionDone(
443
+ sessionId: string,
444
+ startTime: number,
445
+ handle: SessionHandle,
446
+ ): (status: SessionDoneStatus) => void {
447
+ return (rawStatus: SessionDoneStatus): void => {
448
+ const workId = sessionWorkIds.get(sessionId)
449
+ activeSessions.delete(sessionId)
450
+ sessionStartTimes.delete(sessionId)
451
+ sessionWorkIds.delete(sessionId)
452
+ sessionIngressTokens.delete(sessionId)
453
+ const compatId = sessionCompatIds.get(sessionId) ?? sessionId
454
+ sessionCompatIds.delete(sessionId)
455
+ logger.removeSession(compatId)
456
+ titledSessions.delete(compatId)
457
+ v2Sessions.delete(sessionId)
458
+ // Clear per-session timeout timer
459
+ const timer = sessionTimers.get(sessionId)
460
+ if (timer) {
461
+ clearTimeout(timer)
462
+ sessionTimers.delete(sessionId)
463
+ }
464
+ // Clear token refresh timer
465
+ tokenRefresh?.cancel(sessionId)
466
+ // Wake the at-capacity sleep so the bridge can accept new work immediately
467
+ capacityWake.wake()
468
+
469
+ // If the session was killed by the timeout watchdog, treat it as a
470
+ // failed session (not a server/shutdown interrupt) so we still call
471
+ // stopWork and archiveSession below.
472
+ const wasTimedOut = timedOutSessions.delete(sessionId)
473
+ const status: SessionDoneStatus =
474
+ wasTimedOut && rawStatus === 'interrupted' ? 'failed' : rawStatus
475
+ const durationMs = Date.now() - startTime
476
+
477
+ logForDebugging(
478
+ `[bridge:session] sessionId=${sessionId} workId=${workId ?? 'unknown'} exited status=${status} duration=${formatDuration(durationMs)}`,
479
+ )
480
+ logEvent('tengu_bridge_session_done', {
481
+ status:
482
+ status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
483
+ duration_ms: durationMs,
484
+ })
485
+ logForDiagnosticsNoPII('info', 'bridge_session_done', {
486
+ status,
487
+ duration_ms: durationMs,
488
+ })
489
+
490
+ // Clear the status display before printing final log
491
+ logger.clearStatus()
492
+ stopStatusUpdates()
493
+
494
+ // Build error message from stderr if available
495
+ const stderrSummary =
496
+ handle.lastStderr.length > 0 ? handle.lastStderr.join('\n') : undefined
497
+ let failureMessage: string | undefined
498
+
499
+ switch (status) {
500
+ case 'completed':
501
+ logger.logSessionComplete(sessionId, durationMs)
502
+ break
503
+ case 'failed':
504
+ // Skip failure log during shutdown — the child exits non-zero when
505
+ // killed, which is expected and not a real failure.
506
+ // Also skip for timeout-killed sessions — the timeout watchdog
507
+ // already logged a clear timeout message.
508
+ if (!wasTimedOut && !loopSignal.aborted) {
509
+ failureMessage = stderrSummary ?? 'Process exited with error'
510
+ logger.logSessionFailed(sessionId, failureMessage)
511
+ logError(new Error(`Bridge session failed: ${failureMessage}`))
512
+ }
513
+ break
514
+ case 'interrupted':
515
+ logger.logVerbose(`Session ${sessionId} interrupted`)
516
+ break
517
+ }
518
+
519
+ // Notify the server that this work item is done. Skip for interrupted
520
+ // sessions — interrupts are either server-initiated (the server already
521
+ // knows) or caused by bridge shutdown (which calls stopWork() separately).
522
+ if (status !== 'interrupted' && workId) {
523
+ trackCleanup(
524
+ stopWorkWithRetry(
525
+ api,
526
+ environmentId,
527
+ workId,
528
+ logger,
529
+ backoffConfig.stopWorkBaseDelayMs,
530
+ ),
531
+ )
532
+ completedWorkIds.add(workId)
533
+ }
534
+
535
+ // Clean up worktree if one was created for this session
536
+ const wt = sessionWorktrees.get(sessionId)
537
+ if (wt) {
538
+ sessionWorktrees.delete(sessionId)
539
+ trackCleanup(
540
+ removeAgentWorktree(
541
+ wt.worktreePath,
542
+ wt.worktreeBranch,
543
+ wt.gitRoot,
544
+ wt.hookBased,
545
+ ).catch((err: unknown) =>
546
+ logger.logVerbose(
547
+ `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
548
+ ),
549
+ ),
550
+ )
551
+ }
552
+
553
+ // Lifecycle decision: in multi-session mode, keep the bridge running
554
+ // after a session completes. In single-session mode, abort the poll
555
+ // loop so the bridge exits cleanly.
556
+ if (status !== 'interrupted' && !loopSignal.aborted) {
557
+ if (config.spawnMode !== 'single-session') {
558
+ // Multi-session: archive the completed session so it doesn't linger
559
+ // as stale in the web UI. archiveSession is idempotent (409 if already
560
+ // archived), so double-archiving at shutdown is safe.
561
+ // sessionId arrived as cse_* from the work poll (infrastructure-layer
562
+ // tag). archiveSession hits /v1/sessions/{id}/archive which is the
563
+ // compat surface and validates TagSession (session_*). Re-tag — same
564
+ // UUID underneath.
565
+ trackCleanup(
566
+ api
567
+ .archiveSession(compatId)
568
+ .catch((err: unknown) =>
569
+ logger.logVerbose(
570
+ `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
571
+ ),
572
+ ),
573
+ )
574
+ logForDebugging(
575
+ `[bridge:session] Session ${status}, returning to idle (multi-session mode)`,
576
+ )
577
+ } else {
578
+ // Single-session: coupled lifecycle — tear down environment
579
+ logForDebugging(
580
+ `[bridge:session] Session ${status}, aborting poll loop to tear down environment`,
581
+ )
582
+ controller.abort()
583
+ return
584
+ }
585
+ }
586
+
587
+ if (!loopSignal.aborted) {
588
+ startStatusUpdates()
589
+ }
590
+ }
591
+ }
592
+
593
+ // Start the idle status display immediately — unless we have a pre-created
594
+ // session, in which case setAttached() already set up the display and the
595
+ // poll loop will start status updates when it picks up the session.
596
+ if (!initialSessionId) {
597
+ startStatusUpdates()
598
+ }
599
+
600
+ while (!loopSignal.aborted) {
601
+ // Fetched once per iteration — the GrowthBook cache refreshes every
602
+ // 5 min, so a loop running at the at-capacity rate picks up config
603
+ // changes within one sleep cycle.
604
+ const pollConfig = getPollIntervalConfig()
605
+
606
+ try {
607
+ const work = await api.pollForWork(
608
+ environmentId,
609
+ environmentSecret,
610
+ loopSignal,
611
+ pollConfig.reclaim_older_than_ms,
612
+ )
613
+
614
+ // Log reconnection if we were previously disconnected
615
+ const wasDisconnected =
616
+ connErrorStart !== null || generalErrorStart !== null
617
+ if (wasDisconnected) {
618
+ const disconnectedMs =
619
+ Date.now() - (connErrorStart ?? generalErrorStart ?? Date.now())
620
+ logger.logReconnected(disconnectedMs)
621
+ logForDebugging(
622
+ `[bridge:poll] Reconnected after ${formatDuration(disconnectedMs)}`,
623
+ )
624
+ logEvent('tengu_bridge_reconnected', {
625
+ disconnected_ms: disconnectedMs,
626
+ })
627
+ }
628
+
629
+ connBackoff = 0
630
+ generalBackoff = 0
631
+ connErrorStart = null
632
+ generalErrorStart = null
633
+ lastPollErrorTime = null
634
+
635
+ // Null response = no work available in the queue.
636
+ // Add a minimum delay to avoid hammering the server.
637
+ if (!work) {
638
+ // Use live check (not a snapshot) since sessions can end during poll.
639
+ const atCap = activeSessions.size >= config.maxSessions
640
+ if (atCap) {
641
+ const atCapMs = pollConfig.multisession_poll_interval_ms_at_capacity
642
+ // Heartbeat loops WITHOUT polling. When at-capacity polling is also
643
+ // enabled (atCapMs > 0), the loop tracks a deadline and breaks out
644
+ // to poll at that interval — heartbeat and poll compose instead of
645
+ // one suppressing the other. We break out to poll when:
646
+ // - Poll deadline reached (atCapMs > 0 only)
647
+ // - Auth fails (JWT expired → poll refreshes tokens)
648
+ // - Capacity wake fires (session ended → poll for new work)
649
+ // - Loop aborted (shutdown)
650
+ if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
651
+ logEvent('tengu_bridge_heartbeat_mode_entered', {
652
+ active_sessions: activeSessions.size,
653
+ heartbeat_interval_ms:
654
+ pollConfig.non_exclusive_heartbeat_interval_ms,
655
+ })
656
+ // Deadline computed once at entry — GB updates to atCapMs don't
657
+ // shift an in-flight deadline (next entry picks up the new value).
658
+ const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
659
+ let hbResult: 'ok' | 'auth_failed' | 'fatal' | 'failed' = 'ok'
660
+ let hbCycles = 0
661
+ while (
662
+ !loopSignal.aborted &&
663
+ activeSessions.size >= config.maxSessions &&
664
+ (pollDeadline === null || Date.now() < pollDeadline)
665
+ ) {
666
+ // Re-read config each cycle so GrowthBook updates take effect
667
+ const hbConfig = getPollIntervalConfig()
668
+ if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
669
+
670
+ // Capture capacity signal BEFORE the async heartbeat call so
671
+ // a session ending during the HTTP request is caught by the
672
+ // subsequent sleep (instead of being lost to a replaced controller).
673
+ const cap = capacityWake.signal()
674
+
675
+ hbResult = await heartbeatActiveWorkItems()
676
+ if (hbResult === 'auth_failed' || hbResult === 'fatal') {
677
+ cap.cleanup()
678
+ break
679
+ }
680
+
681
+ hbCycles++
682
+ await sleep(
683
+ hbConfig.non_exclusive_heartbeat_interval_ms,
684
+ cap.signal,
685
+ )
686
+ cap.cleanup()
687
+ }
688
+
689
+ // Determine exit reason for telemetry
690
+ const exitReason =
691
+ hbResult === 'auth_failed' || hbResult === 'fatal'
692
+ ? hbResult
693
+ : loopSignal.aborted
694
+ ? 'shutdown'
695
+ : activeSessions.size < config.maxSessions
696
+ ? 'capacity_changed'
697
+ : pollDeadline !== null && Date.now() >= pollDeadline
698
+ ? 'poll_due'
699
+ : 'config_disabled'
700
+ logEvent('tengu_bridge_heartbeat_mode_exited', {
701
+ reason:
702
+ exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
703
+ heartbeat_cycles: hbCycles,
704
+ active_sessions: activeSessions.size,
705
+ })
706
+ if (exitReason === 'poll_due') {
707
+ // bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
708
+ // so the once-per-10min poll_due poll is invisible at counter=2.
709
+ // Log it here so verification runs see both endpoints in the debug log.
710
+ logForDebugging(
711
+ `[bridge:poll] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
712
+ )
713
+ }
714
+
715
+ // On auth_failed or fatal, sleep before polling to avoid a tight
716
+ // poll+heartbeat loop. Auth_failed: heartbeatActiveWorkItems
717
+ // already called reconnectSession — the sleep gives the server
718
+ // time to propagate the re-queue. Fatal (404/410): may be a
719
+ // single work item GCd while the environment is still valid.
720
+ // Use atCapMs if enabled, else the heartbeat interval as a floor
721
+ // (guaranteed > 0 here) so heartbeat-only configs don't tight-loop.
722
+ if (hbResult === 'auth_failed' || hbResult === 'fatal') {
723
+ const cap = capacityWake.signal()
724
+ await sleep(
725
+ atCapMs > 0
726
+ ? atCapMs
727
+ : pollConfig.non_exclusive_heartbeat_interval_ms,
728
+ cap.signal,
729
+ )
730
+ cap.cleanup()
731
+ }
732
+ } else if (atCapMs > 0) {
733
+ // Heartbeat disabled: slow poll as liveness signal.
734
+ const cap = capacityWake.signal()
735
+ await sleep(atCapMs, cap.signal)
736
+ cap.cleanup()
737
+ }
738
+ } else {
739
+ const interval =
740
+ activeSessions.size > 0
741
+ ? pollConfig.multisession_poll_interval_ms_partial_capacity
742
+ : pollConfig.multisession_poll_interval_ms_not_at_capacity
743
+ await sleep(interval, loopSignal)
744
+ }
745
+ continue
746
+ }
747
+
748
+ // At capacity — we polled to keep the heartbeat alive, but cannot
749
+ // accept new work right now. We still enter the switch below so that
750
+ // token refreshes for existing sessions are processed (the case
751
+ // 'session' handler checks for existing sessions before the inner
752
+ // capacity guard).
753
+ const atCapacityBeforeSwitch = activeSessions.size >= config.maxSessions
754
+
755
+ // Skip work items that have already been completed and stopped.
756
+ // The server may re-deliver stale work before processing our stop
757
+ // request, which would otherwise cause a duplicate session spawn.
758
+ if (completedWorkIds.has(work.id)) {
759
+ logForDebugging(
760
+ `[bridge:work] Skipping already-completed workId=${work.id}`,
761
+ )
762
+ // Respect capacity throttle — without a sleep here, persistent stale
763
+ // redeliveries would tight-loop at poll-request speed (the !work
764
+ // branch above is the only sleep, and work != null skips it).
765
+ if (atCapacityBeforeSwitch) {
766
+ const cap = capacityWake.signal()
767
+ if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
768
+ await heartbeatActiveWorkItems()
769
+ await sleep(
770
+ pollConfig.non_exclusive_heartbeat_interval_ms,
771
+ cap.signal,
772
+ )
773
+ } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
774
+ await sleep(
775
+ pollConfig.multisession_poll_interval_ms_at_capacity,
776
+ cap.signal,
777
+ )
778
+ }
779
+ cap.cleanup()
780
+ } else {
781
+ await sleep(1000, loopSignal)
782
+ }
783
+ continue
784
+ }
785
+
786
+ // Decode the work secret for session spawning and to extract the JWT
787
+ // used for the ack call below.
788
+ let secret
789
+ try {
790
+ secret = decodeWorkSecret(work.secret)
791
+ } catch (err) {
792
+ const errMsg = errorMessage(err)
793
+ logger.logError(
794
+ `Failed to decode work secret for workId=${work.id}: ${errMsg}`,
795
+ )
796
+ logEvent('tengu_bridge_work_secret_failed', {})
797
+ // Can't ack (needs the JWT we failed to decode). stopWork uses OAuth,
798
+ // so it's callable here — prevents XAUTOCLAIM from re-delivering this
799
+ // poisoned item every reclaim_older_than_ms cycle.
800
+ completedWorkIds.add(work.id)
801
+ trackCleanup(
802
+ stopWorkWithRetry(
803
+ api,
804
+ environmentId,
805
+ work.id,
806
+ logger,
807
+ backoffConfig.stopWorkBaseDelayMs,
808
+ ),
809
+ )
810
+ // Respect capacity throttle before retrying — without a sleep here,
811
+ // repeated decode failures at capacity would tight-loop at
812
+ // poll-request speed (work != null skips the !work sleep above).
813
+ if (atCapacityBeforeSwitch) {
814
+ const cap = capacityWake.signal()
815
+ if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
816
+ await heartbeatActiveWorkItems()
817
+ await sleep(
818
+ pollConfig.non_exclusive_heartbeat_interval_ms,
819
+ cap.signal,
820
+ )
821
+ } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
822
+ await sleep(
823
+ pollConfig.multisession_poll_interval_ms_at_capacity,
824
+ cap.signal,
825
+ )
826
+ }
827
+ cap.cleanup()
828
+ }
829
+ continue
830
+ }
831
+
832
+ // Explicitly acknowledge after committing to handle the work — NOT
833
+ // before. The at-capacity guard inside case 'session' can break
834
+ // without spawning; acking there would permanently lose the work.
835
+ // Ack failures are non-fatal: server re-delivers, and existingHandle
836
+ // / completedWorkIds paths handle the dedup.
837
+ const ackWork = async (): Promise<void> => {
838
+ logForDebugging(`[bridge:work] Acknowledging workId=${work.id}`)
839
+ try {
840
+ await api.acknowledgeWork(
841
+ environmentId,
842
+ work.id,
843
+ secret.session_ingress_token,
844
+ )
845
+ } catch (err) {
846
+ logForDebugging(
847
+ `[bridge:work] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
848
+ )
849
+ }
850
+ }
851
+
852
+ const workType: string = work.data.type
853
+ switch (work.data.type) {
854
+ case 'healthcheck':
855
+ await ackWork()
856
+ logForDebugging('[bridge:work] Healthcheck received')
857
+ logger.logVerbose('Healthcheck received')
858
+ break
859
+ case 'session': {
860
+ const sessionId = work.data.id
861
+ try {
862
+ validateBridgeId(sessionId, 'session_id')
863
+ } catch {
864
+ await ackWork()
865
+ logger.logError(`Invalid session_id received: ${sessionId}`)
866
+ break
867
+ }
868
+
869
+ // If the session is already running, deliver the fresh token so
870
+ // the child process can reconnect its WebSocket with the new
871
+ // session ingress token. This handles the case where the server
872
+ // re-dispatches work for an existing session after the WS drops.
873
+ const existingHandle = activeSessions.get(sessionId)
874
+ if (existingHandle) {
875
+ existingHandle.updateAccessToken(secret.session_ingress_token)
876
+ sessionIngressTokens.set(sessionId, secret.session_ingress_token)
877
+ sessionWorkIds.set(sessionId, work.id)
878
+ // Re-schedule next refresh from the fresh JWT's expiry. onRefresh
879
+ // branches on v2Sessions so both v1 and v2 are safe here.
880
+ tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
881
+ logForDebugging(
882
+ `[bridge:work] Updated access token for existing sessionId=${sessionId} workId=${work.id}`,
883
+ )
884
+ await ackWork()
885
+ break
886
+ }
887
+
888
+ // At capacity — token refresh for existing sessions is handled
889
+ // above, but we cannot spawn new ones. The post-switch capacity
890
+ // sleep will throttle the loop; just break here.
891
+ if (activeSessions.size >= config.maxSessions) {
892
+ logForDebugging(
893
+ `[bridge:work] At capacity (${activeSessions.size}/${config.maxSessions}), cannot spawn new session for workId=${work.id}`,
894
+ )
895
+ break
896
+ }
897
+
898
+ await ackWork()
899
+ const spawnStartTime = Date.now()
900
+
901
+ // CCR v2 path: register this bridge as the session worker, get the
902
+ // epoch, and point the child at /v1/code/sessions/{id}. The child
903
+ // already has the full v2 client (SSETransport + CCRClient) — same
904
+ // code path environment-manager launches in containers.
905
+ //
906
+ // v1 path: Session-Ingress WebSocket. Uses config.sessionIngressUrl
907
+ // (not secret.api_base_url, which may point to a remote proxy tunnel
908
+ // that doesn't know about locally-created sessions).
909
+ let sdkUrl: string
910
+ let useCcrV2 = false
911
+ let workerEpoch: number | undefined
912
+ // Server decides per-session via the work secret; env var is the
913
+ // ant-dev override (e.g. forcing v2 before the server flag is on).
914
+ if (
915
+ secret.use_code_sessions === true ||
916
+ isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
917
+ ) {
918
+ sdkUrl = buildCCRv2SdkUrl(config.apiBaseUrl, sessionId)
919
+ // Retry once on transient failure (network blip, 500) before
920
+ // permanently giving up and killing the session.
921
+ for (let attempt = 1; attempt <= 2; attempt++) {
922
+ try {
923
+ workerEpoch = await registerWorker(
924
+ sdkUrl,
925
+ secret.session_ingress_token,
926
+ )
927
+ useCcrV2 = true
928
+ logForDebugging(
929
+ `[bridge:session] CCR v2: registered worker sessionId=${sessionId} epoch=${workerEpoch} attempt=${attempt}`,
930
+ )
931
+ break
932
+ } catch (err) {
933
+ const errMsg = errorMessage(err)
934
+ if (attempt < 2) {
935
+ logForDebugging(
936
+ `[bridge:session] CCR v2: registerWorker attempt ${attempt} failed, retrying: ${errMsg}`,
937
+ )
938
+ await sleep(2_000, loopSignal)
939
+ if (loopSignal.aborted) break
940
+ continue
941
+ }
942
+ logger.logError(
943
+ `CCR v2 worker registration failed for session ${sessionId}: ${errMsg}`,
944
+ )
945
+ logError(new Error(`registerWorker failed: ${errMsg}`))
946
+ completedWorkIds.add(work.id)
947
+ trackCleanup(
948
+ stopWorkWithRetry(
949
+ api,
950
+ environmentId,
951
+ work.id,
952
+ logger,
953
+ backoffConfig.stopWorkBaseDelayMs,
954
+ ),
955
+ )
956
+ }
957
+ }
958
+ if (!useCcrV2) break
959
+ } else {
960
+ sdkUrl = buildSdkUrl(config.sessionIngressUrl, sessionId)
961
+ }
962
+
963
+ // In worktree mode, on-demand sessions get an isolated git worktree
964
+ // so concurrent sessions don't interfere with each other's file
965
+ // changes. The pre-created initial session (if any) runs in
966
+ // config.dir so the user's first session lands in the directory they
967
+ // invoked `rc` from — matching the old single-session UX.
968
+ // In same-dir and single-session modes, all sessions share config.dir.
969
+ // Capture spawnMode before the await below — the `w` key handler
970
+ // mutates config.spawnMode directly, and createAgentWorktree can
971
+ // take 1-2s, so reading config.spawnMode after the await can
972
+ // produce contradictory analytics (spawn_mode:'same-dir', in_worktree:true).
973
+ const spawnModeAtDecision = config.spawnMode
974
+ let sessionDir = config.dir
975
+ let worktreeCreateMs = 0
976
+ if (
977
+ spawnModeAtDecision === 'worktree' &&
978
+ (initialSessionId === undefined ||
979
+ !sameSessionId(sessionId, initialSessionId))
980
+ ) {
981
+ const wtStart = Date.now()
982
+ try {
983
+ const wt = await createAgentWorktree(
984
+ `bridge-${safeFilenameId(sessionId)}`,
985
+ )
986
+ worktreeCreateMs = Date.now() - wtStart
987
+ sessionWorktrees.set(sessionId, {
988
+ worktreePath: wt.worktreePath,
989
+ worktreeBranch: wt.worktreeBranch,
990
+ gitRoot: wt.gitRoot,
991
+ hookBased: wt.hookBased,
992
+ })
993
+ sessionDir = wt.worktreePath
994
+ logForDebugging(
995
+ `[bridge:session] Created worktree for sessionId=${sessionId} at ${wt.worktreePath}`,
996
+ )
997
+ } catch (err) {
998
+ const errMsg = errorMessage(err)
999
+ logger.logError(
1000
+ `Failed to create worktree for session ${sessionId}: ${errMsg}`,
1001
+ )
1002
+ logError(new Error(`Worktree creation failed: ${errMsg}`))
1003
+ completedWorkIds.add(work.id)
1004
+ trackCleanup(
1005
+ stopWorkWithRetry(
1006
+ api,
1007
+ environmentId,
1008
+ work.id,
1009
+ logger,
1010
+ backoffConfig.stopWorkBaseDelayMs,
1011
+ ),
1012
+ )
1013
+ break
1014
+ }
1015
+ }
1016
+
1017
+ logForDebugging(
1018
+ `[bridge:session] Spawning sessionId=${sessionId} sdkUrl=${sdkUrl}`,
1019
+ )
1020
+
1021
+ // compat-surface session_* form for logger/Sessions-API calls.
1022
+ // Work poll returns cse_* under v2 compat; convert before spawn so
1023
+ // the onFirstUserMessage callback can close over it.
1024
+ const compatSessionId = toCompatSessionId(sessionId)
1025
+
1026
+ const spawnResult = safeSpawn(
1027
+ spawner,
1028
+ {
1029
+ sessionId,
1030
+ sdkUrl,
1031
+ accessToken: secret.session_ingress_token,
1032
+ useCcrV2,
1033
+ workerEpoch,
1034
+ onFirstUserMessage: text => {
1035
+ // Server-set titles (--name, web rename) win. fetchSessionTitle
1036
+ // runs concurrently; if it already populated titledSessions,
1037
+ // skip. If it hasn't resolved yet, the derived title sticks —
1038
+ // acceptable since the server had no title at spawn time.
1039
+ if (titledSessions.has(compatSessionId)) return
1040
+ titledSessions.add(compatSessionId)
1041
+ const title = deriveSessionTitle(text)
1042
+ logger.setSessionTitle(compatSessionId, title)
1043
+ logForDebugging(
1044
+ `[bridge:title] derived title for ${compatSessionId}: ${title}`,
1045
+ )
1046
+ void import('./createSession.js')
1047
+ .then(({ updateBridgeSessionTitle }) =>
1048
+ updateBridgeSessionTitle(compatSessionId, title, {
1049
+ baseUrl: config.apiBaseUrl,
1050
+ }),
1051
+ )
1052
+ .catch(err =>
1053
+ logForDebugging(
1054
+ `[bridge:title] failed to update title for ${compatSessionId}: ${err}`,
1055
+ { level: 'error' },
1056
+ ),
1057
+ )
1058
+ },
1059
+ },
1060
+ sessionDir,
1061
+ )
1062
+ if (typeof spawnResult === 'string') {
1063
+ logger.logError(
1064
+ `Failed to spawn session ${sessionId}: ${spawnResult}`,
1065
+ )
1066
+ // Clean up worktree if one was created for this session
1067
+ const wt = sessionWorktrees.get(sessionId)
1068
+ if (wt) {
1069
+ sessionWorktrees.delete(sessionId)
1070
+ trackCleanup(
1071
+ removeAgentWorktree(
1072
+ wt.worktreePath,
1073
+ wt.worktreeBranch,
1074
+ wt.gitRoot,
1075
+ wt.hookBased,
1076
+ ).catch((err: unknown) =>
1077
+ logger.logVerbose(
1078
+ `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
1079
+ ),
1080
+ ),
1081
+ )
1082
+ }
1083
+ completedWorkIds.add(work.id)
1084
+ trackCleanup(
1085
+ stopWorkWithRetry(
1086
+ api,
1087
+ environmentId,
1088
+ work.id,
1089
+ logger,
1090
+ backoffConfig.stopWorkBaseDelayMs,
1091
+ ),
1092
+ )
1093
+ break
1094
+ }
1095
+ const handle = spawnResult
1096
+
1097
+ const spawnDurationMs = Date.now() - spawnStartTime
1098
+ logEvent('tengu_bridge_session_started', {
1099
+ active_sessions: activeSessions.size,
1100
+ spawn_mode:
1101
+ spawnModeAtDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1102
+ in_worktree: sessionWorktrees.has(sessionId),
1103
+ spawn_duration_ms: spawnDurationMs,
1104
+ worktree_create_ms: worktreeCreateMs,
1105
+ inProtectedNamespace: isInProtectedNamespace(),
1106
+ })
1107
+ logForDiagnosticsNoPII('info', 'bridge_session_started', {
1108
+ spawn_mode: spawnModeAtDecision,
1109
+ in_worktree: sessionWorktrees.has(sessionId),
1110
+ spawn_duration_ms: spawnDurationMs,
1111
+ worktree_create_ms: worktreeCreateMs,
1112
+ })
1113
+
1114
+ activeSessions.set(sessionId, handle)
1115
+ sessionWorkIds.set(sessionId, work.id)
1116
+ sessionIngressTokens.set(sessionId, secret.session_ingress_token)
1117
+ sessionCompatIds.set(sessionId, compatSessionId)
1118
+
1119
+ const startTime = Date.now()
1120
+ sessionStartTimes.set(sessionId, startTime)
1121
+
1122
+ // Use a generic prompt description since we no longer get startup_context
1123
+ logger.logSessionStart(sessionId, `Session ${sessionId}`)
1124
+
1125
+ // Compute the actual debug file path (mirrors sessionRunner.ts logic)
1126
+ const safeId = safeFilenameId(sessionId)
1127
+ let sessionDebugFile: string | undefined
1128
+ if (config.debugFile) {
1129
+ const ext = config.debugFile.lastIndexOf('.')
1130
+ if (ext > 0) {
1131
+ sessionDebugFile = `${config.debugFile.slice(0, ext)}-${safeId}${config.debugFile.slice(ext)}`
1132
+ } else {
1133
+ sessionDebugFile = `${config.debugFile}-${safeId}`
1134
+ }
1135
+ } else if (config.verbose || process.env.USER_TYPE === 'ant') {
1136
+ sessionDebugFile = join(
1137
+ tmpdir(),
1138
+ 'claude',
1139
+ `bridge-session-${safeId}.log`,
1140
+ )
1141
+ }
1142
+
1143
+ if (sessionDebugFile) {
1144
+ logger.logVerbose(`Debug log: ${sessionDebugFile}`)
1145
+ }
1146
+
1147
+ // Register in the sessions Map before starting status updates so the
1148
+ // first render tick shows the correct count and bullet list in sync.
1149
+ logger.addSession(
1150
+ compatSessionId,
1151
+ getRemoteSessionUrl(compatSessionId, config.sessionIngressUrl),
1152
+ )
1153
+
1154
+ // Start live status updates and transition to "Attached" state.
1155
+ startStatusUpdates()
1156
+ logger.setAttached(compatSessionId)
1157
+
1158
+ // One-shot title fetch. If the session already has a title (set via
1159
+ // --name, web rename, or /remote-control), display it and mark as
1160
+ // titled so the first-user-message fallback doesn't overwrite it.
1161
+ // Otherwise onFirstUserMessage derives one from the first prompt.
1162
+ void fetchSessionTitle(compatSessionId, config.apiBaseUrl)
1163
+ .then(title => {
1164
+ if (title && activeSessions.has(sessionId)) {
1165
+ titledSessions.add(compatSessionId)
1166
+ logger.setSessionTitle(compatSessionId, title)
1167
+ logForDebugging(
1168
+ `[bridge:title] server title for ${compatSessionId}: ${title}`,
1169
+ )
1170
+ }
1171
+ })
1172
+ .catch(err =>
1173
+ logForDebugging(
1174
+ `[bridge:title] failed to fetch title for ${compatSessionId}: ${err}`,
1175
+ { level: 'error' },
1176
+ ),
1177
+ )
1178
+
1179
+ // Start per-session timeout watchdog
1180
+ const timeoutMs =
1181
+ config.sessionTimeoutMs ?? DEFAULT_SESSION_TIMEOUT_MS
1182
+ if (timeoutMs > 0) {
1183
+ const timer = setTimeout(
1184
+ onSessionTimeout,
1185
+ timeoutMs,
1186
+ sessionId,
1187
+ timeoutMs,
1188
+ logger,
1189
+ timedOutSessions,
1190
+ handle,
1191
+ )
1192
+ sessionTimers.set(sessionId, timer)
1193
+ }
1194
+
1195
+ // Schedule proactive token refresh before the JWT expires.
1196
+ // onRefresh branches on v2Sessions: v1 delivers OAuth to the
1197
+ // child, v2 triggers server re-dispatch via reconnectSession.
1198
+ if (useCcrV2) {
1199
+ v2Sessions.add(sessionId)
1200
+ }
1201
+ tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
1202
+
1203
+ void handle.done.then(onSessionDone(sessionId, startTime, handle))
1204
+ break
1205
+ }
1206
+ default:
1207
+ await ackWork()
1208
+ // Gracefully ignore unknown work types. The backend may send new
1209
+ // types before the bridge client is updated.
1210
+ logForDebugging(
1211
+ `[bridge:work] Unknown work type: ${workType}, skipping`,
1212
+ )
1213
+ break
1214
+ }
1215
+
1216
+ // When at capacity, throttle the loop. The switch above still runs so
1217
+ // existing-session token refreshes are processed, but we sleep here
1218
+ // to avoid busy-looping. Include the capacity wake signal so the
1219
+ // sleep is interrupted immediately when a session completes.
1220
+ if (atCapacityBeforeSwitch) {
1221
+ const cap = capacityWake.signal()
1222
+ if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
1223
+ await heartbeatActiveWorkItems()
1224
+ await sleep(
1225
+ pollConfig.non_exclusive_heartbeat_interval_ms,
1226
+ cap.signal,
1227
+ )
1228
+ } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
1229
+ await sleep(
1230
+ pollConfig.multisession_poll_interval_ms_at_capacity,
1231
+ cap.signal,
1232
+ )
1233
+ }
1234
+ cap.cleanup()
1235
+ }
1236
+ } catch (err) {
1237
+ if (loopSignal.aborted) {
1238
+ break
1239
+ }
1240
+
1241
+ // Fatal errors (401/403) — no point retrying, auth won't fix itself
1242
+ if (err instanceof BridgeFatalError) {
1243
+ fatalExit = true
1244
+ // Server-enforced expiry gets a clean status message, not an error
1245
+ if (isExpiredErrorType(err.errorType)) {
1246
+ logger.logStatus(err.message)
1247
+ } else if (isSuppressible403(err)) {
1248
+ // Cosmetic 403 errors (e.g., external_poll_sessions scope,
1249
+ // environments:manage permission) — don't show to user
1250
+ logForDebugging(`[bridge:work] Suppressed 403 error: ${err.message}`)
1251
+ } else {
1252
+ logger.logError(err.message)
1253
+ logError(err)
1254
+ }
1255
+ logEvent('tengu_bridge_fatal_error', {
1256
+ status: err.status,
1257
+ error_type:
1258
+ err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1259
+ })
1260
+ logForDiagnosticsNoPII(
1261
+ isExpiredErrorType(err.errorType) ? 'info' : 'error',
1262
+ 'bridge_fatal_error',
1263
+ { status: err.status, error_type: err.errorType },
1264
+ )
1265
+ break
1266
+ }
1267
+
1268
+ const errMsg = describeAxiosError(err)
1269
+
1270
+ if (isConnectionError(err) || isServerError(err)) {
1271
+ const now = Date.now()
1272
+
1273
+ // Detect system sleep/wake: if the gap since the last poll error
1274
+ // greatly exceeds the expected backoff, the machine likely slept.
1275
+ // Reset error tracking so the bridge retries with a fresh budget.
1276
+ if (
1277
+ lastPollErrorTime !== null &&
1278
+ now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
1279
+ ) {
1280
+ logForDebugging(
1281
+ `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
1282
+ )
1283
+ logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
1284
+ gapMs: now - lastPollErrorTime,
1285
+ })
1286
+ connErrorStart = null
1287
+ connBackoff = 0
1288
+ generalErrorStart = null
1289
+ generalBackoff = 0
1290
+ }
1291
+ lastPollErrorTime = now
1292
+
1293
+ if (!connErrorStart) {
1294
+ connErrorStart = now
1295
+ }
1296
+ const elapsed = now - connErrorStart
1297
+ if (elapsed >= backoffConfig.connGiveUpMs) {
1298
+ logger.logError(
1299
+ `Server unreachable for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
1300
+ )
1301
+ logEvent('tengu_bridge_poll_give_up', {
1302
+ error_type:
1303
+ 'connection' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1304
+ elapsed_ms: elapsed,
1305
+ })
1306
+ logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
1307
+ error_type: 'connection',
1308
+ elapsed_ms: elapsed,
1309
+ })
1310
+ fatalExit = true
1311
+ break
1312
+ }
1313
+
1314
+ // Reset the other track when switching error types
1315
+ generalErrorStart = null
1316
+ generalBackoff = 0
1317
+
1318
+ connBackoff = connBackoff
1319
+ ? Math.min(connBackoff * 2, backoffConfig.connCapMs)
1320
+ : backoffConfig.connInitialMs
1321
+ const delay = addJitter(connBackoff)
1322
+ logger.logVerbose(
1323
+ `Connection error, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
1324
+ )
1325
+ logger.updateReconnectingStatus(
1326
+ formatDelay(delay),
1327
+ formatDuration(elapsed),
1328
+ )
1329
+ // The poll_due heartbeat-loop exit leaves a healthy lease exposed to
1330
+ // this backoff path. Heartbeat before each sleep so /poll outages
1331
+ // (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced
1332
+ // to avoid) don't kill the 300s lease TTL. No-op when activeSessions
1333
+ // is empty or heartbeat is disabled.
1334
+ if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
1335
+ await heartbeatActiveWorkItems()
1336
+ }
1337
+ await sleep(delay, loopSignal)
1338
+ } else {
1339
+ const now = Date.now()
1340
+
1341
+ // Sleep detection for general errors (same logic as connection errors)
1342
+ if (
1343
+ lastPollErrorTime !== null &&
1344
+ now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
1345
+ ) {
1346
+ logForDebugging(
1347
+ `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
1348
+ )
1349
+ logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
1350
+ gapMs: now - lastPollErrorTime,
1351
+ })
1352
+ connErrorStart = null
1353
+ connBackoff = 0
1354
+ generalErrorStart = null
1355
+ generalBackoff = 0
1356
+ }
1357
+ lastPollErrorTime = now
1358
+
1359
+ if (!generalErrorStart) {
1360
+ generalErrorStart = now
1361
+ }
1362
+ const elapsed = now - generalErrorStart
1363
+ if (elapsed >= backoffConfig.generalGiveUpMs) {
1364
+ logger.logError(
1365
+ `Persistent errors for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
1366
+ )
1367
+ logEvent('tengu_bridge_poll_give_up', {
1368
+ error_type:
1369
+ 'general' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1370
+ elapsed_ms: elapsed,
1371
+ })
1372
+ logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
1373
+ error_type: 'general',
1374
+ elapsed_ms: elapsed,
1375
+ })
1376
+ fatalExit = true
1377
+ break
1378
+ }
1379
+
1380
+ // Reset the other track when switching error types
1381
+ connErrorStart = null
1382
+ connBackoff = 0
1383
+
1384
+ generalBackoff = generalBackoff
1385
+ ? Math.min(generalBackoff * 2, backoffConfig.generalCapMs)
1386
+ : backoffConfig.generalInitialMs
1387
+ const delay = addJitter(generalBackoff)
1388
+ logger.logVerbose(
1389
+ `Poll failed, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
1390
+ )
1391
+ logger.updateReconnectingStatus(
1392
+ formatDelay(delay),
1393
+ formatDuration(elapsed),
1394
+ )
1395
+ if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
1396
+ await heartbeatActiveWorkItems()
1397
+ }
1398
+ await sleep(delay, loopSignal)
1399
+ }
1400
+ }
1401
+ }
1402
+
1403
+ // Clean up
1404
+ stopStatusUpdates()
1405
+ logger.clearStatus()
1406
+
1407
+ const loopDurationMs = Date.now() - loopStartTime
1408
+ logEvent('tengu_bridge_shutdown', {
1409
+ active_sessions: activeSessions.size,
1410
+ loop_duration_ms: loopDurationMs,
1411
+ })
1412
+ logForDiagnosticsNoPII('info', 'bridge_shutdown', {
1413
+ active_sessions: activeSessions.size,
1414
+ loop_duration_ms: loopDurationMs,
1415
+ })
1416
+
1417
+ // Graceful shutdown: kill active sessions, report them as interrupted,
1418
+ // archive sessions, then deregister the environment so the web UI shows
1419
+ // the bridge as offline.
1420
+
1421
+ // Collect all session IDs to archive on exit. This includes:
1422
+ // 1. Active sessions (snapshot before killing — onSessionDone clears maps)
1423
+ // 2. The initial auto-created session (may never have had work dispatched)
1424
+ // api.archiveSession is idempotent (409 if already archived), so
1425
+ // double-archiving is safe.
1426
+ const sessionsToArchive = new Set(activeSessions.keys())
1427
+ if (initialSessionId) {
1428
+ sessionsToArchive.add(initialSessionId)
1429
+ }
1430
+ // Snapshot before killing — onSessionDone clears sessionCompatIds.
1431
+ const compatIdSnapshot = new Map(sessionCompatIds)
1432
+
1433
+ if (activeSessions.size > 0) {
1434
+ logForDebugging(
1435
+ `[bridge:shutdown] Shutting down ${activeSessions.size} active session(s)`,
1436
+ )
1437
+ logger.logStatus(
1438
+ `Shutting down ${activeSessions.size} active session(s)\u2026`,
1439
+ )
1440
+
1441
+ // Snapshot work IDs before killing — onSessionDone clears the maps when
1442
+ // each child exits, so we need a copy for the stopWork calls below.
1443
+ const shutdownWorkIds = new Map(sessionWorkIds)
1444
+
1445
+ for (const [sessionId, handle] of activeSessions.entries()) {
1446
+ logForDebugging(
1447
+ `[bridge:shutdown] Sending SIGTERM to sessionId=${sessionId}`,
1448
+ )
1449
+ handle.kill()
1450
+ }
1451
+
1452
+ const timeout = new AbortController()
1453
+ await Promise.race([
1454
+ Promise.allSettled([...activeSessions.values()].map(h => h.done)),
1455
+ sleep(backoffConfig.shutdownGraceMs ?? 30_000, timeout.signal),
1456
+ ])
1457
+ timeout.abort()
1458
+
1459
+ // SIGKILL any processes that didn't respond to SIGTERM within the grace window
1460
+ for (const [sid, handle] of activeSessions.entries()) {
1461
+ logForDebugging(`[bridge:shutdown] Force-killing stuck sessionId=${sid}`)
1462
+ handle.forceKill()
1463
+ }
1464
+
1465
+ // Clear any remaining session timeout and refresh timers
1466
+ for (const timer of sessionTimers.values()) {
1467
+ clearTimeout(timer)
1468
+ }
1469
+ sessionTimers.clear()
1470
+ tokenRefresh?.cancelAll()
1471
+
1472
+ // Clean up any remaining worktrees from active sessions.
1473
+ // Snapshot and clear the map first so onSessionDone (which may fire
1474
+ // during the await below when handle.done resolves) won't try to
1475
+ // remove the same worktrees again.
1476
+ if (sessionWorktrees.size > 0) {
1477
+ const remainingWorktrees = [...sessionWorktrees.values()]
1478
+ sessionWorktrees.clear()
1479
+ logForDebugging(
1480
+ `[bridge:shutdown] Cleaning up ${remainingWorktrees.length} worktree(s)`,
1481
+ )
1482
+ await Promise.allSettled(
1483
+ remainingWorktrees.map(wt =>
1484
+ removeAgentWorktree(
1485
+ wt.worktreePath,
1486
+ wt.worktreeBranch,
1487
+ wt.gitRoot,
1488
+ wt.hookBased,
1489
+ ),
1490
+ ),
1491
+ )
1492
+ }
1493
+
1494
+ // Stop all active work items so the server knows they're done
1495
+ await Promise.allSettled(
1496
+ [...shutdownWorkIds.entries()].map(([sessionId, workId]) => {
1497
+ return api
1498
+ .stopWork(environmentId, workId, true)
1499
+ .catch(err =>
1500
+ logger.logVerbose(
1501
+ `Failed to stop work ${workId} for session ${sessionId}: ${errorMessage(err)}`,
1502
+ ),
1503
+ )
1504
+ }),
1505
+ )
1506
+ }
1507
+
1508
+ // Ensure all in-flight cleanup (stopWork, worktree removal) from
1509
+ // onSessionDone completes before deregistering — otherwise
1510
+ // process.exit() can kill them mid-flight.
1511
+ if (pendingCleanups.size > 0) {
1512
+ await Promise.allSettled([...pendingCleanups])
1513
+ }
1514
+
1515
+ // In single-session mode with a known session, leave the session and
1516
+ // environment alive so `claude remote-control --session-id=<id>` can resume.
1517
+ // The backend GCs stale environments via a 4h TTL (BRIDGE_LAST_POLL_TTL).
1518
+ // Archiving the session or deregistering the environment would make the
1519
+ // printed resume command a lie — deregister deletes Firestore + Redis stream.
1520
+ // Skip when the loop exited fatally (env expired, auth failed, give-up) —
1521
+ // resume is impossible in those cases and the message would contradict the
1522
+ // error already printed.
1523
+ // feature('KAIROS') gate: --session-id is ant-only; without the gate,
1524
+ // revert to the pre-PR behavior (archive + deregister on every shutdown).
1525
+ if (
1526
+ feature('KAIROS') &&
1527
+ config.spawnMode === 'single-session' &&
1528
+ initialSessionId &&
1529
+ !fatalExit
1530
+ ) {
1531
+ logger.logStatus(
1532
+ `Resume this session by running \`claude remote-control --continue\``,
1533
+ )
1534
+ logForDebugging(
1535
+ `[bridge:shutdown] Skipping archive+deregister to allow resume of session ${initialSessionId}`,
1536
+ )
1537
+ return
1538
+ }
1539
+
1540
+ // Archive all known sessions so they don't linger as idle/running on the
1541
+ // server after the bridge goes offline.
1542
+ if (sessionsToArchive.size > 0) {
1543
+ logForDebugging(
1544
+ `[bridge:shutdown] Archiving ${sessionsToArchive.size} session(s)`,
1545
+ )
1546
+ await Promise.allSettled(
1547
+ [...sessionsToArchive].map(sessionId =>
1548
+ api
1549
+ .archiveSession(
1550
+ compatIdSnapshot.get(sessionId) ?? toCompatSessionId(sessionId),
1551
+ )
1552
+ .catch(err =>
1553
+ logger.logVerbose(
1554
+ `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
1555
+ ),
1556
+ ),
1557
+ ),
1558
+ )
1559
+ }
1560
+
1561
+ // Deregister the environment so the web UI shows the bridge as offline
1562
+ // and the Redis stream is cleaned up.
1563
+ try {
1564
+ await api.deregisterEnvironment(environmentId)
1565
+ logForDebugging(
1566
+ `[bridge:shutdown] Environment deregistered, bridge offline`,
1567
+ )
1568
+ logger.logVerbose('Environment deregistered.')
1569
+ } catch (err) {
1570
+ logger.logVerbose(`Failed to deregister environment: ${errorMessage(err)}`)
1571
+ }
1572
+
1573
+ // Clear the crash-recovery pointer — the env is gone, pointer would be
1574
+ // stale. The early return above (resumable SIGINT shutdown) skips this,
1575
+ // leaving the pointer as a backup for the printed --session-id hint.
1576
+ const { clearBridgePointer } = await import('./bridgePointer.js')
1577
+ await clearBridgePointer(config.dir)
1578
+
1579
+ logger.logVerbose('Environment offline.')
1580
+ }
1581
+
1582
+ const CONNECTION_ERROR_CODES = new Set([
1583
+ 'ECONNREFUSED',
1584
+ 'ECONNRESET',
1585
+ 'ETIMEDOUT',
1586
+ 'ENETUNREACH',
1587
+ 'EHOSTUNREACH',
1588
+ ])
1589
+
1590
+ export function isConnectionError(err: unknown): boolean {
1591
+ if (
1592
+ err &&
1593
+ typeof err === 'object' &&
1594
+ 'code' in err &&
1595
+ typeof err.code === 'string' &&
1596
+ CONNECTION_ERROR_CODES.has(err.code)
1597
+ ) {
1598
+ return true
1599
+ }
1600
+ return false
1601
+ }
1602
+
1603
+ /** Detect HTTP 5xx errors from axios (code: 'ERR_BAD_RESPONSE'). */
1604
+ export function isServerError(err: unknown): boolean {
1605
+ return (
1606
+ !!err &&
1607
+ typeof err === 'object' &&
1608
+ 'code' in err &&
1609
+ typeof err.code === 'string' &&
1610
+ err.code === 'ERR_BAD_RESPONSE'
1611
+ )
1612
+ }
1613
+
1614
+ /** Add ±25% jitter to a delay value. */
1615
+ function addJitter(ms: number): number {
1616
+ return Math.max(0, ms + ms * 0.25 * (2 * Math.random() - 1))
1617
+ }
1618
+
1619
+ function formatDelay(ms: number): string {
1620
+ return ms >= 1000 ? `${(ms / 1000).toFixed(1)}s` : `${Math.round(ms)}ms`
1621
+ }
1622
+
1623
+ /**
1624
+ * Retry stopWork with exponential backoff (3 attempts, 1s/2s/4s).
1625
+ * Ensures the server learns the work item ended, preventing server-side zombies.
1626
+ */
1627
+ async function stopWorkWithRetry(
1628
+ api: BridgeApiClient,
1629
+ environmentId: string,
1630
+ workId: string,
1631
+ logger: BridgeLogger,
1632
+ baseDelayMs = 1000,
1633
+ ): Promise<void> {
1634
+ const MAX_ATTEMPTS = 3
1635
+
1636
+ for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
1637
+ try {
1638
+ await api.stopWork(environmentId, workId, false)
1639
+ logForDebugging(
1640
+ `[bridge:work] stopWork succeeded for workId=${workId} on attempt ${attempt}/${MAX_ATTEMPTS}`,
1641
+ )
1642
+ return
1643
+ } catch (err) {
1644
+ // Auth/permission errors won't be fixed by retrying
1645
+ if (err instanceof BridgeFatalError) {
1646
+ if (isSuppressible403(err)) {
1647
+ logForDebugging(
1648
+ `[bridge:work] Suppressed stopWork 403 for ${workId}: ${err.message}`,
1649
+ )
1650
+ } else {
1651
+ logger.logError(`Failed to stop work ${workId}: ${err.message}`)
1652
+ }
1653
+ logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
1654
+ attempts: attempt,
1655
+ fatal: true,
1656
+ })
1657
+ return
1658
+ }
1659
+ const errMsg = errorMessage(err)
1660
+ if (attempt < MAX_ATTEMPTS) {
1661
+ const delay = addJitter(baseDelayMs * Math.pow(2, attempt - 1))
1662
+ logger.logVerbose(
1663
+ `Failed to stop work ${workId} (attempt ${attempt}/${MAX_ATTEMPTS}), retrying in ${formatDelay(delay)}: ${errMsg}`,
1664
+ )
1665
+ await sleep(delay)
1666
+ } else {
1667
+ logger.logError(
1668
+ `Failed to stop work ${workId} after ${MAX_ATTEMPTS} attempts: ${errMsg}`,
1669
+ )
1670
+ logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
1671
+ attempts: MAX_ATTEMPTS,
1672
+ })
1673
+ }
1674
+ }
1675
+ }
1676
+ }
1677
+
1678
+ function onSessionTimeout(
1679
+ sessionId: string,
1680
+ timeoutMs: number,
1681
+ logger: BridgeLogger,
1682
+ timedOutSessions: Set<string>,
1683
+ handle: SessionHandle,
1684
+ ): void {
1685
+ logForDebugging(
1686
+ `[bridge:session] sessionId=${sessionId} timed out after ${formatDuration(timeoutMs)}`,
1687
+ )
1688
+ logEvent('tengu_bridge_session_timeout', {
1689
+ timeout_ms: timeoutMs,
1690
+ })
1691
+ logger.logSessionFailed(
1692
+ sessionId,
1693
+ `Session timed out after ${formatDuration(timeoutMs)}`,
1694
+ )
1695
+ timedOutSessions.add(sessionId)
1696
+ handle.kill()
1697
+ }
1698
+
1699
+ export type ParsedArgs = {
1700
+ verbose: boolean
1701
+ sandbox: boolean
1702
+ debugFile?: string
1703
+ sessionTimeoutMs?: number
1704
+ permissionMode?: string
1705
+ name?: string
1706
+ /** Value passed to --spawn (if any); undefined if no --spawn flag was given. */
1707
+ spawnMode: SpawnMode | undefined
1708
+ /** Value passed to --capacity (if any); undefined if no --capacity flag was given. */
1709
+ capacity: number | undefined
1710
+ /** --[no-]create-session-in-dir override; undefined = use default (on). */
1711
+ createSessionInDir: boolean | undefined
1712
+ /** Resume an existing session instead of creating a new one. */
1713
+ sessionId?: string
1714
+ /** Resume the last session in this directory (reads bridge-pointer.json). */
1715
+ continueSession: boolean
1716
+ help: boolean
1717
+ error?: string
1718
+ }
1719
+
1720
+ const SPAWN_FLAG_VALUES = ['session', 'same-dir', 'worktree'] as const
1721
+
1722
+ function parseSpawnValue(raw: string | undefined): SpawnMode | string {
1723
+ if (raw === 'session') return 'single-session'
1724
+ if (raw === 'same-dir') return 'same-dir'
1725
+ if (raw === 'worktree') return 'worktree'
1726
+ return `--spawn requires one of: ${SPAWN_FLAG_VALUES.join(', ')} (got: ${raw ?? '<missing>'})`
1727
+ }
1728
+
1729
+ function parseCapacityValue(raw: string | undefined): number | string {
1730
+ const n = raw === undefined ? NaN : parseInt(raw, 10)
1731
+ if (isNaN(n) || n < 1) {
1732
+ return `--capacity requires a positive integer (got: ${raw ?? '<missing>'})`
1733
+ }
1734
+ return n
1735
+ }
1736
+
1737
+ export function parseArgs(args: string[]): ParsedArgs {
1738
+ let verbose = false
1739
+ let sandbox = false
1740
+ let debugFile: string | undefined
1741
+ let sessionTimeoutMs: number | undefined
1742
+ let permissionMode: string | undefined
1743
+ let name: string | undefined
1744
+ let help = false
1745
+ let spawnMode: SpawnMode | undefined
1746
+ let capacity: number | undefined
1747
+ let createSessionInDir: boolean | undefined
1748
+ let sessionId: string | undefined
1749
+ let continueSession = false
1750
+
1751
+ for (let i = 0; i < args.length; i++) {
1752
+ const arg = args[i]!
1753
+ if (arg === '--help' || arg === '-h') {
1754
+ help = true
1755
+ } else if (arg === '--verbose' || arg === '-v') {
1756
+ verbose = true
1757
+ } else if (arg === '--sandbox') {
1758
+ sandbox = true
1759
+ } else if (arg === '--no-sandbox') {
1760
+ sandbox = false
1761
+ } else if (arg === '--debug-file' && i + 1 < args.length) {
1762
+ debugFile = resolve(args[++i]!)
1763
+ } else if (arg.startsWith('--debug-file=')) {
1764
+ debugFile = resolve(arg.slice('--debug-file='.length))
1765
+ } else if (arg === '--session-timeout' && i + 1 < args.length) {
1766
+ sessionTimeoutMs = parseInt(args[++i]!, 10) * 1000
1767
+ } else if (arg.startsWith('--session-timeout=')) {
1768
+ sessionTimeoutMs =
1769
+ parseInt(arg.slice('--session-timeout='.length), 10) * 1000
1770
+ } else if (arg === '--permission-mode' && i + 1 < args.length) {
1771
+ permissionMode = args[++i]!
1772
+ } else if (arg.startsWith('--permission-mode=')) {
1773
+ permissionMode = arg.slice('--permission-mode='.length)
1774
+ } else if (arg === '--name' && i + 1 < args.length) {
1775
+ name = args[++i]!
1776
+ } else if (arg.startsWith('--name=')) {
1777
+ name = arg.slice('--name='.length)
1778
+ } else if (
1779
+ feature('KAIROS') &&
1780
+ arg === '--session-id' &&
1781
+ i + 1 < args.length
1782
+ ) {
1783
+ sessionId = args[++i]!
1784
+ if (!sessionId) {
1785
+ return makeError('--session-id requires a value')
1786
+ }
1787
+ } else if (feature('KAIROS') && arg.startsWith('--session-id=')) {
1788
+ sessionId = arg.slice('--session-id='.length)
1789
+ if (!sessionId) {
1790
+ return makeError('--session-id requires a value')
1791
+ }
1792
+ } else if (feature('KAIROS') && (arg === '--continue' || arg === '-c')) {
1793
+ continueSession = true
1794
+ } else if (arg === '--spawn' || arg.startsWith('--spawn=')) {
1795
+ if (spawnMode !== undefined) {
1796
+ return makeError('--spawn may only be specified once')
1797
+ }
1798
+ const raw = arg.startsWith('--spawn=')
1799
+ ? arg.slice('--spawn='.length)
1800
+ : args[++i]
1801
+ const v = parseSpawnValue(raw)
1802
+ if (v === 'single-session' || v === 'same-dir' || v === 'worktree') {
1803
+ spawnMode = v
1804
+ } else {
1805
+ return makeError(v)
1806
+ }
1807
+ } else if (arg === '--capacity' || arg.startsWith('--capacity=')) {
1808
+ if (capacity !== undefined) {
1809
+ return makeError('--capacity may only be specified once')
1810
+ }
1811
+ const raw = arg.startsWith('--capacity=')
1812
+ ? arg.slice('--capacity='.length)
1813
+ : args[++i]
1814
+ const v = parseCapacityValue(raw)
1815
+ if (typeof v === 'number') capacity = v
1816
+ else return makeError(v)
1817
+ } else if (arg === '--create-session-in-dir') {
1818
+ createSessionInDir = true
1819
+ } else if (arg === '--no-create-session-in-dir') {
1820
+ createSessionInDir = false
1821
+ } else {
1822
+ return makeError(
1823
+ `Unknown argument: ${arg}\nRun 'claude remote-control --help' for usage.`,
1824
+ )
1825
+ }
1826
+ }
1827
+
1828
+ // Note: gate check for --spawn/--capacity/--create-session-in-dir is in bridgeMain
1829
+ // (gate-aware error). Flag cross-validation happens here.
1830
+
1831
+ // --capacity only makes sense for multi-session modes.
1832
+ if (spawnMode === 'single-session' && capacity !== undefined) {
1833
+ return makeError(
1834
+ `--capacity cannot be used with --spawn=session (single-session mode has fixed capacity 1).`,
1835
+ )
1836
+ }
1837
+
1838
+ // --session-id / --continue resume a specific session on its original
1839
+ // environment; incompatible with spawn-related flags (which configure
1840
+ // fresh session creation), and mutually exclusive with each other.
1841
+ if (
1842
+ (sessionId || continueSession) &&
1843
+ (spawnMode !== undefined ||
1844
+ capacity !== undefined ||
1845
+ createSessionInDir !== undefined)
1846
+ ) {
1847
+ return makeError(
1848
+ `--session-id and --continue cannot be used with --spawn, --capacity, or --create-session-in-dir.`,
1849
+ )
1850
+ }
1851
+ if (sessionId && continueSession) {
1852
+ return makeError(`--session-id and --continue cannot be used together.`)
1853
+ }
1854
+
1855
+ return {
1856
+ verbose,
1857
+ sandbox,
1858
+ debugFile,
1859
+ sessionTimeoutMs,
1860
+ permissionMode,
1861
+ name,
1862
+ spawnMode,
1863
+ capacity,
1864
+ createSessionInDir,
1865
+ sessionId,
1866
+ continueSession,
1867
+ help,
1868
+ }
1869
+
1870
+ function makeError(error: string): ParsedArgs {
1871
+ return {
1872
+ verbose,
1873
+ sandbox,
1874
+ debugFile,
1875
+ sessionTimeoutMs,
1876
+ permissionMode,
1877
+ name,
1878
+ spawnMode,
1879
+ capacity,
1880
+ createSessionInDir,
1881
+ sessionId,
1882
+ continueSession,
1883
+ help,
1884
+ error,
1885
+ }
1886
+ }
1887
+ }
1888
+
1889
+ async function printHelp(): Promise<void> {
1890
+ // Use EXTERNAL_PERMISSION_MODES for help text — internal modes (bubble)
1891
+ // are ant-only and auto is feature-gated; they're still accepted by validation.
1892
+ const { EXTERNAL_PERMISSION_MODES } = await import('../types/permissions.js')
1893
+ const modes = EXTERNAL_PERMISSION_MODES.join(', ')
1894
+ const showServer = await isMultiSessionSpawnEnabled()
1895
+ const serverOptions = showServer
1896
+ ? ` --spawn <mode> Spawn mode: same-dir, worktree, session
1897
+ (default: same-dir)
1898
+ --capacity <N> Max concurrent sessions in worktree or
1899
+ same-dir mode (default: ${SPAWN_SESSIONS_DEFAULT})
1900
+ --[no-]create-session-in-dir Pre-create a session in the current
1901
+ directory; in worktree mode this session
1902
+ stays in cwd while on-demand sessions get
1903
+ isolated worktrees (default: on)
1904
+ `
1905
+ : ''
1906
+ const serverDescription = showServer
1907
+ ? `
1908
+ Remote Control runs as a persistent server that accepts multiple concurrent
1909
+ sessions in the current directory. One session is pre-created on start so
1910
+ you have somewhere to type immediately. Use --spawn=worktree to isolate
1911
+ each on-demand session in its own git worktree, or --spawn=session for
1912
+ the classic single-session mode (exits when that session ends). Press 'w'
1913
+ during runtime to toggle between same-dir and worktree.
1914
+ `
1915
+ : ''
1916
+ const serverNote = showServer
1917
+ ? ` - Worktree mode requires a git repository or WorktreeCreate/WorktreeRemove hooks
1918
+ `
1919
+ : ''
1920
+ const help = `
1921
+ Remote Control - Connect your local environment to claude.ai/code
1922
+
1923
+ USAGE
1924
+ claude remote-control [options]
1925
+ OPTIONS
1926
+ --name <name> Name for the session (shown in claude.ai/code)
1927
+ ${
1928
+ feature('KAIROS')
1929
+ ? ` -c, --continue Resume the last session in this directory
1930
+ --session-id <id> Resume a specific session by ID (cannot be
1931
+ used with spawn flags or --continue)
1932
+ `
1933
+ : ''
1934
+ } --permission-mode <mode> Permission mode for spawned sessions
1935
+ (${modes})
1936
+ --debug-file <path> Write debug logs to file
1937
+ -v, --verbose Enable verbose output
1938
+ -h, --help Show this help
1939
+ ${serverOptions}
1940
+ DESCRIPTION
1941
+ Remote Control allows you to control sessions on your local device from
1942
+ claude.ai/code (https://claude.ai/code). Run this command in the
1943
+ directory you want to work in, then connect from the Claude app or web.
1944
+ ${serverDescription}
1945
+ NOTES
1946
+ - You must be logged in with a Claude account that has a subscription
1947
+ - Run \`claude\` first in the directory to accept the workspace trust dialog
1948
+ ${serverNote}`
1949
+ // biome-ignore lint/suspicious/noConsole: intentional help output
1950
+ console.log(help)
1951
+ }
1952
+
1953
+ const TITLE_MAX_LEN = 80
1954
+
1955
+ /** Derive a session title from a user message: first line, truncated. */
1956
+ function deriveSessionTitle(text: string): string {
1957
+ // Collapse whitespace — newlines/tabs would break the single-line status display.
1958
+ const flat = text.replace(/\s+/g, ' ').trim()
1959
+ return truncateToWidth(flat, TITLE_MAX_LEN)
1960
+ }
1961
+
1962
+ /**
1963
+ * One-shot fetch of a session's title via GET /v1/sessions/{id}.
1964
+ *
1965
+ * Uses `getBridgeSession` from createSession.ts (ccr-byoc headers + org UUID)
1966
+ * rather than the environments-level bridgeApi client, whose headers make the
1967
+ * Sessions API return 404. Returns undefined if the session has no title yet
1968
+ * or the fetch fails — the caller falls back to deriving a title from the
1969
+ * first user message.
1970
+ */
1971
+ async function fetchSessionTitle(
1972
+ compatSessionId: string,
1973
+ baseUrl: string,
1974
+ ): Promise<string | undefined> {
1975
+ const { getBridgeSession } = await import('./createSession.js')
1976
+ const session = await getBridgeSession(compatSessionId, { baseUrl })
1977
+ return session?.title || undefined
1978
+ }
1979
+
1980
+ export async function bridgeMain(args: string[]): Promise<void> {
1981
+ const parsed = parseArgs(args)
1982
+
1983
+ if (parsed.help) {
1984
+ await printHelp()
1985
+ return
1986
+ }
1987
+ if (parsed.error) {
1988
+ // biome-ignore lint/suspicious/noConsole: intentional error output
1989
+ console.error(`Error: ${parsed.error}`)
1990
+ // eslint-disable-next-line custom-rules/no-process-exit
1991
+ process.exit(1)
1992
+ }
1993
+
1994
+ const {
1995
+ verbose,
1996
+ sandbox,
1997
+ debugFile,
1998
+ sessionTimeoutMs,
1999
+ permissionMode,
2000
+ name,
2001
+ spawnMode: parsedSpawnMode,
2002
+ capacity: parsedCapacity,
2003
+ createSessionInDir: parsedCreateSessionInDir,
2004
+ sessionId: parsedSessionId,
2005
+ continueSession,
2006
+ } = parsed
2007
+ // Mutable so --continue can set it from the pointer file. The #20460
2008
+ // resume flow below then treats it the same as an explicit --session-id.
2009
+ let resumeSessionId = parsedSessionId
2010
+ // When --continue found a pointer, this is the directory it came from
2011
+ // (may be a worktree sibling, not `dir`). On resume-flow deterministic
2012
+ // failure, clear THIS file so --continue doesn't keep hitting the same
2013
+ // dead session. Undefined for explicit --session-id (leaves pointer alone).
2014
+ let resumePointerDir: string | undefined
2015
+
2016
+ const usedMultiSessionFeature =
2017
+ parsedSpawnMode !== undefined ||
2018
+ parsedCapacity !== undefined ||
2019
+ parsedCreateSessionInDir !== undefined
2020
+
2021
+ // Validate permission mode early so the user gets an error before
2022
+ // the bridge starts polling for work.
2023
+ if (permissionMode !== undefined) {
2024
+ const { PERMISSION_MODES } = await import('../types/permissions.js')
2025
+ const valid: readonly string[] = PERMISSION_MODES
2026
+ if (!valid.includes(permissionMode)) {
2027
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2028
+ console.error(
2029
+ `Error: Invalid permission mode '${permissionMode}'. Valid modes: ${valid.join(', ')}`,
2030
+ )
2031
+ // eslint-disable-next-line custom-rules/no-process-exit
2032
+ process.exit(1)
2033
+ }
2034
+ }
2035
+
2036
+ const dir = resolve('.')
2037
+
2038
+ // The bridge fast-path bypasses init.ts, so we must enable config reading
2039
+ // before any code that transitively calls getGlobalConfig()
2040
+ const { enableConfigs, checkHasTrustDialogAccepted } = await import(
2041
+ '../utils/config.js'
2042
+ )
2043
+ enableConfigs()
2044
+
2045
+ // Initialize analytics and error reporting sinks. The bridge bypasses the
2046
+ // setup() init flow, so we call initSinks() directly to attach sinks here.
2047
+ const { initSinks } = await import('../utils/sinks.js')
2048
+ initSinks()
2049
+
2050
+ // Gate-aware validation: --spawn / --capacity / --create-session-in-dir require
2051
+ // the multi-session gate. parseArgs has already validated flag combinations;
2052
+ // here we only check the gate since that requires an async GrowthBook call.
2053
+ // Runs after enableConfigs() (GrowthBook cache reads global config) and after
2054
+ // initSinks() so the denial event can be enqueued.
2055
+ const multiSessionEnabled = await isMultiSessionSpawnEnabled()
2056
+ if (usedMultiSessionFeature && !multiSessionEnabled) {
2057
+ await logEventAsync('tengu_bridge_multi_session_denied', {
2058
+ used_spawn: parsedSpawnMode !== undefined,
2059
+ used_capacity: parsedCapacity !== undefined,
2060
+ used_create_session_in_dir: parsedCreateSessionInDir !== undefined,
2061
+ })
2062
+ // logEventAsync only enqueues — process.exit() discards buffered events.
2063
+ // Flush explicitly, capped at 500ms to match gracefulShutdown.ts.
2064
+ // (sleep() doesn't unref its timer, but process.exit() follows immediately
2065
+ // so the ref'd timer can't delay shutdown.)
2066
+ await Promise.race([
2067
+ Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
2068
+ sleep(500, undefined, { unref: true }),
2069
+ ]).catch(() => {})
2070
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2071
+ console.error(
2072
+ 'Error: Multi-session Remote Control is not enabled for your account yet.',
2073
+ )
2074
+ // eslint-disable-next-line custom-rules/no-process-exit
2075
+ process.exit(1)
2076
+ }
2077
+
2078
+ // Set the bootstrap CWD so that trust checks, project config lookups, and
2079
+ // git utilities (getBranch, getRemoteUrl) resolve against the correct path.
2080
+ const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
2081
+ setOriginalCwd(dir)
2082
+ setCwdState(dir)
2083
+
2084
+ // The bridge bypasses main.tsx (which renders the interactive TrustDialog via showSetupScreens),
2085
+ // so we must verify trust was previously established by a normal `claude` session.
2086
+ if (!checkHasTrustDialogAccepted()) {
2087
+ // biome-ignore lint/suspicious/noConsole:: intentional console output
2088
+ console.error(
2089
+ `Error: Workspace not trusted. Please run \`claude\` in ${dir} first to review and accept the workspace trust dialog.`,
2090
+ )
2091
+ // eslint-disable-next-line custom-rules/no-process-exit
2092
+ process.exit(1)
2093
+ }
2094
+
2095
+ // Resolve auth
2096
+ const { clearOAuthTokenCache, checkAndRefreshOAuthTokenIfNeeded } =
2097
+ await import('../utils/auth.js')
2098
+ const { getBridgeAccessToken, getBridgeBaseUrl } = await import(
2099
+ './bridgeConfig.js'
2100
+ )
2101
+
2102
+ const bridgeToken = getBridgeAccessToken()
2103
+ if (!bridgeToken) {
2104
+ // biome-ignore lint/suspicious/noConsole:: intentional console output
2105
+ console.error(BRIDGE_LOGIN_ERROR)
2106
+ // eslint-disable-next-line custom-rules/no-process-exit
2107
+ process.exit(1)
2108
+ }
2109
+
2110
+ // First-time remote dialog — explain what bridge does and get consent
2111
+ const {
2112
+ getGlobalConfig,
2113
+ saveGlobalConfig,
2114
+ getCurrentProjectConfig,
2115
+ saveCurrentProjectConfig,
2116
+ } = await import('../utils/config.js')
2117
+ if (!getGlobalConfig().remoteDialogSeen) {
2118
+ const readline = await import('readline')
2119
+ const rl = readline.createInterface({
2120
+ input: process.stdin,
2121
+ output: process.stdout,
2122
+ })
2123
+ // biome-ignore lint/suspicious/noConsole:: intentional console output
2124
+ console.log(
2125
+ '\nRemote Control lets you access this CLI session from the web (claude.ai/code)\nor the Claude app, so you can pick up where you left off on any device.\n\nYou can disconnect remote access anytime by running /remote-control again.\n',
2126
+ )
2127
+ const answer = await new Promise<string>(resolve => {
2128
+ rl.question('Enable Remote Control? (y/n) ', resolve)
2129
+ })
2130
+ rl.close()
2131
+ saveGlobalConfig(current => {
2132
+ if (current.remoteDialogSeen) return current
2133
+ return { ...current, remoteDialogSeen: true }
2134
+ })
2135
+ if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
2136
+ // eslint-disable-next-line custom-rules/no-process-exit
2137
+ process.exit(0)
2138
+ }
2139
+ }
2140
+
2141
+ // --continue: resolve the most recent session from the crash-recovery
2142
+ // pointer and chain into the #20460 --session-id flow. Worktree-aware:
2143
+ // checks current dir first (fast path, zero exec), then fans out to git
2144
+ // worktree siblings if that misses — the REPL bridge writes to
2145
+ // getOriginalCwd() which EnterWorktreeTool/activeWorktreeSession can
2146
+ // point at a worktree while the user's shell is at the repo root.
2147
+ // KAIROS-gated at parseArgs — continueSession is always false in external
2148
+ // builds, so this block tree-shakes.
2149
+ if (feature('KAIROS') && continueSession) {
2150
+ const { readBridgePointerAcrossWorktrees } = await import(
2151
+ './bridgePointer.js'
2152
+ )
2153
+ const found = await readBridgePointerAcrossWorktrees(dir)
2154
+ if (!found) {
2155
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2156
+ console.error(
2157
+ `Error: No recent session found in this directory or its worktrees. Run \`claude remote-control\` to start a new one.`,
2158
+ )
2159
+ // eslint-disable-next-line custom-rules/no-process-exit
2160
+ process.exit(1)
2161
+ }
2162
+ const { pointer, dir: pointerDir } = found
2163
+ const ageMin = Math.round(pointer.ageMs / 60_000)
2164
+ const ageStr = ageMin < 60 ? `${ageMin}m` : `${Math.round(ageMin / 60)}h`
2165
+ const fromWt = pointerDir !== dir ? ` from worktree ${pointerDir}` : ''
2166
+ // biome-ignore lint/suspicious/noConsole: intentional info output
2167
+ console.error(
2168
+ `Resuming session ${pointer.sessionId} (${ageStr} ago)${fromWt}\u2026`,
2169
+ )
2170
+ resumeSessionId = pointer.sessionId
2171
+ // Track where the pointer came from so the #20460 exit(1) paths below
2172
+ // clear the RIGHT file on deterministic failure — otherwise --continue
2173
+ // would keep hitting the same dead session. May be a worktree sibling.
2174
+ resumePointerDir = pointerDir
2175
+ }
2176
+
2177
+ // In production, baseUrl is the Anthropic API (from OAuth config).
2178
+ // CLAUDE_BRIDGE_BASE_URL overrides this for ant local dev only.
2179
+ const baseUrl = getBridgeBaseUrl()
2180
+
2181
+ // For non-localhost targets, require HTTPS to protect credentials.
2182
+ if (
2183
+ baseUrl.startsWith('http://') &&
2184
+ !baseUrl.includes('localhost') &&
2185
+ !baseUrl.includes('127.0.0.1')
2186
+ ) {
2187
+ // biome-ignore lint/suspicious/noConsole:: intentional console output
2188
+ console.error(
2189
+ 'Error: Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
2190
+ )
2191
+ // eslint-disable-next-line custom-rules/no-process-exit
2192
+ process.exit(1)
2193
+ }
2194
+
2195
+ // Session ingress URL for WebSocket connections. In production this is the
2196
+ // same as baseUrl (Envoy routes /v1/session_ingress/* to session-ingress).
2197
+ // Locally, session-ingress runs on a different port (9413) than the
2198
+ // contain-provide-api (8211), so CLAUDE_BRIDGE_SESSION_INGRESS_URL must be
2199
+ // set explicitly. Ant-only, matching CLAUDE_BRIDGE_BASE_URL.
2200
+ const sessionIngressUrl =
2201
+ process.env.USER_TYPE === 'ant' &&
2202
+ process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2203
+ ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2204
+ : baseUrl
2205
+
2206
+ const { getBranch, getRemoteUrl, findGitRoot } = await import(
2207
+ '../utils/git.js'
2208
+ )
2209
+
2210
+ // Precheck worktree availability for the first-run dialog and the `w`
2211
+ // toggle. Unconditional so we know upfront whether worktree is an option.
2212
+ const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
2213
+ const worktreeAvailable = hasWorktreeCreateHook() || findGitRoot(dir) !== null
2214
+
2215
+ // Load saved per-project spawn-mode preference. Gated by multiSessionEnabled
2216
+ // so a GrowthBook rollback cleanly reverts users to single-session —
2217
+ // otherwise a saved pref would silently re-enable multi-session behavior
2218
+ // (worktree isolation, 32 max sessions, w toggle) despite the gate being off.
2219
+ // Also guard against a stale worktree pref left over from when this dir WAS
2220
+ // a git repo (or the user copied config) — clear it on disk so the warning
2221
+ // doesn't repeat on every launch.
2222
+ let savedSpawnMode = multiSessionEnabled
2223
+ ? getCurrentProjectConfig().remoteControlSpawnMode
2224
+ : undefined
2225
+ if (savedSpawnMode === 'worktree' && !worktreeAvailable) {
2226
+ // biome-ignore lint/suspicious/noConsole: intentional warning output
2227
+ console.error(
2228
+ 'Warning: Saved spawn mode is worktree but this directory is not a git repository. Falling back to same-dir.',
2229
+ )
2230
+ savedSpawnMode = undefined
2231
+ saveCurrentProjectConfig(current => {
2232
+ if (current.remoteControlSpawnMode === undefined) return current
2233
+ return { ...current, remoteControlSpawnMode: undefined }
2234
+ })
2235
+ }
2236
+
2237
+ // First-run spawn-mode choice: ask once per project when the choice is
2238
+ // meaningful (gate on, both modes available, no explicit override, not
2239
+ // resuming). Saves to ProjectConfig so subsequent runs skip this.
2240
+ if (
2241
+ multiSessionEnabled &&
2242
+ !savedSpawnMode &&
2243
+ worktreeAvailable &&
2244
+ parsedSpawnMode === undefined &&
2245
+ !resumeSessionId &&
2246
+ process.stdin.isTTY
2247
+ ) {
2248
+ const readline = await import('readline')
2249
+ const rl = readline.createInterface({
2250
+ input: process.stdin,
2251
+ output: process.stdout,
2252
+ })
2253
+ // biome-ignore lint/suspicious/noConsole: intentional dialog output
2254
+ console.log(
2255
+ `\nClaude Remote Control is launching in spawn mode which lets you create new sessions in this project from Claude Code on Web or your Mobile app. Learn more here: https://code.claude.com/docs/en/remote-control\n\n` +
2256
+ `Spawn mode for this project:\n` +
2257
+ ` [1] same-dir \u2014 sessions share the current directory (default)\n` +
2258
+ ` [2] worktree \u2014 each session gets an isolated git worktree\n\n` +
2259
+ `This can be changed later or explicitly set with --spawn=same-dir or --spawn=worktree.\n`,
2260
+ )
2261
+ const answer = await new Promise<string>(resolve => {
2262
+ rl.question('Choose [1/2] (default: 1): ', resolve)
2263
+ })
2264
+ rl.close()
2265
+ const chosen: 'same-dir' | 'worktree' =
2266
+ answer.trim() === '2' ? 'worktree' : 'same-dir'
2267
+ savedSpawnMode = chosen
2268
+ logEvent('tengu_bridge_spawn_mode_chosen', {
2269
+ spawn_mode:
2270
+ chosen as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2271
+ })
2272
+ saveCurrentProjectConfig(current => {
2273
+ if (current.remoteControlSpawnMode === chosen) return current
2274
+ return { ...current, remoteControlSpawnMode: chosen }
2275
+ })
2276
+ }
2277
+
2278
+ // Determine effective spawn mode.
2279
+ // Precedence: resume > explicit --spawn > saved project pref > gate default
2280
+ // - resuming via --continue / --session-id: always single-session (resume
2281
+ // targets one specific session in its original directory)
2282
+ // - explicit --spawn flag: use that value directly (does not persist)
2283
+ // - saved ProjectConfig.remoteControlSpawnMode: set by first-run dialog or `w`
2284
+ // - default with gate on: same-dir (persistent multi-session, shared cwd)
2285
+ // - default with gate off: single-session (unchanged legacy behavior)
2286
+ // Track how spawn mode was determined, for rollout analytics.
2287
+ type SpawnModeSource = 'resume' | 'flag' | 'saved' | 'gate_default'
2288
+ let spawnModeSource: SpawnModeSource
2289
+ let spawnMode: SpawnMode
2290
+ if (resumeSessionId) {
2291
+ spawnMode = 'single-session'
2292
+ spawnModeSource = 'resume'
2293
+ } else if (parsedSpawnMode !== undefined) {
2294
+ spawnMode = parsedSpawnMode
2295
+ spawnModeSource = 'flag'
2296
+ } else if (savedSpawnMode !== undefined) {
2297
+ spawnMode = savedSpawnMode
2298
+ spawnModeSource = 'saved'
2299
+ } else {
2300
+ spawnMode = multiSessionEnabled ? 'same-dir' : 'single-session'
2301
+ spawnModeSource = 'gate_default'
2302
+ }
2303
+ const maxSessions =
2304
+ spawnMode === 'single-session'
2305
+ ? 1
2306
+ : (parsedCapacity ?? SPAWN_SESSIONS_DEFAULT)
2307
+ // Pre-create an empty session on start so the user has somewhere to type
2308
+ // immediately, running in the current directory (exempted from worktree
2309
+ // creation in the spawn loop). On by default; --no-create-session-in-dir
2310
+ // opts out for a pure on-demand server where every session is isolated.
2311
+ // The effectiveResumeSessionId guard at the creation site handles the
2312
+ // resume case (skip creation when resume succeeded; fall through to
2313
+ // fresh creation on env-mismatch fallback).
2314
+ const preCreateSession = parsedCreateSessionInDir ?? true
2315
+
2316
+ // Without --continue: a leftover pointer means the previous run didn't
2317
+ // shut down cleanly (crash, kill -9, terminal closed). Clear it so the
2318
+ // stale env doesn't linger past its relevance. Runs in all modes
2319
+ // (clearBridgePointer is a no-op when no file exists) — covers the
2320
+ // gate-transition case where a user crashed in single-session mode then
2321
+ // starts fresh in worktree mode. Only single-session mode writes new
2322
+ // pointers.
2323
+ if (!resumeSessionId) {
2324
+ const { clearBridgePointer } = await import('./bridgePointer.js')
2325
+ await clearBridgePointer(dir)
2326
+ }
2327
+
2328
+ // Worktree mode requires either git or WorktreeCreate/WorktreeRemove hooks.
2329
+ // Only reachable via explicit --spawn=worktree (default is same-dir);
2330
+ // saved worktree pref was already guarded above.
2331
+ if (spawnMode === 'worktree' && !worktreeAvailable) {
2332
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2333
+ console.error(
2334
+ `Error: Worktree mode requires a git repository or WorktreeCreate hooks configured. Use --spawn=session for single-session mode.`,
2335
+ )
2336
+ // eslint-disable-next-line custom-rules/no-process-exit
2337
+ process.exit(1)
2338
+ }
2339
+
2340
+ const branch = await getBranch()
2341
+ const gitRepoUrl = await getRemoteUrl()
2342
+ const machineName = hostname()
2343
+ const bridgeId = randomUUID()
2344
+
2345
+ const { handleOAuth401Error } = await import('../utils/auth.js')
2346
+ const api = createBridgeApiClient({
2347
+ baseUrl,
2348
+ getAccessToken: getBridgeAccessToken,
2349
+ runnerVersion: MACRO.VERSION,
2350
+ onDebug: logForDebugging,
2351
+ onAuth401: handleOAuth401Error,
2352
+ getTrustedDeviceToken,
2353
+ })
2354
+
2355
+ // When resuming a session via --session-id, fetch it to learn its
2356
+ // environment_id and reuse that for registration (idempotent on the
2357
+ // backend). Left undefined otherwise — the backend rejects
2358
+ // client-generated UUIDs and will allocate a fresh environment.
2359
+ // feature('KAIROS') gate: --session-id is ant-only; parseArgs already
2360
+ // rejects the flag when the gate is off, so resumeSessionId is always
2361
+ // undefined here in external builds — this guard is for tree-shaking.
2362
+ let reuseEnvironmentId: string | undefined
2363
+ if (feature('KAIROS') && resumeSessionId) {
2364
+ try {
2365
+ validateBridgeId(resumeSessionId, 'sessionId')
2366
+ } catch {
2367
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2368
+ console.error(
2369
+ `Error: Invalid session ID "${resumeSessionId}". Session IDs must not contain unsafe characters.`,
2370
+ )
2371
+ // eslint-disable-next-line custom-rules/no-process-exit
2372
+ process.exit(1)
2373
+ }
2374
+ // Proactively refresh the OAuth token — getBridgeSession uses raw axios
2375
+ // without the withOAuthRetry 401-refresh logic. An expired-but-present
2376
+ // token would otherwise produce a misleading "not found" error.
2377
+ await checkAndRefreshOAuthTokenIfNeeded()
2378
+ clearOAuthTokenCache()
2379
+ const { getBridgeSession } = await import('./createSession.js')
2380
+ const session = await getBridgeSession(resumeSessionId, {
2381
+ baseUrl,
2382
+ getAccessToken: getBridgeAccessToken,
2383
+ })
2384
+ if (!session) {
2385
+ // Session gone on server → pointer is stale. Clear it so the user
2386
+ // isn't re-prompted next launch. (Explicit --session-id leaves the
2387
+ // pointer alone — it's an independent file they may not even have.)
2388
+ // resumePointerDir may be a worktree sibling — clear THAT file.
2389
+ if (resumePointerDir) {
2390
+ const { clearBridgePointer } = await import('./bridgePointer.js')
2391
+ await clearBridgePointer(resumePointerDir)
2392
+ }
2393
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2394
+ console.error(
2395
+ `Error: Session ${resumeSessionId} not found. It may have been archived or expired, or your login may have lapsed (run \`claude /login\`).`,
2396
+ )
2397
+ // eslint-disable-next-line custom-rules/no-process-exit
2398
+ process.exit(1)
2399
+ }
2400
+ if (!session.environment_id) {
2401
+ if (resumePointerDir) {
2402
+ const { clearBridgePointer } = await import('./bridgePointer.js')
2403
+ await clearBridgePointer(resumePointerDir)
2404
+ }
2405
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2406
+ console.error(
2407
+ `Error: Session ${resumeSessionId} has no environment_id. It may never have been attached to a bridge.`,
2408
+ )
2409
+ // eslint-disable-next-line custom-rules/no-process-exit
2410
+ process.exit(1)
2411
+ }
2412
+ reuseEnvironmentId = session.environment_id
2413
+ logForDebugging(
2414
+ `[bridge:init] Resuming session ${resumeSessionId} on environment ${reuseEnvironmentId}`,
2415
+ )
2416
+ }
2417
+
2418
+ const config: BridgeConfig = {
2419
+ dir,
2420
+ machineName,
2421
+ branch,
2422
+ gitRepoUrl,
2423
+ maxSessions,
2424
+ spawnMode,
2425
+ verbose,
2426
+ sandbox,
2427
+ bridgeId,
2428
+ workerType: 'claude_code',
2429
+ environmentId: randomUUID(),
2430
+ reuseEnvironmentId,
2431
+ apiBaseUrl: baseUrl,
2432
+ sessionIngressUrl,
2433
+ debugFile,
2434
+ sessionTimeoutMs,
2435
+ }
2436
+
2437
+ logForDebugging(
2438
+ `[bridge:init] bridgeId=${bridgeId}${reuseEnvironmentId ? ` reuseEnvironmentId=${reuseEnvironmentId}` : ''} dir=${dir} branch=${branch} gitRepoUrl=${gitRepoUrl} machine=${machineName}`,
2439
+ )
2440
+ logForDebugging(
2441
+ `[bridge:init] apiBaseUrl=${baseUrl} sessionIngressUrl=${sessionIngressUrl}`,
2442
+ )
2443
+ logForDebugging(
2444
+ `[bridge:init] sandbox=${sandbox}${debugFile ? ` debugFile=${debugFile}` : ''}`,
2445
+ )
2446
+
2447
+ // Register the bridge environment before entering the poll loop.
2448
+ let environmentId: string
2449
+ let environmentSecret: string
2450
+ try {
2451
+ const reg = await api.registerBridgeEnvironment(config)
2452
+ environmentId = reg.environment_id
2453
+ environmentSecret = reg.environment_secret
2454
+ } catch (err) {
2455
+ logEvent('tengu_bridge_registration_failed', {
2456
+ status: err instanceof BridgeFatalError ? err.status : undefined,
2457
+ })
2458
+ // Registration failures are fatal — print a clean message instead of a stack trace.
2459
+ // biome-ignore lint/suspicious/noConsole:: intentional console output
2460
+ console.error(
2461
+ err instanceof BridgeFatalError && err.status === 404
2462
+ ? 'Remote Control environments are not available for your account.'
2463
+ : `Error: ${errorMessage(err)}`,
2464
+ )
2465
+ // eslint-disable-next-line custom-rules/no-process-exit
2466
+ process.exit(1)
2467
+ }
2468
+
2469
+ // Tracks whether the --session-id resume flow completed successfully.
2470
+ // Used below to skip fresh session creation and seed initialSessionId.
2471
+ // Cleared on env mismatch so we gracefully fall back to a new session.
2472
+ let effectiveResumeSessionId: string | undefined
2473
+ if (feature('KAIROS') && resumeSessionId) {
2474
+ if (reuseEnvironmentId && environmentId !== reuseEnvironmentId) {
2475
+ // Backend returned a different environment_id — the original env
2476
+ // expired or was reaped. Reconnect won't work against the new env
2477
+ // (session is bound to the old one). Log to sentry for visibility
2478
+ // and fall through to fresh session creation on the new env.
2479
+ logError(
2480
+ new Error(
2481
+ `Bridge resume env mismatch: requested ${reuseEnvironmentId}, backend returned ${environmentId}. Falling back to fresh session.`,
2482
+ ),
2483
+ )
2484
+ // biome-ignore lint/suspicious/noConsole: intentional warning output
2485
+ console.warn(
2486
+ `Warning: Could not resume session ${resumeSessionId} — its environment has expired. Creating a fresh session instead.`,
2487
+ )
2488
+ // Don't deregister — we're going to use this new environment.
2489
+ // effectiveResumeSessionId stays undefined → fresh session path below.
2490
+ } else {
2491
+ // Force-stop any stale worker instances for this session and re-queue
2492
+ // it so our poll loop picks it up. Must happen after registration so
2493
+ // the backend knows a live worker exists for the environment.
2494
+ //
2495
+ // The pointer stores a session_* ID but /bridge/reconnect looks
2496
+ // sessions up by their infra tag (cse_*) when ccr_v2_compat_enabled
2497
+ // is on. Try both; the conversion is a no-op if already cse_*.
2498
+ const infraResumeId = toInfraSessionId(resumeSessionId)
2499
+ const reconnectCandidates =
2500
+ infraResumeId === resumeSessionId
2501
+ ? [resumeSessionId]
2502
+ : [resumeSessionId, infraResumeId]
2503
+ let reconnected = false
2504
+ let lastReconnectErr: unknown
2505
+ for (const candidateId of reconnectCandidates) {
2506
+ try {
2507
+ await api.reconnectSession(environmentId, candidateId)
2508
+ logForDebugging(
2509
+ `[bridge:init] Session ${candidateId} re-queued via bridge/reconnect`,
2510
+ )
2511
+ effectiveResumeSessionId = resumeSessionId
2512
+ reconnected = true
2513
+ break
2514
+ } catch (err) {
2515
+ lastReconnectErr = err
2516
+ logForDebugging(
2517
+ `[bridge:init] reconnectSession(${candidateId}) failed: ${errorMessage(err)}`,
2518
+ )
2519
+ }
2520
+ }
2521
+ if (!reconnected) {
2522
+ const err = lastReconnectErr
2523
+
2524
+ // Do NOT deregister on transient reconnect failure — at this point
2525
+ // environmentId IS the session's own environment. Deregistering
2526
+ // would make retry impossible. The backend's 4h TTL cleans up.
2527
+ const isFatal = err instanceof BridgeFatalError
2528
+ // Clear pointer only on fatal reconnect failure. Transient failures
2529
+ // ("try running the same command again") should keep the pointer so
2530
+ // next launch re-prompts — that IS the retry mechanism.
2531
+ if (resumePointerDir && isFatal) {
2532
+ const { clearBridgePointer } = await import('./bridgePointer.js')
2533
+ await clearBridgePointer(resumePointerDir)
2534
+ }
2535
+ // biome-ignore lint/suspicious/noConsole: intentional error output
2536
+ console.error(
2537
+ isFatal
2538
+ ? `Error: ${errorMessage(err)}`
2539
+ : `Error: Failed to reconnect session ${resumeSessionId}: ${errorMessage(err)}\nThe session may still be resumable — try running the same command again.`,
2540
+ )
2541
+ // eslint-disable-next-line custom-rules/no-process-exit
2542
+ process.exit(1)
2543
+ }
2544
+ }
2545
+ }
2546
+
2547
+ logForDebugging(
2548
+ `[bridge:init] Registered, server environmentId=${environmentId}`,
2549
+ )
2550
+ const startupPollConfig = getPollIntervalConfig()
2551
+ logEvent('tengu_bridge_started', {
2552
+ max_sessions: config.maxSessions,
2553
+ has_debug_file: !!config.debugFile,
2554
+ sandbox: config.sandbox,
2555
+ verbose: config.verbose,
2556
+ heartbeat_interval_ms:
2557
+ startupPollConfig.non_exclusive_heartbeat_interval_ms,
2558
+ spawn_mode:
2559
+ config.spawnMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2560
+ spawn_mode_source:
2561
+ spawnModeSource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2562
+ multi_session_gate: multiSessionEnabled,
2563
+ pre_create_session: preCreateSession,
2564
+ worktree_available: worktreeAvailable,
2565
+ })
2566
+ logForDiagnosticsNoPII('info', 'bridge_started', {
2567
+ max_sessions: config.maxSessions,
2568
+ sandbox: config.sandbox,
2569
+ spawn_mode: config.spawnMode,
2570
+ })
2571
+
2572
+ const spawner = createSessionSpawner({
2573
+ execPath: process.execPath,
2574
+ scriptArgs: spawnScriptArgs(),
2575
+ env: process.env,
2576
+ verbose,
2577
+ sandbox,
2578
+ debugFile,
2579
+ permissionMode,
2580
+ onDebug: logForDebugging,
2581
+ onActivity: (sessionId, activity) => {
2582
+ logForDebugging(
2583
+ `[bridge:activity] sessionId=${sessionId} ${activity.type} ${activity.summary}`,
2584
+ )
2585
+ },
2586
+ onPermissionRequest: (sessionId, request, _accessToken) => {
2587
+ logForDebugging(
2588
+ `[bridge:perm] sessionId=${sessionId} tool=${request.request.tool_name} request_id=${request.request_id} (not auto-approving)`,
2589
+ )
2590
+ },
2591
+ })
2592
+
2593
+ const logger = createBridgeLogger({ verbose })
2594
+ const { parseGitHubRepository } = await import('../utils/detectRepository.js')
2595
+ const ownerRepo = gitRepoUrl ? parseGitHubRepository(gitRepoUrl) : null
2596
+ // Use the repo name from the parsed owner/repo, or fall back to the dir basename
2597
+ const repoName = ownerRepo ? ownerRepo.split('/').pop()! : basename(dir)
2598
+ logger.setRepoInfo(repoName, branch)
2599
+
2600
+ // `w` toggle is available iff we're in a multi-session mode AND worktree
2601
+ // is a valid option. When unavailable, the mode suffix and hint are hidden.
2602
+ const toggleAvailable = spawnMode !== 'single-session' && worktreeAvailable
2603
+ if (toggleAvailable) {
2604
+ // Safe cast: spawnMode is not single-session (checked above), and the
2605
+ // saved-worktree-in-non-git guard + exit check above ensure worktree
2606
+ // is only reached when available.
2607
+ logger.setSpawnModeDisplay(spawnMode as 'same-dir' | 'worktree')
2608
+ }
2609
+
2610
+ // Listen for keys: space toggles QR code, w toggles spawn mode
2611
+ const onStdinData = (data: Buffer): void => {
2612
+ if (data[0] === 0x03 || data[0] === 0x04) {
2613
+ // Ctrl+C / Ctrl+D — trigger graceful shutdown
2614
+ process.emit('SIGINT')
2615
+ return
2616
+ }
2617
+ if (data[0] === 0x20 /* space */) {
2618
+ logger.toggleQr()
2619
+ return
2620
+ }
2621
+ if (data[0] === 0x77 /* 'w' */) {
2622
+ if (!toggleAvailable) return
2623
+ const newMode: 'same-dir' | 'worktree' =
2624
+ config.spawnMode === 'same-dir' ? 'worktree' : 'same-dir'
2625
+ config.spawnMode = newMode
2626
+ logEvent('tengu_bridge_spawn_mode_toggled', {
2627
+ spawn_mode:
2628
+ newMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2629
+ })
2630
+ logger.logStatus(
2631
+ newMode === 'worktree'
2632
+ ? 'Spawn mode: worktree (new sessions get isolated git worktrees)'
2633
+ : 'Spawn mode: same-dir (new sessions share the current directory)',
2634
+ )
2635
+ logger.setSpawnModeDisplay(newMode)
2636
+ logger.refreshDisplay()
2637
+ saveCurrentProjectConfig(current => {
2638
+ if (current.remoteControlSpawnMode === newMode) return current
2639
+ return { ...current, remoteControlSpawnMode: newMode }
2640
+ })
2641
+ return
2642
+ }
2643
+ }
2644
+ if (process.stdin.isTTY) {
2645
+ process.stdin.setRawMode(true)
2646
+ process.stdin.resume()
2647
+ process.stdin.on('data', onStdinData)
2648
+ }
2649
+
2650
+ const controller = new AbortController()
2651
+ const onSigint = (): void => {
2652
+ logForDebugging('[bridge:shutdown] SIGINT received, shutting down')
2653
+ controller.abort()
2654
+ }
2655
+ const onSigterm = (): void => {
2656
+ logForDebugging('[bridge:shutdown] SIGTERM received, shutting down')
2657
+ controller.abort()
2658
+ }
2659
+ process.on('SIGINT', onSigint)
2660
+ process.on('SIGTERM', onSigterm)
2661
+
2662
+ // Auto-create an empty session so the user has somewhere to type
2663
+ // immediately (matching /remote-control behavior). Controlled by
2664
+ // preCreateSession: on by default; --no-create-session-in-dir opts out.
2665
+ // When a --session-id resume succeeded, skip creation entirely — the
2666
+ // session already exists and bridge/reconnect has re-queued it.
2667
+ // When resume was requested but failed on env mismatch, effectiveResumeSessionId
2668
+ // is undefined, so we fall through to fresh session creation (honoring the
2669
+ // "Creating a fresh session instead" warning printed above).
2670
+ let initialSessionId: string | null =
2671
+ feature('KAIROS') && effectiveResumeSessionId
2672
+ ? effectiveResumeSessionId
2673
+ : null
2674
+ if (preCreateSession && !(feature('KAIROS') && effectiveResumeSessionId)) {
2675
+ const { createBridgeSession } = await import('./createSession.js')
2676
+ try {
2677
+ initialSessionId = await createBridgeSession({
2678
+ environmentId,
2679
+ title: name,
2680
+ events: [],
2681
+ gitRepoUrl,
2682
+ branch,
2683
+ signal: controller.signal,
2684
+ baseUrl,
2685
+ getAccessToken: getBridgeAccessToken,
2686
+ permissionMode,
2687
+ })
2688
+ if (initialSessionId) {
2689
+ logForDebugging(
2690
+ `[bridge:init] Created initial session ${initialSessionId}`,
2691
+ )
2692
+ }
2693
+ } catch (err) {
2694
+ logForDebugging(
2695
+ `[bridge:init] Session creation failed (non-fatal): ${errorMessage(err)}`,
2696
+ )
2697
+ }
2698
+ }
2699
+
2700
+ // Crash-recovery pointer: write immediately so kill -9 at any point
2701
+ // after this leaves a recoverable trail. Covers both fresh sessions and
2702
+ // resumed ones (so a second crash after resume is still recoverable).
2703
+ // Cleared when runBridgeLoop falls through to archive+deregister; left in
2704
+ // place on the SIGINT resumable-shutdown return (backup for when the user
2705
+ // closes the terminal before copying the printed --session-id hint).
2706
+ // Refreshed hourly so a 5h+ session that crashes still has a fresh
2707
+ // pointer (staleness checks file mtime, backend TTL is rolling-from-poll).
2708
+ let pointerRefreshTimer: ReturnType<typeof setInterval> | null = null
2709
+ // Single-session only: --continue forces single-session mode on resume,
2710
+ // so a pointer written in multi-session mode would contradict the user's
2711
+ // config when they try to resume. The resumable-shutdown path is also
2712
+ // gated to single-session (line ~1254) so the pointer would be orphaned.
2713
+ if (initialSessionId && spawnMode === 'single-session') {
2714
+ const { writeBridgePointer } = await import('./bridgePointer.js')
2715
+ const pointerPayload = {
2716
+ sessionId: initialSessionId,
2717
+ environmentId,
2718
+ source: 'standalone' as const,
2719
+ }
2720
+ await writeBridgePointer(config.dir, pointerPayload)
2721
+ pointerRefreshTimer = setInterval(
2722
+ writeBridgePointer,
2723
+ 60 * 60 * 1000,
2724
+ config.dir,
2725
+ pointerPayload,
2726
+ )
2727
+ // Don't let the interval keep the process alive on its own.
2728
+ pointerRefreshTimer.unref?.()
2729
+ }
2730
+
2731
+ try {
2732
+ await runBridgeLoop(
2733
+ config,
2734
+ environmentId,
2735
+ environmentSecret,
2736
+ api,
2737
+ spawner,
2738
+ logger,
2739
+ controller.signal,
2740
+ undefined,
2741
+ initialSessionId ?? undefined,
2742
+ async () => {
2743
+ // Clear the memoized OAuth token cache so we re-read from secure
2744
+ // storage, picking up tokens refreshed by child processes.
2745
+ clearOAuthTokenCache()
2746
+ // Proactively refresh the token if it's expired on disk too.
2747
+ await checkAndRefreshOAuthTokenIfNeeded()
2748
+ return getBridgeAccessToken()
2749
+ },
2750
+ )
2751
+ } finally {
2752
+ if (pointerRefreshTimer !== null) {
2753
+ clearInterval(pointerRefreshTimer)
2754
+ }
2755
+ process.off('SIGINT', onSigint)
2756
+ process.off('SIGTERM', onSigterm)
2757
+ process.stdin.off('data', onStdinData)
2758
+ if (process.stdin.isTTY) {
2759
+ process.stdin.setRawMode(false)
2760
+ }
2761
+ process.stdin.pause()
2762
+ }
2763
+
2764
+ // The bridge bypasses init.ts (and its graceful shutdown handler), so we
2765
+ // must exit explicitly.
2766
+ // eslint-disable-next-line custom-rules/no-process-exit
2767
+ process.exit(0)
2768
+ }
2769
+
2770
+ // ─── Headless bridge (daemon worker) ────────────────────────────────────────
2771
+
2772
+ /**
2773
+ * Thrown by runBridgeHeadless for configuration issues the supervisor should
2774
+ * NOT retry (trust not accepted, worktree unavailable, http-not-https). The
2775
+ * daemon worker catches this and exits with EXIT_CODE_PERMANENT so the
2776
+ * supervisor parks the worker instead of respawning it on backoff.
2777
+ */
2778
+ export class BridgeHeadlessPermanentError extends Error {
2779
+ constructor(message: string) {
2780
+ super(message)
2781
+ this.name = 'BridgeHeadlessPermanentError'
2782
+ }
2783
+ }
2784
+
2785
+ export type HeadlessBridgeOpts = {
2786
+ dir: string
2787
+ name?: string
2788
+ spawnMode: 'same-dir' | 'worktree'
2789
+ capacity: number
2790
+ permissionMode?: string
2791
+ sandbox: boolean
2792
+ sessionTimeoutMs?: number
2793
+ createSessionOnStart: boolean
2794
+ getAccessToken: () => string | undefined
2795
+ onAuth401: (failedToken: string) => Promise<boolean>
2796
+ log: (s: string) => void
2797
+ }
2798
+
2799
+ /**
2800
+ * Non-interactive bridge entrypoint for the `remoteControl` daemon worker.
2801
+ *
2802
+ * Linear subset of bridgeMain(): no readline dialogs, no stdin key handlers,
2803
+ * no TUI, no process.exit(). Config comes from the caller (daemon.json), auth
2804
+ * comes via IPC (supervisor's AuthManager), logs go to the worker's stdout
2805
+ * pipe. Throws on fatal errors — the worker catches and maps permanent vs
2806
+ * transient to the right exit code.
2807
+ *
2808
+ * Resolves cleanly when `signal` aborts and the poll loop tears down.
2809
+ */
2810
+ export async function runBridgeHeadless(
2811
+ opts: HeadlessBridgeOpts,
2812
+ signal: AbortSignal,
2813
+ ): Promise<void> {
2814
+ const { dir, log } = opts
2815
+
2816
+ // Worker inherits the supervisor's CWD. chdir first so git utilities
2817
+ // (getBranch/getRemoteUrl) — which read from bootstrap CWD state set
2818
+ // below — resolve against the right repo.
2819
+ process.chdir(dir)
2820
+ const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
2821
+ setOriginalCwd(dir)
2822
+ setCwdState(dir)
2823
+
2824
+ const { enableConfigs, checkHasTrustDialogAccepted } = await import(
2825
+ '../utils/config.js'
2826
+ )
2827
+ enableConfigs()
2828
+ const { initSinks } = await import('../utils/sinks.js')
2829
+ initSinks()
2830
+
2831
+ if (!checkHasTrustDialogAccepted()) {
2832
+ throw new BridgeHeadlessPermanentError(
2833
+ `Workspace not trusted: ${dir}. Run \`claude\` in that directory first to accept the trust dialog.`,
2834
+ )
2835
+ }
2836
+
2837
+ if (!opts.getAccessToken()) {
2838
+ // Transient — supervisor's AuthManager may pick up a token on next cycle.
2839
+ throw new Error(BRIDGE_LOGIN_ERROR)
2840
+ }
2841
+
2842
+ const { getBridgeBaseUrl } = await import('./bridgeConfig.js')
2843
+ const baseUrl = getBridgeBaseUrl()
2844
+ if (
2845
+ baseUrl.startsWith('http://') &&
2846
+ !baseUrl.includes('localhost') &&
2847
+ !baseUrl.includes('127.0.0.1')
2848
+ ) {
2849
+ throw new BridgeHeadlessPermanentError(
2850
+ 'Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
2851
+ )
2852
+ }
2853
+ const sessionIngressUrl =
2854
+ process.env.USER_TYPE === 'ant' &&
2855
+ process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2856
+ ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
2857
+ : baseUrl
2858
+
2859
+ const { getBranch, getRemoteUrl, findGitRoot } = await import(
2860
+ '../utils/git.js'
2861
+ )
2862
+ const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
2863
+
2864
+ if (opts.spawnMode === 'worktree') {
2865
+ const worktreeAvailable =
2866
+ hasWorktreeCreateHook() || findGitRoot(dir) !== null
2867
+ if (!worktreeAvailable) {
2868
+ throw new BridgeHeadlessPermanentError(
2869
+ `Worktree mode requires a git repository or WorktreeCreate hooks. Directory ${dir} has neither.`,
2870
+ )
2871
+ }
2872
+ }
2873
+
2874
+ const branch = await getBranch()
2875
+ const gitRepoUrl = await getRemoteUrl()
2876
+ const machineName = hostname()
2877
+ const bridgeId = randomUUID()
2878
+
2879
+ const config: BridgeConfig = {
2880
+ dir,
2881
+ machineName,
2882
+ branch,
2883
+ gitRepoUrl,
2884
+ maxSessions: opts.capacity,
2885
+ spawnMode: opts.spawnMode,
2886
+ verbose: false,
2887
+ sandbox: opts.sandbox,
2888
+ bridgeId,
2889
+ workerType: 'claude_code',
2890
+ environmentId: randomUUID(),
2891
+ apiBaseUrl: baseUrl,
2892
+ sessionIngressUrl,
2893
+ sessionTimeoutMs: opts.sessionTimeoutMs,
2894
+ }
2895
+
2896
+ const api = createBridgeApiClient({
2897
+ baseUrl,
2898
+ getAccessToken: opts.getAccessToken,
2899
+ runnerVersion: MACRO.VERSION,
2900
+ onDebug: log,
2901
+ onAuth401: opts.onAuth401,
2902
+ getTrustedDeviceToken,
2903
+ })
2904
+
2905
+ let environmentId: string
2906
+ let environmentSecret: string
2907
+ try {
2908
+ const reg = await api.registerBridgeEnvironment(config)
2909
+ environmentId = reg.environment_id
2910
+ environmentSecret = reg.environment_secret
2911
+ } catch (err) {
2912
+ // Transient — let supervisor backoff-retry.
2913
+ throw new Error(`Bridge registration failed: ${errorMessage(err)}`)
2914
+ }
2915
+
2916
+ const spawner = createSessionSpawner({
2917
+ execPath: process.execPath,
2918
+ scriptArgs: spawnScriptArgs(),
2919
+ env: process.env,
2920
+ verbose: false,
2921
+ sandbox: opts.sandbox,
2922
+ permissionMode: opts.permissionMode,
2923
+ onDebug: log,
2924
+ })
2925
+
2926
+ const logger = createHeadlessBridgeLogger(log)
2927
+ logger.printBanner(config, environmentId)
2928
+
2929
+ let initialSessionId: string | undefined
2930
+ if (opts.createSessionOnStart) {
2931
+ const { createBridgeSession } = await import('./createSession.js')
2932
+ try {
2933
+ const sid = await createBridgeSession({
2934
+ environmentId,
2935
+ title: opts.name,
2936
+ events: [],
2937
+ gitRepoUrl,
2938
+ branch,
2939
+ signal,
2940
+ baseUrl,
2941
+ getAccessToken: opts.getAccessToken,
2942
+ permissionMode: opts.permissionMode,
2943
+ })
2944
+ if (sid) {
2945
+ initialSessionId = sid
2946
+ log(`created initial session ${sid}`)
2947
+ }
2948
+ } catch (err) {
2949
+ log(`session pre-creation failed (non-fatal): ${errorMessage(err)}`)
2950
+ }
2951
+ }
2952
+
2953
+ await runBridgeLoop(
2954
+ config,
2955
+ environmentId,
2956
+ environmentSecret,
2957
+ api,
2958
+ spawner,
2959
+ logger,
2960
+ signal,
2961
+ undefined,
2962
+ initialSessionId,
2963
+ async () => opts.getAccessToken(),
2964
+ )
2965
+ }
2966
+
2967
+ /** BridgeLogger adapter that routes everything to a single line-log fn. */
2968
+ function createHeadlessBridgeLogger(log: (s: string) => void): BridgeLogger {
2969
+ const noop = (): void => {}
2970
+ return {
2971
+ printBanner: (cfg, envId) =>
2972
+ log(
2973
+ `registered environmentId=${envId} dir=${cfg.dir} spawnMode=${cfg.spawnMode} capacity=${cfg.maxSessions}`,
2974
+ ),
2975
+ logSessionStart: (id, _prompt) => log(`session start ${id}`),
2976
+ logSessionComplete: (id, ms) => log(`session complete ${id} (${ms}ms)`),
2977
+ logSessionFailed: (id, err) => log(`session failed ${id}: ${err}`),
2978
+ logStatus: log,
2979
+ logVerbose: log,
2980
+ logError: s => log(`error: ${s}`),
2981
+ logReconnected: ms => log(`reconnected after ${ms}ms`),
2982
+ addSession: (id, _url) => log(`session attached ${id}`),
2983
+ removeSession: id => log(`session detached ${id}`),
2984
+ updateIdleStatus: noop,
2985
+ updateReconnectingStatus: noop,
2986
+ updateSessionStatus: noop,
2987
+ updateSessionActivity: noop,
2988
+ updateSessionCount: noop,
2989
+ updateFailedStatus: noop,
2990
+ setSpawnModeDisplay: noop,
2991
+ setRepoInfo: noop,
2992
+ setDebugLogPath: noop,
2993
+ setAttached: noop,
2994
+ setSessionTitle: noop,
2995
+ clearStatus: noop,
2996
+ toggleQr: noop,
2997
+ refreshDisplay: noop,
2998
+ }
2999
+ }