bedrock-agentcore 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +1 -1
  2. package/dist/src/memory/integrations/strands/factory.d.ts +78 -0
  3. package/dist/src/memory/integrations/strands/factory.d.ts.map +1 -0
  4. package/dist/src/memory/integrations/strands/factory.js +108 -0
  5. package/dist/src/memory/integrations/strands/factory.js.map +1 -0
  6. package/dist/src/memory/integrations/strands/format.d.ts +17 -0
  7. package/dist/src/memory/integrations/strands/format.d.ts.map +1 -0
  8. package/dist/src/memory/integrations/strands/format.js +29 -0
  9. package/dist/src/memory/integrations/strands/format.js.map +1 -0
  10. package/dist/src/memory/integrations/strands/index.d.ts +11 -0
  11. package/dist/src/memory/integrations/strands/index.d.ts.map +1 -0
  12. package/dist/src/memory/integrations/strands/index.js +6 -0
  13. package/dist/src/memory/integrations/strands/index.js.map +1 -0
  14. package/dist/src/memory/integrations/strands/logger.d.ts +19 -0
  15. package/dist/src/memory/integrations/strands/logger.d.ts.map +1 -0
  16. package/dist/src/memory/integrations/strands/logger.js +27 -0
  17. package/dist/src/memory/integrations/strands/logger.js.map +1 -0
  18. package/dist/src/memory/integrations/strands/sender.d.ts +79 -0
  19. package/dist/src/memory/integrations/strands/sender.d.ts.map +1 -0
  20. package/dist/src/memory/integrations/strands/sender.js +170 -0
  21. package/dist/src/memory/integrations/strands/sender.js.map +1 -0
  22. package/dist/src/memory/integrations/strands/store.d.ts +28 -0
  23. package/dist/src/memory/integrations/strands/store.d.ts.map +1 -0
  24. package/dist/src/memory/integrations/strands/store.js +154 -0
  25. package/dist/src/memory/integrations/strands/store.js.map +1 -0
  26. package/dist/src/memory/integrations/strands/types.d.ts +122 -0
  27. package/dist/src/memory/integrations/strands/types.d.ts.map +1 -0
  28. package/dist/src/memory/integrations/strands/types.js +73 -0
  29. package/dist/src/memory/integrations/strands/types.js.map +1 -0
  30. package/dist/src/runtime/client.d.ts +79 -11
  31. package/dist/src/runtime/client.d.ts.map +1 -1
  32. package/dist/src/runtime/client.js +230 -79
  33. package/dist/src/runtime/client.js.map +1 -1
  34. package/dist/src/runtime/index.d.ts +5 -0
  35. package/dist/src/runtime/index.d.ts.map +1 -1
  36. package/dist/src/runtime/index.js +3 -0
  37. package/dist/src/runtime/index.js.map +1 -1
  38. package/dist/src/runtime/shell/config.d.ts +81 -0
  39. package/dist/src/runtime/shell/config.d.ts.map +1 -0
  40. package/dist/src/runtime/shell/config.js +15 -0
  41. package/dist/src/runtime/shell/config.js.map +1 -0
  42. package/dist/src/runtime/shell/index.d.ts +9 -0
  43. package/dist/src/runtime/shell/index.d.ts.map +1 -0
  44. package/dist/src/runtime/shell/index.js +6 -0
  45. package/dist/src/runtime/shell/index.js.map +1 -0
  46. package/dist/src/runtime/shell/protocol.d.ts +45 -0
  47. package/dist/src/runtime/shell/protocol.d.ts.map +1 -0
  48. package/dist/src/runtime/shell/protocol.js +99 -0
  49. package/dist/src/runtime/shell/protocol.js.map +1 -0
  50. package/dist/src/runtime/shell/session.d.ts +240 -0
  51. package/dist/src/runtime/shell/session.d.ts.map +1 -0
  52. package/dist/src/runtime/shell/session.js +880 -0
  53. package/dist/src/runtime/shell/session.js.map +1 -0
  54. package/dist/src/runtime/shell/validation.d.ts +8 -0
  55. package/dist/src/runtime/shell/validation.d.ts.map +1 -0
  56. package/dist/src/runtime/shell/validation.js +17 -0
  57. package/dist/src/runtime/shell/validation.js.map +1 -0
  58. package/dist/src/runtime/types.d.ts +89 -0
  59. package/dist/src/runtime/types.d.ts.map +1 -1
  60. package/package.json +13 -4
@@ -0,0 +1,880 @@
1
+ /**
2
+ * ShellSession — async-iterable interactive PTY WebSocket session.
3
+ *
4
+ * Connects on `connect()`, reads the initial STATUS confirmation frame, and exposes
5
+ * typed `send()` / `resize()` / `[Symbol.asyncIterator]()` / `close()`.
6
+ *
7
+ * When `reconnectConfig` is provided, transparently reconnects on unexpected disconnects
8
+ * using the same `shellId` so the shell's working directory, environment, background jobs,
9
+ * and up to 256 KB of buffered output are preserved on the server.
10
+ *
11
+ * Reconnect restores the *connection* on its own (it is driven by the socket close event,
12
+ * not by your read loop, and `send()`/`resize()` wait for it). However, on reattach the
13
+ * server replays the buffered output as inbound frames — to receive that replay (and to see
14
+ * `bytesDropped` updated and `exitCode` set) you must be consuming the session with
15
+ * `for await (const frame of shell)`. A write-only caller that never iterates stays
16
+ * connected across drops but will not observe the replayed output. Keep a `for await` loop
17
+ * running for the life of the session.
18
+ *
19
+ * @example
20
+ * ```typescript
21
+ * const shell = await client.openShell({ runtimeArn })
22
+ * try {
23
+ * await shell.send('cat /etc/os-release\n')
24
+ * for await (const frame of shell) {
25
+ * if (frame.channel === ShellChannel.STDOUT) process.stdout.write(frame.text)
26
+ * }
27
+ * } finally {
28
+ * await shell.close()
29
+ * }
30
+ * ```
31
+ */
32
+ import WebSocket from 'ws';
33
+ import { once, on } from 'events';
34
+ import { randomUUID } from 'crypto';
35
+ import { Buffer } from 'buffer';
36
+ import { ShellFramer, ShellChannel } from './protocol.js';
37
+ import { validateShellId } from './validation.js';
38
+ import { DEFAULT_BASE_DELAY, DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_MAX_DELAY, DEFAULT_MAX_RETRIES, DEFAULT_METADATA_TIMEOUT, DEFAULT_OUTER_LOOP_DELAY, DEFAULT_RECONNECT_WINDOW, noopLogger, } from './config.js';
39
+ /** Header names in the 101 Switching Protocols response (lowercase per HTTP/1.1). */
40
+ const SESSION_HEADER = 'x-amzn-bedrock-agentcore-runtime-session-id';
41
+ const SHELL_ID_HEADER = 'x-amzn-bedrock-agentcore-shell-id';
42
+ /**
43
+ * Async-iterable shell session wrapping a live PTY WebSocket.
44
+ *
45
+ * Read-only observable attributes (updated by the session as events arrive):
46
+ * - `shellId` — Server-confirmed shell identifier. Preserve to reconnect to the same PTY.
47
+ * - `sessionId` — Runtime session ID routing to the VM.
48
+ * - `reconnected` — True when the most recent connect reattached an existing PTY.
49
+ * - `kicked` — True when another client connected with the same shellId (close 4000).
50
+ * Check this after the `for await` loop exits to distinguish a kick from
51
+ * a clean shell exit.
52
+ * - `bytesDropped` — PTY ring-buffer bytes lost during the most recent disconnect, as
53
+ * reported by the server in the reconnect confirmation frame.
54
+ * Zero if no overflow occurred or on a fresh connection.
55
+ * - `exitCode` — Shell process exit code. `null` until the shell exits; `0` for a clean
56
+ * exit. Check this after the `for await` loop exits alongside `kicked`.
57
+ */
58
+ export class ShellSession {
59
+ _shellId;
60
+ _sessionId;
61
+ _reconnected = false;
62
+ _kicked = false;
63
+ _bytesDropped = 0;
64
+ _exitCode = null;
65
+ /** Server-confirmed shell identifier. */
66
+ get shellId() {
67
+ return this._shellId;
68
+ }
69
+ /** Runtime session ID routing to the VM. */
70
+ get sessionId() {
71
+ return this._sessionId;
72
+ }
73
+ /** True when the most recent connect reattached an existing PTY. */
74
+ get reconnected() {
75
+ return this._reconnected;
76
+ }
77
+ /**
78
+ * True when another client connected with the same shellId (close 4000).
79
+ * Check after the `for await` loop exits to distinguish a kick from a clean exit.
80
+ */
81
+ get kicked() {
82
+ return this._kicked;
83
+ }
84
+ /**
85
+ * PTY ring-buffer bytes lost during the most recent disconnect.
86
+ * Zero when no overflow occurred or on a fresh connection.
87
+ */
88
+ get bytesDropped() {
89
+ return this._bytesDropped;
90
+ }
91
+ /**
92
+ * Shell process exit code. `null` until the shell exits; `0` for a clean exit.
93
+ * Check after the `for await` loop exits alongside `kicked`.
94
+ */
95
+ get exitCode() {
96
+ return this._exitCode;
97
+ }
98
+ connectFn;
99
+ reconnectConfig;
100
+ keepaliveIntervalMs;
101
+ log;
102
+ framer = new ShellFramer();
103
+ _wsFactory;
104
+ _state = { status: 'idle' };
105
+ _abortController = null;
106
+ _sessionController = new AbortController();
107
+ _closeError = null;
108
+ /**
109
+ * Set while a reconnect is in flight, cleared when it settles. Shared so that the
110
+ * iterator, the close/dead-detection handler, and send()/resize() all await the same
111
+ * attempt rather than racing or each starting their own. Resolves to the reconnect
112
+ * outcome (true = recovered, false = gave up). This is what makes *connection* recovery
113
+ * iterator-independent — the socket is restored without a `for await` loop. Consuming the
114
+ * replayed output still requires an active iterator (see the class docstring).
115
+ */
116
+ _reconnectPromise = null;
117
+ constructor(opts) {
118
+ this.connectFn = opts.connectFn;
119
+ if (opts.shellId != null)
120
+ validateShellId(opts.shellId);
121
+ this._shellId = opts.shellId ?? randomUUID();
122
+ this._sessionId = opts.sessionId ?? randomUUID();
123
+ this.reconnectConfig = opts.reconnectConfig;
124
+ this.keepaliveIntervalMs = opts.keepaliveIntervalMs ?? DEFAULT_KEEPALIVE_INTERVAL;
125
+ this.log = opts.logger ?? noopLogger;
126
+ this._wsFactory =
127
+ opts._wsFactory ??
128
+ ((url, protocols, options) => protocols?.length ? new WebSocket(url, protocols, options) : new WebSocket(url, options));
129
+ }
130
+ /** Connect and read the initial STATUS metadata frame. */
131
+ async connect() {
132
+ if (this._state.status === 'closed')
133
+ throw new Error('ShellSession is closed');
134
+ if (this._state.status !== 'idle') {
135
+ throw new Error(`ShellSession.connect() requires idle state (current: ${this._state.status})`);
136
+ }
137
+ await this._connectWithUpgrade();
138
+ // _connectWithUpgrade returns void (not throw) when close() fires mid-flight.
139
+ if (this._isClosed())
140
+ throw new Error('ShellSession was closed during connect()');
141
+ return this;
142
+ }
143
+ /**
144
+ * Send text or raw bytes to the shell's stdin.
145
+ * Pass a string for text commands; pass a Buffer for binary/escape sequences.
146
+ *
147
+ * If a reconnect is in flight, this waits for it and sends on the recovered
148
+ * connection. Throws a descriptive `Error` (never the raw `ws` "readyState 3"
149
+ * error) when the session is closed or could not be recovered.
150
+ */
151
+ async send(data) {
152
+ await this._wsSend(await this._writableSocket(), this.framer.encodeStdin(data));
153
+ }
154
+ /** Send a HEARTBEAT frame (0x05) to the server. */
155
+ async sendHeartbeat() {
156
+ await this._wsSend(await this._writableSocket(), this.framer.encodeHeartbeat());
157
+ }
158
+ /** Resize the terminal PTY. */
159
+ async resize(width, height) {
160
+ await this._wsSend(await this._writableSocket(), this.framer.encodeResize(width, height));
161
+ }
162
+ /**
163
+ * Resolve the live socket for a write, healing first if needed. Awaits an in-flight
164
+ * reconnect (transparent recovery) and validates the *real* socket
165
+ * readyState — not just the `_state` flag, which can lag a silently-dropped socket.
166
+ * Throws a descriptive `Error` instead of leaking the raw `ws`
167
+ * "readyState 3 (CLOSED)" error.
168
+ */
169
+ async _writableSocket() {
170
+ // Wait out an in-flight reconnect, looping so back-to-back drops (a fresh reconnect
171
+ // starting while we awaited the previous one) are also awaited rather than throwing a
172
+ // spurious "not connected" mid-recovery. The reconnect passes through 'reconnecting'
173
+ // then 'connecting' before reaching 'open', so we wait whenever a reconnect promise is
174
+ // set AND we are not yet open/closed. Crucially we stop once status is 'open': an
175
+ // onReconnect callback that calls send()/resize() runs after the new socket is promoted
176
+ // to 'open' but before _reconnectPromise settles — gating on 'open' lets it through
177
+ // instead of awaiting its own in-flight promise and deadlocking.
178
+ while (this._reconnectPromise && this._state.status !== 'open' && this._state.status !== 'closed') {
179
+ await this._reconnectPromise;
180
+ }
181
+ if (this._isClosed())
182
+ throw new Error('ShellSession is closed');
183
+ if (this._state.status !== 'open') {
184
+ throw new Error(`ShellSession is not connected (status: ${this._state.status})`);
185
+ }
186
+ if (this._state.ws.readyState !== WebSocket.OPEN) {
187
+ throw new Error(`ShellSession connection is not open (readyState ${this._state.ws.readyState})`);
188
+ }
189
+ return this._state.ws;
190
+ }
191
+ /** Send a CLOSE frame (0xFF) to permanently kill the shell, then close the WebSocket.
192
+ * The server kills the shell process (SIGHUP → SIGKILL) and responds with its own [0xFF].
193
+ * Unlike dropping the WebSocket (which detaches and allows reconnection), this is permanent. */
194
+ async close() {
195
+ const prev = this._state;
196
+ if (prev.status === 'closed') {
197
+ this.log.debug(`ShellSession: close() called on already-closed session (shellId=${this.shellId})`);
198
+ return;
199
+ }
200
+ // Atomic transition — any concurrent code checks _state.status === 'closed'.
201
+ this._state = { status: 'closed' };
202
+ // Abort per-connection iterator (unblocks any suspended _recvRaw) and reconnect sleeps.
203
+ this._abortController?.abort();
204
+ this._sessionController.abort();
205
+ if (prev.status === 'connecting' && prev.ws !== null) {
206
+ // Terminate any in-progress TLS handshake — the socket is unreachable from
207
+ // the _connectWithUpgrade local, so close() must kill it here.
208
+ try {
209
+ prev.ws.terminate();
210
+ }
211
+ catch (err) {
212
+ this.log.debug(`ShellSession: ws.terminate() threw during connecting (shellId=${this.shellId}): ${String(err)}`);
213
+ }
214
+ }
215
+ else if (prev.status === 'open') {
216
+ this._stopKeepalive(prev.keepaliveTimer);
217
+ // ws.send() throws synchronously if the socket is already closing/closed.
218
+ // Swallow it — the intent is best-effort notification, not guaranteed delivery.
219
+ try {
220
+ prev.ws.send(this.framer.encodeClose());
221
+ }
222
+ catch (err) {
223
+ this.log.debug(`ShellSession: CLOSE frame not sent — socket already closing/closed (shellId=${this.shellId}): ${String(err)}`);
224
+ }
225
+ try {
226
+ prev.ws.close();
227
+ }
228
+ catch (err) {
229
+ this.log.debug(`ShellSession: ws.close() threw (shellId=${this.shellId}): ${String(err)}`);
230
+ }
231
+ }
232
+ }
233
+ /**
234
+ * Forcibly terminates the underlying WebSocket without a clean handshake.
235
+ * Useful in tests to simulate an abrupt network drop and trigger the reconnect path.
236
+ * Has no effect if the session is not currently open.
237
+ * @internal
238
+ */
239
+ _terminateConnection() {
240
+ if (this._state.status === 'open')
241
+ this._state.ws.terminate();
242
+ }
243
+ /**
244
+ * Async iterator — yields inbound ShellFrames, reconnecting on drop if configured.
245
+ *
246
+ * The loop exits silently (no throw) in three cases: shell exit, kicked by a new
247
+ * client, or reconnect budget exhausted. Check `exitCode`, `kicked`, and
248
+ * `bytesDropped` after the loop to distinguish them:
249
+ *
250
+ * ```typescript
251
+ * for await (const frame of shell) { ... }
252
+ * if (shell.kicked) { ... } // another client took over
253
+ * if (shell.exitCode !== null) { ... } // shell process exited
254
+ * if (shell.bytesDropped > 0) { ... } // ring-buffer overflow on reconnect
255
+ * ```
256
+ */
257
+ [Symbol.asyncIterator]() {
258
+ return this._iterate();
259
+ }
260
+ // ── Internal ──────────────────────────────────────────────────────────────
261
+ _startKeepalive(ws, signal) {
262
+ if (this.keepaliveIntervalMs <= 0)
263
+ return null;
264
+ // SDK clients MUST send a Ping every 30s AND treat "no Pong within ~60s" as a dead
265
+ // connection. ws emits 'pong' for each RFC 6455 Pong received. Without this,
266
+ // a silently-dropped socket reports readyState OPEN for up to ~60s (KARP idle timeout),
267
+ // during which the session would sit stale on a dead connection.
268
+ //
269
+ // Liveness is pong-only by design: a 'message' listener here would (a) double-dispatch
270
+ // on every inbound frame — the read path (`on(ws,'message')` iterator) already consumes
271
+ // them — and (b) mask a write-dead-but-read-alive half-close, since streamed output
272
+ // would keep refreshing the timer while our pings go unanswered. The 'pong' listener is
273
+ // removed when this connection's controller aborts (close or drop) so it never leaks.
274
+ let lastPongAt = Date.now();
275
+ const markAlive = () => {
276
+ lastPongAt = Date.now();
277
+ };
278
+ ws.on('pong', markAlive);
279
+ signal.addEventListener('abort', () => ws.removeListener('pong', markAlive), { once: true });
280
+ const deadAfterMs = this.keepaliveIntervalMs * 2; // ~60s with the 30s default
281
+ return globalThis.setInterval(() => {
282
+ if (ws.readyState !== WebSocket.OPEN)
283
+ return;
284
+ if (Date.now() - lastPongAt > deadAfterMs) {
285
+ // Silent death: pings went unanswered. Terminate so 'close' fires and reconnect
286
+ // engages — instead of leaving the session stale on a dead socket.
287
+ this.log.warn(`ShellSession: no Pong within ${deadAfterMs}ms — connection presumed dead, ` +
288
+ `terminating (shellId=${this.shellId})`);
289
+ ws.terminate();
290
+ return;
291
+ }
292
+ ws.ping();
293
+ }, this.keepaliveIntervalMs);
294
+ }
295
+ _stopKeepalive(timer) {
296
+ if (timer !== null)
297
+ globalThis.clearInterval(timer);
298
+ }
299
+ _wsSend(ws, data) {
300
+ return new Promise((resolve, reject) => {
301
+ ws.send(data, (err) => (err ? reject(err) : resolve()));
302
+ });
303
+ }
304
+ /** Open WebSocket, capture 101 upgrade headers, then read metadata frame. */
305
+ async _connectWithUpgrade() {
306
+ // Guard must be the very first check — any state mutation below would violate the
307
+ // closed invariant if close() has already fired.
308
+ if (this._isClosed())
309
+ return;
310
+ // Abort any previous per-connection iterator before creating a new one.
311
+ this._abortController?.abort();
312
+ // A reconnect attempt enters here with status 'reconnecting'; a fresh connect with 'idle'.
313
+ const isReconnect = this._state.status === 'reconnecting';
314
+ this._closeError = null;
315
+ this._reconnected = false;
316
+ this._kicked = false;
317
+ this._exitCode = null;
318
+ // bytesDropped reports loss for the most recent disconnect (per-disconnect, not
319
+ // cumulative). Reset it only on a FRESH connect — resetting on every reconnect attempt
320
+ // would clobber a value the iterator has not yet read when reconnects happen back-to-back.
321
+ if (!isReconnect)
322
+ this._bytesDropped = 0;
323
+ this._state = { status: 'connecting', ws: null };
324
+ let connectResult;
325
+ try {
326
+ connectResult = await this.connectFn(this.shellId, this.sessionId);
327
+ }
328
+ catch (err) {
329
+ // connectFn threw before any WebSocket was created — reset to idle so the
330
+ // state doesn't stay 'connecting' across the backoff sleep between retries.
331
+ if (!this._isClosed())
332
+ this._state = { status: 'idle' };
333
+ throw err;
334
+ }
335
+ // close() may have fired while connectFn was suspended — bail out before
336
+ // creating a new WebSocket so the session doesn't revive after an explicit close.
337
+ if (this._isClosed())
338
+ return;
339
+ const ws = this._wsFactory(connectResult.url, connectResult.protocols, { headers: connectResult.headers });
340
+ // close() may have fired in the gap between wsFactory() and this state update —
341
+ // terminate the socket and bail out instead of re-registering in 'connecting'.
342
+ if (this._isClosed()) {
343
+ ws.terminate();
344
+ return;
345
+ }
346
+ // Register ws in the connecting state so close() can terminate the socket
347
+ // during the TLS handshake, before the open race resolves.
348
+ this._state = { status: 'connecting', ws };
349
+ // Capture controller in a local variable so each WebSocket's handlers close over
350
+ // their own controller — not this._abortController, which is replaced on reconnect.
351
+ this._abortController = new AbortController();
352
+ const controller = this._abortController;
353
+ // Single iterator used for both the STATUS handshake and subsequent frame reads.
354
+ // On the timeout path, one frame may be lost (the abandoned .next() from the
355
+ // Promise.race in _readMetadataFrame consumes it), but that is a degraded scenario.
356
+ // Using two independent iterators would cause every pre-STATUS frame to be yielded
357
+ // twice (once via pendingFrames, once via the independent listener queue).
358
+ const messageIterator = on(ws, 'message', { signal: controller.signal });
359
+ ws.on('close', (code, reason) => {
360
+ const err = Object.assign(new Error(`WebSocket closed: ${code} ${reason?.toString() ?? ''}`), { closeCode: code });
361
+ this._closeError = err;
362
+ controller.abort(err);
363
+ // Trigger reconnect from the close event itself so recovery does not depend on a
364
+ // `for await` loop being active. Only act on closes for the *current*
365
+ // connection: ignore if we have already moved on (closed, reconnecting, or this
366
+ // socket is no longer the live one). The iterator, if running, joins the same
367
+ // in-flight attempt via _reconnectPromise rather than starting a second one.
368
+ if (this._state.status === 'open' && this._state.ws === ws) {
369
+ void this._ensureReconnect(code).catch(() => { });
370
+ }
371
+ });
372
+ ws.on('error', (err) => {
373
+ controller.abort(err);
374
+ // Do NOT trigger reconnect here. The `ws` library always emits 'close' after 'error'
375
+ // on a connected socket, and that close carries the authoritative close code (the
376
+ // close code drives the reconnect decision). An 'error' has no closeCode, so
377
+ // reconnecting from it (with a null code) would pre-empt a terminal close that must
378
+ // NOT reconnect — e.g. 4000 (kicked) or 1003 (text frames) — by flipping state to
379
+ // 'reconnecting' before the real code arrives. The 'close' handler below owns the
380
+ // decision; this listener exists only so the 'error' event is not unhandled.
381
+ this.log.debug(`ShellSession: WebSocket error (deferring to close, shellId=${this.shellId}): ${String(err)}`);
382
+ });
383
+ // Capture shellId/sessionId from 101 upgrade response headers before open fires.
384
+ ws.once('upgrade', (response) => {
385
+ const resHeaders = response.headers;
386
+ const hShellId = resHeaders[SHELL_ID_HEADER];
387
+ const hSessionId = resHeaders[SESSION_HEADER];
388
+ if (typeof hShellId === 'string')
389
+ this._shellId = hShellId;
390
+ if (typeof hSessionId === 'string')
391
+ this._sessionId = hSessionId;
392
+ });
393
+ // Race open against unexpected-response (non-101) and pre-open close.
394
+ // events.once(ws, 'open') rejects automatically if 'error' fires first.
395
+ // AbortController cleans up the two losing once() listeners immediately after the race settles.
396
+ const openRaceAc = new AbortController();
397
+ try {
398
+ await Promise.race([
399
+ once(ws, 'open', { signal: openRaceAc.signal }),
400
+ once(ws, 'unexpected-response', { signal: openRaceAc.signal }).then((args) => {
401
+ const res = args[1];
402
+ res.resume();
403
+ throw new Error(`Server rejected WebSocket connection: HTTP ${res.statusCode ?? 0}`);
404
+ }),
405
+ once(ws, 'close', { signal: openRaceAc.signal }).then((args) => {
406
+ const [code, reason] = args;
407
+ throw new Error(`WebSocket closed before open: ${code} ${reason?.toString() ?? ''}`);
408
+ }),
409
+ ]);
410
+ }
411
+ catch (err) {
412
+ ws.terminate();
413
+ if (!this._isClosed())
414
+ this._state = { status: 'idle' };
415
+ this.log.debug(`ShellSession: WebSocket upgrade failed (shellId=${this.shellId}): ${String(err)}`);
416
+ throw err;
417
+ }
418
+ finally {
419
+ openRaceAc.abort();
420
+ }
421
+ let pendingFrames;
422
+ try {
423
+ pendingFrames = await this._readMetadataFrame(messageIterator);
424
+ }
425
+ catch (err) {
426
+ // Server closed before sending STATUS, or close() fired during handshake.
427
+ ws.terminate();
428
+ if (!this._isClosed())
429
+ this._state = { status: 'idle' };
430
+ throw err;
431
+ }
432
+ // close() may have fired during _readMetadataFrame — terminate and bail out.
433
+ if (this._isClosed()) {
434
+ ws.terminate();
435
+ return;
436
+ }
437
+ const keepaliveTimer = this._startKeepalive(ws, controller.signal);
438
+ // Atomic promotion: all connection objects become available together.
439
+ this._state = { status: 'open', ws, messageIterator, keepaliveTimer, pendingFrames };
440
+ }
441
+ /** Receive one raw binary message from the WebSocket. */
442
+ async _recvRaw(messageIterator) {
443
+ try {
444
+ const { value, done } = await messageIterator.next();
445
+ if (done)
446
+ throw this._closeError ?? new Error('WebSocket closed');
447
+ const [data] = value;
448
+ if (Buffer.isBuffer(data))
449
+ return data;
450
+ if (Array.isArray(data))
451
+ return Buffer.concat(data);
452
+ return Buffer.from(data);
453
+ }
454
+ catch (err) {
455
+ throw this._closeError ?? err;
456
+ }
457
+ }
458
+ /**
459
+ * Consume frames until a STATUS confirmation is found, stashing others in pendingFrames.
460
+ * Returns the accumulated pending frames to be stored in the 'open' state.
461
+ */
462
+ async _readMetadataFrame(messageIterator) {
463
+ // Use an explicit AbortController so the timer can be cancelled as soon as
464
+ // STATUS arrives — AbortSignal.timeout() creates a timer that outlives the
465
+ // fast-path read and accumulates orphan wakeups on rapid reconnects.
466
+ const timeoutAc = new AbortController();
467
+ const timer = globalThis.setTimeout(() => timeoutAc.abort(new Error('timeout')), DEFAULT_METADATA_TIMEOUT);
468
+ const timeoutP = new Promise((_, rej) => timeoutAc.signal.addEventListener('abort', () => rej(timeoutAc.signal.reason), { once: true }));
469
+ const pendingFrames = [];
470
+ try {
471
+ while (true) {
472
+ let raw;
473
+ try {
474
+ raw = await Promise.race([this._recvRaw(messageIterator), timeoutP]);
475
+ }
476
+ catch (err) {
477
+ if (timeoutAc.signal.aborted) {
478
+ // If the WebSocket also closed concurrently (race between the 10s timer
479
+ // and a server close event), prefer the real close error — promoting a
480
+ // dead messageIterator to 'open' would mislead callers and lose the cause.
481
+ if (this._closeError !== null)
482
+ throw this._closeError;
483
+ this.log.warn(`ShellSession: Timed out waiting for STATUS confirmation (shellId=${this.shellId})`);
484
+ return pendingFrames;
485
+ }
486
+ throw err;
487
+ }
488
+ const frame = this.framer.decode(raw);
489
+ if (frame.channel === ShellChannel.STATUS) {
490
+ try {
491
+ const meta = (frame.json()['metadata'] ?? {});
492
+ if (meta['shellId']) {
493
+ // Confirmation frame — update shellId and we're done. On a reconnect this is
494
+ // the single reconnection confirmation ("sent once, before replay begins"),
495
+ // so read bytesDropped here — it is delivered on THIS frame and nowhere else.
496
+ this._shellId = String(meta['shellId']);
497
+ this._reconnected = Boolean(meta['reconnected']);
498
+ this._recordBytesDropped(meta);
499
+ return pendingFrames;
500
+ }
501
+ }
502
+ catch (err) {
503
+ this.log.debug(`ShellSession: malformed STATUS frame, proceeding with client-generated shellId=${this.shellId}: ${String(err)}`);
504
+ return pendingFrames;
505
+ }
506
+ // No shellId → termination frame (shell died before confirmation).
507
+ // Stash it so _iterate can set exitCode and return cleanly.
508
+ this.log.debug(`ShellSession: termination STATUS received before confirmation (shellId=${this.shellId})`);
509
+ pendingFrames.push(frame);
510
+ return pendingFrames;
511
+ }
512
+ pendingFrames.push(frame);
513
+ }
514
+ }
515
+ finally {
516
+ globalThis.clearTimeout(timer);
517
+ }
518
+ }
519
+ _isConfirmationStatus(status) {
520
+ const meta = status['metadata'];
521
+ return Boolean(meta?.['shellId']);
522
+ }
523
+ /**
524
+ * Record `bytesDropped` from a reconnection confirmation frame's metadata, if present.
525
+ * `bytesDropped` reports PTY output lost from ring-buffer overflow during THIS disconnect
526
+ * (per-disconnect, not session-cumulative — assign, don't accumulate). Present
527
+ * only when greater than 0; absent on a clean reconnect.
528
+ */
529
+ _recordBytesDropped(meta) {
530
+ const dropped = meta?.['bytesDropped'];
531
+ if (typeof dropped === 'number' && dropped > 0) {
532
+ this._bytesDropped = dropped;
533
+ this.log.warn(`ShellSession: ${dropped} bytes of PTY output lost during disconnect ` +
534
+ `(ring buffer overflow, shellId=${this.shellId})`);
535
+ }
536
+ }
537
+ _isTerminationStatus(status) {
538
+ if (this._isConfirmationStatus(status))
539
+ return false;
540
+ return status['status'] === 'Success' || status['status'] === 'Failure';
541
+ }
542
+ _parseExitCode(status) {
543
+ if (status['status'] === 'Success')
544
+ return 0;
545
+ const details = status['details'];
546
+ const causes = details?.['causes'];
547
+ for (const cause of causes ?? []) {
548
+ if (cause['reason'] === 'ExitCode') {
549
+ const n = parseInt(String(cause['message']), 10);
550
+ if (!isNaN(n))
551
+ return n;
552
+ }
553
+ }
554
+ // Platform error with no ExitCode cause — return null so callers can
555
+ // distinguish "exited cleanly" (0) from "no exit code available" (null).
556
+ return null;
557
+ }
558
+ async *_iterate() {
559
+ // Hoisted outside the loop so the finally block can reference the last known
560
+ // open state's keepaliveTimer even after _state has been transitioned away.
561
+ let state;
562
+ try {
563
+ while (true) {
564
+ if (this._state.status !== 'open')
565
+ return;
566
+ // Capture state snapshot before awaiting — close() may transition state
567
+ // concurrently while the generator is suspended at a yield or await point.
568
+ state = this._state;
569
+ // Drain frames buffered during the metadata handshake first.
570
+ while (state.pendingFrames.length > 0) {
571
+ if (this._isClosed())
572
+ return;
573
+ const frame = state.pendingFrames.shift();
574
+ // HEARTBEAT — server echo of client keepalive, not application data.
575
+ if (frame.channel === ShellChannel.HEARTBEAT)
576
+ continue;
577
+ if (frame.channel === ShellChannel.CLOSE) {
578
+ this._state = { status: 'idle' };
579
+ this.log.debug(`ShellSession: CLOSE frame received in pending queue (shellId=${this.shellId})`);
580
+ return;
581
+ }
582
+ if (frame.channel === ShellChannel.STATUS) {
583
+ try {
584
+ const s = frame.json();
585
+ if (this._isTerminationStatus(s)) {
586
+ this._exitCode = this._parseExitCode(s);
587
+ this._state = { status: 'idle' };
588
+ yield frame;
589
+ return;
590
+ }
591
+ }
592
+ catch (err) {
593
+ this.log.debug(`ShellSession: malformed STATUS frame in pending queue (shellId=${this.shellId}): ${String(err)}`);
594
+ }
595
+ }
596
+ yield frame;
597
+ }
598
+ if (this._state.status !== 'open')
599
+ return;
600
+ let raw;
601
+ try {
602
+ raw = await this._recvRaw(state.messageIterator);
603
+ }
604
+ catch (err) {
605
+ if (this._isClosed())
606
+ return;
607
+ // A socket 'error' aborts this read before the authoritative 'close' arrives, so
608
+ // the abort error carries no closeCode. Reconnecting on that null code would
609
+ // pre-empt the real code (e.g. 4000 kicked / 1003 text-frames): _ensureReconnect
610
+ // would flip state to 'reconnecting', and the close handler's terminal decision
611
+ // (which guards on status 'open') would then be silently dropped. Wait for the
612
+ // 'close' first — `ws` always emits it after 'error' on a connected socket, and
613
+ // its handler sets _closeError and owns the reconnect decision (spec §7).
614
+ if (this._extractCloseCode(err) === null && this._closeError === null) {
615
+ await this._waitForClose(state.ws);
616
+ if (this._isClosed())
617
+ return;
618
+ }
619
+ // Start-or-join reconnect with the authoritative close code. The close/error
620
+ // handler for this socket may have already kicked off the attempt
621
+ // (iterator-independent path); either way we await the single shared attempt here
622
+ // rather than duplicating close-code logic.
623
+ const closeCode = this._extractCloseCode(this._closeError ?? err);
624
+ const didReconnect = await this._ensureReconnect(closeCode);
625
+ if (this._isClosed())
626
+ return;
627
+ if (!didReconnect)
628
+ return; // terminal close, no config, or budget exhausted
629
+ continue;
630
+ }
631
+ const frame = this.framer.decode(raw);
632
+ if (frame.channel === ShellChannel.CLOSE) {
633
+ this._state = { status: 'idle' };
634
+ this.log.debug(`ShellSession: CLOSE frame received (shellId=${this.shellId})`);
635
+ return;
636
+ }
637
+ // HEARTBEAT — server echo of client keepalive, not application data.
638
+ if (frame.channel === ShellChannel.HEARTBEAT)
639
+ continue;
640
+ if (frame.channel === ShellChannel.STATUS) {
641
+ try {
642
+ const s = frame.json();
643
+ if (this._isConfirmationStatus(s)) {
644
+ // A confirmation frame in the data stream. Per spec §6 the reconnection
645
+ // confirmation (carrying bytesDropped) is sent once *before* replay and is
646
+ // consumed by _readMetadataFrame, not here — so this path normally does not
647
+ // fire on reconnect. Record bytesDropped defensively in case a confirmation
648
+ // reaches the stream, then swallow it (not application output).
649
+ this._recordBytesDropped(s['metadata']);
650
+ continue;
651
+ }
652
+ if (this._isTerminationStatus(s)) {
653
+ this._exitCode = this._parseExitCode(s);
654
+ this._state = { status: 'idle' };
655
+ yield frame;
656
+ return;
657
+ }
658
+ }
659
+ catch (err) {
660
+ this.log.debug(`ShellSession: malformed STATUS frame (shellId=${this.shellId}): ${String(err)}`);
661
+ }
662
+ }
663
+ yield frame;
664
+ }
665
+ }
666
+ finally {
667
+ // Safety net: stop keepalive if close() fires while the generator is suspended
668
+ // at a yield point — close() transitions to 'closed' but state.keepaliveTimer
669
+ // was captured before that transition, so it still holds the live timer handle.
670
+ this._stopKeepalive(state?.keepaliveTimer ?? null);
671
+ }
672
+ }
673
+ /** Returns true when close() has been called. Used after await points to guard
674
+ * against close() firing while the method was suspended. A method call prevents
675
+ * TypeScript from narrowing away 'closed' comparisons after state assignments. */
676
+ _isClosed() {
677
+ return this._state.status === 'closed';
678
+ }
679
+ _extractCloseCode(err) {
680
+ if (err !== null && typeof err === 'object' && 'closeCode' in err) {
681
+ return err.closeCode;
682
+ }
683
+ return null;
684
+ }
685
+ /**
686
+ * Wait for the socket's authoritative 'close' to land after an 'error' woke the read
687
+ * loop early. The 'close' handler sets `_closeError` (carrying the real close code) and
688
+ * makes the reconnect decision, so this resolves as soon as `_closeError` is populated.
689
+ * Bounded by a short timeout in case 'close' never follows (it always does on a
690
+ * connected `ws`, but we must not hang the iterator on a misbehaving socket).
691
+ */
692
+ async _waitForClose(ws) {
693
+ if (this._closeError !== null || this._isClosed())
694
+ return;
695
+ const ac = new AbortController();
696
+ const timer = globalThis.setTimeout(() => ac.abort(), DEFAULT_METADATA_TIMEOUT);
697
+ try {
698
+ await once(ws, 'close', { signal: ac.signal });
699
+ }
700
+ catch {
701
+ // Timed out or aborted — fall through; caller reconnects with whatever code is set.
702
+ }
703
+ finally {
704
+ globalThis.clearTimeout(timer);
705
+ }
706
+ }
707
+ /**
708
+ * Decide whether a close code is auto-reconnectable, then start-or-join a reconnect.
709
+ *
710
+ * This is the single entry point for triggering reconnection. It is called from
711
+ * wherever a disconnect is observed — the iterator's read error, the `ws.on('close')`
712
+ * handler (dead-detection / silent drop while no one is iterating), or the keepalive
713
+ * pong-timeout — so restoring the *connection* no longer depends on an active `for await`
714
+ * loop. (Receiving the server's replayed output after reattach still requires iterating;
715
+ * see the class docstring.)
716
+ *
717
+ * Returns a promise that resolves to the reconnect outcome (true = recovered).
718
+ * Returns immediately with `false` for terminal close codes (4000 kicked, 1003 text,
719
+ * 1000 normal) or when no `reconnectConfig` is set. Concurrent callers share one
720
+ * in-flight attempt via `_reconnectPromise`.
721
+ */
722
+ _ensureReconnect(closeCode) {
723
+ // Join the in-flight attempt while it is still running — that spans both 'reconnecting'
724
+ // (backoff sleep) and 'connecting' (a retry's _connectWithUpgrade in progress). A second
725
+ // observer of the SAME disconnect (e.g. the iterator resuming after _waitForClose, while
726
+ // the close handler already armed the attempt and it has advanced to 'connecting') must
727
+ // join, not start a duplicate attempt. We deliberately do NOT join once status is 'open':
728
+ // a disconnect that arrives after a successful reattach (the window where the new socket
729
+ // is promoted to 'open' but _reconnectPromise has not yet been cleared in .finally) must
730
+ // be processed as a NEW event — otherwise a terminal 4000/1003 on the new socket is
731
+ // swallowed (kicked never set) and a fresh drop arms no new reconnect.
732
+ if (this._reconnectPromise && (this._state.status === 'reconnecting' || this._state.status === 'connecting')) {
733
+ return this._reconnectPromise;
734
+ }
735
+ if (this._isClosed())
736
+ return Promise.resolve(false);
737
+ // Terminal close codes (never auto-reconnect) and the no-config case all stop the
738
+ // session. Classify once, emit any code-specific warning, then do a SINGLE shared
739
+ // idle transition rather than repeating it per branch.
740
+ // 4000 kicked · 1003 text-frames-unsupported · 1000 normal close · no reconnectConfig
741
+ const terminal = closeCode === 4000 || closeCode === 1003 || closeCode === 1000 || !this.reconnectConfig;
742
+ if (terminal) {
743
+ if (closeCode === 4000) {
744
+ this._kicked = true;
745
+ this.log.warn(`ShellSession: kicked by new connection (close 4000, shellId=${this.shellId})`);
746
+ }
747
+ else if (closeCode === 1003) {
748
+ this.log.warn(`ShellSession: Server closed with 1003 (text frames not supported). ` +
749
+ `Open a new ShellSession — do not reconnect.`);
750
+ }
751
+ else if (closeCode === 1001 && !this.reconnectConfig) {
752
+ this.log.warn(`ShellSession: Server sent 1001 Going Away but no reconnectConfig — ` +
753
+ `stopping. Reconnect with shellId=${this.shellId}`);
754
+ }
755
+ if (this._state.status === 'open')
756
+ this._state = { status: 'idle' };
757
+ return Promise.resolve(false);
758
+ }
759
+ // Reconnectable disconnect — stop keepalive on the dead connection and enter the loop.
760
+ if (closeCode === 1001) {
761
+ this.log.warn(`ShellSession: Server sent 1001 Going Away — reconnecting (shellId=${this.shellId})`);
762
+ }
763
+ if (this._state.status === 'open')
764
+ this._stopKeepalive(this._state.keepaliveTimer);
765
+ this._state = { status: 'reconnecting' };
766
+ const attempt = this._reconnectWithBackoff(Date.now())
767
+ .then((didReconnect) => {
768
+ if (!didReconnect && !this._isClosed()) {
769
+ this.log.warn(`ShellSession: reconnect exhausted (shellId=${this.shellId})`);
770
+ this._state = { status: 'idle' };
771
+ }
772
+ return didReconnect;
773
+ })
774
+ .finally(() => {
775
+ // Only clear if we are still the current attempt — a newer _ensureReconnect may
776
+ // have replaced _reconnectPromise (e.g. a drop right after this one recovered).
777
+ if (this._reconnectPromise === attempt)
778
+ this._reconnectPromise = null;
779
+ });
780
+ this._reconnectPromise = attempt;
781
+ return this._reconnectPromise;
782
+ }
783
+ // ── Two-loop exponential backoff reconnect (mirrors Python SDK) ────────────
784
+ async _reconnectWithBackoff(startTime) {
785
+ const cfg = this.reconnectConfig;
786
+ // reconnectWindow: null means unlimited; undefined falls back to default.
787
+ const window = cfg.reconnectWindow !== undefined ? cfg.reconnectWindow : DEFAULT_RECONNECT_WINDOW;
788
+ while (true) {
789
+ if (window !== null) {
790
+ const elapsed = Date.now() - startTime;
791
+ if (elapsed >= window) {
792
+ this.log.warn(`ShellSession: Reconnection window of ${window}ms expired after ${elapsed}ms ` + `(shellId=${this.shellId})`);
793
+ return false;
794
+ }
795
+ }
796
+ const success = await this._runInnerRetryLoop(cfg, startTime, window);
797
+ if (success)
798
+ return true;
799
+ let outerDelay = cfg.outerLoopDelay ?? DEFAULT_OUTER_LOOP_DELAY;
800
+ // Cap outer sleep to remaining window so we don't overshoot a short window
801
+ // by up to outerDelay ms (e.g. a 5s window with 30s outerDelay would overshoot
802
+ // by 25s waiting for a retry that the window check will immediately reject).
803
+ if (window !== null) {
804
+ const remaining = window - (Date.now() - startTime);
805
+ outerDelay = Math.min(outerDelay, Math.max(0, remaining));
806
+ }
807
+ this.log.info(`ShellSession: Inner loop exhausted, waiting ${outerDelay}ms before next outer retry ` +
808
+ `(shellId=${this.shellId})`);
809
+ await _sleep(outerDelay, this._sessionController.signal);
810
+ if (this._isClosed())
811
+ return false;
812
+ }
813
+ }
814
+ async _runInnerRetryLoop(cfg, startTime, window) {
815
+ const maxRetries = cfg.maxRetries ?? DEFAULT_MAX_RETRIES;
816
+ const baseDelay = cfg.baseDelay ?? DEFAULT_BASE_DELAY;
817
+ const maxDelay = cfg.maxDelay ?? DEFAULT_MAX_DELAY;
818
+ let attempt = 0;
819
+ while (attempt < maxRetries) {
820
+ attempt++;
821
+ if (window !== null && Date.now() - startTime >= window)
822
+ return false;
823
+ // Attempt first — no pre-attempt sleep. Sleep only after failure so that
824
+ // transient disconnects reconnect immediately on the first try.
825
+ this.log.info(`ShellSession: Reconnect attempt ${attempt}/${maxRetries} (shellId=${this.shellId})`);
826
+ try {
827
+ await this._connectWithUpgrade();
828
+ // close() may have fired while connectFn was awaiting — _connectWithUpgrade returns void
829
+ // rather than throwing in that case, so guard here before claiming a successful reconnect.
830
+ if (this._isClosed())
831
+ return false;
832
+ this.log.info(`ShellSession: Reconnected (reconnected=${this.reconnected}, shellId=${this.shellId})`);
833
+ if (cfg.onReconnect) {
834
+ try {
835
+ await cfg.onReconnect(this.reconnected);
836
+ }
837
+ catch (err) {
838
+ this.log.warn(`ShellSession: onReconnect callback threw (shellId=${this.shellId}): ${String(err)}`);
839
+ }
840
+ }
841
+ return true;
842
+ }
843
+ catch (err) {
844
+ this.log.warn(`ShellSession: Reconnect attempt ${attempt} failed: ${String(err)} (shellId=${this.shellId})`);
845
+ }
846
+ if (this._isClosed())
847
+ return false;
848
+ // Re-check window after a slow _connectWithUpgrade so the backoff sleep
849
+ // doesn't fire after the window has already expired mid-attempt.
850
+ if (window !== null && Date.now() - startTime >= window)
851
+ return false;
852
+ // Exponential backoff with ±25% jitter to avoid thundering herd on
853
+ // simultaneous reconnects from multiple clients.
854
+ // Use _sessionController.signal (only aborted by close()) — not
855
+ // _abortController, which is already aborted by the WS close event.
856
+ const base = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
857
+ const jitter = base * 0.25 * (Math.random() * 2 - 1);
858
+ const delay = base + jitter;
859
+ this.log.info(`ShellSession: Waiting ${Math.round(delay)}ms before next attempt (shellId=${this.shellId})`);
860
+ await _sleep(delay, this._sessionController.signal);
861
+ if (this._isClosed())
862
+ return false;
863
+ }
864
+ return false;
865
+ }
866
+ }
867
+ function _sleep(ms, signal) {
868
+ return new Promise((resolve) => {
869
+ if (signal?.aborted) {
870
+ resolve();
871
+ return;
872
+ }
873
+ const timer = globalThis.setTimeout(resolve, ms);
874
+ signal?.addEventListener('abort', () => {
875
+ globalThis.clearTimeout(timer);
876
+ resolve();
877
+ }, { once: true });
878
+ });
879
+ }
880
+ //# sourceMappingURL=session.js.map